Skip to content

Commit d981965

Browse files
authored
Add UTF8Encoder contract (#5)
1 parent b467143 commit d981965

2 files changed

Lines changed: 50 additions & 18 deletions

File tree

contracts/UTF8Encoder.sol

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//SPDX-License-Identifier: MIT
2+
pragma solidity ^0.8.9;
3+
4+
/// @title A library for encoding UTF-8 strings
5+
/// @author Devin Stein
6+
library UTF8Encoder {
7+
/// @notice Get the UTF-8 string for `self`
8+
/// @dev encode will error if the code point is not valid
9+
/// @param self The code point to encode
10+
/// @return The UTF-8 string for the given code point
11+
function encode(uint32 self) external pure returns (string memory) {
12+
bytes memory out;
13+
if (self <= 0x7F) {
14+
// Plain ASCII
15+
out = bytes.concat(bytes1(uint8(self)));
16+
return string(out);
17+
} else if (self <= 0x07FF) {
18+
// 2-byte unicode
19+
bytes1 b0 = bytes1(((uint8(self) >> 6) & (uint8(0x1F))) | (uint8(0xC0)));
20+
bytes1 b1 = bytes1(((uint8(self) >> 0) & (uint8(0x3F))) | (uint8(0x80)));
21+
out = bytes.concat(b0, b1);
22+
return string(out);
23+
} else if (self <= 0xFFFF) {
24+
// 3-byte unicode
25+
bytes1 b0 = bytes1(uint8(((self >> 12) & 0x0F) | 0xE0));
26+
bytes1 b1 = bytes1(uint8(((self >> 6) & 0x3F) | 0x80));
27+
bytes1 b2 = bytes1(uint8(((self >> 0) & 0x3F) | 0x80));
28+
out = bytes.concat(b0, b1, b2);
29+
return string(out);
30+
} else if (self <= 0x10FFFF) {
31+
// 4-byte unicode
32+
bytes1 b0 = bytes1(uint8(((self >> 18) & 0x07) | 0xF0));
33+
bytes1 b1 = bytes1(uint8((self >> 12) & 0x3F) | 0x80);
34+
bytes1 b2 = bytes1(uint8(((self >> 6) & 0x3F) | 0x80));
35+
bytes1 b3 = bytes1(uint8(((self >> 0) & 0x3F) | 0x80));
36+
out = bytes.concat(b0, b1, b2, b3);
37+
return string(out);
38+
}
39+
require(false, "invalid unicode code point");
40+
return "";
41+
}
42+
}

scripts/contracts.ts

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,21 @@ import { ethers } from "hardhat";
22
import { Contract } from "ethers";
33

44
const UNICODE_LIBRARY = "Unicode";
5+
const UTF8_ENCODER_LIBRARY = "UTF8Encoder";
56
const UNICODE_DATA_CONTRACT = "UnicodeData";
67

7-
export const deployUnicodeData = async (): Promise<Contract> => {
8-
const contractFactory = await ethers.getContractFactory(
9-
UNICODE_DATA_CONTRACT
10-
);
8+
export const deploy = async (id: string): Promise<Contract> => {
9+
const contractFactory = await ethers.getContractFactory(id);
1110
const contract = await contractFactory.deploy();
1211
await contract.deployed();
13-
console.log(
14-
`${UNICODE_DATA_CONTRACT} contract deployed: ${contract.address}`
15-
);
16-
return contract;
17-
};
18-
19-
export const deployUnicodeLibrary = async (): Promise<Contract> => {
20-
const contractFactory = await ethers.getContractFactory(UNICODE_LIBRARY);
21-
const contract = await contractFactory.deploy();
22-
await contract.deployed();
23-
console.log(`${UNICODE_LIBRARY} contract deployed: ${contract.address}`);
12+
console.log(`${id} contract deployed: ${contract.address}`);
2413
return contract;
2514
};
2615

2716
export const deployAll = async (): Promise<Contract[]> => {
28-
const unicodeData = await deployUnicodeData();
29-
const unicodeLibrary = await deployUnicodeLibrary();
17+
const unicodeData = await deploy(UNICODE_DATA_CONTRACT);
18+
const unicodeLibrary = await deploy(UNICODE_LIBRARY);
19+
const utf8Encoder = await deploy(UTF8_ENCODER_LIBRARY);
3020

31-
return [unicodeData, unicodeLibrary];
21+
return [unicodeData, unicodeLibrary, utf8Encoder];
3222
};

0 commit comments

Comments
 (0)