1+
2+ pub fn length_of ( string : String ) -> usize {
3+ let mut count = 0 ;
4+ let mut cur = 0 ;
5+
6+ for c in string. as_bytes ( ) {
7+ if cur == 0 {
8+ if ( c & 0b10000000 ) == 0 {
9+ count += 1 ;
10+ cur = 0 ;
11+ continue ;
12+ }
13+ if ( c & 0b11100000 ) == 0b11000000 {
14+ count += 1 ;
15+ cur = 1 ;
16+ continue ;
17+ }
18+ if ( c & 0b11110000 ) == 0b11100000 {
19+ count += 1 ;
20+ cur = 2 ;
21+ continue ;
22+ }
23+ if ( c & 0b11111000 ) == 0b11110000 {
24+ count += 1 ;
25+ cur = 3 ;
26+ continue ;
27+ }
28+ }
29+ cur -= 1 ;
30+ }
31+
32+ count
33+ }
34+
35+ pub fn validate ( string : String ) -> bool {
36+ let mut cur = 0 ;
37+ let mut count = 0 ;
38+
39+ for c in string. as_bytes ( ) {
40+ if cur == 0 {
41+ if ( c & 0b10000000 ) == 0 {
42+ count += 1 ;
43+ cur = 0 ;
44+ continue ;
45+ }
46+ if ( c & 0b11100000 ) == 0b11000000 {
47+ count += 1 ;
48+ cur = 1 ;
49+ continue ;
50+ }
51+ if ( c & 0b11110000 ) == 0b11100000 {
52+ count += 1 ;
53+ cur = 2 ;
54+ continue ;
55+ }
56+ if ( c & 0b11111000 ) == 0b11110000 {
57+ count += 1 ;
58+ cur = 3 ;
59+ continue ;
60+ }
61+ }
62+ if ( c & 0b11000000 ) != 0b10000000 {
63+ return false ;
64+ }
65+ if count == 0 {
66+ return false ;
67+ }
68+ cur -= 1 ;
69+ }
70+
71+ true
72+ }
73+
74+ pub fn convert_utf8_from ( string : String ) -> Vec < u32 > {
75+ let mut count = 0 ;
76+ let mut step = 0 ;
77+ let mut result = vec ! [ ] ;
78+
79+ if !validate ( string. clone ( ) ) {
80+ return result;
81+ }
82+
83+ let mut temp = 0 ;
84+ for c in string. as_bytes ( ) {
85+ if count == 0 {
86+ if ( c & 0b10000000 ) == 0 {
87+ result. push ( temp) ;
88+ temp = ( * c as u32 ) & 0b01111111 ;
89+ count = 0 ;
90+ step = 2 ;
91+ continue ;
92+ }
93+ if ( c & 0b11100000 ) == 0b11000000 {
94+ result. push ( temp) ;
95+ temp = ( * c as u32 ) & 0b00011111 ;
96+ temp = temp << 6 ;
97+ count = 1 ;
98+ step = 2 ;
99+ continue ;
100+ }
101+ if ( c & 0b11110000 ) == 0b11100000 {
102+ result. push ( temp) ;
103+ temp = ( * c as u32 ) & 0b00001111 ;
104+ temp = temp << 12 ;
105+ count = 2 ;
106+ step = 2 ;
107+ continue ;
108+ }
109+ if ( c & 0b11111000 ) == 0b11110000 {
110+ result. push ( temp) ;
111+ temp = ( * c as u32 ) & 0b00000111 ;
112+ temp = temp << 18 ;
113+ count = 3 ;
114+ step = 2 ;
115+ continue ;
116+ }
117+ }
118+ count -= 1 ;
119+ step -= 1 ;
120+ temp |= ( ( * c as u32 ) & 0b00111111 ) << ( step * 6 ) ;
121+ }
122+ result. push ( temp) ;
123+
124+ if let Some ( x) = result. first ( ) {
125+ if * x == 0 {
126+ result. remove ( 0 ) ;
127+ }
128+ }
129+ if let Some ( x) = result. last ( ) {
130+ if * x == 0 {
131+ result. pop ( ) ;
132+ }
133+ }
134+
135+ result
136+ }
137+
138+ pub fn convert_utf8_to ( vec : Vec < u32 > ) -> String {
139+ let mut result = Vec :: new ( ) ;
140+
141+ for v in vec {
142+ println ! ( "{:?}" , v) ;
143+ if v <= 0x0000007F {
144+ result. push ( v as u8 ) ;
145+ continue ;
146+ }
147+ if v <= 0x000007FF {
148+ result. push ( ( ( ( v >> 6 ) & 0b00011111 ) | 0b11000000 ) as u8 ) ;
149+ result. push ( ( ( v & 0b00111111 ) | 0b10000000 ) as u8 ) ;
150+ continue ;
151+ }
152+ if v <= 0x0000FFFF {
153+ result. push ( ( ( ( v >> 12 ) & 0b00001111 ) | 0b11100000 ) as u8 ) ;
154+ result. push ( ( ( ( v >> 6 ) & 0b00111111 ) | 0b10000000 ) as u8 ) ;
155+ result. push ( ( ( v & 0b00111111 ) | 0b10000000 ) as u8 ) ;
156+ continue ;
157+ }
158+ if v <= 0x0010FFFF {
159+ result. push ( ( ( ( v >> 18 ) & 0b00000111 ) | 0b11110000 ) as u8 ) ;
160+ result. push ( ( ( v >> 12 ) & 0b00111111 ) as u8 ) ;
161+ result. push ( ( ( v >> 6 ) & 0b00111111 ) as u8 ) ;
162+ result. push ( ( ( v >> 0 ) & 0b00111111 ) as u8 ) ;
163+ continue ;
164+ }
165+ }
166+
167+ match String :: from_utf8 ( result) {
168+ Ok ( x) => x,
169+ Err ( _) => String :: from ( "" ) ,
170+ }
171+ }
172+
173+ #[ test]
174+ fn test ( ) {
175+ let a = "가" . to_string ( ) ;
176+ for i in a. as_bytes ( ) {
177+ print ! ( "{:#010b} " , i) ;
178+ }
179+ println ! ( ) ;
180+ let c = convert_utf8_from ( a. clone ( ) ) ;
181+ println ! ( "{:?}" , c) ;
182+ let s = convert_utf8_to ( c) ;
183+ for i in s. as_bytes ( ) {
184+ print ! ( "{:#010b} " , i) ;
185+ }
186+ println ! ( ) ;
187+ println ! ( "{}" , s) ;
188+ }
0 commit comments