1
2
3
4
5
6 using System.IO;
7 using System.Collections;
8
9 namespace System.Unicode
10 {
11 public class UnicodeEngine : IOBase
12 {
13 public UnicodeEngine() : resultReady(false), result('\0'), state(0)
14 {
15 }
16 [nodiscard]
17 public Result<bool> Put(byte x)
18 {
19 switch (state)
20 {
21 case 0:
22 {
23 resultReady = false;
24 if ((x & 128u) == 0u)
25 {
26 result = cast<uchar>(x);
27 resultReady = true;
28 }
29 else if ((x & 224u) == 192u )
30 {
31 bytes[0] = x;
32 state = 1;
33 }
34 else if ((x & 240u) == 224u )
35 {
36 bytes[0] = x;
37 state = 2;
38 }
39 else if ((x & 248u) == 240u )
40 {
41 bytes[0] = x;
42 state = 4;
43 }
44 else
45 {
46 int errorId = AllocateError("invalid UTF-8 sequence");
47 SetErrorId(errorId);
48 return Result<bool>(ErrorId(errorId));
49 }
50 break;
51 }
52 case 1:
53 {
54 result = cast<uchar>(0);
55 bytes[1] = x;
56 byte b1 = bytes[1];
57 if ((b1 & 192u) != 128u )
58 {
59 int errorId = AllocateError("invalid UTF-8 sequence");
60 SetErrorId(errorId);
61 return Result<bool>(ErrorId(errorId));
62 }
63 byte shift = 0u;
64 for (byte i = 0u; i < 6u; ++i;)
65 {
66 byte bit = b1 & 1u;
67 b1 = b1 >> 1u;
68 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
69 ++shift;
70 }
71 byte b0 = bytes[0];
72 for (byte i = 0u; i < 5u; ++i;)
73 {
74 byte bit = b0 & 1u;
75 b0 = b0 >> 1u;
76 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
77 ++shift;
78 }
79 resultReady = true;
80 state = 0;
81 break;
82 }
83 case 2:
84 {
85 bytes[1] = x;
86 state = 3;
87 break;
88 }
89 case 3:
90 {
91 bytes[2] = x;
92 result = cast<uchar>(0);
93 byte b2 = bytes[2];
94 if ((b2 & 192u) != 128u )
95 {
96 int errorId = AllocateError("invalid UTF-8 sequence");
97 SetErrorId(errorId);
98 return Result<bool>(ErrorId(errorId));
99 }
100 byte shift = 0u;
101 for (byte i = 0u; i < 6u; ++i;)
102 {
103 byte bit = b2 & 1u;
104 b2 = b2 >> 1u;
105 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
106 ++shift;
107 }
108 byte b1 = bytes[1];
109 if ((b1 & 192u) != 128u )
110 {
111 int errorId = AllocateError("invalid UTF-8 sequence");
112 SetErrorId(errorId);
113 return Result<bool>(ErrorId(errorId));
114 }
115 for (byte i = 0u; i < 6u; ++i;)
116 {
117 byte bit = b1 & 1u;
118 b1 = b1 >> 1u;
119 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
120 ++shift;
121 }
122 byte b0 = bytes[0];
123 for (byte i = 0u; i < 4u; ++i;)
124 {
125 byte bit = b0 & 1u;
126 b0 = b0 >> 1u;
127 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
128 ++shift;
129 }
130 resultReady = true;
131 state = 0;
132 break;
133 }
134 case 4:
135 {
136 bytes[1] = x;
137 state = 5;
138 break;
139 }
140 case 5:
141 {
142 bytes[2] = x;
143 state = 6;
144 break;
145 }
146 case 6:
147 {
148 bytes[3] = x;
149 result = cast<uchar>(0);
150 byte b3 = bytes[3];
151 if ((b3 & 192u) != 128u )
152 {
153 int errorId = AllocateError("invalid UTF-8 sequence");
154 SetErrorId(errorId);
155 return Result<bool>(ErrorId(errorId));
156 }
157 byte shift = 0u;
158 for (byte i = 0u; i < 6u; ++i;)
159 {
160 byte bit = b3 & 1u;
161 b3 = b3 >> 1u;
162 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
163 ++shift;
164 }
165 byte b2 = bytes[2];
166 if ((b2 & 192u) != 128u )
167 {
168 int errorId = AllocateError("invalid UTF-8 sequence");
169 SetErrorId(errorId);
170 return Result<bool>(ErrorId(errorId));
171 }
172 for (byte i = 0u; i < 6u; ++i;)
173 {
174 byte bit = b2 & 1u;
175 b2 = b2 >> 1u;
176 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
177 ++shift;
178 }
179 byte b1 = bytes[1];
180 if ((b1 & 192u) != 128u )
181 {
182 int errorId = AllocateError("invalid UTF-8 sequence");
183 SetErrorId(errorId);
184 return Result<bool>(ErrorId(errorId));
185 }
186 for (byte i = 0u; i < 6u; ++i;)
187 {
188 byte bit = b1 & 1u;
189 b1 = b1 >> 1u;
190 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
191 ++shift;
192 }
193 byte b0 = bytes[0];
194 for (byte i = 0u; i < 3u; ++i;)
195 {
196 byte bit = b0 & 1u;
197 b0 = b0 >> 1u;
198 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
199 ++shift;
200 }
201 resultReady = true;
202 state = 0;
203 break;
204 }
205 }
206 return Result<bool>(true);
207 }
208 public inline bool ResultReady() const
209 {
210 return resultReady;
211 }
212 public uchar Get()
213 {
214 return result;
215 }
216 private bool resultReady;
217 private uchar result;
218 private int state;
219 private byte[4] bytes;
220 }