1
2
3
4
5
6 using System.Collections;
7
8 namespace System.Unicode
9 {
10 public class UnicodeEngine
11 {
12 public nothrow UnicodeEngine() : resultReady(false), result('\0'), state(0)
13 {
14 }
15 public void Put(byte x)
16 {
17 switch (state)
18 {
19 case 0:
20 {
21 resultReady = false;
22 if ((x & 0x80u) == 0u)
23 {
24 result = cast<uchar>(x);
25 resultReady = true;
26 }
27 else if ((x & 0xE0u) == 0xC0u)
28 {
29 bytes[0] = x;
30 state = 1;
31 }
32 else if ((x & 0xF0u) == 0xE0u)
33 {
34 bytes[0] = x;
35 state = 2;
36 }
37 else if ((x & 0xF8u) == 0xF0u)
38 {
39 bytes[0] = x;
40 state = 4;
41 }
42 else
43 {
44 throw UnicodeException("invalid UTF-8 sequence");
45 }
46 break;
47 }
48 case 1:
49 {
50 result = cast<uchar>(0);
51 bytes[1] = x;
52 byte b1 = bytes[1];
53 if ((b1 & 0xC0u) != 0x80u)
54 {
55 throw UnicodeException("invalid UTF-8 sequence");
56 }
57 byte shift = 0u;
58 for (byte i = 0u; i < 6u; ++i;)
59 {
60 byte bit = b1 & 1u;
61 b1 = b1 >> 1u;
62 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
63 ++shift;
64 }
65 byte b0 = bytes[0];
66 for (byte i = 0u; i < 5u; ++i;)
67 {
68 byte bit = b0 & 1u;
69 b0 = b0 >> 1u;
70 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
71 ++shift;
72 }
73 resultReady = true;
74 state = 0;
75 break;
76 }
77 case 2:
78 {
79 bytes[1] = x;
80 state = 3;
81 break;
82 }
83 case 3:
84 {
85 bytes[2] = x;
86 result = cast<uchar>(0);
87 byte b2 = bytes[2];
88 if ((b2 & 0xC0u) != 0x80u)
89 {
90 throw UnicodeException("invalid UTF-8 sequence");
91 }
92 byte shift = 0u;
93 for (byte i = 0u; i < 6u; ++i;)
94 {
95 byte bit = b2 & 1u;
96 b2 = b2 >> 1u;
97 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
98 ++shift;
99 }
100 byte b1 = bytes[1];
101 if ((b1 & 0xC0u) != 0x80u)
102 {
103 throw UnicodeException("invalid UTF-8 sequence");
104 }
105 for (byte i = 0u; i < 6u; ++i;)
106 {
107 byte bit = b1 & 1u;
108 b1 = b1 >> 1u;
109 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
110 ++shift;
111 }
112 byte b0 = bytes[0];
113 for (byte i = 0u; i < 4u; ++i;)
114 {
115 byte bit = b0 & 1u;
116 b0 = b0 >> 1u;
117 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
118 ++shift;
119 }
120 resultReady = true;
121 state = 0;
122 break;
123 }
124 case 4:
125 {
126 bytes[1] = x;
127 state = 5;
128 break;
129 }
130 case 5:
131 {
132 bytes[2] = x;
133 state = 6;
134 break;
135 }
136 case 6:
137 {
138 bytes[3] = x;
139 result = cast<uchar>(0);
140 byte b3 = bytes[3];
141 if ((b3 & 0xC0u) != 0x80u)
142 {
143 throw UnicodeException("invalid UTF-8 sequence");
144 }
145 byte shift = 0u;
146 for (byte i = 0u; i < 6u; ++i;)
147 {
148 byte bit = b3 & 1u;
149 b3 = b3 >> 1u;
150 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
151 ++shift;
152 }
153 byte b2 = bytes[2];
154 if ((b2 & 0xC0u) != 0x80u)
155 {
156 throw UnicodeException("invalid UTF-8 sequence");
157 }
158 for (byte i = 0u; i < 6u; ++i;)
159 {
160 byte bit = b2 & 1u;
161 b2 = b2 >> 1u;
162 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
163 ++shift;
164 }
165 byte b1 = bytes[1];
166 if ((b1 & 0xC0u) != 0x80u)
167 {
168 throw UnicodeException("invalid UTF-8 sequence");
169 }
170 for (byte i = 0u; i < 6u; ++i;)
171 {
172 byte bit = b1 & 1u;
173 b1 = b1 >> 1u;
174 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
175 ++shift;
176 }
177 byte b0 = bytes[0];
178 for (byte i = 0u; i < 3u; ++i;)
179 {
180 byte bit = b0 & 1u;
181 b0 = b0 >> 1u;
182 result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
183 ++shift;
184 }
185 resultReady = true;
186 state = 0;
187 break;
188 }
189 }
190 }
191 public inline nothrow bool ResultReady() const
192 {
193 return resultReady;
194 }
195 public nothrow uchar Get()
196 {
197 return result;
198 }
199 private bool resultReady;
200 private uchar result;
201 private int state;
202 private byte[4] bytes;
203 }
204 }