1
2
3
4
5
6 using System;
7 using System.Collections;
8 using System.IO;
9 using System.Threading;
10
11 namespace System.Unicode
12 {
13 public class UnicodeException : Exception
14 {
15 public nothrow UnicodeException(const string& message_) : base(message_)
16 {
17 }
18 }
19
20 public void ThrowUnicodeException(const string& message)
21 {
22 throw UnicodeException(message);
23 }
24
25 public string PathToUnicodeDirectory()
26 {
27 string cmajorRoot = RtGetEnvironmentVariable("CMAJOR_ROOT");
28 if (cmajorRoot.IsEmpty())
29 {
30 ThrowUnicodeException("CMAJOR_ROOT environment variable not set (set it to /path/to/cmajor directory)");
31 return string();
32 }
33 return Path.Combine(cmajorRoot, "unicode");
34 }
35
36 public string PathToUcdBinFile()
37 {
38 return Path.Combine(PathToUnicodeDirectory(), "cmajor_ucd.bin");
39 }
40
41 public string MakeCanonicalPropertyName(const string& s)
42 {
43 string result;
44 for (char c : s)
45 {
46 if (c != '_' && c != ' ' && c != '-')
47 {
48 result.Append(c);
49 }
50 }
51 return ToLower(result);
52 }
53
54 public enum BinaryPropertyId : byte
55 {
56 asciiHexDigit,
57 alphabetic,
58 bidiControl,
59 bidiMirrored,
60 cased,
61 compositionExclusion,
62 caseIgnorable,
63 fullCompositionExclusion,
64 changesWhenCasefolded,
65 changesWhenCaseMapped,
66 changesWhenNFKCCasefolded,
67 changesWhenLowercased,
68 changesWhenTitlecased,
69 changesWhenUppercased,
70 dash,
71 deprecated,
72 defaultIgnorableCodePoint,
73 diacritic,
74 extender,
75 graphemeBase,
76 graphemeExtend,
77 graphemeLink,
78 hexDigit,
79 hyphen,
80 idContinue,
81 ideographic,
82 idStart,
83 idsBinaryOperator,
84 idsTrinaryOperator,
85 joinControl,
86 logicalOrderException,
87 lowercase,
88 math,
89 noncharacterCodePoint,
90 otherAlphabetic,
91 otherDefaultIgnorableCodePoint,
92 otherGraphemeExtend,
93 otherIdContinue,
94 otherIdStart,
95 otherLowercase,
96 otherMath,
97 otherUppercase,
98 patternSyntax,
99 patternWhiteSpace,
100 prependedConcatenationMark,
101 quotationMark,
102 radical,
103 softDotted,
104 sentenceterminal,
105 terminalPunctuation,
106 unifiedIdeograph,
107 uppercase,
108 variationSelector,
109 whiteSpace,
110 xidContinue,
111 xidStart,
112 expandsOnNFC,
113 expandsOnNFD,
114 expandsOnNFKC,
115 expandsOnNFKD
116 }
117
118 public class BinaryProperty
119 {
120 public nothrow BinaryProperty(BinaryPropertyId id_, const string& shortName_, const string& longName_) : id(id_), shortName(shortName_), longName(longName_)
121 {
122 }
123 public inline nothrow BinaryPropertyId Id()
124 {
125 return id;
126 }
127 public inline nothrow const string& ShortName()
128 {
129 return shortName;
130 }
131 public inline nothrow const string& LongName()
132 {
133 return longName;
134 }
135 private BinaryPropertyId id;
136 private string shortName;
137 private string longName;
138 }
139
140 public class BinaryPropertyTable
141 {
142 static BinaryPropertyTable() : instance(new BinaryPropertyTable())
143 {
144 }
145 public static nothrow BinaryPropertyTable& Instance()
146 {
147 return *instance;
148 }
149 public nothrow const BinaryProperty* GetBinaryProperty(BinaryPropertyId id) const
150 {
151 HashMap<uint, void*>.ConstIterator it = binaryPropertyIdMap.CFind(cast<uint>(cast<byte>(id)));
152 if (it != binaryPropertyIdMap.CEnd())
153 {
154 return cast<BinaryProperty*>(it->second);
155 }
156 return null;
157 }
158 public const BinaryProperty* GetBinaryPropertyByShortName(const string& shortName) const
159 {
160 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
161 if (it != shortNameMap.CEnd())
162 {
163 return cast<BinaryProperty*>(it->second);
164 }
165 return null;
166 }
167 public const BinaryProperty* GetBinaryPropertyByLongName(const string& longName) const
168 {
169 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(MakeCanonicalPropertyName(longName));
170 if (it != longNameMap.CEnd())
171 {
172 return cast<BinaryProperty*>(it->second);
173 }
174 return null;
175 }
176 private BinaryPropertyTable() : binaryProperties(), binaryPropertyIdMap(), shortNameMap(), longNameMap()
177 {
178 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.asciiHexDigit, "AHex", "Ascii Hex Digit"));
179 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.alphabetic, "Alpha", "Alphabetic"));
180 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.bidiControl, "Bidi C", "Bidi Control"));
181 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.bidiMirrored, "Bidi M", "Bidi Mirrored"));
182 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.cased, "Cased", "Cased"));
183 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.compositionExclusion, "CE", "Composition Exclusion"));
184 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.caseIgnorable, "CI", "Case Ignorable"));
185 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.fullCompositionExclusion, "Comp Ex", "Full Composition Exclusion"));
186 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.changesWhenCasefolded, "CWCF", "Changes When Casefolded"));
187 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.changesWhenCaseMapped, "CWCM", "Changes When Casemapped"));
188 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.changesWhenNFKCCasefolded, "CWKCF", "Changes When NFKC Casefolded"));
189 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.changesWhenLowercased, "CWL", "Changes When Lowercased"));
190 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.changesWhenTitlecased, "CWT", "Changes When Titlecased"));
191 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.changesWhenUppercased, "CWU", "Changes When Uppercased"));
192 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.dash, "Dash", "Dash"));
193 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.deprecated, "Dep", "Deprecated"));
194 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.defaultIgnorableCodePoint, "DI", "Default Ignorable Code Point"));
195 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.diacritic, "Dia", "Diacritic"));
196 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.extender, "Ext", "Extender"));
197 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.graphemeBase, "Gr Base", "Grapheme Base"));
198 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.graphemeExtend, "Gr Ext", "Grapheme Extend"));
199 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.graphemeLink, "Gr Link", "Grapheme Link"));
200 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.hexDigit, "Hex", "Hex Digit"));
201 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.hyphen, "Hyphen", "Hyphen"));
202 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.idContinue, "IDC", "ID Continue"));
203 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.ideographic, "Ideo", "Ideographic"));
204 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.idStart, "IDS", "ID Start"));
205 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.idsBinaryOperator, "IDSB", "IDS Binary Operator"));
206 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.idsTrinaryOperator, "IDST", "IDS Trinary Operator"));
207 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.joinControl, "Join C", "Join Control"));
208 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.logicalOrderException, "LOE", "Logical Order Exception"));
209 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.lowercase, "Lower", "Lowercase"));
210 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.math, "Math", "Math"));
211 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.noncharacterCodePoint, "NChar", "Noncharacter Code Point"));
212 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherAlphabetic, "OAlpha", "Other Alphabetic"));
213 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherDefaultIgnorableCodePoint, "ODI", "Other Default Ignorable Code Point"));
214 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherGraphemeExtend, "OGr Ext", "Other Grapheme Extend"));
215 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherIdContinue, "OIDC", "Other ID Continue"));
216 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherIdStart, "OIDS", "Other ID Start"));
217 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherLowercase, "OLower", "Other Lowercase"));
218 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherMath, "OMath", "Other Math"));
219 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.otherUppercase, "OUpper", "Other Uppercase"));
220 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.patternSyntax, "Pat Syn", "Pattern Syntax"));
221 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.patternWhiteSpace, "Pat WS", "Pattern White Space"));
222 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.prependedConcatenationMark, "PCM", "Prepended Concatenation Mark"));
223 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.quotationMark, "QMark", "Quotation Mark"));
224 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.radical, "Radical", "Radical"));
225 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.softDotted, "SD", "Soft Dotted"));
226 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.sentenceterminal, "STerm", "Sentence Terminal"));
227 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.terminalPunctuation, "Term", "Terminal Punctuation"));
228 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.unifiedIdeograph, "UIdeo", "Unified Ideograph"));
229 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.uppercase, "Upper", "Uppercase"));
230 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.variationSelector, "VS", "Variation Selector"));
231 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.whiteSpace, "WSpace", "White Space"));
232 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.xidContinue, "XIDC", "XID Continue"));
233 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.xidStart, "XIDS", "XID Start"));
234 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.expandsOnNFC, "XO NFC", "Expands On NFC"));
235 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.expandsOnNFD, "XO NFD", "Expands On NFD"));
236 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.expandsOnNFKC, "XO NFKC", "Expands On NFKC"));
237 binaryProperties.Add(new BinaryProperty(BinaryPropertyId.expandsOnNFKD, "XO NFKD", "Expands On NFKD"));
238 for (BinaryProperty* binaryProperty : binaryProperties)
239 {
240 binaryPropertyIdMap[cast<uint>(cast<byte>(binaryProperty->Id()))] = binaryProperty;
241 shortNameMap[MakeCanonicalPropertyName(binaryProperty->ShortName())] = binaryProperty;
242 longNameMap[MakeCanonicalPropertyName(binaryProperty->LongName())] = binaryProperty;
243 }
244 }
245 public ~BinaryPropertyTable()
246 {
247 for (BinaryProperty* binaryProperty : binaryProperties)
248 {
249 delete binaryProperty;
250 }
251 }
252 private List<BinaryProperty*> binaryProperties;
253 private HashMap<uint, void*> binaryPropertyIdMap;
254 private HashMap<string, void*> shortNameMap;
255 private HashMap<string, void*> longNameMap;
256 private static UniquePtr<BinaryPropertyTable> instance;
257 }
258
259 public nothrow const BinaryProperty* GetBinaryProperty(BinaryPropertyId id)
260 {
261 return BinaryPropertyTable.Instance().GetBinaryProperty(id);
262 }
263
264 public const BinaryProperty* GetBinaryPropertyByShortName(string shortName)
265 {
266 return BinaryPropertyTable.Instance().GetBinaryPropertyByShortName(shortName);
267 }
268
269 public const BinaryProperty* GetBinaryPropertyByLongName(string longName)
270 {
271 return BinaryPropertyTable.Instance().GetBinaryPropertyByLongName(longName);
272 }
273
274 public enum BlockId : ushort
275 {
276 none = 0u,
277 adlam, aegeanNumbers, ahom, alchemical, alphabeticPf, anatolianHieroglyphs, ancientGreekMusic, ancientGreekNumbers, ancientSymbols,
278 arabic, arabicExtA, arabicMath, arabicPfA, arabicPfB, arabicSup, armenian, arrows, ascii, avestan,
279 balinese, bamum, bamumSup, bassaVah, batak, bengali, bhaisuki, blockElements, bopomofo, bopomofoExt, boxDrawing,
280 brahmi, braille, buginese, buhid, byzantineMusic,
281 carian, caucasianAlbanian, chakma, cham, cherokee, cherokeeSup, chessSymbols,
282 cjk, cjkCompat, cjkCompatForms, cjkCompatIdeographs, cjkCompatIdeographsSup, cjkExtA, cjkExtB, cjkExtC, cjkExtD, cjkExtE, cjkExtF,
283 cjkRadicalsSup, cjkStrokes, cjkSymbols, compatJamo, controlPictures, coptic, copticEpactNumbers, countingRod, cuneiform,
284 cuneiformNumbers, currencySymbols, cypriotSyllabary, cyrillic, cyrillicExtA, cyrillicExtB, cyrillicExtC, cyrillicSup,
285 deseret, devanagari, devanagariExt, diacriticals, diariticalsForSymbols, diacriticalsSup, diacriticalsExt, dingbats, dogra, domino, duployan,
286 earlyDynasticCuneiform, egyptianHieroglyphs, egyptianHieroglyphFormatControls, elbasan, elymaic, emoticons, enclosedAlphanum, enclosedAlphanumSup, enclosedCjk, enclosedIdeographicSup,
287 ethiopic, ethiopicExt, ethiopicExtA, ethiopicSup,
288 geometricShapes, geometricShapesExt, georgian, georgianExt, georgianSup, glagolitic, glagoliticSup, gothic, grantha, greek, greekExt, gujarati, gunjalaGondi, gurmukhi,
289 halfAndFullForms, halfMarks, hangul, hanifiRohingya, hanunoo, hatran, hebrew, highPuSurrogates, highSurrogates, hiragana,
290 idc, ideographicSymbols, imperialAramaic, indicNumberForms, inscriptionalPahlavi, inscriptionalParthian, ipaExt, indicSiyaqNumbers,
291 jamo, jamoExtA, jamoExtB, javanese, kaithi, kanaExtA, smallKanaExt, kanaSup, kanbun, kangxi, kannada, katakana, katakanaExt, kayahLi, kharoshthi, khmer, khmerSymbols, khojki, khudawadi,
292 lao, latin1Sup, latinExtA, latinExtAdditional, latinExtB, latinExtC, latinExtD, latinExtE, lepcha, letterlikeSymbols, limbu,
293 linearA, linearBIdeograms, linearBSyllabary, lisu, lowSurrogates, lycian, lydian,
294 mahajani, mahjong, makasar, malayalam, mandaic, manichean, marchen, masaramGondi, mathAlphanum, mathOperators, mayanNumerals, medefaidrin, meeteiMayek, meeteiMayekExt, mendeKikakui, meroiticCursive, meroiticHieroglyphs,
295 miao, miscArrows, miscMathSymbolsA, miscMathSymbolsB, miscPictographs, miscSymbols, miscTechnical, modi, modifierLetters, modifierToneLetters, mongolian, mongolianSup,
296 mro, music, multani, myanmar, myanmarExtA, myanmarExtB,
297 nabataean, nb, nandinagari, newTaiLue, newa, nko, numberForms, nushu, nyiakengPuachueHmong,
298 ocr, ogham, olChiki, oldHungarian, oldItalic, oldNorthArabian, oldPermic, oldPersian, oldSogdian, oldSouthArabian, oldTurkic, oriya, ornamentalDingbats, osage, osmanya, ottomanSiyaqNumbers,
299 pahawhHmong, palmyrene, pauCinHau, phagsPa, phaistos, phoenician, phoneticExt, phoneticExtSup, playingCards, psalterPahlavi, pua, punctuation,
300 rejang, rumi, runic,
301 samaritan, saurashtra, sharada, shavian, shorthandFormatControls, siddham, sinhala, sinhalaArchaicNumbers, smallForms, sogdian, soraSompeng, soyombo, specials, sundanese, sundaneseSup,
302 supArrowsA, supArrowsB, supArrowsC, supMathOperators, supPuaA, supPuaB, supPunctuation, supSymbolsAndPictographs, superAndSub, suttonSignWriting, sylotiNagri, symbolsAndPictographsExtA, syriac,
303 syriacSup,
304 tagalog, tagbanwa, tags, taiLe, taiTham, taiViet, taiXuanJing, takri, tamil, tamilSup, tangut, tangutComponents, telugu, thaana, thai, tibetan, tifinagh, tirhuta, transportAndMap,
305 ucas, ucasExt, ugaritic, vai, vedicExt, verticalForms, vs, vsSup, wancho, warangCiti, yiRadicals, yiSyllables, yijing, zanabazarSquare
306 }
307
308 public class Block
309 {
310 public nothrow Block(BlockId id_, const string& shortName_, const string& longName_, uchar start_, uchar end_) :
311 id(id_), shortName(shortName_), longName(longName_), start(start_), end(end_)
312 {
313 }
314 public inline nothrow BlockId Id()
315 {
316 return id;
317 }
318 public inline nothrow const string& ShortName()
319 {
320 return shortName;
321 }
322 public inline nothrow const string& LongName()
323 {
324 return longName;
325 }
326 public inline nothrow uchar Start()
327 {
328 return start;
329 }
330 public inline nothrow uchar End()
331 {
332 return end;
333 }
334 private BlockId id;
335 private string shortName;
336 private string longName;
337 private uchar start;
338 private uchar end;
339 }
340
341 public class BlockTable
342 {
343 static BlockTable() : instance(new BlockTable())
344 {
345 }
346 public static BlockTable& Instance()
347 {
348 return *instance;
349 }
350 public inline nothrow const Block* GetBlock(BlockId id) const
351 {
352 HashMap<uint, void*>.ConstIterator it = blockIdMap.CFind(cast<uint>(cast<ushort>(id)));
353 if (it != blockIdMap.CEnd())
354 {
355 return cast<Block*>(it->second);
356 }
357 return null;
358 }
359 public inline const Block* GetBlockByShortName(const string& shortName) const
360 {
361 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
362 if (it != shortNameMap.CEnd())
363 {
364 return cast<Block*>(it->second);
365 }
366 return null;
367 }
368 public inline const Block* GetBlockByLongName(const string& longName) const
369 {
370 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(MakeCanonicalPropertyName(longName));
371 if (it != longNameMap.CEnd())
372 {
373 return cast<Block*>(it->second);
374 }
375 return null;
376 }
377 private BlockTable() : blocks(), blockIdMap(), shortNameMap(), longNameMap()
378 {
379 blocks.Add(new Block(BlockId.ascii, "ASCII", "Basic Latin", cast<uchar>(0x0000u), cast<uchar>(0x007Fu)));
380 blocks.Add(new Block(BlockId.latin1Sup, "Latin 1 Sup", "Latin-1 Supplement", cast<uchar>(0x0080u), cast<uchar>(0x00FFu)));
381 blocks.Add(new Block(BlockId.latinExtA, "Latin Ext A", "Latin Extended-A", cast<uchar>(0x0100u), cast<uchar>(0x017Fu)));
382 blocks.Add(new Block(BlockId.latinExtB, "Latin Ext B", "Latin Extended-B", cast<uchar>(0x0180u), cast<uchar>(0x0024Fu)));
383 blocks.Add(new Block(BlockId.ipaExt, "IPA Ext", "IPA Extensions", cast<uchar>(0x0250u), cast<uchar>(0x02AFu)));
384 blocks.Add(new Block(BlockId.modifierLetters, "Modifier Letters", "Spacing Modifier Letters", cast<uchar>(0x02B0u), cast<uchar>(0x02FFu)));
385 blocks.Add(new Block(BlockId.diacriticals, "Diacriticals", "Combining Diacritical Marks", cast<uchar>(0x0300u), cast<uchar>(0x036Fu)));
386 blocks.Add(new Block(BlockId.greek, "Greek", "Greek and Coptic", cast<uchar>(0x0370u), cast<uchar>(0x03FFu)));
387 blocks.Add(new Block(BlockId.cyrillic, "Cyrillic", "Cyrillic", cast<uchar>(0x0400u), cast<uchar>(0x04FFu)));
388 blocks.Add(new Block(BlockId.cyrillicSup, "Cyrillic Sup", "Cyrillic Supplement", cast<uchar>(0x0500u), cast<uchar>(0x052Fu)));
389 blocks.Add(new Block(BlockId.armenian, "Armenian", "Armenian", cast<uchar>(0x0530u), cast<uchar>(0x058Fu)));
390 blocks.Add(new Block(BlockId.hebrew, "Hebrew", "Hebrew", cast<uchar>(0x0590u), cast<uchar>(0x05FFu)));
391 blocks.Add(new Block(BlockId.arabic, "Arabic", "Arabic", cast<uchar>(0x0600u), cast<uchar>(0x06FFu)));
392 blocks.Add(new Block(BlockId.syriac, "Syriac", "Syriac", cast<uchar>(0x0700u), cast<uchar>(0x074Fu)));
393 blocks.Add(new Block(BlockId.arabicSup, "Arabic Sup", "Arabic Supplement", cast<uchar>(0x0750u), cast<uchar>(0x077Fu)));
394 blocks.Add(new Block(BlockId.thaana, "Thaana", "Thaana", cast<uchar>(0x0780u), cast<uchar>(0x07BFu)));
395 blocks.Add(new Block(BlockId.nko, "Nko", "Nko", cast<uchar>(0x07C0u), cast<uchar>(0x07FFu)));
396 blocks.Add(new Block(BlockId.samaritan, "Samaritan", "Samaritan", cast<uchar>(0x0800u), cast<uchar>(0x083Fu)));
397 blocks.Add(new Block(BlockId.mandaic, "Mandaic", "Mandaic", cast<uchar>(0x0840u), cast<uchar>(0x085Fu)));
398 blocks.Add(new Block(BlockId.syriacSup, "Syriac Sup", "Syriac Supplement", cast<uchar>(0x0860), cast<uchar>(0x086F)));
399 blocks.Add(new Block(BlockId.arabicExtA, "Arabic Ext A", "Arabic Extended-A", cast<uchar>(0x08A0u), cast<uchar>(0x08FFu)));
400 blocks.Add(new Block(BlockId.devanagari, "Devanagari", "Devanagari", cast<uchar>(0x0900u), cast<uchar>(0x097Fu)));
401 blocks.Add(new Block(BlockId.bengali, "Bengali", "Bengali", cast<uchar>(0x0980u), cast<uchar>(0x09FFu)));
402 blocks.Add(new Block(BlockId.gurmukhi, "Gurmukhi", "Gurmukhi", cast<uchar>(0x0A00u), cast<uchar>(0x0A7Fu)));
403 blocks.Add(new Block(BlockId.gujarati, "Gujarati", "Gujarati", cast<uchar>(0x0A80u), cast<uchar>(0x0AFFu)));
404 blocks.Add(new Block(BlockId.oriya, "Oriya", "Oriya", cast<uchar>(0x0B00u), cast<uchar>(0x0B7Fu)));
405 blocks.Add(new Block(BlockId.tamil, "Tamil", "Tamil", cast<uchar>(0x0B80u), cast<uchar>(0x0BFFu)));
406 blocks.Add(new Block(BlockId.telugu, "Telugu", "Telugu", cast<uchar>(0x0C00u), cast<uchar>(0x0C7Fu)));
407 blocks.Add(new Block(BlockId.kannada, "Kannada", "Kannada", cast<uchar>(0x0C80u), cast<uchar>(0x0CFFu)));
408 blocks.Add(new Block(BlockId.malayalam, "Malayalam", "Malayalam", cast<uchar>(0x0D00u), cast<uchar>(0x0D7Fu)));
409 blocks.Add(new Block(BlockId.sinhala, "Sinhala", "Sinhala", cast<uchar>(0x0D80u), cast<uchar>(0x0DFFu)));
410 blocks.Add(new Block(BlockId.thai, "Thai", "Thai", cast<uchar>(0x0E00u), cast<uchar>(0x0E7Fu)));
411 blocks.Add(new Block(BlockId.lao, "Lao", "Lao", cast<uchar>(0x0E80u), cast<uchar>(0x0EFFu)));
412 blocks.Add(new Block(BlockId.tibetan, "Tibetan", "Tibetan", cast<uchar>(0x0F00u), cast<uchar>(0x0FFFu)));
413 blocks.Add(new Block(BlockId.myanmar, "Myanmar", "Myanmar", cast<uchar>(0x1000u), cast<uchar>(0x109Fu)));
414 blocks.Add(new Block(BlockId.georgian, "Georgian", "Georgian", cast<uchar>(0x10A0u), cast<uchar>(0x10FFu)));
415 blocks.Add(new Block(BlockId.jamo, "Jamo", "Hangul Jamo", cast<uchar>(0x1100u), cast<uchar>(0x11FFu)));
416 blocks.Add(new Block(BlockId.ethiopic, "Ethiopic", "Ethiopic", cast<uchar>(0x1200u), cast<uchar>(0x137Fu)));
417 blocks.Add(new Block(BlockId.ethiopicSup, "Ethiopic Sup", "Ethiopic Supplement", cast<uchar>(0x1380u), cast<uchar>(0x139Fu)));
418 blocks.Add(new Block(BlockId.cherokee, "Cherokee", "Cherokee", cast<uchar>(0x13A0u), cast<uchar>(0x13FFu)));
419 blocks.Add(new Block(BlockId.ucas, "UCAS", "Unified Canadian Aboriginal Syllabics", cast<uchar>(0x1400u), cast<uchar>(0x167Fu)));
420 blocks.Add(new Block(BlockId.ogham, "Ogham", "Ogham", cast<uchar>(0x1680u), cast<uchar>(0x169Fu)));
421 blocks.Add(new Block(BlockId.runic, "Runic", "Runic", cast<uchar>(0x16A0u), cast<uchar>(0x16FFu)));
422 blocks.Add(new Block(BlockId.tagalog, "Tagalog", "Tagalog", cast<uchar>(0x1700u), cast<uchar>(0x171Fu)));
423 blocks.Add(new Block(BlockId.hanunoo, "Hanunoo", "Hanunoo", cast<uchar>(0x1720u), cast<uchar>(0x173Fu)));
424 blocks.Add(new Block(BlockId.buhid, "Buhid", "Buhid", cast<uchar>(0x1740u), cast<uchar>(0x175Fu)));
425 blocks.Add(new Block(BlockId.tagbanwa, "Tagbanwa", "Tagbanwa", cast<uchar>(0x1760u), cast<uchar>(0x177Fu)));
426 blocks.Add(new Block(BlockId.khmer, "Khmer", "Khmer", cast<uchar>(0x1780u), cast<uchar>(0x17FFu)));
427 blocks.Add(new Block(BlockId.mongolian, "Mongolian", "Mongolian", cast<uchar>(0x1800u), cast<uchar>(0x18AFu)));
428 blocks.Add(new Block(BlockId.ucasExt, "UCAS Ext", "Unified Canadian Aboriginal Syllabics Extended", cast<uchar>(0x18B0u), cast<uchar>(0x18FFu)));
429 blocks.Add(new Block(BlockId.limbu, "Limbu", "Limbu", cast<uchar>(0x1900u), cast<uchar>(0x194Fu)));
430 blocks.Add(new Block(BlockId.taiLe, "Tai Le", "Tai Le", cast<uchar>(0x1950u), cast<uchar>(0x197Fu)));
431 blocks.Add(new Block(BlockId.newTaiLue, "New Tai Lue", "New Tai Lue", cast<uchar>(0x1980u), cast<uchar>(0x19DFu)));
432 blocks.Add(new Block(BlockId.khmerSymbols, "Khmer Symbols", "Khmer Symbols", cast<uchar>(0x19E0u), cast<uchar>(0x19FFu)));
433 blocks.Add(new Block(BlockId.buginese, "Buginese", "Buginese", cast<uchar>(0x1A00u), cast<uchar>(0x1A1Fu)));
434 blocks.Add(new Block(BlockId.taiTham, "Tai Tham", "Tai Tham", cast<uchar>(0x1A20u), cast<uchar>(0x1AAFu)));
435 blocks.Add(new Block(BlockId.diacriticalsExt, "Diacriticals Ext", "Combining Diacritical Marks Extended", cast<uchar>(0x1AB0u), cast<uchar>(0x1AFFu)));
436 blocks.Add(new Block(BlockId.balinese, "Balinese", "Balinese", cast<uchar>(0x1B00u), cast<uchar>(0x1B7Fu)));
437 blocks.Add(new Block(BlockId.sundanese, "Sundanese", "Sundanese", cast<uchar>(0x1B80u), cast<uchar>(0x1BBFu)));
438 blocks.Add(new Block(BlockId.batak, "Batak", "Batak", cast<uchar>(0x1BC0u), cast<uchar>(0x1BFFu)));
439 blocks.Add(new Block(BlockId.lepcha, "Lepcha", "Lepcha", cast<uchar>(0x1C00u), cast<uchar>(0x1C4Fu)));
440 blocks.Add(new Block(BlockId.olChiki, "Ol Chiki", "Ol Chiki", cast<uchar>(0x1C50u), cast<uchar>(0x1C7Fu)));
441 blocks.Add(new Block(BlockId.cyrillicExtC, "Cyrillic Ext C", "Cyrillic Extended-C", cast<uchar>(0x1C80u), cast<uchar>(0x1C8Fu)));
442 blocks.Add(new Block(BlockId.georgianExt, "Georgian Ext", "Georgian Extended", cast<uchar>(0x1C90), cast<uchar>(0x1CBF)));
443 blocks.Add(new Block(BlockId.sundaneseSup, "Sundanese Sup", "Sundanese Supplement", cast<uchar>(0x1CC0u), cast<uchar>(0x1CCFu)));
444 blocks.Add(new Block(BlockId.vedicExt, "Vedic Ext", "Vedic Extensions", cast<uchar>(0x1CD0u), cast<uchar>(0x1CFFu)));
445 blocks.Add(new Block(BlockId.phoneticExt, "Phonetic Ext", "Phonetic Extensions", cast<uchar>(0x1D00u), cast<uchar>(0x1D7Fu)));
446 blocks.Add(new Block(BlockId.phoneticExtSup, "Phonetic Ext Sup", "Phonetic Extensions Supplement", cast<uchar>(0x1D80u), cast<uchar>(0x1DBFu)));
447 blocks.Add(new Block(BlockId.diacriticalsSup, "Diacriticals Sup", "Combining Diacritical Marks Supplement", cast<uchar>(0x1DC0u), cast<uchar>(0x1DFFu)));
448 blocks.Add(new Block(BlockId.latinExtAdditional, "Latin Ext Additional", "Latin Extended Additional", cast<uchar>(0x1E00u), cast<uchar>(0x1EFFu)));
449 blocks.Add(new Block(BlockId.greekExt, "Greek Ext", "Greek Extended", cast<uchar>(0x1F00u), cast<uchar>(0x1FFFu)));
450 blocks.Add(new Block(BlockId.punctuation, "Punctuation", "General Punctuation", cast<uchar>(0x2000u), cast<uchar>(0x206Fu)));
451 blocks.Add(new Block(BlockId.superAndSub, "Super And Sub", "Superscripts and Subscripts", cast<uchar>(0x2070u), cast<uchar>(0x209Fu)));
452 blocks.Add(new Block(BlockId.currencySymbols, "Currency Symbols", "Currency Symbols", cast<uchar>(0x20A0u), cast<uchar>(0x20CFu)));
453 blocks.Add(new Block(BlockId.diariticalsForSymbols, "Diacriticals For Symbols", "Combining Diacritical Marks for Symbols", cast<uchar>(0x20D0u), cast<uchar>(0x20FFu)));
454 blocks.Add(new Block(BlockId.letterlikeSymbols, "Letterlike Symbols", "Letterlike Symbols", cast<uchar>(0x2100u), cast<uchar>(0x214Fu)));
455 blocks.Add(new Block(BlockId.numberForms, "Number Forms", "Number Forms", cast<uchar>(0x2150u), cast<uchar>(0x218Fu)));
456 blocks.Add(new Block(BlockId.arrows, "Arrows", "Arrows", cast<uchar>(0x2190u), cast<uchar>(0x21FFu)));
457 blocks.Add(new Block(BlockId.mathOperators, "Math Operators", "Mathematical Operators", cast<uchar>(0x2200u), cast<uchar>(0x22FFu)));
458 blocks.Add(new Block(BlockId.miscTechnical, "Misc Technical", "Miscellaneous Technical", cast<uchar>(0x2300u), cast<uchar>(0x23FFu)));
459 blocks.Add(new Block(BlockId.controlPictures, "Control Pictures", "Control Pictures", cast<uchar>(0x2400u), cast<uchar>(0x243Fu)));
460 blocks.Add(new Block(BlockId.ocr, "OCR", "Optical Character Regognition", cast<uchar>(0x2440u), cast<uchar>(0x245Fu)));
461 blocks.Add(new Block(BlockId.enclosedAlphanum, "Enclosed Alphanum", "Enclosed Alphanumerics", cast<uchar>(0x2460u), cast<uchar>(0x24FFu)));
462 blocks.Add(new Block(BlockId.boxDrawing, "Box Drawing", "Box Drawing", cast<uchar>(0x2500u), cast<uchar>(0x257Fu)));
463 blocks.Add(new Block(BlockId.blockElements, "Block Elements", "Block Elements", cast<uchar>(0x2580u), cast<uchar>(0x259Fu)));
464 blocks.Add(new Block(BlockId.geometricShapes, "Geometric Shapes", "Geometric Shapes", cast<uchar>(0x25A0u), cast<uchar>(0x25FFu)));
465 blocks.Add(new Block(BlockId.miscSymbols, "Misc Symbols", "Miscellaneous Symbols", cast<uchar>(0x2600u), cast<uchar>(0x26FFu)));
466 blocks.Add(new Block(BlockId.dingbats, "Dingbats", "Dingbats", cast<uchar>(0x2700u), cast<uchar>(0x27BFu)));
467 blocks.Add(new Block(BlockId.miscMathSymbolsA, "Misc Math Symbols A", "Miscellaneous Mathematical Symbols - A", cast<uchar>(0x27C0u), cast<uchar>(0x27EFu)));
468 blocks.Add(new Block(BlockId.supArrowsA, "Sup Arrows A", "Supplemental Arrows-A", cast<uchar>(0x27F0u), cast<uchar>(0x27FFu)));
469 blocks.Add(new Block(BlockId.braille, "Braille", "Braille Patterns", cast<uchar>(0x2800u), cast<uchar>(0x28FFu)));
470 blocks.Add(new Block(BlockId.supArrowsB, "Sup Arrows B", "Supplemental Arrows-B", cast<uchar>(0x2900u), cast<uchar>(0x297Fu)));
471 blocks.Add(new Block(BlockId.miscMathSymbolsB, "Misc Math Symbols B", "Miscellaneous Mathematical Symbols-B", cast<uchar>(0x2980u), cast<uchar>(0x29FFu)));
472 blocks.Add(new Block(BlockId.supMathOperators, "Sup Math Operators", "Supplemental Mathematical Operators", cast<uchar>(0x2A00u), cast<uchar>(0x2AFFu)));
473 blocks.Add(new Block(BlockId.miscArrows, "Misc Arrows", "Miscellaneous Symbols and Arrows", cast<uchar>(0x2B00u), cast<uchar>(0x2BFFu)));
474 blocks.Add(new Block(BlockId.glagolitic, "Glagolitic", "Glagolitic", cast<uchar>(0x2C00u), cast<uchar>(0x2C5Fu)));
475 blocks.Add(new Block(BlockId.latinExtC, "Latin Ext C", "Latin Extended-C", cast<uchar>(0x2C60u), cast<uchar>(0x2C7Fu)));
476 blocks.Add(new Block(BlockId.coptic, "Coptic", "Coptic", cast<uchar>(0x2C80u), cast<uchar>(0x2CFFu)));
477 blocks.Add(new Block(BlockId.georgianSup, "Georgian Sup", "Georgian Supplement", cast<uchar>(0x2D00u), cast<uchar>(0x2D2Fu)));
478 blocks.Add(new Block(BlockId.tifinagh, "Tifinagh", "Tifinagh", cast<uchar>(0x2D30u), cast<uchar>(0x2D7Fu)));
479 blocks.Add(new Block(BlockId.ethiopicExt, "Ethiopic Ext", "Ethiopic Extended", cast<uchar>(0x2D80u), cast<uchar>(0x2DDFu)));
480 blocks.Add(new Block(BlockId.cyrillicExtA, "Cyrillic Ext A", "Cyrillic Extended-A", cast<uchar>(0x2DE0u), cast<uchar>(0x2DFFu)));
481 blocks.Add(new Block(BlockId.supPunctuation, "Sup Punctuation", "Supplemental Punctuation", cast<uchar>(0x2E00u), cast<uchar>(0x2E7Fu)));
482 blocks.Add(new Block(BlockId.cjkRadicalsSup, "CJK Radicals Sup", "CJK Radicals Supplement", cast<uchar>(0x2E80u), cast<uchar>(0x2EFFu)));
483 blocks.Add(new Block(BlockId.kangxi, "Kangxi", "Kangxi Radicals", cast<uchar>(0x2F00u), cast<uchar>(0x2FDFu)));
484 blocks.Add(new Block(BlockId.idc, "IDC", "Ideographic Description Characters", cast<uchar>(0x2FF0u), cast<uchar>(0x2FFFu)));
485 blocks.Add(new Block(BlockId.cjkSymbols, "CJK Symbols", "CJK Symbols and Punctuation", cast<uchar>(0x3000u), cast<uchar>(0x303Fu)));
486 blocks.Add(new Block(BlockId.hiragana, "Hiragana", "Hiragana", cast<uchar>(0x3040u), cast<uchar>(0x309Fu)));
487 blocks.Add(new Block(BlockId.katakana, "Katakana", "Katakana", cast<uchar>(0x30A0u), cast<uchar>(0x30FFu)));
488 blocks.Add(new Block(BlockId.bopomofo, "Bopomofo", "Bopomofo", cast<uchar>(0x3100u), cast<uchar>(0x312Fu)));
489 blocks.Add(new Block(BlockId.compatJamo, "Compat Jamo", "Hangul Compatibility Jamo", cast<uchar>(0x3130u), cast<uchar>(0x318Fu)));
490 blocks.Add(new Block(BlockId.kanbun, "Kanbun", "Kanbun", cast<uchar>(0x3190u), cast<uchar>(0x319Fu)));
491 blocks.Add(new Block(BlockId.bopomofoExt, "Bopomofo Ext", "Bopomofo Extended", cast<uchar>(0x31A0u), cast<uchar>(0x31BFu)));
492 blocks.Add(new Block(BlockId.cjkStrokes, "CJK Strokes", "CJK Strokes", cast<uchar>(0x31C0u), cast<uchar>(0x31EFu)));
493 blocks.Add(new Block(BlockId.katakanaExt, "Katakana Ext", "Katakana Phonetic Extensions", cast<uchar>(0x31F0u), cast<uchar>(0x31FFu)));
494 blocks.Add(new Block(BlockId.enclosedCjk, "Enclosed CJK", "Enclosed CJK Letters and Months", cast<uchar>(0x3200u), cast<uchar>(0x32FFu)));
495 blocks.Add(new Block(BlockId.cjkCompat, "CJK Compat", "CJK Compatibility", cast<uchar>(0x3300u), cast<uchar>(0x33FFu)));
496 blocks.Add(new Block(BlockId.cjkExtA, "CJK Ext A", "CJK Unified Ideographic Extension A", cast<uchar>(0x3400u), cast<uchar>(0x4DBFu)));
497 blocks.Add(new Block(BlockId.yijing, "Yijing", "Yijing Hexagram Symbols", cast<uchar>(0x4DC0u), cast<uchar>(0x4DFFu)));
498 blocks.Add(new Block(BlockId.cjk, "CJK", "CJK Unified Ideographs", cast<uchar>(0x4E00u), cast<uchar>(0x9FFFu)));
499 blocks.Add(new Block(BlockId.yiSyllables, "Yi Syllables", "Yi Syllables", cast<uchar>(0xA000u), cast<uchar>(0xA48Fu)));
500 blocks.Add(new Block(BlockId.yiRadicals, "Yi Radicals", "Yi Radicals", cast<uchar>(0xA090u), cast<uchar>(0xA4CFu)));
501 blocks.Add(new Block(BlockId.lisu, "Lisu", "Lisu", cast<uchar>(0xA0D0u), cast<uchar>(0xA4FFu)));
502 blocks.Add(new Block(BlockId.vai, "Vai", "Vai", cast<uchar>(0xA500u), cast<uchar>(0xA63Fu)));
503 blocks.Add(new Block(BlockId.cyrillicExtB, "Cyrillic Ext B", "Cyrillic Extended-B", cast<uchar>(0xA640u), cast<uchar>(0xA69Fu)));
504 blocks.Add(new Block(BlockId.bamum, "Bamum", "Bamum", cast<uchar>(0xA6A0u), cast<uchar>(0xA6FFu)));
505 blocks.Add(new Block(BlockId.modifierToneLetters, "Modifier Tone Letters", "Modifier Tone Letters", cast<uchar>(0xA700u), cast<uchar>(0xA71Fu)));
506 blocks.Add(new Block(BlockId.latinExtD, "Latin Ext D", "Latin Extended-D", cast<uchar>(0xA720u), cast<uchar>(0xA7FFu)));
507 blocks.Add(new Block(BlockId.sylotiNagri, "Syloti Nagri", "Syloti Nagri", cast<uchar>(0xA800u), cast<uchar>(0xA82Fu)));
508 blocks.Add(new Block(BlockId.indicNumberForms, "Indic Number Forms", "Common Indic Number Forms", cast<uchar>(0xA830u), cast<uchar>(0xA83Fu)));
509 blocks.Add(new Block(BlockId.phagsPa, "Phags Pa", "Phags-Pa", cast<uchar>(0xA840u), cast<uchar>(0xA87Fu)));
510 blocks.Add(new Block(BlockId.saurashtra, "Saurashtra", "Saurashtra", cast<uchar>(0xA880u), cast<uchar>(0xA8DFu)));
511 blocks.Add(new Block(BlockId.devanagariExt, "Devanagari Ext", "Devanagari Extended", cast<uchar>(0xA8E0u), cast<uchar>(0xA8FFu)));
512 blocks.Add(new Block(BlockId.kayahLi, "Kayah Li", "Kayah Li", cast<uchar>(0xA900u), cast<uchar>(0xA92Fu)));
513 blocks.Add(new Block(BlockId.rejang, "Rejang", "Rejang", cast<uchar>(0xA930u), cast<uchar>(0xA95Fu)));
514 blocks.Add(new Block(BlockId.jamoExtA, "Jamo Ext A", "Hangul Jamo Extended-A", cast<uchar>(0xA960u), cast<uchar>(0xA97Fu)));
515 blocks.Add(new Block(BlockId.javanese, "Javanese", "Javanese", cast<uchar>(0xA980u), cast<uchar>(0xA9DFu)));
516 blocks.Add(new Block(BlockId.myanmarExtB, "Myanmar Ext B", "Myanmar Extended - B", cast<uchar>(0xA9E0u), cast<uchar>(0xA9FFu)));
517 blocks.Add(new Block(BlockId.cham, "Cham", "Cham", cast<uchar>(0xAA00u), cast<uchar>(0xAA5Fu)));
518 blocks.Add(new Block(BlockId.myanmarExtA, "Myanmar Ext A", "Myanmar Extended-A", cast<uchar>(0xAA60u), cast<uchar>(0xAA7Fu)));
519 blocks.Add(new Block(BlockId.taiViet, "Tai Viet", "Tai Viet", cast<uchar>(0xAA80u), cast<uchar>(0xAADFu)));
520 blocks.Add(new Block(BlockId.meeteiMayekExt, "Meetei Mayek Ext", "Meetei Mayek Extensions", cast<uchar>(0xAAE0u), cast<uchar>(0xAAFFu)));
521 blocks.Add(new Block(BlockId.ethiopicExtA, "Ethiopic Ext A", "Ethiopic Extended-A", cast<uchar>(0xAB00u), cast<uchar>(0xAB2Fu)));
522 blocks.Add(new Block(BlockId.latinExtE, "Latin Ext E", "Latin Extended-E", cast<uchar>(0xAB30u), cast<uchar>(0xAB6Fu)));
523 blocks.Add(new Block(BlockId.cherokeeSup, "Cherokee Sup", "Cherokee Supplement", cast<uchar>(0xAB70u), cast<uchar>(0xABBFu)));
524 blocks.Add(new Block(BlockId.meeteiMayek, "Meetei Mayek", "Meetei Mayek", cast<uchar>(0xABC0u), cast<uchar>(0xABFFu)));
525 blocks.Add(new Block(BlockId.hangul, "Hangul", "Hangul Syllables", cast<uchar>(0xAC00u), cast<uchar>(0xD7AFu)));
526 blocks.Add(new Block(BlockId.jamoExtB, "Jamo Ext B", "Hangul Jamo Extended-B", cast<uchar>(0xD7B0u), cast<uchar>(0xD7FFu)));
527 blocks.Add(new Block(BlockId.highSurrogates, "High Surrogates", "High Surrogates", cast<uchar>(0xD800u), cast<uchar>(0xDB7Fu)));
528 blocks.Add(new Block(BlockId.highPuSurrogates, "High PU Surrogates", "High Private Use Surrogates", cast<uchar>(0xDB80u), cast<uchar>(0xDBFFu)));
529 blocks.Add(new Block(BlockId.lowSurrogates, "Low Surrogates", "Low Surrogates", cast<uchar>(0xDC00u), cast<uchar>(0xDFFFu)));
530 blocks.Add(new Block(BlockId.pua, "PUA", "Private Use Area", cast<uchar>(0xE000u), cast<uchar>(0xF8FFu)));
531 blocks.Add(new Block(BlockId.cjkCompatIdeographs, "CJK Compat Ideographs", "CJK Compatibility Ideographs", cast<uchar>(0xF900u), cast<uchar>(0xFAFFu)));
532 blocks.Add(new Block(BlockId.alphabeticPf, "Alphabetic PF", "Alphabetic Presentations Forms", cast<uchar>(0xFB00u), cast<uchar>(0xFB4Fu)));
533 blocks.Add(new Block(BlockId.arabicPfA, "Arabic PF A", "Arabic Presentation Forms-A", cast<uchar>(0xFB50u), cast<uchar>(0xFDFFu)));
534 blocks.Add(new Block(BlockId.vs, "VS", "Variation Selectors", cast<uchar>(0xFE00u), cast<uchar>(0xFE0Fu)));
535 blocks.Add(new Block(BlockId.verticalForms, "Vertical Forms", "Vertical Forms", cast<uchar>(0xFE10u), cast<uchar>(0xFE1Fu)));
536 blocks.Add(new Block(BlockId.halfMarks, "Half Marks", "Combining Half Marks", cast<uchar>(0xFE20u), cast<uchar>(0xFE2Fu)));
537 blocks.Add(new Block(BlockId.cjkCompatForms, "CJK Compat Forms", "CJK Compatibility Forms", cast<uchar>(0xFE30u), cast<uchar>(0xFE4Fu)));
538 blocks.Add(new Block(BlockId.smallForms, "Small Forms", "Small Form Variants", cast<uchar>(0xFE50u), cast<uchar>(0xFE6Fu)));
539 blocks.Add(new Block(BlockId.arabicPfB, "Arabic PF B", "Arabic Presentation Forms-B", cast<uchar>(0xFE70u), cast<uchar>(0xFEFFu)));
540 blocks.Add(new Block(BlockId.halfAndFullForms, "Half And Full Forms", "Halfwidth and Fullwidth Forms", cast<uchar>(0xFF00u), cast<uchar>(0xFFEFu)));
541 blocks.Add(new Block(BlockId.specials, "Specials", "Specials", cast<uchar>(0xFFF0u), cast<uchar>(0xFFFFu)));
542 blocks.Add(new Block(BlockId.linearBSyllabary, "Linear B Syllabary", "Linear B Syllabary", cast<uchar>(0x10000u), cast<uchar>(0x1007Fu)));
543 blocks.Add(new Block(BlockId.linearBIdeograms, "Linear B Ideograms", "Linear B Ideograms", cast<uchar>(0x10080u), cast<uchar>(0x100FFu)));
544 blocks.Add(new Block(BlockId.aegeanNumbers, "Aegean Numbers", "Aegean Numbers", cast<uchar>(0x10100u), cast<uchar>(0x1013Fu)));
545 blocks.Add(new Block(BlockId.ancientGreekNumbers, "Ancient Greek Numbers", "Ancient Greek Numbers", cast<uchar>(0x10140u), cast<uchar>(0x1018Fu)));
546 blocks.Add(new Block(BlockId.ancientSymbols, "Ancient Symbols", "Ancient Symbols", cast<uchar>(0x10190u), cast<uchar>(0x101CFu)));
547 blocks.Add(new Block(BlockId.phaistos, "Phaistos", "Phaistos Disc", cast<uchar>(0x101D0u), cast<uchar>(0x101FFu)));
548 blocks.Add(new Block(BlockId.lycian, "Lycian", "Lycian", cast<uchar>(0x10280u), cast<uchar>(0x1029Fu)));
549 blocks.Add(new Block(BlockId.carian, "Carian", "Carian", cast<uchar>(0x102A0u), cast<uchar>(0x102DFu)));
550 blocks.Add(new Block(BlockId.copticEpactNumbers, "Coptic Epact Numbers", "Coptic Epact Numbers", cast<uchar>(0x102E0u), cast<uchar>(0x102FFu)));
551 blocks.Add(new Block(BlockId.oldItalic, "Old Italic", "Old Italic", cast<uchar>(0x10300u), cast<uchar>(0x1032Fu)));
552 blocks.Add(new Block(BlockId.gothic, "Gothic", "Gothic", cast<uchar>(0x10330u), cast<uchar>(0x1034Fu)));
553 blocks.Add(new Block(BlockId.oldPermic, "Old Permic", "Old Permic", cast<uchar>(0x10350u), cast<uchar>(0x1037Fu)));
554 blocks.Add(new Block(BlockId.ugaritic, "Ugaritic", "Ugaritic", cast<uchar>(0x10380u), cast<uchar>(0x1039Fu)));
555 blocks.Add(new Block(BlockId.oldPersian, "Old Persian", "Old Persian", cast<uchar>(0x103A0u), cast<uchar>(0x103DFu)));
556 blocks.Add(new Block(BlockId.deseret, "Deseret", "Deseret", cast<uchar>(0x10400u), cast<uchar>(0x1044Fu)));
557 blocks.Add(new Block(BlockId.shavian, "Shavian", "Shavian", cast<uchar>(0x10450u), cast<uchar>(0x1047Fu)));
558 blocks.Add(new Block(BlockId.osmanya, "Osmanya", "Osmanya", cast<uchar>(0x10480u), cast<uchar>(0x104AFu)));
559 blocks.Add(new Block(BlockId.osage, "Osage", "Osage", cast<uchar>(0x104B0u), cast<uchar>(0x104FFu)));
560 blocks.Add(new Block(BlockId.elbasan, "Elbasan", "Elbasan", cast<uchar>(0x10500u), cast<uchar>(0x1052Fu)));
561 blocks.Add(new Block(BlockId.caucasianAlbanian, "Caucasian Albanian", "Caucasian Albanian", cast<uchar>(0x10530u), cast<uchar>(0x1056Fu)));
562 blocks.Add(new Block(BlockId.linearA, "Linear A", "Linear A", cast<uchar>(0x10600u), cast<uchar>(0x1077Fu)));
563 blocks.Add(new Block(BlockId.cypriotSyllabary, "Cypriot Syllabary", "Cypriot Syllabary", cast<uchar>(0x10800u), cast<uchar>(0x1083Fu)));
564 blocks.Add(new Block(BlockId.imperialAramaic, "Imperial Aramaic", "Imperial Aramaic", cast<uchar>(0x10840u), cast<uchar>(0x1085Fu)));
565 blocks.Add(new Block(BlockId.palmyrene, "Palmyrene", "Palmyrene", cast<uchar>(0x10860u), cast<uchar>(0x1087Fu)));
566 blocks.Add(new Block(BlockId.nabataean, "Nabataean", "Nabataean", cast<uchar>(0x10880u), cast<uchar>(0x108AFu)));
567 blocks.Add(new Block(BlockId.hatran, "Hatran", "Hatran", cast<uchar>(0x108E0u), cast<uchar>(0x108FFu)));
568 blocks.Add(new Block(BlockId.phoenician, "Phoenician", "Phoenician", cast<uchar>(0x10900u), cast<uchar>(0x1091Fu)));
569 blocks.Add(new Block(BlockId.lydian, "Lydian", "Lydian", cast<uchar>(0x10920u), cast<uchar>(0x1093Fu)));
570 blocks.Add(new Block(BlockId.meroiticHieroglyphs, "Meroitic Hieroglyphs", "Meroitic Hieroglyphs", cast<uchar>(0x10980u), cast<uchar>(0x1099Fu)));
571 blocks.Add(new Block(BlockId.meroiticCursive, "Meroitic Cursive", "Meroitic Cursive", cast<uchar>(0x109A0u), cast<uchar>(0x109FFu)));
572 blocks.Add(new Block(BlockId.kharoshthi, "Kharoshthi", "Kharoshthi", cast<uchar>(0x10A00u), cast<uchar>(0x10A5Fu)));
573 blocks.Add(new Block(BlockId.oldSouthArabian, "Old South Arabian", "Old South Arabian", cast<uchar>(0x10A60u), cast<uchar>(0x10A7Fu)));
574 blocks.Add(new Block(BlockId.oldNorthArabian, "Old North Arabian", "Old North Arabian", cast<uchar>(0x10A80u), cast<uchar>(0x10A9Fu)));
575 blocks.Add(new Block(BlockId.manichean, "Manichaean", "Manichaean", cast<uchar>(0x10AC0u), cast<uchar>(0x10AFFu)));
576 blocks.Add(new Block(BlockId.avestan, "Avestan", "Avestan", cast<uchar>(0x10B00u), cast<uchar>(0x10B3Fu)));
577 blocks.Add(new Block(BlockId.inscriptionalParthian, "Inscriptional Parthian", "Inscriptional Parthian", cast<uchar>(0x10B40u), cast<uchar>(0x10B5Fu)));
578 blocks.Add(new Block(BlockId.inscriptionalPahlavi, "Inscriptional Pahlavi", "Inscriptional Pahlavi", cast<uchar>(0x10B60u), cast<uchar>(0x10B7Fu)));
579 blocks.Add(new Block(BlockId.psalterPahlavi, "Psalter Pahlavi", "Psalter Pahlavi", cast<uchar>(0x10B80u), cast<uchar>(0x10BAFu)));
580 blocks.Add(new Block(BlockId.oldTurkic, "Old Turkic", "Old Turkic", cast<uchar>(0x10C00u), cast<uchar>(0x10C4Fu)));
581 blocks.Add(new Block(BlockId.oldHungarian, "Old Hungarian", "Old Hungarian", cast<uchar>(0x10C80u), cast<uchar>(0x10CFFu)));
582 blocks.Add(new Block(BlockId.hanifiRohingya, "Hanifi Rohingya", "Hanifi Rohingya", cast<uchar>(0x10D00u), cast<uchar>(0x10D3Fu)));
583 blocks.Add(new Block(BlockId.rumi, "Rumi", "Rumi Numeral Symbols", cast<uchar>(0x10E60u), cast<uchar>(0x10E7Fu)));
584 blocks.Add(new Block(BlockId.oldSogdian, "Old Sogdian", "Old Sogdian", cast<uchar>(0x10F00u), cast<uchar>(0x10F2Fu)));
585 blocks.Add(new Block(BlockId.sogdian, "Sogdian", "Sogdian", cast<uchar>(0x10F30u), cast<uchar>(0x10F6Fu)));
586 blocks.Add(new Block(BlockId.elymaic, "Elymaic", "Elymaic", cast<uchar>(0x10FE0), cast<uchar>(0x10FFF)));
587 blocks.Add(new Block(BlockId.brahmi, "Brahmi", "Brahmi", cast<uchar>(0x11000u), cast<uchar>(0x1107Fu)));
588 blocks.Add(new Block(BlockId.kaithi, "Kaithi", "Kaithi", cast<uchar>(0x11080u), cast<uchar>(0x110CFu)));
589 blocks.Add(new Block(BlockId.soraSompeng, "Sora Sompeng", "Sora Sompeng", cast<uchar>(0x110D0u), cast<uchar>(0x110FFu)));
590 blocks.Add(new Block(BlockId.chakma, "Chakma", "Chakma", cast<uchar>(0x11100u), cast<uchar>(0x1114Fu)));
591 blocks.Add(new Block(BlockId.mahajani, "Mahajani", "Mahajani", cast<uchar>(0x11150u), cast<uchar>(0x1117Fu)));
592 blocks.Add(new Block(BlockId.sharada, "Sharada", "Sharada", cast<uchar>(0x11180u), cast<uchar>(0x111DFu)));
593 blocks.Add(new Block(BlockId.sinhalaArchaicNumbers, "Sinhala Archaic Numbers", "Sinhala Archaic Numbers", cast<uchar>(0x111E0u), cast<uchar>(0x111FFu)));
594 blocks.Add(new Block(BlockId.khojki, "Khojki", "Khojki", cast<uchar>(0x11200u), cast<uchar>(0x1124Fu)));
595 blocks.Add(new Block(BlockId.multani, "Multani", "Multani", cast<uchar>(0x11280u), cast<uchar>(0x112AFu)));
596 blocks.Add(new Block(BlockId.khudawadi, "Khudawadi", "Khudawadi", cast<uchar>(0x112B0u), cast<uchar>(0x112FFu)));
597 blocks.Add(new Block(BlockId.grantha, "Grantha", "Grantha", cast<uchar>(0x11300u), cast<uchar>(0x1137Fu)));
598 blocks.Add(new Block(BlockId.newa, "Newa", "Newa", cast<uchar>(0x11400u), cast<uchar>(0x1147Fu)));
599 blocks.Add(new Block(BlockId.tirhuta, "Tirhuta", "Tirhuta", cast<uchar>(0x11480u), cast<uchar>(0x114DFu)));
600 blocks.Add(new Block(BlockId.siddham, "Siddham", "Siddham", cast<uchar>(0x11580u), cast<uchar>(0x115FFu)));
601 blocks.Add(new Block(BlockId.modi, "Modi", "Modi", cast<uchar>(0x11600u), cast<uchar>(0x1165Fu)));
602 blocks.Add(new Block(BlockId.mongolianSup, "Mongolian Sup", "Mongolian Supplement", cast<uchar>(0x11660u), cast<uchar>(0x1167Fu)));
603 blocks.Add(new Block(BlockId.takri, "Takri", "Takri", cast<uchar>(0x11680u), cast<uchar>(0x116CFu)));
604 blocks.Add(new Block(BlockId.ahom, "Ahom", "Ahom", cast<uchar>(0x11700u), cast<uchar>(0x1173Fu)));
605 blocks.Add(new Block(BlockId.dogra, "Dogra", "Dogra", cast<uchar>(0x11800u), cast<uchar>(0x1184Fu)));
606 blocks.Add(new Block(BlockId.warangCiti, "Warang Citi", "Warang Citi", cast<uchar>(0x118A0u), cast<uchar>(0x118FFu)));
607 blocks.Add(new Block(BlockId.nandinagari, "Nandinagari", "Nandinagari", cast<uchar>(0x119A0), cast<uchar>(0x119FF)));
608 blocks.Add(new Block(BlockId.zanabazarSquare, "Zanabazar Square", "Zanabazar Square", cast<uchar>(0x11A00u), cast<uchar>(0x11A4FFu)));
609 blocks.Add(new Block(BlockId.soyombo, "Soyombo", "Soyombo", cast<uchar>(0x11A50u), cast<uchar>(0x11AAFu)));
610 blocks.Add(new Block(BlockId.pauCinHau, "Pau Cin Hau", "Pau Cin Hau", cast<uchar>(0x11AC0u), cast<uchar>(0x11AFFu)));
611 blocks.Add(new Block(BlockId.bhaisuki, "Bhaiksuki", "Bhaiksuki", cast<uchar>(0x11C00u), cast<uchar>(0x11C6Fu)));
612 blocks.Add(new Block(BlockId.marchen, "Marchen", "Marchen", cast<uchar>(0x11C70u), cast<uchar>(0x11CBFu)));
613 blocks.Add(new Block(BlockId.masaramGondi, "Masaram Gondi", "Masaram Gondi", cast<uchar>(0x11D00), cast<uchar>(0x11D5F)));
614 blocks.Add(new Block(BlockId.gunjalaGondi, "Gunjala Gondi", "Gunjala Gondi", cast<uchar>(0x11D60), cast<uchar>(0x11DAF)));
615 blocks.Add(new Block(BlockId.makasar, "Makasar", "Makasar", cast<uchar>(0x11EE0), cast<uchar>(0x11EFF)));
616 blocks.Add(new Block(BlockId.tamilSup, "Tamil Sup", "Tamil Sup", cast<uchar>(0x11FC0), cast<uchar>(0x11FFE)));
617 blocks.Add(new Block(BlockId.cuneiform, "Cuneiform", "Cuneiform", cast<uchar>(0x12000u), cast<uchar>(0x123FFu)));
618 blocks.Add(new Block(BlockId.cuneiformNumbers, "Cuneiform Numbers", "Cuneiform Numbers and Punctuation", cast<uchar>(0x12400u), cast<uchar>(0x1247Fu)));
619 blocks.Add(new Block(BlockId.earlyDynasticCuneiform, "Early Dynastic Cuneiform", "Early Dynastic Cuneiform", cast<uchar>(0x12480u), cast<uchar>(0x1254Fu)));
620 blocks.Add(new Block(BlockId.egyptianHieroglyphs, "Egyptian Hieroglyphs", "Egyptian Hieroglyphs", cast<uchar>(0x13000u), cast<uchar>(0x1342Fu)));
621 blocks.Add(new Block(BlockId.egyptianHieroglyphFormatControls, "Egyptian Hieroglyph Format Controls", "Egyptian Hieroglyph Format Controls", cast<uchar>(0x13430), cast<uchar>(0x1343F)));
622 blocks.Add(new Block(BlockId.anatolianHieroglyphs, "Anatolian Hieroglyphs", "Anatolian Hieroglyphs", cast<uchar>(0x14400u), cast<uchar>(0x1467Fu)));
623 blocks.Add(new Block(BlockId.bamumSup, "Bamum Sup", "Bamum Supplement", cast<uchar>(0x16800u), cast<uchar>(0x16A3Fu)));
624 blocks.Add(new Block(BlockId.mro, "Mro", "Mro", cast<uchar>(0x16A40u), cast<uchar>(0x16A6Fu)));
625 blocks.Add(new Block(BlockId.bassaVah, "Bassa Vah", "Bassa Vah", cast<uchar>(0x16AD0u), cast<uchar>(0x16AFFu)));
626 blocks.Add(new Block(BlockId.pahawhHmong, "Pahawh Hmong", "Pahawh Hmong", cast<uchar>(0x16B00u), cast<uchar>(0x16B8Fu)));
627 blocks.Add(new Block(BlockId.medefaidrin, "Medefaidrin", "Medefaidrin", cast<uchar>(0x16E40u), cast<uchar>(0x16E9Fu)));
628 blocks.Add(new Block(BlockId.miao, "Miao", "Miao", cast<uchar>(0x16F00u), cast<uchar>(0x16F9Fu)));
629 blocks.Add(new Block(BlockId.ideographicSymbols, "Ideographic Symbols", "Ideographic Symbols and Punctuation", cast<uchar>(0x16FE0u), cast<uchar>(0x16FFFu)));
630 blocks.Add(new Block(BlockId.tangut, "Tangut", "Tangut", cast<uchar>(0x17000u), cast<uchar>(0x187FFu)));
631 blocks.Add(new Block(BlockId.tangutComponents, "Tangut Components", "Tangut Components", cast<uchar>(0x18800u), cast<uchar>(0x18AFFu)));
632 blocks.Add(new Block(BlockId.kanaSup, "Kana Sup", "Kana Supplement", cast<uchar>(0x1B000u), cast<uchar>(0x1B0FFu)));
633 blocks.Add(new Block(BlockId.kanaExtA, "Kana Ext A", "Kana Extended-A", cast<uchar>(0x1B100u), cast<uchar>(0x1B12Fu)));
634 blocks.Add(new Block(BlockId.smallKanaExt, "Small Kana Ext", "Small Kana Extension", cast<uchar>(0x1B130), cast<uchar>(0x1B16F)));
635 blocks.Add(new Block(BlockId.nushu, "Nushu", "Nushu", cast<uchar>(0x1B170u), cast<uchar>(0x1B2FFu)));
636 blocks.Add(new Block(BlockId.duployan, "Duployan", "Duployan", cast<uchar>(0x1BC00u), cast<uchar>(0x1BC9Fu)));
637 blocks.Add(new Block(BlockId.shorthandFormatControls, "Shorthand Format Controls", "Shorthand Format Controls", cast<uchar>(0x1BCA0u), cast<uchar>(0x1BCAFu)));
638 blocks.Add(new Block(BlockId.byzantineMusic, "Byzantine Music", "Byzantine Musical Symbols", cast<uchar>(0x1D000u), cast<uchar>(0x1D0FFu)));
639 blocks.Add(new Block(BlockId.music, "Music", "Musical Symbols", cast<uchar>(0x1D100u), cast<uchar>(0x1D1FFu)));
640 blocks.Add(new Block(BlockId.ancientGreekMusic, "Ancient Greek Music", "Ancient Greek Musical Notation", cast<uchar>(0x1D200u), cast<uchar>(0x1D24Fu)));
641 blocks.Add(new Block(BlockId.mayanNumerals, "Mayan Numerals", "Mayan Numerals", cast<uchar>(0x1D2E0u), cast<uchar>(0x1D2FFu)));
642 blocks.Add(new Block(BlockId.taiXuanJing, "Tai Xuan Jing", "Tai Xuan Jing Symbols", cast<uchar>(0x1D300u), cast<uchar>(0x1D35Fu)));
643 blocks.Add(new Block(BlockId.countingRod, "Counting Rod", "Counting Rod Numerals", cast<uchar>(0x1D360u), cast<uchar>(0x1D37Fu)));
644 blocks.Add(new Block(BlockId.mathAlphanum, "Math Alphanum", "Mathematical Alphanumeric Symbols", cast<uchar>(0x1D400u), cast<uchar>(0x1D7FFu)));
645 blocks.Add(new Block(BlockId.suttonSignWriting, "Sutton SignWriting", "Sutton SignWriting", cast<uchar>(0x1D800u), cast<uchar>(0x1DAAFu)));
646 blocks.Add(new Block(BlockId.glagoliticSup, "Glagolitic Sup", "Glagolitic Supplement", cast<uchar>(0x1E000u), cast<uchar>(0x1E02Fu)));
647 blocks.Add(new Block(BlockId.nyiakengPuachueHmong, "Nyiakeng Puachue Hmong", "Nyiakeng Puachue Hmong", cast<uchar>(0x1E100), cast<uchar>(0x1E14F)));
648 blocks.Add(new Block(BlockId.wancho, "Wancho", "Wancho", cast<uchar>(0x1E2C0), cast<uchar>(0x1E2FF)));
649 blocks.Add(new Block(BlockId.mendeKikakui, "Mende Kikakui", "Mende Kikakui", cast<uchar>(0x1E800u), cast<uchar>(0x1E8DFu)));
650 blocks.Add(new Block(BlockId.adlam, "Adlam", "Adlam", cast<uchar>(0x1E900u), cast<uchar>(0x1E95Fu)));
651 blocks.Add(new Block(BlockId.indicSiyaqNumbers, "Indic Siyaq Numbers", "Indic Siyaq Numbers", cast<uchar>(0x1EC70), cast<uchar>(0x1ECBF)));
652 blocks.Add(new Block(BlockId.ottomanSiyaqNumbers, "Ottoman Siyaq Numbers", "Ottoman Siyaq Numbers", cast<uchar>(0x1ED00), cast<uchar>(0x1ED4F)));
653 blocks.Add(new Block(BlockId.arabicMath, "Arabic Math", "Arabic Mathematical Alphabetic Symbols", cast<uchar>(0x1EE00u), cast<uchar>(0x1EEFFu)));
654 blocks.Add(new Block(BlockId.mahjong, "Mahjong", "Mahjong Tiles", cast<uchar>(0x1F000u), cast<uchar>(0x1F02Fu)));
655 blocks.Add(new Block(BlockId.domino, "Domino", "Domino Tiles", cast<uchar>(0x1F030u), cast<uchar>(0x1F09Fu)));
656 blocks.Add(new Block(BlockId.playingCards, "Playing Cards", "Playing Cards", cast<uchar>(0x1F0A0u), cast<uchar>(0x1F0FFu)));
657 blocks.Add(new Block(BlockId.enclosedAlphanumSup, "Enclosed Alphanum Sup", "Enclosed Alphanumeric Supplement", cast<uchar>(0x1F100u), cast<uchar>(0x1F1FFu)));
658 blocks.Add(new Block(BlockId.enclosedIdeographicSup, "Enclosed Ideographic Sup", "Enclosed Ideographic Supplement", cast<uchar>(0x1F200u), cast<uchar>(0x1F2FFu)));
659 blocks.Add(new Block(BlockId.miscPictographs, "Misc Pictographs", "Miscellaneous Symbols and Pictographs", cast<uchar>(0x1F300u), cast<uchar>(0x1F5FFu)));
660 blocks.Add(new Block(BlockId.emoticons, "Emoticons", "Emoticons", cast<uchar>(0x1F600u), cast<uchar>(0x1F64Fu)));
661 blocks.Add(new Block(BlockId.ornamentalDingbats, "Ornamental Dingbats", "Ornamental Dingbats", cast<uchar>(0x1F650u), cast<uchar>(0x1F67Fu)));
662 blocks.Add(new Block(BlockId.transportAndMap, "Transport And Map", "Transport and Map Symbols", cast<uchar>(0x1F680u), cast<uchar>(0x1F6FFu)));
663 blocks.Add(new Block(BlockId.alchemical, "Alchemical", "Alchemical Symbols", cast<uchar>(0x1F700u), cast<uchar>(0x1F77Fu)));
664 blocks.Add(new Block(BlockId.geometricShapesExt, "Geometric Shapes Ext", "Geometric Shapes Extended", cast<uchar>(0x1F780u), cast<uchar>(0x1F7FFu)));
665 blocks.Add(new Block(BlockId.supArrowsC, "Sup Arrows C", "Supplemental Arrows-C", cast<uchar>(0x1F800u), cast<uchar>(0x1F8FFu)));
666 blocks.Add(new Block(BlockId.supSymbolsAndPictographs, "Sup Symbols And Pictographs", "Supplemental Symbols and Pictographs", cast<uchar>(0x1F900u), cast<uchar>(0x1F9FFu)));
667 blocks.Add(new Block(BlockId.chessSymbols, "Chess Symbols", "Chess Symbols", cast<uchar>(0x1FA00u), cast<uchar>(0x1FA6Fu)));
668 blocks.Add(new Block(BlockId.symbolsAndPictographsExtA, "Symbols And Pictographs Ext A", "Symbols And Pictographs Extended A", cast<uchar>(0x1FA70), cast<uchar>(0x1FAFF)));
669 blocks.Add(new Block(BlockId.cjkExtB, "CJK Ext B", "CJK Unified Ideographs Extension B", cast<uchar>(0x20000u), cast<uchar>(0x2A6DFu)));
670 blocks.Add(new Block(BlockId.cjkExtC, "CJK Ext C", "CJK Unified Ideographs Extension C", cast<uchar>(0x2A700u), cast<uchar>(0x2B73Fu)));
671 blocks.Add(new Block(BlockId.cjkExtD, "CJK Ext D", "CJK Unified Ideographs Extension D", cast<uchar>(0x2B740u), cast<uchar>(0x2B81Fu)));
672 blocks.Add(new Block(BlockId.cjkExtE, "CJK Ext E", "CJK Unified Ideographs Extension E", cast<uchar>(0x2B820u), cast<uchar>(0x2CEAFu)));
673 blocks.Add(new Block(BlockId.cjkExtF, "CJK Ext F", "CJK Unified Ideographs Extension F", cast<uchar>(0x2CEB0u), cast<uchar>(0x2EBEFu)));
674 blocks.Add(new Block(BlockId.cjkCompatIdeographsSup, "CJK Compat Ideographs Sup", "CJK Compatibility Ideographs Supplement", cast<uchar>(0x2F800u), cast<uchar>(0x2FA1Fu)));
675 blocks.Add(new Block(BlockId.tags, "Tags", "Tags", cast<uchar>(0xE0000u), cast<uchar>(0xE007Fu)));
676 blocks.Add(new Block(BlockId.vsSup, "VS Sup", "Variation Selectors Supplement", cast<uchar>(0xE0100u), cast<uchar>(0xE01EFu)));
677 blocks.Add(new Block(BlockId.supPuaA, "Sup PUA A", "Supplementary Private Use Area-A", cast<uchar>(0xF0000u), cast<uchar>(0xFFFFFu)));
678 blocks.Add(new Block(BlockId.supPuaB, "Sup PUA B", "Supplementary Private Use Area-B", cast<uchar>(0x100000u), cast<uchar>(0x10FFFFu)));
679 for (Block* block : blocks)
680 {
681 blockIdMap[cast<uint>(cast<ushort>(block->Id()))] = block;
682 shortNameMap[MakeCanonicalPropertyName(block->ShortName())] = block;
683 longNameMap[MakeCanonicalPropertyName(block->LongName())] = block;
684 }
685 }
686 public ~BlockTable()
687 {
688 for (Block* block : blocks)
689 {
690 delete block;
691 }
692 }
693 private static UniquePtr<BlockTable> instance;
694 private List<Block*> blocks;
695 private HashMap<uint, void*> blockIdMap;
696 private HashMap<string, void*> shortNameMap;
697 private HashMap<string, void*> longNameMap;
698 }
699
700 public const Block* GetBlock(BlockId id)
701 {
702 return BlockTable.Instance().GetBlock(id);
703 }
704
705 public const Block* GetBlockByShortName(const string& shortName)
706 {
707 return BlockTable.Instance().GetBlockByShortName(shortName);
708 }
709
710 public const Block* GetBlockByLongName(const string& longName)
711 {
712 return BlockTable.Instance().GetBlockByLongName(longName);
713 }
714
715 public enum GeneralCategoryId : uint
716 {
717 none = 0u,
718 Lu = 1u << 0u, Ll = 1u << 1u, Lt = 1u << 2u, Lm = 1u << 3u, Lo = 1u << 4u,
719 LC = Lu | Ll | Lt,
720 L = Lu | Ll | Lt | Lm | Lo,
721 Mn = 1u << 5u, Mc = 1u << 6u, Me = 1u << 7u,
722 M = Mn | Mc | Me,
723 Nd = 1u << 8u, Nl = 1u << 9u, No = 1u << 10u,
724 N = Nd | Nl | No,
725 Pc = 1u << 11u, Pd = 1u << 12u, Ps = 1u << 13u, Pe = 1u << 14u, Pi = 1u << 15u, Pf = 1u << 16u, Po = 1u << 17u,
726 P = Pc | Pd | Ps | Pe | Pi | Pf | Po,
727 Sm = 1u << 18u, Sc = 1u << 19u, Sk = 1u << 20u, So = 1u << 21u,
728 S = Sm | Sc | Sk | So,
729 Zs = 1u << 22u, Zl = 1u << 23u, Zp = 1u << 24u,
730 Z = Zs | Zl | Zp,
731 Cc = 1u << 25u, Cf = 1u << 26u, Cs = 1u << 27u, Co = 1u << 28u, Cn = 1u << 29u,
732 C = Cc | Cf | Cs | Co | Cn,
733 G = L | M | N | P | S | Zs,
734 B = L | N | P | S | Zs
735 }
736
737 public class GeneralCategory
738 {
739 public nothrow GeneralCategory(GeneralCategoryId id_, const string& shortName_, const string& longName_) : id(id_), shortName(shortName_), longName(longName_)
740 {
741 }
742 public inline nothrow GeneralCategoryId Id()
743 {
744 return id;
745 }
746 public inline nothrow const string& ShortName()
747 {
748 return shortName;
749 }
750 public inline nothrow const string& LongName()
751 {
752 return longName;
753 }
754 private GeneralCategoryId id;
755 private string shortName;
756 private string longName;
757 }
758
759 public class GeneralCategoryTable
760 {
761 static GeneralCategoryTable() : instance(new GeneralCategoryTable())
762 {
763 }
764 public static GeneralCategoryTable& Instance()
765 {
766 return *instance;
767 }
768 public nothrow const GeneralCategory* GetGeneralCategory(GeneralCategoryId id) const
769 {
770 HashMap<uint, void*>.ConstIterator it = generalCategoryIdMap.CFind(cast<uint>(id));
771 if (it != generalCategoryIdMap.CEnd())
772 {
773 return cast<GeneralCategory*>(it->second);
774 }
775 return null;
776 }
777 public const GeneralCategory* GetGeneralCategoryByShortName(const string& shortName) const
778 {
779 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
780 if (it != shortNameMap.CEnd())
781 {
782 return cast<GeneralCategory*>(it->second);
783 }
784 return null;
785 }
786 public const GeneralCategory* GetGeneralCategoryByLongName(const string& longName) const
787 {
788 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(MakeCanonicalPropertyName(longName));
789 if (it != longNameMap.CEnd())
790 {
791 return cast<GeneralCategory*>(it->second);
792 }
793 return null;
794 }
795 private GeneralCategoryTable() : generalCategories(), generalCategoryIdMap(), shortNameMap(), longNameMap()
796 {
797 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Lu, "Lu", "Uppercase Letter"));
798 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Lu, "Ll", "Lowercase Letter"));
799 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Lt, "Lt", "Titlecase Letter"));
800 generalCategories.Add(new GeneralCategory(GeneralCategoryId.LC, "LC", "Cased Letter"));
801 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Lm, "Lm", "Modifier Letter"));
802 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Lo, "Lo", "Other Letter"));
803 generalCategories.Add(new GeneralCategory(GeneralCategoryId.L, "L", "Letter"));
804 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Mn, "Mn", "Nonspacing Mark"));
805 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Mc, "Mc", "Spacing Mark"));
806 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Me, "Me", "Enclosing Mark"));
807 generalCategories.Add(new GeneralCategory(GeneralCategoryId.M, "M", "Mark"));
808 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Nd, "Nd", "Decimal Number"));
809 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Nl, "Nl", "Letter Number"));
810 generalCategories.Add(new GeneralCategory(GeneralCategoryId.No, "No", "Other Number"));
811 generalCategories.Add(new GeneralCategory(GeneralCategoryId.N, "N", "Number"));
812 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Pc, "Pc", "Connector Punctuation"));
813 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Pd, "Pd", "Dash Punctuation"));
814 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Ps, "Ps", "Open Punctuation"));
815 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Pe, "Pe", "Close Punctuation"));
816 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Pi, "Pi", "Initial Punctuation"));
817 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Pf, "Pf", "Final Punctuation"));
818 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Po, "Po", "Other Punctuation"));
819 generalCategories.Add(new GeneralCategory(GeneralCategoryId.P, "P", "Punctuation"));
820 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Sm, "Sm", "Math Symbol"));
821 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Sc, "Sc", "Currency Symbol"));
822 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Sk, "Sk", "Modifier Symbol"));
823 generalCategories.Add(new GeneralCategory(GeneralCategoryId.So, "So", "Other Symbol"));
824 generalCategories.Add(new GeneralCategory(GeneralCategoryId.S, "S", "Symbol"));
825 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Zs, "Zs", "Space Separator"));
826 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Zl, "Zl", "Line Separator"));
827 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Zp, "Zp", "Paragraph Separator"));
828 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Z, "Z", "Separator"));
829 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Cc, "Cc", "Control"));
830 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Cf, "Cf", "Format"));
831 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Cs, "Cs", "Surrogate"));
832 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Co, "Co", "Private Use"));
833 generalCategories.Add(new GeneralCategory(GeneralCategoryId.Cn, "Cn", "Unassigned"));
834 generalCategories.Add(new GeneralCategory(GeneralCategoryId.C, "C", "Other"));
835 generalCategories.Add(new GeneralCategory(GeneralCategoryId.G, "G", "Graphic"));
836 generalCategories.Add(new GeneralCategory(GeneralCategoryId.B, "B", "Base"));
837 for (GeneralCategory* generalCategory : generalCategories)
838 {
839 generalCategoryIdMap[generalCategory->Id()] = generalCategory;
840 shortNameMap[MakeCanonicalPropertyName(generalCategory->ShortName())] = generalCategory;
841 longNameMap[MakeCanonicalPropertyName(generalCategory->LongName())] = generalCategory;
842 }
843 }
844 public ~GeneralCategoryTable()
845 {
846 for (GeneralCategory* generalCategory : generalCategories)
847 {
848 delete generalCategory;
849 }
850 }
851 private static UniquePtr<GeneralCategoryTable> instance;
852 private List<GeneralCategory*> generalCategories;
853 private HashMap<uint, void*> generalCategoryIdMap;
854 private HashMap<string, void*> shortNameMap;
855 private HashMap<string, void*> longNameMap;
856 }
857
858 public const GeneralCategory* GetGeneralCategory(GeneralCategoryId id)
859 {
860 return GeneralCategoryTable.Instance().GetGeneralCategory(id);
861 }
862
863 public const GeneralCategory* GetGeneralCategoryByShortName(const string& shortName)
864 {
865 return GeneralCategoryTable.Instance().GetGeneralCategoryByShortName(shortName);
866 }
867
868 public const GeneralCategory* GetGeneralCategoryByLongName(const string& longName)
869 {
870 return GeneralCategoryTable.Instance().GetGeneralCategoryByLongName(longName);
871 }
872
873 public enum AgeId : byte
874 {
875 age_unassigned = 0u,
876 age_1_1,
877 age_2_0, age_2_1,
878 age_3_0, age_3_1, age_3_2,
879 age_4_0, age_4_1,
880 age_5_0, age_5_1, age_5_2,
881 age_6_0, age_6_1, age_6_2, age_6_3,
882 age_7_0,
883 age_8_0,
884 age_9_0,
885 age_10_0,
886 age_11_0,
887 age_12_0, age_12_1
888 }
889
890 public class Age
891 {
892 public nothrow Age(AgeId id_, const string& version_) : id(id_), version(version_)
893 {
894 }
895 public inline nothrow AgeId Id() const
896 {
897 return id;
898 }
899 public inline nothrow const string& Version() const
900 {
901 return version;
902 }
903 private AgeId id;
904 private string version;
905 }
906
907 public class AgeTable
908 {
909 static AgeTable() : instance(new AgeTable())
910 {
911 }
912 public static AgeTable& Instance()
913 {
914 return *instance;
915 }
916 public const Age* GetAge(AgeId id) const
917 {
918 HashMap<uint, void*>.ConstIterator it = ageIdMap.CFind(cast<uint>(cast<byte>(id)));
919 if (it != ageIdMap.CEnd())
920 {
921 return cast<Age*>(it->second);
922 }
923 return null;
924 }
925 public const Age* GetAge(const string& version) const
926 {
927 HashMap<string, void*>.ConstIterator it = versionMap.CFind(version);
928 if (it != versionMap.CEnd())
929 {
930 return cast<Age*>(it->second);
931 }
932 return null;
933 }
934 private AgeTable() : ages(), ageIdMap(), versionMap()
935 {
936 ages.Add(new Age(AgeId.age_1_1, "1.1"));
937 ages.Add(new Age(AgeId.age_2_0, "2.0"));
938 ages.Add(new Age(AgeId.age_2_1, "2.1"));
939 ages.Add(new Age(AgeId.age_3_0, "3.0"));
940 ages.Add(new Age(AgeId.age_3_1, "3.1"));
941 ages.Add(new Age(AgeId.age_3_2, "3.2"));
942 ages.Add(new Age(AgeId.age_4_0, "4.0"));
943 ages.Add(new Age(AgeId.age_4_1, "4.1"));
944 ages.Add(new Age(AgeId.age_5_0, "5.0"));
945 ages.Add(new Age(AgeId.age_5_1, "5.1"));
946 ages.Add(new Age(AgeId.age_5_2, "5.2"));
947 ages.Add(new Age(AgeId.age_6_0, "6.0"));
948 ages.Add(new Age(AgeId.age_6_1, "6.1"));
949 ages.Add(new Age(AgeId.age_6_2, "6.2"));
950 ages.Add(new Age(AgeId.age_6_3, "6.3"));
951 ages.Add(new Age(AgeId.age_7_0, "7.0"));
952 ages.Add(new Age(AgeId.age_8_0, "8.0"));
953 ages.Add(new Age(AgeId.age_9_0, "9.0"));
954 ages.Add(new Age(AgeId.age_10_0, "10.0"));
955 ages.Add(new Age(AgeId.age_11_0, "11.0"));
956 ages.Add(new Age(AgeId.age_12_0, "12.0"));
957 ages.Add(new Age(AgeId.age_12_1, "12.1"));
958 for (Age* age : ages)
959 {
960 ageIdMap[cast<uint>(cast<byte>(age->Id()))] = age;
961 versionMap[age->Version()] = age;
962 }
963 }
964 public ~AgeTable()
965 {
966 for (Age* age : ages)
967 {
968 delete age;
969 }
970 }
971 private static UniquePtr<AgeTable> instance;
972 private List<Age*> ages;
973 private HashMap<uint, void*> ageIdMap;
974 private HashMap<string, void*> versionMap;
975 }
976
977 public const Age* GetAge(AgeId id)
978 {
979 return AgeTable.Instance().GetAge(id);
980 }
981
982 public const Age* GetAge(const string& version)
983 {
984 return AgeTable.Instance().GetAge(version);
985 }
986
987 public enum ScriptId : byte
988 {
989 none = 0u,
990 adlm, aghb, ahom, arab, armi, armn, avst,
991 bali, bamu, bass, batk, beng, bhks, bopo, brah, brai, bugi, buhd,
992 cakm, cans, cari, cham, cher, copt, cprt, cyrl,
993 deva, dogr, dsrt, dupl,
994 elba, elym, egyp, ethi,
995 geor, glag, gong, gonm, goth, gran, grek, gujr, guru,
996 hang, hani, hano, hatr, hebr, hira, hluw, hmng, hmnp, hrkt, hung,
997 ital,
998 java,
999 kali, kana, khar, khmr, khoj, knda, kthi,
1000 lana, laoo, latn, lepc, limb, lina, linb, lisu, lyci, lydi,
1001 mahj, maka, mand, mani, marc, medf, mend, merc, mero, mlym, modi, mong, mroo, mtei, mult, mymr,
1002 nand, narb, nbat, newa, nkoo, nshu,
1003 ogam, olck, orkh, orya, osge, osma,
1004 palm, pauc, perm, phag, phli, phlp, phnx, plrd, prti,
1005 qaai,
1006 rjng, rohg, runr,
1007 samr, sarb, saur, sgnw, shaw, shrd, sidd, sind, sinh, sogd, sogo, sora, soyo, sund, sylo, syrc,
1008 tagb, takr, tale, talu, taml, tang, tavt, telu, tfng, tglg, thaa, thai, tibt, tirh,
1009 ugar,
1010 vaii,
1011 wara,
1012 wcho,
1013 xpeo, xsux,
1014 yiii, zanb,
1015 zinh, zyyy, zzzz
1016 }
1017
1018 public class Script
1019 {
1020 public nothrow Script(ScriptId id_, const string& shortName_, const string& longName_) : id(id_), shortName(shortName_), longName(longName_)
1021 {
1022 }
1023 public inline nothrow ScriptId Id() const
1024 {
1025 return id;
1026 }
1027 public inline nothrow const string& ShortName() const
1028 {
1029 return shortName;
1030 }
1031 public inline nothrow const string& LongName() const
1032 {
1033 return longName;
1034 }
1035 private ScriptId id;
1036 private string shortName;
1037 private string longName;
1038 }
1039
1040 public class ScriptTable
1041 {
1042 static ScriptTable() : instance(new ScriptTable())
1043 {
1044 }
1045 public static ScriptTable& Instance()
1046 {
1047 return *instance;
1048 }
1049 public const Script* GetScript(ScriptId id) const
1050 {
1051 HashMap<uint, void*>.ConstIterator it = scriptIdMap.CFind(cast<uint>(cast<byte>(id)));
1052 if (it != scriptIdMap.CEnd())
1053 {
1054 return cast<Script*>(it->second);
1055 }
1056 return null;
1057 }
1058 public const Script* GetScriptByShortName(const string& shortName) const
1059 {
1060 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
1061 if (it != shortNameMap.CEnd())
1062 {
1063 return cast<Script*>(it->second);
1064 }
1065 return null;
1066 }
1067 public const Script* GetScriptByLongName(const string& longName) const
1068 {
1069 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(MakeCanonicalPropertyName(longName));
1070 if (it != longNameMap.CEnd())
1071 {
1072 return cast<Script*>(it->second);
1073 }
1074 return null;
1075 }
1076 private ScriptTable() : scripts(), scriptIdMap(), shortNameMap(), longNameMap()
1077 {
1078 scripts.Add(new Script(ScriptId.adlm, "Adlm", "Adlam"));
1079 scripts.Add(new Script(ScriptId.aghb, "Aghb", "Caucasian Albanian"));
1080 scripts.Add(new Script(ScriptId.ahom, "Ahom", "Ahom"));
1081 scripts.Add(new Script(ScriptId.arab, "Arab", "Arabic"));
1082 scripts.Add(new Script(ScriptId.armi, "Armi", "Imperial Aramaic"));
1083 scripts.Add(new Script(ScriptId.armn, "Armn", "Armenian"));
1084 scripts.Add(new Script(ScriptId.avst, "Avst", "Avestan"));
1085 scripts.Add(new Script(ScriptId.bali, "Bali", "Balinese"));
1086 scripts.Add(new Script(ScriptId.bamu, "Bamu", "Bamum"));
1087 scripts.Add(new Script(ScriptId.bass, "Bass", "Bassa Vah"));
1088 scripts.Add(new Script(ScriptId.batk, "Batk", "Batak"));
1089 scripts.Add(new Script(ScriptId.beng, "Beng", "Bengali"));
1090 scripts.Add(new Script(ScriptId.bhks, "Bhks", "Bhaisuki"));
1091 scripts.Add(new Script(ScriptId.bopo, "Bopo", "Bopomofo"));
1092 scripts.Add(new Script(ScriptId.brah, "Brah", "Brahmi"));
1093 scripts.Add(new Script(ScriptId.brai, "Brai", "Braille"));
1094 scripts.Add(new Script(ScriptId.bugi, "Bugi", "Buginese"));
1095 scripts.Add(new Script(ScriptId.buhd, "Buhd", "Buhid"));
1096 scripts.Add(new Script(ScriptId.cakm, "Cakm", "Chakma"));
1097 scripts.Add(new Script(ScriptId.cans, "Cans", "Canadian Aboriginal"));
1098 scripts.Add(new Script(ScriptId.cari, "Cari", "Carian"));
1099 scripts.Add(new Script(ScriptId.cham, "Cham", "Cham"));
1100 scripts.Add(new Script(ScriptId.cher, "Cher", "Cherokee"));
1101 scripts.Add(new Script(ScriptId.copt, "Copt", "Coptic"));
1102 scripts.Add(new Script(ScriptId.cprt, "Cprt", "Cypriot"));
1103 scripts.Add(new Script(ScriptId.cyrl, "Cyrl", "Cyrillic"));
1104 scripts.Add(new Script(ScriptId.deva, "Deva", "Devanagari"));
1105 scripts.Add(new Script(ScriptId.dogr, "Dogr", "Dogra"));
1106 scripts.Add(new Script(ScriptId.dsrt, "Dsrt", "Deseret"));
1107 scripts.Add(new Script(ScriptId.dupl, "Dupl", "Duployan"));
1108 scripts.Add(new Script(ScriptId.egyp, "Egyp", "Egyptian Hieroglyphs"));
1109 scripts.Add(new Script(ScriptId.elba, "Elba", "Elbasan"));
1110 scripts.Add(new Script(ScriptId.elym, "Elym", "Elymaic"));
1111 scripts.Add(new Script(ScriptId.ethi, "Ethi", "Ethiopian"));
1112 scripts.Add(new Script(ScriptId.geor, "Geor", "Georgian"));
1113 scripts.Add(new Script(ScriptId.glag, "Glag", "Glagolitic"));
1114 scripts.Add(new Script(ScriptId.gong, "Gong", "Gunjala Gondi"));
1115 scripts.Add(new Script(ScriptId.gonm, "Gonm", "Masaram Gondi"));
1116 scripts.Add(new Script(ScriptId.goth, "Goth", "Gothic"));
1117 scripts.Add(new Script(ScriptId.gran, "Gran", "Grantha"));
1118 scripts.Add(new Script(ScriptId.grek, "Grek", "Greek"));
1119 scripts.Add(new Script(ScriptId.gujr, "Gujr", "Gujarati"));
1120 scripts.Add(new Script(ScriptId.guru, "Guru", "Gurmukhi"));
1121 scripts.Add(new Script(ScriptId.hang, "Hang", "Hangul"));
1122 scripts.Add(new Script(ScriptId.hani, "Hani", "Han"));
1123 scripts.Add(new Script(ScriptId.hano, "Hano", "Hanunoo"));
1124 scripts.Add(new Script(ScriptId.hatr, "Hatr", "Hatran"));
1125 scripts.Add(new Script(ScriptId.hebr, "Hebr", "Hebrew"));
1126 scripts.Add(new Script(ScriptId.hira, "Hira", "Hiragana"));
1127 scripts.Add(new Script(ScriptId.hluw, "Hluw", "Anatolian Hieroglyphs"));
1128 scripts.Add(new Script(ScriptId.hmng, "Hmng", "Pahawh Hmong"));
1129 scripts.Add(new Script(ScriptId.hmnp, "Hmnp", "Nyiakeng Puachue Hmong"));
1130 scripts.Add(new Script(ScriptId.hrkt, "Hrkt", "Katakana Or Hiragana"));
1131 scripts.Add(new Script(ScriptId.hung, "Hung", "Old Hungarian"));
1132 scripts.Add(new Script(ScriptId.ital, "Ital", "Old Italic"));
1133 scripts.Add(new Script(ScriptId.java, "Java", "Javanese"));
1134 scripts.Add(new Script(ScriptId.kali, "Kali", "Kayah Li"));
1135 scripts.Add(new Script(ScriptId.kana, "Kana", "Katakana"));
1136 scripts.Add(new Script(ScriptId.khar, "Khar", "Kharoshthi"));
1137 scripts.Add(new Script(ScriptId.khmr, "Khmr", "Khmer"));
1138 scripts.Add(new Script(ScriptId.khoj, "Khoj", "Khojki"));
1139 scripts.Add(new Script(ScriptId.knda, "Knda", "Kannada"));
1140 scripts.Add(new Script(ScriptId.kthi, "Kthi", "Kaithi"));
1141 scripts.Add(new Script(ScriptId.lana, "Lana", "Tai Tham"));
1142 scripts.Add(new Script(ScriptId.laoo, "Laoo", "Lao"));
1143 scripts.Add(new Script(ScriptId.latn, "Latn", "Latin"));
1144 scripts.Add(new Script(ScriptId.lepc, "Lepc", "Lepcha"));
1145 scripts.Add(new Script(ScriptId.limb, "Limb", "Limbu"));
1146 scripts.Add(new Script(ScriptId.lina, "Lina", "Linear A"));
1147 scripts.Add(new Script(ScriptId.linb, "Linb", "Linear B"));
1148 scripts.Add(new Script(ScriptId.lisu, "Lisu", "Lisu"));
1149 scripts.Add(new Script(ScriptId.lyci, "Lyci", "Lycian"));
1150 scripts.Add(new Script(ScriptId.lydi, "Lydi", "Lydian"));
1151 scripts.Add(new Script(ScriptId.mahj, "Mahj", "Mahajani"));
1152 scripts.Add(new Script(ScriptId.maka, "Maka", "Makasar"));
1153 scripts.Add(new Script(ScriptId.mand, "Mand", "Mandaic"));
1154 scripts.Add(new Script(ScriptId.mani, "Mani", "Manichaean"));
1155 scripts.Add(new Script(ScriptId.marc, "Marc", "Marchen"));
1156 scripts.Add(new Script(ScriptId.medf, "Medf", "Medefaidrin"));
1157 scripts.Add(new Script(ScriptId.mend, "Mend", "Mende Kikakui"));
1158 scripts.Add(new Script(ScriptId.merc, "Merc", "Meroitic Cursive"));
1159 scripts.Add(new Script(ScriptId.mero, "Mero", "Meroitic Hieroglyphs"));
1160 scripts.Add(new Script(ScriptId.mlym, "Mlym", "Malayalam"));
1161 scripts.Add(new Script(ScriptId.modi, "Modi", "Modi"));
1162 scripts.Add(new Script(ScriptId.mong, "Mong", "Mongolian"));
1163 scripts.Add(new Script(ScriptId.mroo, "Mroo", "Mro"));
1164 scripts.Add(new Script(ScriptId.mtei, "Mtei", "Meetei Mayak"));
1165 scripts.Add(new Script(ScriptId.mult, "Mult", "Multani"));
1166 scripts.Add(new Script(ScriptId.mymr, "Mymr", "Myanmar"));
1167 scripts.Add(new Script(ScriptId.nand, "Nand", "Nandinagari"));
1168 scripts.Add(new Script(ScriptId.narb, "Narb", "Old North Arabian"));
1169 scripts.Add(new Script(ScriptId.nbat, "Nbat", "Nabataean"));
1170 scripts.Add(new Script(ScriptId.newa, "Newa", "Newa"));
1171 scripts.Add(new Script(ScriptId.nkoo, "Nkoo", "Nko"));
1172 scripts.Add(new Script(ScriptId.nshu, "Nshu", "Nushu"));
1173 scripts.Add(new Script(ScriptId.ogam, "Ogam", "Ogham"));
1174 scripts.Add(new Script(ScriptId.olck, "Olck", "Ol Chiki"));
1175 scripts.Add(new Script(ScriptId.orkh, "Orkh", "Old Turkic"));
1176 scripts.Add(new Script(ScriptId.orya, "Orya", "Oriya"));
1177 scripts.Add(new Script(ScriptId.osge, "Osge", "Osage"));
1178 scripts.Add(new Script(ScriptId.osma, "Osma", "Osmanya"));
1179 scripts.Add(new Script(ScriptId.palm, "Palm", "Palmyrene"));
1180 scripts.Add(new Script(ScriptId.pauc, "Pauc", "Pau Cin Hau"));
1181 scripts.Add(new Script(ScriptId.perm, "Perm", "Old Permic"));
1182 scripts.Add(new Script(ScriptId.phag, "Phag", "Phags Pa"));
1183 scripts.Add(new Script(ScriptId.phli, "Phli", "Inscriptional Pahlavi"));
1184 scripts.Add(new Script(ScriptId.phlp, "Phlp", "Psalter Pahlavi"));
1185 scripts.Add(new Script(ScriptId.phnx, "Phnx", "Phoenician"));
1186 scripts.Add(new Script(ScriptId.plrd, "Plrd", "Miao"));
1187 scripts.Add(new Script(ScriptId.prti, "Prti", "Inscriptional Parthian"));
1188 scripts.Add(new Script(ScriptId.rjng, "Rjng", "Rejang"));
1189 scripts.Add(new Script(ScriptId.rohg, "Rohg", "Hanifi Rohingya"));
1190 scripts.Add(new Script(ScriptId.runr, "Runr", "Runic"));
1191 scripts.Add(new Script(ScriptId.samr, "Samr", "Samaritan"));
1192 scripts.Add(new Script(ScriptId.sarb, "Sarb", "Old South Arabian"));
1193 scripts.Add(new Script(ScriptId.saur, "Saur", "Saurashtra"));
1194 scripts.Add(new Script(ScriptId.sgnw, "Sgnw", "SignWriting"));
1195 scripts.Add(new Script(ScriptId.shaw, "Shaw", "Shawian"));
1196 scripts.Add(new Script(ScriptId.shrd, "Shrd", "Sharada"));
1197 scripts.Add(new Script(ScriptId.sidd, "Sidd", "Shiddham"));
1198 scripts.Add(new Script(ScriptId.sind, "Sind", "Khudawadi"));
1199 scripts.Add(new Script(ScriptId.sinh, "Sinh", "Sinhala"));
1200 scripts.Add(new Script(ScriptId.sogd, "Sogd", "Sogdian"));
1201 scripts.Add(new Script(ScriptId.sogo, "Sogo", "Old Sogdian"));
1202 scripts.Add(new Script(ScriptId.sora, "Sora", "Sora Sompeng"));
1203 scripts.Add(new Script(ScriptId.soyo, "Soyo", "Soyombo"));
1204 scripts.Add(new Script(ScriptId.sund, "Sund", "Sundanese"));
1205 scripts.Add(new Script(ScriptId.sylo, "Sylo", "Syloti Nagri"));
1206 scripts.Add(new Script(ScriptId.syrc, "Syrc", "Syriac"));
1207 scripts.Add(new Script(ScriptId.tagb, "Tagb", "Tagbanwa"));
1208 scripts.Add(new Script(ScriptId.takr, "Takr", "Takri"));
1209 scripts.Add(new Script(ScriptId.tale, "Tale", "Tai Le"));
1210 scripts.Add(new Script(ScriptId.talu, "Talu", "New Tai Lue"));
1211 scripts.Add(new Script(ScriptId.taml, "Taml", "Tamil"));
1212 scripts.Add(new Script(ScriptId.tang, "Tang", "Tangut"));
1213 scripts.Add(new Script(ScriptId.tavt, "Tavt", "Tai Viet"));
1214 scripts.Add(new Script(ScriptId.telu, "Telu", "Telugu"));
1215 scripts.Add(new Script(ScriptId.tfng, "Tfng", "Tifinag"));
1216 scripts.Add(new Script(ScriptId.tglg, "Tglg", "Tagalog"));
1217 scripts.Add(new Script(ScriptId.thaa, "Thaa", "Thaana"));
1218 scripts.Add(new Script(ScriptId.thai, "Thai", "Thai"));
1219 scripts.Add(new Script(ScriptId.tibt, "Tibt", "Tibetan"));
1220 scripts.Add(new Script(ScriptId.tirh, "Tirh", "Tirhuta"));
1221 scripts.Add(new Script(ScriptId.ugar, "Ugar", "Ugaritic"));
1222 scripts.Add(new Script(ScriptId.vaii, "Vaii", "Vai"));
1223 scripts.Add(new Script(ScriptId.wara, "Wara", "Warang Citi"));
1224 scripts.Add(new Script(ScriptId.wcho, "Wcho", "Wcho"));
1225 scripts.Add(new Script(ScriptId.xpeo, "Xpeo", "Old Persian"));
1226 scripts.Add(new Script(ScriptId.xsux, "Xsux", "Cuneiform"));
1227 scripts.Add(new Script(ScriptId.yiii, "Yiii", "Yi"));
1228 scripts.Add(new Script(ScriptId.zanb, "Zanb", "Zanabazar Square"));
1229 scripts.Add(new Script(ScriptId.zinh, "Zinh", "Inherited"));
1230 scripts.Add(new Script(ScriptId.zyyy, "Zyyy", "Common"));
1231 scripts.Add(new Script(ScriptId.zzzz, "Zzzz", "Unknown"));
1232 for (Script* script : scripts)
1233 {
1234 scriptIdMap[cast<uint>(cast<byte>(script->Id()))] = script;
1235 shortNameMap[MakeCanonicalPropertyName(script->ShortName())] = script;
1236 longNameMap[MakeCanonicalPropertyName(script->LongName())] = script;
1237 }
1238 }
1239 public ~ScriptTable()
1240 {
1241 for (Script* script : scripts)
1242 {
1243 delete script;
1244 }
1245 }
1246 private static UniquePtr<ScriptTable> instance;
1247 private List<Script*> scripts;
1248 private HashMap<uint, void*> scriptIdMap;
1249 private HashMap<string, void*> shortNameMap;
1250 private HashMap<string, void*> longNameMap;
1251 }
1252
1253 public const Script* GetScript(ScriptId id)
1254 {
1255 return ScriptTable.Instance().GetScript(id);
1256 }
1257
1258 public const Script* GetScriptByShortName(const string& shortName)
1259 {
1260 return ScriptTable.Instance().GetScriptByShortName(shortName);
1261 }
1262
1263 public const Script* GetScriptByLongName(const string& longName)
1264 {
1265 return ScriptTable.Instance().GetScriptByLongName(longName);
1266 }
1267
1268 public class CharacterInfo
1269 {
1270 public nothrow CharacterInfo() :
1271 binaryProperties(0u), generalCategory(GeneralCategoryId.none), upper(cast<uchar>(0u)), lower(cast<uchar>(0u)), title(cast<uchar>(0u)),
1272 folding(cast<uchar>(0u)), block(BlockId.none), age(AgeId.age_unassigned), script(ScriptId.none)
1273 {
1274 }
1275 public inline nothrow bool GetBinaryProperty(BinaryPropertyId binaryPropertyId) const
1276 {
1277 ulong mask = cast<ulong>(1u) << cast<ulong>(cast<byte>(binaryPropertyId));
1278 return (binaryProperties & mask) != 0u;
1279 }
1280 public inline nothrow bool HasGeneralCategory(GeneralCategoryId generalCategory_) const
1281 {
1282 return (generalCategory & generalCategory_) != GeneralCategoryId.none;
1283 }
1284 public inline nothrow GeneralCategoryId GeneralCategoryValue() const
1285 {
1286 return generalCategory;
1287 }
1288 public inline nothrow uchar Upper() const
1289 {
1290 return upper;
1291 }
1292 public inline nothrow uchar Lower() const
1293 {
1294 return lower;
1295 }
1296 public inline nothrow uchar Title() const
1297 {
1298 return title;
1299 }
1300 public inline nothrow uchar Folding() const
1301 {
1302 return folding;
1303 }
1304 public inline nothrow BlockId BlockValue() const
1305 {
1306 return block;
1307 }
1308 public inline nothrow AgeId AgeValue() const
1309 {
1310 return age;
1311 }
1312 public inline nothrow ScriptId ScriptValue() const
1313 {
1314 return script;
1315 }
1316 public void Read(BinaryReader& reader)
1317 {
1318 binaryProperties = reader.ReadULong();
1319 generalCategory = cast<GeneralCategoryId>(reader.ReadUInt());
1320 upper = reader.ReadUChar();
1321 lower = reader.ReadUChar();
1322 title = reader.ReadUChar();
1323 folding = reader.ReadUChar();
1324 block = cast<BlockId>(reader.ReadUShort());
1325 age = cast<AgeId>(reader.ReadByte());
1326 script = cast<ScriptId>(reader.ReadByte());
1327 }
1328 private ulong binaryProperties;
1329 private GeneralCategoryId generalCategory;
1330 private uchar upper;
1331 private uchar lower;
1332 private uchar title;
1333 private uchar folding;
1334 private BlockId block;
1335 private AgeId age;
1336 private ScriptId script;
1337 }
1338
1339 public const long numInfosInPage = 128;
1340 public const uint characterInfoSize = 32u;
1341 public const uint characterInfoPageSize = cast<uint>(numInfosInPage) * characterInfoSize;
1342
1343 public enum NumericTypeId : byte
1344 {
1345 none = 0u,
1346 de, di, nu
1347 }
1348
1349 public class NumericType
1350 {
1351 public nothrow NumericType(NumericTypeId id_, const string& shortName_, const string& longName_) : id(id_), shortName(shortName_), longName(longName_)
1352 {
1353 }
1354 public inline nothrow NumericTypeId Id() const
1355 {
1356 return id;
1357 }
1358 public inline nothrow const string& ShortName() const
1359 {
1360 return shortName;
1361 }
1362 public inline nothrow const string& LongName() const
1363 {
1364 return longName;
1365 }
1366 private NumericTypeId id;
1367 private string shortName;
1368 private string longName;
1369 }
1370
1371 public class NumericTypeTable
1372 {
1373 static NumericTypeTable() : instance(new NumericTypeTable())
1374 {
1375 }
1376 public static NumericTypeTable& Instance()
1377 {
1378 return *instance;
1379 }
1380 public const NumericType* GetNumericType(NumericTypeId id) const
1381 {
1382 HashMap<uint, void*>.ConstIterator it = numericTypeIdMap.CFind(cast<uint>(cast<byte>(id)));
1383 if (it != numericTypeIdMap.CEnd())
1384 {
1385 return cast<NumericType*>(it->second);
1386 }
1387 return null;
1388 }
1389 public const NumericType* GetNumericTypeByShortName(const string& shortName) const
1390 {
1391 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
1392 if (it != shortNameMap.CEnd())
1393 {
1394 return cast<NumericType*>(it->second);
1395 }
1396 return null;
1397 }
1398 public const NumericType* GetNumericTypeByLongName(const string& longName) const
1399 {
1400 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(MakeCanonicalPropertyName(longName));
1401 if (it != longNameMap.CEnd())
1402 {
1403 return cast<NumericType*>(it->second);
1404 }
1405 return null;
1406 }
1407 private NumericTypeTable() : numericTypes(), numericTypeIdMap(), shortNameMap(), longNameMap()
1408 {
1409 numericTypes.Add(new NumericType(NumericTypeId.none, "None", "None"));
1410 numericTypes.Add(new NumericType(NumericTypeId.de, "De", "Decimal"));
1411 numericTypes.Add(new NumericType(NumericTypeId.di, "Di", "Digit"));
1412 numericTypes.Add(new NumericType(NumericTypeId.nu, "Nu", "Numeric"));
1413 for (NumericType* numericType : numericTypes)
1414 {
1415 numericTypeIdMap[cast<uint>(cast<byte>(numericType->Id()))] = numericType;
1416 shortNameMap[MakeCanonicalPropertyName(numericType->ShortName())] = numericType;
1417 longNameMap[MakeCanonicalPropertyName(numericType->LongName())] = numericType;
1418 }
1419 }
1420 public ~NumericTypeTable()
1421 {
1422 for (NumericType* numericType : numericTypes)
1423 {
1424 delete numericType;
1425 }
1426 }
1427 private List<NumericType*> numericTypes;
1428 private HashMap<uint, void*> numericTypeIdMap;
1429 private HashMap<string, void*> shortNameMap;
1430 private HashMap<string, void*> longNameMap;
1431 private static UniquePtr<NumericTypeTable> instance;
1432 }
1433
1434 public const NumericType* GetNumericType(NumericTypeId id)
1435 {
1436 return NumericTypeTable.Instance().GetNumericType(id);
1437 }
1438
1439 public const NumericType* GetNumericTypeByShortName(const string& shortName)
1440 {
1441 return NumericTypeTable.Instance().GetNumericTypeByShortName(shortName);
1442 }
1443
1444 public const NumericType* GetNumericTypeByLongName(const string& longName)
1445 {
1446 return NumericTypeTable.Instance().GetNumericTypeByLongName(longName);
1447 }
1448
1449 public enum BidiClassId : byte
1450 {
1451 none = 0u,
1452 al, an, b, bn, cs, en, es, et, fsi, l, lre, lri, lro, nsm, on, pdf, pdi, r, rle, rli, rlo, s, ws
1453 }
1454
1455 public class BidiClass
1456 {
1457 public nothrow BidiClass(BidiClassId id_, const string& shortName_, const string& longName_) : id(id_), shortName(shortName_), longName(longName_)
1458 {
1459 }
1460 public inline nothrow BidiClassId Id() const
1461 {
1462 return id;
1463 }
1464 public inline nothrow const string& ShortName() const
1465 {
1466 return shortName;
1467 }
1468 public inline nothrow const string& LongName() const
1469 {
1470 return longName;
1471 }
1472 private BidiClassId id;
1473 private string shortName;
1474 private string longName;
1475 }
1476
1477 public class BidiClassTable
1478 {
1479 static BidiClassTable() : instance(new BidiClassTable())
1480 {
1481 }
1482 public static BidiClassTable& Instance()
1483 {
1484 return *instance;
1485 }
1486 public const BidiClass* GetBidiClass(BidiClassId id) const
1487 {
1488 HashMap<uint, void*>.ConstIterator it = bidiClassIdMap.CFind(cast<uint>(cast<byte>(id)));
1489 if (it != bidiClassIdMap.CEnd())
1490 {
1491 return cast<BidiClass*>(it->second);
1492 }
1493 return null;
1494 }
1495 public const BidiClass* GetBidiClassByShortName(const string& shortName) const
1496 {
1497 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
1498 if (it != shortNameMap.CEnd())
1499 {
1500 return cast<BidiClass*>(it->second);
1501 }
1502 return null;
1503 }
1504 public const BidiClass* GetBidiClassByLongName(const string& longName) const
1505 {
1506 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(MakeCanonicalPropertyName(longName));
1507 if (it != longNameMap.CEnd())
1508 {
1509 return cast<BidiClass*>(it->second);
1510 }
1511 return null;
1512 }
1513 private BidiClassTable() : bidiClasses(), bidiClassIdMap(), shortNameMap(), longNameMap()
1514 {
1515 bidiClasses.Add(new BidiClass(BidiClassId.al, "AL", "Arabic Letter"));
1516 bidiClasses.Add(new BidiClass(BidiClassId.an, "AN", "Arabic Number"));
1517 bidiClasses.Add(new BidiClass(BidiClassId.b, "B", "Paragraph Separator"));
1518 bidiClasses.Add(new BidiClass(BidiClassId.bn, "BN", "Boundary Neutral"));
1519 bidiClasses.Add(new BidiClass(BidiClassId.cs, "CS", "Common Separator"));
1520 bidiClasses.Add(new BidiClass(BidiClassId.en, "EN", "European Number"));
1521 bidiClasses.Add(new BidiClass(BidiClassId.es, "ES", "European Separator"));
1522 bidiClasses.Add(new BidiClass(BidiClassId.et, "ET", "European Terminator"));
1523 bidiClasses.Add(new BidiClass(BidiClassId.fsi, "FSI", "First Strong Isolate"));
1524 bidiClasses.Add(new BidiClass(BidiClassId.l, "L", "Left To Right"));
1525 bidiClasses.Add(new BidiClass(BidiClassId.lre, "LRE", "Left To Right Embedding"));
1526 bidiClasses.Add(new BidiClass(BidiClassId.lri, "LRI", "Left To Right Isolate"));
1527 bidiClasses.Add(new BidiClass(BidiClassId.lro, "LRO", "Left To Right Override"));
1528 bidiClasses.Add(new BidiClass(BidiClassId.nsm, "NSM", "Nonspacing Mark"));
1529 bidiClasses.Add(new BidiClass(BidiClassId.on, "ON", "Other Neutral"));
1530 bidiClasses.Add(new BidiClass(BidiClassId.pdf, "PDF", "Pop Directional Format"));
1531 bidiClasses.Add(new BidiClass(BidiClassId.pdi, "PDI", "Pop Directional Isolate"));
1532 bidiClasses.Add(new BidiClass(BidiClassId.r, "R", "Right To Left"));
1533 bidiClasses.Add(new BidiClass(BidiClassId.rle, "RLE", "Right To Left Embedding"));
1534 bidiClasses.Add(new BidiClass(BidiClassId.rli, "RLI", "Right To Left Isolate"));
1535 bidiClasses.Add(new BidiClass(BidiClassId.rlo, "RLO", "Right To Left Override"));
1536 bidiClasses.Add(new BidiClass(BidiClassId.s, "S", "Segment Separator"));
1537 bidiClasses.Add(new BidiClass(BidiClassId.ws, "WS", "White Space"));
1538 for (BidiClass* bidiClass : bidiClasses)
1539 {
1540 bidiClassIdMap[cast<uint>(cast<byte>(bidiClass->Id()))] = bidiClass;
1541 shortNameMap[MakeCanonicalPropertyName(bidiClass->ShortName())] = bidiClass;
1542 longNameMap[MakeCanonicalPropertyName(bidiClass->LongName())] = bidiClass;
1543 }
1544 }
1545 public ~BidiClassTable()
1546 {
1547 for (BidiClass* bidiClass : bidiClasses)
1548 {
1549 delete bidiClass;
1550 }
1551 }
1552 private static UniquePtr<BidiClassTable> instance;
1553 private List<BidiClass*> bidiClasses;
1554 private HashMap<uint, void*> bidiClassIdMap;
1555 private HashMap<string, void*> shortNameMap;
1556 private HashMap<string, void*> longNameMap;
1557 }
1558
1559 public const BidiClass* GetBidiClass(BidiClassId id)
1560 {
1561 return BidiClassTable.Instance().GetBidiClass(id);
1562 }
1563
1564 public const BidiClass* GetBidiClassByShortName(const string& shortName)
1565 {
1566 return BidiClassTable.Instance().GetBidiClassByShortName(shortName);
1567 }
1568
1569 public const BidiClass* GetBidiClassByLongName(const string& longName)
1570 {
1571 return BidiClassTable.Instance().GetBidiClassByLongName(longName);
1572 }
1573
1574 public enum BidiPairedBracketTypeId : byte
1575 {
1576 none = 0u,
1577 o, c
1578 }
1579
1580 public class BidiPairedBracketType
1581 {
1582 public nothrow BidiPairedBracketType(BidiPairedBracketTypeId id_, const string& shortName_, const string& longName_) :
1583 id(id_), shortName(shortName_), longName(longName_)
1584 {
1585 }
1586 public inline nothrow BidiPairedBracketTypeId Id() const
1587 {
1588 return id;
1589 }
1590 public inline nothrow const string& ShortName() const
1591 {
1592 return shortName;
1593 }
1594 public inline nothrow const string& LongName() const
1595 {
1596 return longName;
1597 }
1598 private BidiPairedBracketTypeId id;
1599 private string shortName;
1600 private string longName;
1601 }
1602
1603 public class BidiPairedBracketTypeTable
1604 {
1605 static BidiPairedBracketTypeTable() : instance(new BidiPairedBracketTypeTable())
1606 {
1607 }
1608 public static BidiPairedBracketTypeTable& Instance()
1609 {
1610 return *instance;
1611 }
1612 public const BidiPairedBracketType* GetBidiPairedBracketType(BidiPairedBracketTypeId id) const
1613 {
1614 HashMap<uint, void*>.ConstIterator it = bidiPairedBracketTypeIdMap.CFind(cast<uint>(cast<byte>(id)));
1615 if (it != bidiPairedBracketTypeIdMap.CEnd())
1616 {
1617 return cast<BidiPairedBracketType*>(it->second);
1618 }
1619 return null;
1620 }
1621 public const BidiPairedBracketType* GetBidiPairedBracketTypeByShortName(const string& shortName) const
1622 {
1623 HashMap<string, void*>.ConstIterator it = shortNameMap.CFind(MakeCanonicalPropertyName(shortName));
1624 if (it != shortNameMap.CEnd())
1625 {
1626 return cast<BidiPairedBracketType*>(it->second);
1627 }
1628 return null;
1629 }
1630 public const BidiPairedBracketType* GetBidiPairedBracketTypeByLongName(const string& longName) const
1631 {
1632 HashMap<string, void*>.ConstIterator it = longNameMap.CFind(longName);
1633 if (it != longNameMap.CEnd())
1634 {
1635 return cast<BidiPairedBracketType*>(it->second);
1636 }
1637 return null;
1638 }
1639 private BidiPairedBracketTypeTable() : bidiPairedBracketTypes(), bidiPairedBracketTypeIdMap(), shortNameMap(), longNameMap()
1640 {
1641 bidiPairedBracketTypes.Add(new BidiPairedBracketType(BidiPairedBracketTypeId.o, "O", "Open"));
1642 bidiPairedBracketTypes.Add(new BidiPairedBracketType(BidiPairedBracketTypeId.c, "C", "Close"));
1643 bidiPairedBracketTypes.Add(new BidiPairedBracketType(BidiPairedBracketTypeId.none, "N", "None"));
1644 for (BidiPairedBracketType* bidiPairedBracketType : bidiPairedBracketTypes)
1645 {
1646 bidiPairedBracketTypeIdMap[cast<uint>(cast<byte>(bidiPairedBracketType->Id()))] = bidiPairedBracketType;
1647 shortNameMap[MakeCanonicalPropertyName(bidiPairedBracketType->ShortName())] = bidiPairedBracketType;
1648 longNameMap[MakeCanonicalPropertyName(bidiPairedBracketType->LongName())] = bidiPairedBracketType;
1649 }
1650 }
1651 public ~BidiPairedBracketTypeTable()
1652 {
1653 for (BidiPairedBracketType* bidiPairedBracketType : bidiPairedBracketTypes)
1654 {
1655 delete bidiPairedBracketType;
1656 }
1657 }
1658 private static UniquePtr<BidiPairedBracketTypeTable> instance;
1659 private List<BidiPairedBracketType*> bidiPairedBracketTypes;
1660 private HashMap<uint, void*> bidiPairedBracketTypeIdMap;
1661 private HashMap<string, void*> shortNameMap;
1662 private HashMap<string, void*> longNameMap;
1663 }
1664
1665 public const BidiPairedBracketType* GetBidiPairedBracketType(BidiPairedBracketTypeId id)
1666 {
1667 return BidiPairedBracketTypeTable.Instance().GetBidiPairedBracketType(id);
1668 }
1669
1670 public const BidiPairedBracketType* GetBidiPairedBracketTypeByShortName(const string& shortName)
1671 {
1672 return BidiPairedBracketTypeTable.Instance().GetBidiPairedBracketTypeByShortName(shortName);
1673 }
1674
1675 public const BidiPairedBracketType* GetBidiPairedBracketTypeByLongName(const string& longName)
1676 {
1677 return BidiPairedBracketTypeTable.Instance().GetBidiPairedBracketTypeByLongName(longName);
1678 }
1679
1680 public enum AliasTypeId : byte
1681 {
1682 none = 0u,
1683 correction, control, alternate, figment, abbreviation
1684 }
1685
1686 public class AliasType
1687 {
1688 public nothrow AliasType(AliasTypeId id_, const string& name_) : id(id_), name(name_)
1689 {
1690 }
1691 public inline nothrow AliasTypeId Id() const
1692 {
1693 return id;
1694 }
1695 public inline nothrow const string& Name() const
1696 {
1697 return name;
1698 }
1699 private AliasTypeId id;
1700 private string name;
1701 }
1702
1703 public class AliasTypeTable
1704 {
1705 static AliasTypeTable() : instance(new AliasTypeTable())
1706 {
1707 }
1708 public static AliasTypeTable& Instance()
1709 {
1710 return *instance;
1711 }
1712 public const AliasType* GetAliasType(AliasTypeId id) const
1713 {
1714 HashMap<uint, void*>.ConstIterator it = aliasTypeIdMap.CFind(cast<uint>(cast<byte>(id)));
1715 if (it != aliasTypeIdMap.CEnd())
1716 {
1717 return cast<AliasType*>(it->second);
1718 }
1719 return null;
1720 }
1721 public const AliasType* GetAliasType(const string& name) const
1722 {
1723 HashMap<string, void*>.ConstIterator it = nameMap.CFind(name);
1724 if (it != nameMap.CEnd())
1725 {
1726 return cast<AliasType*>(it->second);
1727 }
1728 return null;
1729 }
1730 private AliasTypeTable() : aliasTypes(), aliasTypeIdMap(), nameMap()
1731 {
1732 aliasTypes.Add(new AliasType(AliasTypeId.abbreviation, "abbreviation"));
1733 aliasTypes.Add(new AliasType(AliasTypeId.alternate, "alternate"));
1734 aliasTypes.Add(new AliasType(AliasTypeId.control, "control"));
1735 aliasTypes.Add(new AliasType(AliasTypeId.correction, "correction"));
1736 aliasTypes.Add(new AliasType(AliasTypeId.figment, "figment"));
1737 for (AliasType* aliasType : aliasTypes)
1738 {
1739 aliasTypeIdMap[cast<uint>(cast<byte>(aliasType->Id()))] = aliasType;
1740 nameMap[aliasType->Name()] = aliasType;
1741 }
1742 }
1743 public ~AliasTypeTable()
1744 {
1745 for (AliasType* aliasType : aliasTypes)
1746 {
1747 delete aliasType;
1748 }
1749 }
1750 private static UniquePtr<AliasTypeTable> instance;
1751 private List<AliasType*> aliasTypes;
1752 private HashMap<uint, void*> aliasTypeIdMap;
1753 private HashMap<string, void*> nameMap;
1754 }
1755
1756 public const AliasType* GetAliasType(AliasTypeId id)
1757 {
1758 return AliasTypeTable.Instance().GetAliasType(id);
1759 }
1760
1761 public const AliasType* GetAliasType(const string& name)
1762 {
1763 return AliasTypeTable.Instance().GetAliasType(name);
1764 }
1765
1766 public class Alias
1767 {
1768 public nothrow Alias() : typeId(AliasTypeId.none), name()
1769 {
1770 }
1771 public nothrow Alias(AliasTypeId typeId_, const string& name_) : typeId(typeId_), name(name_)
1772 {
1773 }
1774 public inline nothrow AliasTypeId TypeId() const
1775 {
1776 return typeId;
1777 }
1778 public inline nothrow const string& Name() const
1779 {
1780 return name;
1781 }
1782 public void Read(BinaryReader& reader)
1783 {
1784 typeId = cast<AliasTypeId>(reader.ReadByte());
1785 name = reader.ReadString();
1786 }
1787 private AliasTypeId typeId;
1788 private string name;
1789 }
1790
1791 public class ExtendedCharacterInfo
1792 {
1793 public nothrow ExtendedCharacterInfo() :
1794 characterName(), unicode1Name(), canonicalCombiningClass(0u), fullUpper(), fullLower(), fullTitle(), fullFolding(), bidiClass(BidiClassId.none),
1795 numericType(NumericTypeId.none), numericValue(), aliases(), bidiMirroringGlyph(cast<uchar>(0u)), bidiPairedBracketType(BidiPairedBracketTypeId.none),
1796 bidiPairedBracket(cast<uchar>(0u))
1797 {
1798 }
1799 public inline nothrow const string& CharacterName() const
1800 {
1801 return characterName;
1802 }
1803 public inline nothrow const string& Unicode1Name() const
1804 {
1805 return unicode1Name;
1806 }
1807 public inline nothrow byte CanonicalCombiningClass() const
1808 {
1809 return canonicalCombiningClass;
1810 }
1811 public inline nothrow const ustring& FullUpper() const
1812 {
1813 return fullUpper;
1814 }
1815 public inline nothrow const ustring& FullLower() const
1816 {
1817 return fullLower;
1818 }
1819 public inline nothrow const ustring& FullTitle() const
1820 {
1821 return fullTitle;
1822 }
1823 public inline nothrow const ustring& FullFolding() const
1824 {
1825 return fullFolding;
1826 }
1827 public inline nothrow BidiClassId BidiClassValue() const
1828 {
1829 return bidiClass;
1830 }
1831 public inline nothrow NumericTypeId NumericTypeValue() const
1832 {
1833 return numericType;
1834 }
1835 public inline nothrow const ustring& NumericValue() const
1836 {
1837 return numericValue;
1838 }
1839 public inline nothrow const List<Alias>& Aliases() const
1840 {
1841 return aliases;
1842 }
1843 public inline nothrow uchar BidiMirroringGlyph() const
1844 {
1845 return bidiMirroringGlyph;
1846 }
1847 public inline nothrow BidiPairedBracketTypeId BidiPairedBracketTypeValue() const
1848 {
1849 return bidiPairedBracketType;
1850 }
1851 public inline nothrow uchar BidiPairedBracket()
1852 {
1853 return bidiPairedBracket;
1854 }
1855 public void Read(BinaryReader& reader)
1856 {
1857 characterName = reader.ReadString();
1858 unicode1Name = reader.ReadString();
1859 canonicalCombiningClass = reader.ReadByte();
1860 byte nu = reader.ReadByte();
1861 for (byte i = 0u; i < nu; ++i;)
1862 {
1863 fullUpper.Append(reader.ReadUChar());
1864 }
1865 byte nl = reader.ReadByte();
1866 for (byte i = 0u; i < nl; ++i;)
1867 {
1868 fullLower.Append(reader.ReadUChar());
1869 }
1870 byte nt = reader.ReadByte();
1871 for (byte i = 0u; i < nt; ++i;)
1872 {
1873 fullTitle.Append(reader.ReadUChar());
1874 }
1875 byte nf = reader.ReadByte();
1876 for (byte i = 0u; i < nf; ++i;)
1877 {
1878 fullFolding.Append(reader.ReadUChar());
1879 }
1880 bidiClass = cast<BidiClassId>(reader.ReadByte());
1881 numericType = cast<NumericTypeId>(reader.ReadByte());
1882 numericValue = reader.ReadUString();
1883 byte na = reader.ReadByte();
1884 for (byte i = 0u; i < na; ++i;)
1885 {
1886 Alias alias;
1887 alias.Read(reader);
1888 aliases.Add(alias);
1889 }
1890 bidiMirroringGlyph = reader.ReadUChar();
1891 bidiPairedBracketType = cast<BidiPairedBracketTypeId>(reader.ReadByte());
1892 bidiPairedBracket = reader.ReadUChar();
1893 }
1894 private string characterName;
1895 private string unicode1Name;
1896 private byte canonicalCombiningClass;
1897 private ustring fullUpper;
1898 private ustring fullLower;
1899 private ustring fullTitle;
1900 private ustring fullFolding;
1901 private BidiClassId bidiClass;
1902 private NumericTypeId numericType;
1903 private ustring numericValue;
1904 private List<Alias> aliases;
1905 private uchar bidiMirroringGlyph;
1906 private BidiPairedBracketTypeId bidiPairedBracketType;
1907 private uchar bidiPairedBracket;
1908 }
1909
1910 public class CharacterInfoPage
1911 {
1912 public CharacterInfoPage() : characterInfos()
1913 {
1914 characterInfos.Resize(numInfosInPage);
1915 }
1916 public const CharacterInfo& operator[](long index) const
1917 {
1918 return *characterInfos[index];
1919 }
1920 public void Read(BinaryReader& reader)
1921 {
1922 for (long i = 0; i < numInfosInPage; ++i;)
1923 {
1924 CharacterInfo* characterInfo = new CharacterInfo();
1925 characterInfo->Read(reader);
1926 characterInfos[i].Reset(characterInfo);
1927 }
1928 }
1929 private List<UniquePtr<CharacterInfo>> characterInfos;
1930 }
1931
1932 public class ExtendedCharacterInfoPage
1933 {
1934 public ExtendedCharacterInfoPage() : extendedCharacterInfos()
1935 {
1936 extendedCharacterInfos.Resize(numInfosInPage);
1937 }
1938 public const ExtendedCharacterInfo& operator[](long index) const
1939 {
1940 return *extendedCharacterInfos[index];
1941 }
1942 public void Read(BinaryReader& reader)
1943 {
1944 for (long i = 0; i < numInfosInPage; ++i;)
1945 {
1946 ExtendedCharacterInfo* extendedCharacterInfo = new ExtendedCharacterInfo();
1947 extendedCharacterInfo->Read(reader);
1948 extendedCharacterInfos[i].Reset(extendedCharacterInfo);
1949 }
1950 }
1951 private List<UniquePtr<ExtendedCharacterInfo>> extendedCharacterInfos;
1952 }
1953
1954 public class ExtendedCharacterInfoHeader
1955 {
1956 public ExtendedCharacterInfoHeader() : extendedPageStarts()
1957 {
1958 }
1959 public uint operator[](long index) const
1960 {
1961 return extendedPageStarts[index];
1962 }
1963 public void Read(BinaryReader& reader)
1964 {
1965 uint n = reader.ReadUInt();
1966 extendedPageStarts.Resize(n);
1967 for (uint i = 0u; i < n; ++i;)
1968 {
1969 extendedPageStarts[i] = reader.ReadUInt();
1970 }
1971 }
1972 private List<uint> extendedPageStarts;
1973 }
1974
1975 public const byte cmajor_ucd_version_1 = cast<byte>('1');
1976 public const byte cmajor_ucd_version_2 = cast<byte>('2');
1977 public const byte cmajor_ucd_version_3 = cast<byte>('3');
1978 public const byte current_cmajor_ucd_version = cmajor_ucd_version_3;
1979
1980 public class CharacterTable
1981 {
1982 static CharacterTable() : instance(new CharacterTable())
1983 {
1984 }
1985 public static CharacterTable& Instance()
1986 {
1987 return *instance;
1988 }
1989 public const CharacterInfo& GetCharacterInfo(uchar codePoint)
1990 {
1991 if (codePoint > cast<uchar>(0x10FFFF))
1992 {
1993 throw UnicodeException("invalid Unicode code point " + ToHexString(cast<uint>(codePoint)));
1994 }
1995 long pageIndex = cast<long>(codePoint) / numInfosInPage;
1996 if (pages.Count() <= pageIndex)
1997 {
1998 LockGuard<RecursiveMutex> lock(characterTableLock);
1999 while (pages.Count() <= pageIndex)
2000 {
2001 pages.Add(UniquePtr<CharacterInfoPage>());
2002 }
2003 }
2004 CharacterInfoPage* page = pages[pageIndex].Get();
2005 if (page == null)
2006 {
2007 LockGuard<RecursiveMutex> lock(characterTableLock);
2008 if (page == null)
2009 {
2010 string pathToUcdBinFile = PathToUcdBinFile();
2011 if (!File.Exists(pathToUcdBinFile))
2012 {
2013 throw UnicodeException("Error: Cmajor Unicode database file '" + pathToUcdBinFile + "' does not exist.");
2014 }
2015 BinaryReader reader = File.OpenBinary(pathToUcdBinFile);
2016 if (!headerRead)
2017 {
2018 ReadHeader(reader);
2019 }
2020 uint pageStart = headerSize + characterInfoPageSize * cast<uint>(pageIndex);
2021 reader.Seek(cast<long>(pageStart), Origin.seekSet);
2022 page = new CharacterInfoPage();
2023 page->Read(reader);
2024 pages[pageIndex].Reset(page);
2025 }
2026 }
2027 long infoIndex = cast<long>(codePoint) % numInfosInPage;
2028 return (*page)[infoIndex];
2029 }
2030 public const ExtendedCharacterInfo& GetExtendedCharacterInfo(uchar codePoint)
2031 {
2032 if (codePoint > cast<uchar>(0x10FFFF))
2033 {
2034 throw UnicodeException("invalid Unicode code point " + ToHexString(cast<uint>(codePoint)));
2035 }
2036 long pageIndex = cast<long>(codePoint) / numInfosInPage;
2037 if (extendedPages.Count() <= pageIndex)
2038 {
2039 LockGuard<RecursiveMutex> lock(characterTableLock);
2040 while (extendedPages.Count() <= pageIndex)
2041 {
2042 extendedPages.Add(UniquePtr<ExtendedCharacterInfoPage>());
2043 }
2044 }
2045 ExtendedCharacterInfoPage* extendedPage = extendedPages[pageIndex].Get();
2046 if (extendedPage == null)
2047 {
2048 LockGuard<RecursiveMutex> lock(characterTableLock);
2049 if (extendedPage == null)
2050 {
2051 string pathToUcdBinFile = PathToUcdBinFile();
2052 if (!File.Exists(pathToUcdBinFile))
2053 {
2054 throw UnicodeException("Error: Cmajor Unicode database file '" + pathToUcdBinFile + "' does not exist.");
2055 }
2056 BinaryReader reader = File.OpenBinary(pathToUcdBinFile);
2057 if (!headerRead)
2058 {
2059 ReadHeader(reader);
2060 }
2061 if (extendedHeader.IsNull())
2062 {
2063 reader.Seek(cast<long>(extendedHeaderStart), Origin.seekSet);
2064 ReadExtendedHeader(reader);
2065 }
2066 reader.Seek(cast<long>((*extendedHeader)[pageIndex]), Origin.seekSet);
2067 extendedPage = new ExtendedCharacterInfoPage();
2068 extendedPage->Read(reader);
2069 extendedPages[pageIndex].Reset(extendedPage);
2070 }
2071 }
2072 long infoIndex = cast<long>(codePoint) % numInfosInPage;
2073 return (*extendedPage)[infoIndex];
2074 }
2075 private CharacterTable() : headerRead(false), pages(), extendedHeaderStart(0u), extendedHeaderEnd(0u), extendedHeader(), extendedPages(), characterTableLock()
2076 {
2077 headerMagic.Resize(8);
2078 string hdrmgc = "CMAJUCD";
2079 for (long i = 0; i < 8; ++i;)
2080 {
2081 if (i < 7)
2082 {
2083 headerMagic[i] = cast<byte>(hdrmgc[i]);
2084 }
2085 else
2086 {
2087 headerMagic[i] = current_cmajor_ucd_version;
2088 }
2089 }
2090 }
2091 private void ReadHeader(BinaryReader& reader)
2092 {
2093 headerRead = true;
2094 List<byte> magic;
2095 magic.Resize(8);
2096 for (long i = 0; i < 8; ++i;)
2097 {
2098 magic[i] = reader.ReadByte();
2099 }
2100 for (long i = 0; i < 7; ++i;)
2101 {
2102 if (magic[i] != cast<byte>(headerMagic[i]))
2103 {
2104 throw UnicodeException("invalid cmajor_ucd.bin header magic: 'CMAJUCD' expected");
2105 }
2106 }
2107 if (magic[7] != headerMagic[7])
2108 {
2109 throw UnicodeException("invalid cmajor_ucd.bin version: version " +
2110 string(cast<char>(headerMagic[7]), 1) + " expected, version " +
2111 string(cast<char>(magic[7]), 1) + " read");
2112 }
2113 extendedHeaderStart = reader.ReadUInt();
2114 extendedHeaderEnd = reader.ReadUInt();
2115 }
2116 private void ReadExtendedHeader(BinaryReader& reader)
2117 {
2118 extendedHeader.Reset(new ExtendedCharacterInfoHeader());
2119 extendedHeader->Read(reader);
2120 }
2121 public ~CharacterTable()
2122 {
2123 }
2124 private const uint headerSize = 4096u;
2125 private static UniquePtr<CharacterTable> instance;
2126 private List<byte> headerMagic;
2127 private bool headerRead;
2128 private List<UniquePtr<CharacterInfoPage>> pages;
2129 private uint extendedHeaderStart;
2130 private uint extendedHeaderEnd;
2131 private UniquePtr<ExtendedCharacterInfoHeader> extendedHeader;
2132 private List<UniquePtr<ExtendedCharacterInfoPage>> extendedPages;
2133 private RecursiveMutex characterTableLock;
2134 }
2135
2136 public const CharacterInfo& GetCharacterInfo(uchar codePoint)
2137 {
2138 return CharacterTable.Instance().GetCharacterInfo(codePoint);
2139 }
2140
2141 public const ExtendedCharacterInfo& GetExtendedCharacterInfo(uchar codePoint)
2142 {
2143 return CharacterTable.Instance().GetExtendedCharacterInfo(codePoint);
2144 }
2145
2146 public GeneralCategoryId GetGeneralCategory(uchar c)
2147 {
2148 return GetCharacterInfo(c).GeneralCategoryValue();
2149 }
2150
2151 public bool HasGeneralCategory(uchar c, GeneralCategoryId generalCategory)
2152 {
2153 return GetCharacterInfo(c).HasGeneralCategory(generalCategory);
2154 }
2155 }