1
2
3
4
5
6 #ifndef SOULNG_UNICODE_UNICODE_INCLUDED
7 #define SOULNG_UNICODE_UNICODE_INCLUDED
8 #include <soulng/util/BinaryWriter.hpp>
9 #include <soulng/util/BinaryReader.hpp>
10 #include <string>
11 #include <vector>
12 #include <unordered_map>
13 #include <memory>
14 #include <stdexcept>
15 #include <stdint.h>
16 #include <cstdio>
17
18 namespace soulng { namespace unicode {
19
20 using namespace soulng::util;
21
22 class UnicodeException : public std::runtime_error
23 {
24 public:
25 UnicodeException(const std::string& message_);
26 };
27
28 class Utf8ToUtf32Engine
29 {
30 public:
31 Utf8ToUtf32Engine();
32 void Put(uint8_t x);
33 bool ResulReady() const { return resultReady; }
34 char32_t Result() const { return result; }
35 private:
36 int state;
37 bool resultReady;
38 char32_t result;
39 uint8_t bytes[4];
40 };
41
42 std::u32string ToUtf32(const std::string& utf8Str);
43 std::u32string ToUtf32(const std::u16string& utf16Str);
44 std::u16string ToUtf16(const std::u32string& utf32Str);
45 std::u16string ToUtf16(const std::string& utf8Str);
46 std::string ToUtf8(const std::u32string& utf32Str);
47 std::string ToUtf8(const std::u16string& utf16Str);
48
49 std::u32string ToUpper(const std::u32string& s);
50 std::u32string ToLower(const std::u32string& s);
51
52 std::string MakeCanonicalPropertyName(const std::string& s);
53
54 enum class BinaryPropertyId : uint8_t
55 {
56 asciiHexDigit,
57 alphabetic,
58 bidiControl,
59 bidiMirrored,
60 cased,
61 compositionExclusion,
62 caseIgnorable,
63 fullCompositionExclusion,
64 changesWhenCasefolded,
65 changesWhenCaseMapped,
66 changesWhenNFKCCasefolded,
67 changesWhenLowercased,
68 changesWhenTitlecased,
69 changesWhenUppercased,
70 dash,
71 deprecated,
72 defaultIgnorableCodePoint,
73 diacritic,
74 extender,
75 graphemeBase,
76 graphemeExtend,
77 graphemeLink,
78 hexDigit,
79 hyphen,
80 idContinue,
81 ideographic,
82 idStart,
83 idsBinaryOperator,
84 idsTrinaryOperator,
85 joinControl,
86 logicalOrderException,
87 lowercase,
88 math,
89 noncharacterCodePoint,
90 otherAlphabetic,
91 otherDefaultIgnorableCodePoint,
92 otherGraphemeExtend,
93 otherIdContinue,
94 otherIdStart,
95 otherLowercase,
96 otherMath,
97 otherUppercase,
98 patternSyntax,
99 patternWhiteSpace,
100 prependedConcatenationMark,
101 quotationMark,
102 radical,
103 softDotted,
104 sentenceterminal,
105 terminalPunctuation,
106 unifiedIdeograph,
107 uppercase,
108 variationSelector,
109 whiteSpace,
110 xidContinue,
111 xidStart,
112 expandsOnNFC,
113 expandsOnNFD,
114 expandsOnNFKC,
115 expandsOnNFKD
116 };
117
118 class BinaryProperty
119 {
120 public:
121 BinaryProperty(BinaryPropertyId id_, const std::string& shortName_, const std::string& longName_);
122 BinaryPropertyId Id() const { return id; }
123 const std::string& ShortName() const { return shortName; }
124 const std::string& LongName() const { return longName; }
125 private:
126 BinaryPropertyId id;
127 std::string shortName;
128 std::string longName;
129 };
130
131 struct BinaryPropertyIdHash
132 {
133 size_t operator()(BinaryPropertyId id) const
134 {
135 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
136 }
137 };
138
139 class BinaryPropertyTable
140 {
141 public:
142 static void Init();
143 static void Done();
144 static BinaryPropertyTable& Instance() { return *instance; }
145 const BinaryProperty& GetBinaryProperty(BinaryPropertyId binaryPropertyId) const;
146 bool IsBinaryProperty(const std::string& shortName) const;
147 const BinaryProperty& GetBinaryPropertyByShortName(const std::string& shortName) const;
148 const BinaryProperty& GetBinaryPropertyByLongName(const std::string& longName) const;
149 const std::std::vector<BinaryProperty>&BinaryProperties() const{returnbinaryProperties;}
150 private:
151 BinaryPropertyTable();
152 static std::unique_ptr<BinaryPropertyTable> instance;
153 std::unordered_map<BinaryPropertyId, const BinaryProperty*, BinaryPropertyIdHash> binaryPropertyIdMap;
154 std::unordered_map<std::string, const BinaryProperty*> shortNameMap;
155 std::unordered_map<std::string, const BinaryProperty*> longNameMap;
156 std::vector<BinaryProperty> binaryProperties;
157 };
158
159 inline const BinaryProperty& GetBinaryProperty(BinaryPropertyId id)
160 {
161 return BinaryPropertyTable::Instance().GetBinaryProperty(id);
162 }
163
164 inline const BinaryProperty& GetBinaryPropertyByShortName(const std::string& shortName)
165 {
166 return BinaryPropertyTable::Instance().GetBinaryPropertyByShortName(shortName);
167 }
168
169 inline const BinaryProperty& GetBinaryPropertyByLongName(const std::string& longName)
170 {
171 return BinaryPropertyTable::Instance().GetBinaryPropertyByLongName(longName);
172 }
173
174 enum class BlockId : uint16_t
175 {
176 none= 0,
177 adlam, aegeanNumbers, ahom, alchemical, alphabeticPf, anatolianHieroglyphs, ancientGreekMusic, ancientGreekNumbers, ancientSymbols,
178 arabic, arabicExtA, arabicMath, arabicPfA, arabicPfB, arabicSup, armenian, arrows, ascii, avestan,
179 balinese, bamum, bamumSup, bassaVah, batak, bengali, bhaisuki, blockElements, bopomofo, bopomofoExt, boxDrawing,
180 brahmi, braille, buginese, buhid, byzantineMusic,
181 carian, caucasianAlbanian, chakma, cham, cherokee, cherokeeSup, chessSymbols,
182 cjk, cjkCompat, cjkCompatForms, cjkCompatIdeographs, cjkCompatIdeographsSup, cjkExtA, cjkExtB, cjkExtC, cjkExtD, cjkExtE, cjkExtF,
183 cjkRadicalsSup, cjkStrokes, cjkSymbols, compatJamo, controlPictures, coptic, copticEpactNumbers, countingRod, cuneiform,
184 cuneiformNumbers, currencySymbols, cypriotSyllabary, cyrillic, cyrillicExtA, cyrillicExtB, cyrillicExtC, cyrillicSup,
185 deseret, devanagari, devanagariExt, diacriticals, diariticalsForSymbols, diacriticalsSup, diacriticalsExt, dingbats, dogra, domino, duployan,
186 earlyDynasticCuneiform, egyptianHieroglyphs, egyptianHieroglyphFormatControls, elbasan, elymaic, emoticons, enclosedAlphanum, enclosedAlphanumSup, enclosedCjk, enclosedIdeographicSup,
187 ethiopic, ethiopicExt, ethiopicExtA, ethiopicSup,
188 geometricShapes, geometricShapesExt, georgian, georgianExt, georgianSup, glagolitic, glagoliticSup, gothic, grantha, greek, greekExt, gujarati, gunjalaGondi, gurmukhi,
189 halfAndFullForms, halfMarks, hangul, hanifiRohingya, hanunoo, hatran, hebrew, highPuSurrogates, highSurrogates, hiragana,
190 idc, ideographicSymbols, imperialAramaic, indicNumberForms, inscriptionalPahlavi, inscriptionalParthian, ipaExt, indicSiyaqNumbers,
191 jamo, jamoExtA, jamoExtB, javanese, kaithi, kanaExtA, smallKanaExt, kanaSup, kanbun, kangxi, kannada, katakana, katakanaExt, kayahLi, kharoshthi, khmer, khmerSymbols, khojki, khudawadi,
192 lao, latin1Sup, latinExtA, latinExtAdditional, latinExtB, latinExtC, latinExtD, latinExtE, lepcha, letterlikeSymbols, limbu,
193 linearA, linearBIdeograms, linearBSyllabary, lisu, lowSurrogates, lycian, lydian,
194 mahajani, mahjong, makasar, malayalam, mandaic, manichean, marchen, masaramGondi, mathAlphanum, mathOperators, mayanNumerals, medefaidrin, meeteiMayek, meeteiMayekExt, mendeKikakui, meroiticCursive, meroiticHieroglyphs,
195 miao, miscArrows, miscMathSymbolsA, miscMathSymbolsB, miscPictographs, miscSymbols, miscTechnical, modi, modifierLetters, modifierToneLetters, mongolian, mongolianSup,
196 mro, music, multani, myanmar, myanmarExtA, myanmarExtB,
197 nabataean, nb, nandinagari, newTaiLue, newa, nko, numberForms, nushu, nyiakengPuachueHmong,
198 ocr, ogham, olChiki, oldHungarian, oldItalic, oldNorthArabian, oldPermic, oldPersian, oldSogdian, oldSouthArabian, oldTurkic, oriya, ornamentalDingbats, osage, osmanya, ottomanSiyaqNumbers,
199 pahawhHmong, palmyrene, pauCinHau, phagsPa, phaistos, phoenician, phoneticExt, phoneticExtSup, playingCards, psalterPahlavi, pua, punctuation,
200 rejang, rumi, runic,
201 samaritan, saurashtra, sharada, shavian, shorthandFormatControls, siddham, sinhala, sinhalaArchaicNumbers, smallForms, sogdian, soraSompeng, soyombo, specials, sundanese, sundaneseSup,
202 supArrowsA, supArrowsB, supArrowsC, supMathOperators, supPuaA, supPuaB, supPunctuation, supSymbolsAndPictographs, superAndSub, suttonSignWriting, sylotiNagri, symbolsAndPictographsExtA, syriac,
203 syriacSup,
204 tagalog, tagbanwa, tags, taiLe, taiTham, taiViet, taiXuanJing, takri, tamil, tamilSup, tangut, tangutComponents, telugu, thaana, thai, tibetan, tifinagh, tirhuta, transportAndMap,
205 ucas, ucasExt, ugaritic, vai, vedicExt, verticalForms, vs, vsSup, wancho, warangCiti, yiRadicals, yiSyllables, yijing, zanabazarSquare
206 };
207
208 struct BlockIdHash
209 {
210 size_t operator()(BlockId id) const
211 {
212 return std::hash<uint16_t>()(static_cast<uint16_t>(id));
213 }
214 };
215
216 class Block
217 {
218 public:
219 Block(BlockId id_, const std::string& shortName_, const std::string& longName_, char32_t start, char32_t end_);
220 BlockId Id() const { return id; }
221 const std::string& ShortName() const { return shortName; }
222 const std::string& LongName() const { return longName; }
223 char32_t Start() const { return start; }
224 char32_t End() const { return end; }
225 private:
226 BlockId id;
227 std::string shortName;
228 std::string longName;
229 char32_t start;
230 char32_t end;
231 };
232
233 class BlockTable
234 {
235 public:
236 static void Init();
237 static void Done();
238 static BlockTable& Instance() { return *instance; }
239 const Block& GetBlock(BlockId blockId) const;
240 const Block& GetBlockByShortName(const std::string& shortName) const;
241 const Block& GetBlockByLongName(const std::string& longName) const;
242 const std::std::vector<Block>&Blocks() const{returnblocks;}
243 private:
244 BlockTable();
245 static std::unique_ptr<BlockTable> instance;
246 std::unordered_map<BlockId, const Block*, BlockIdHash> blockIdMap;
247 std::unordered_map<std::string, const Block*> shortNameMap;
248 std::unordered_map<std::string, const Block*> longNameMap;
249 std::vector<Block> blocks;
250 };
251
252 inline const Block& GetBlock(BlockId blockId)
253 {
254 return BlockTable::Instance().GetBlock(blockId);
255 }
256
257 inline const Block& GetBlockByShortName(const std::string& shortName)
258 {
259 return BlockTable::Instance().GetBlockByShortName(shortName);
260 }
261
262 inline const Block& GetBlockByLongName(const std::string& longName)
263 {
264 return BlockTable::Instance().GetBlockByLongName(longName);
265 }
266
267 enum class GeneralCategoryId : uint32_t
268 {
269 none= 0,
270 Lu= 1 << 0, Ll= 1 << 1, Lt= 1 << 2, Lm= 1 << 3, Lo= 1 << 4,
271 LC= Lu | Ll | Lt,
272 L= Lu | Ll | Lt | Lm | Lo,
273 Mn= 1 << 5, Mc= 1 << 6, Me= 1 << 7,
274 M= Mn | Mc | Me,
275 Nd= 1 << 8, Nl= 1 << 9, No= 1 << 10,
276 N= Nd | Nl | No,
277 Pc= 1 << 11, Pd= 1 << 12, Ps= 1 << 13, Pe= 1 << 14, Pi= 1 << 15, Pf= 1 << 16, Po= 1 << 17,
278 P= Pc | Pd | Ps | Pe | Pi | Pf | Po,
279 Sm= 1 << 18, Sc= 1 << 19, Sk= 1 << 20, So= 1 << 21,
280 S= Sm | Sc | Sk | So,
281 Zs= 1 << 22, Zl= 1 << 23, Zp= 1 << 24,
282 Z= Zs | Zl | Zp,
283 Cc= 1 << 25, Cf= 1 << 26, Cs= 1 << 27, Co= 1 << 28, Cn= 1 << 29,
284 C= Cc | Cf | Cs | Co | Cn,
285 G= L | M | N | P | S | Zs,
286 B= L | N | P | S | Zs
287 };
288
289 inline GeneralCategoryId operator&(GeneralCategoryId left, GeneralCategoryId right)
290 {
291 return GeneralCategoryId(uint32_t(left) & uint32_t(right));
292 }
293
294 inline GeneralCategoryId operator|(GeneralCategoryId left, GeneralCategoryId right)
295 {
296 return GeneralCategoryId(uint32_t(left) | uint32_t(right));
297 }
298
299 struct GeneralCategoryIdHash
300 {
301 size_t operator()(GeneralCategoryId id) const
302 {
303 return std::hash<uint32_t>()(static_cast<uint32_t>(id));
304 }
305 };
306
307 class GeneralCategory
308 {
309 public:
310 GeneralCategory(GeneralCategoryId id_, const std::string& shortName_, const std::string& longName_);
311 GeneralCategoryId Id() const { return id; }
312 const std::string& ShortName() const { return shortName; }
313 const std::string& LongName() const { return longName; }
314 private:
315 GeneralCategoryId id;
316 std::string shortName;
317 std::string longName;
318 };
319
320 class GeneralCategoryTable
321 {
322 public:
323 static void Init();
324 static void Done();
325 static GeneralCategoryTable& Instance() { return *instance; }
326 const GeneralCategory& GetGeneralCategory(GeneralCategoryId generalCategoryId) const;
327 const GeneralCategory& GetGeneralCategoryByShortName(const std::string& shortName) const;
328 const GeneralCategory& GetGeneralCategoryByLongName(const std::string& shortName) const;
329 const std::std::vector<GeneralCategory>&GeneralCategories() const{returngeneralCategories;}
330 private:
331 GeneralCategoryTable();
332 static std::unique_ptr<GeneralCategoryTable> instance;
333 std::unordered_map<GeneralCategoryId, const GeneralCategory*, GeneralCategoryIdHash> generalCategoryIdMap;
334 std::unordered_map<std::string, const GeneralCategory*> shortNameMap;
335 std::unordered_map<std::string, const GeneralCategory*> longNameMap;
336 std::vector<GeneralCategory> generalCategories;
337 };
338
339 inline const GeneralCategory& GetGeneralCategory(GeneralCategoryId id)
340 {
341 return GeneralCategoryTable::Instance().GetGeneralCategory(id);
342 }
343
344 inline const GeneralCategory& GetGeneralCategoryByShortName(const std::string& shortName)
345 {
346 return GeneralCategoryTable::Instance().GetGeneralCategoryByShortName(shortName);
347 }
348
349 inline const GeneralCategory& GetGeneralCategoryByLongName(const std::string& longName)
350 {
351 return GeneralCategoryTable::Instance().GetGeneralCategoryByLongName(longName);
352 }
353
354 enum class AgeId : uint8_t
355 {
356 age_unassigned= 0,
357 age_1_1,
358 age_2_0, age_2_1,
359 age_3_0, age_3_1, age_3_2,
360 age_4_0, age_4_1,
361 age_5_0, age_5_1, age_5_2,
362 age_6_0, age_6_1, age_6_2, age_6_3,
363 age_7_0,
364 age_8_0,
365 age_9_0,
366 age_10_0,
367 age_11_0,
368 age_12_0, age_12_1
369 };
370
371 struct AgeIdHash
372 {
373 size_t operator()(AgeId id) const
374 {
375 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
376 }
377 };
378
379 class Age
380 {
381 public:
382 Age(AgeId id_, const std::string& version_);
383 AgeId Id() const { return id; }
384 const std::string& Version() const { return version; }
385 private:
386 AgeId id;
387 std::string version;
388 };
389
390 class AgeTable
391 {
392 public:
393 static void Init();
394 static void Done();
395 static AgeTable& Instance() { return *instance; }
396 const Age& GetAge(AgeId id) const;
397 const Age& GetAge(const std::string& version) const;
398 const std::std::vector<Age>&Ages() const{returnages;}
399 private:
400 static std::unique_ptr<AgeTable> instance;
401 AgeTable();
402 std::unordered_map<AgeId, const Age*, AgeIdHash> ageIdMap;
403 std::unordered_map<std::string, const Age*> versionMap;
404 std::vector<Age> ages;
405 };
406
407 inline const Age& GetAge(AgeId id)
408 {
409 return AgeTable::Instance().GetAge(id);
410 }
411
412 inline const Age& GetAge(const std::string& version)
413 {
414 return AgeTable::Instance().GetAge(version);
415 }
416
417 enum class ScriptId : uint8_t
418 {
419 none= 0,
420 adlm, aghb, ahom, arab, armi, armn, avst,
421 bali, bamu, bass, batk, beng, bhks, bopo, brah, brai, bugi, buhd,
422 cakm, cans, cari, cham, cher, copt, cprt, cyrl,
423 deva, dogr, dsrt, dupl,
424 elba, elym, egyp, ethi,
425 geor, glag, gong, gonm, goth, gran, grek, gujr, guru,
426 hang, hani, hano, hatr, hebr, hira, hluw, hmng, hmnp, hrkt, hung,
427 ital,
428 java,
429 kali, kana, khar, khmr, khoj, knda, kthi,
430 lana, laoo, latn, lepc, limb, lina, linb, lisu, lyci, lydi,
431 mahj, maka, mand, mani, marc, medf, mend, merc, mero, mlym, modi, mong, mroo, mtei, mult, mymr,
432 nand, narb, nbat, newa, nkoo, nshu,
433 ogam, olck, orkh, orya, osge, osma,
434 palm, pauc, perm, phag, phli, phlp, phnx, plrd, prti,
435 qaai,
436 rjng, rohg, runr,
437 samr, sarb, saur, sgnw, shaw, shrd, sidd, sind, sinh, sogd, sogo, sora, soyo, sund, sylo, syrc,
438 tagb, takr, tale, talu, taml, tang, tavt, telu, tfng, tglg, thaa, thai, tibt, tirh,
439 ugar,
440 vaii,
441 wara,
442 wcho,
443 xpeo, xsux,
444 yiii, zanb,
445 zinh, zyyy, zzzz
446 };
447
448 struct ScriptIdHash
449 {
450 size_t operator()(ScriptId id) const
451 {
452 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
453 }
454 };
455
456 class Script
457 {
458 public:
459 Script(ScriptId id_, const std::string& shortName_, const std::string& longName_);
460 ScriptId Id() const { return id; }
461 const std::string& ShortName() const { return shortName; }
462 const std::string& LongName() const { return longName; }
463 private:
464 ScriptId id;
465 std::string shortName;
466 std::string longName;
467 };
468
469 class ScriptTable
470 {
471 public:
472 static void Init();
473 static void Done();
474 static ScriptTable& Instance() { return *instance; }
475 const Script& GetScript(ScriptId id) const;
476 const Script& GetScriptByShortName(const std::string& shortName) const;
477 const Script& GetScriptByLongName(const std::string& longName) const;
478 const std::std::vector<Script>&Scripts() const{returnscripts;}
479 private:
480 static std::unique_ptr<ScriptTable> instance;
481 ScriptTable();
482 std::vector<Script> scripts;
483 std::unordered_map<ScriptId, const Script*, ScriptIdHash> scriptIdMap;
484 std::unordered_map<std::string, const Script*> shortNameMap;
485 std::unordered_map<std::string, const Script*> longNameMap;
486 };
487
488 inline const Script& GetScript(ScriptId id)
489 {
490 return ScriptTable::Instance().GetScript(id);
491 }
492
493 inline const Script& GetScriptByShortName(const std::string& shortName)
494 {
495 return ScriptTable::Instance().GetScriptByShortName(shortName);
496 }
497
498 inline const Script& GetScriptByLongName(const std::string& longName)
499 {
500 return ScriptTable::Instance().GetScriptByLongName(longName);
501 }
502
503 class CharacterInfo
504 {
505 public:
506 CharacterInfo();
507 bool GetBinaryProperty(BinaryPropertyId binaryPropertyId) const
508 {
509 uint64_t mask = static_cast<uint64_t>(1) << static_cast<uint64_t>(binaryPropertyId);
510 return (binaryProperties & mask) != 0;
511 }
512 void SetBinaryPropery(BinaryPropertyId binaryPropertyId, bool value)
513 {
514 uint64_t bit = static_cast<uint64_t>(1) << static_cast<uint64_t>(binaryPropertyId);
515 if (value)
516 {
517 binaryProperties = binaryProperties | bit;
518 }
519 else
520 {
521 binaryProperties = binaryProperties & ~bit;
522 }
523 }
524 GeneralCategoryId GetGeneralCategory() const
525 {
526 return generalCategory;
527 }
528 bool HasGeneralCategory(GeneralCategoryId generalCategory_) const
529 {
530 return (generalCategory & generalCategory_) != GeneralCategoryId::none;
531 }
532 void SetGeneralCategory(GeneralCategoryId generalCategory_)
533 {
534 generalCategory = generalCategory_;
535 }
536 char32_t Upper() const
537 {
538 return upper;
539 }
540 void SetUpper(char32_t upper_)
541 {
542 upper = upper_;
543 }
544 char32_t Lower() const
545 {
546 return lower;
547 }
548 void SetLower(char32_t lower_)
549 {
550 lower = lower_;
551 }
552 char32_t Title() const
553 {
554 return title;
555 }
556 void SetTitle(char32_t title_)
557 {
558 title = title_;
559 }
560 char32_t Folding() const
561 {
562 return folding;
563 }
564 void SetFolding(char32_t folding_)
565 {
566 folding = folding_;
567 }
568 BlockId GetBlock() const
569 {
570 return block;
571 }
572 void SetBlock(BlockId block_)
573 {
574 block = block_;
575 }
576 AgeId GetAge() const
577 {
578 return age;
579 }
580 void SetAge(AgeId age_)
581 {
582 age = age_;
583 }
584 ScriptId GetScript() const
585 {
586 return script;
587 }
588 void SetScript(ScriptId script_)
589 {
590 script = script_;
591 }
592 void Write(BinaryWriter& writer);
593 void Read(BinaryReader& reader);
594 private:
595 uint64_t binaryProperties;
596 GeneralCategoryId generalCategory;
597 char32_t upper;
598 char32_t lower;
599 char32_t title;
600 char32_t folding;
601 BlockId block;
602 AgeId age;
603 ScriptId script;
604 };
605
606 constexpr int numInfosInPage = 128;
607 constexpr size_t characterInfoSize = sizeof(uint64_t) + sizeof(BlockId) + sizeof(GeneralCategoryId) + sizeof(AgeId) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint32_t) +
608 sizeof(uint32_t) + sizeof(ScriptId);
609 constexpr size_t characterInfoPageSize = numInfosInPage * characterInfoSize;
610
611 enum class NumericTypeId : uint8_t
612 {
613 none= 0,
614 de, di, nu
615 };
616
617 struct NumericTypeIdHash
618 {
619 size_t operator()(NumericTypeId id) const
620 {
621 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
622 }
623 };
624
625 class NumericType
626 {
627 public:
628 NumericType(NumericTypeId id_, const std::string& shortName_, const std::string& longName_);
629 NumericTypeId Id() const { return id; }
630 const std::string& ShortName() const { return shortName; }
631 const std::string& LongName() const { return longName; }
632 private:
633 NumericTypeId id;
634 std::string shortName;
635 std::string longName;
636 };
637
638 class NumericTypeTable
639 {
640 public:
641 static void Init();
642 static void Done();
643 static NumericTypeTable& Instance() { return *instance; }
644 const NumericType& GetNumericType(NumericTypeId id) const;
645 const NumericType& GetNumericTypeByShortName(const std::string& shortName) const;
646 const NumericType& GetNumericTypeByLongName(const std::string& longName) const;
647 const std::std::vector<NumericType>&NumericTypes() const{returnnumericTypes;}
648 private:
649 static std::unique_ptr<NumericTypeTable> instance;
650 NumericTypeTable();
651 std::vector<NumericType> numericTypes;
652 std::unordered_map<NumericTypeId, const NumericType*, NumericTypeIdHash> numericTypeMap;
653 std::unordered_map<std::string, const NumericType*> shortNameMap;
654 std::unordered_map<std::string, const NumericType*> longNameMap;
655 };
656
657 inline const NumericType& GetNumericType(NumericTypeId id)
658 {
659 return NumericTypeTable::Instance().GetNumericType(id);
660 }
661
662 inline const NumericType& GetNumericTypeByShortName(const std::string& shortName)
663 {
664 return NumericTypeTable::Instance().GetNumericTypeByShortName(shortName);
665 }
666
667 inline const NumericType& GetNumericTypeByLongName(const std::string& longName)
668 {
669 return NumericTypeTable::Instance().GetNumericTypeByLongName(longName);
670 }
671
672 enum class BidiClassId : uint8_t
673 {
674 none= 0,
675 al, an, b, bn, cs, en, es, et, fsi, l, lre, lri, lro, nsm, on, pdf, pdi, r, rle, rli, rlo, s, ws
676 };
677
678 struct BidiClassIdHash
679 {
680 size_t operator()(BidiClassId id) const
681 {
682 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
683 }
684 };
685
686 class BidiClass
687 {
688 public:
689 BidiClass(BidiClassId id_, const std::string& shortName_, const std::string& longName_);
690 BidiClassId Id() const { return id; }
691 const std::string& ShortName() const { return shortName; }
692 const std::string& LongName() const { return longName; }
693 private:
694 BidiClassId id;
695 std::string shortName;
696 std::string longName;
697 };
698
699 class BidiClassTable
700 {
701 public:
702 static void Init();
703 static void Done();
704 static BidiClassTable& Instance() { return *instance; }
705 const BidiClass& GetBidiClass(BidiClassId id) const;
706 const BidiClass& GetBidiClassByShortName(const std::string& shortName) const;
707 const BidiClass& GetBidiClassByLongName(const std::string& longName) const;
708 const std::std::vector<BidiClass>&BidiClasses() const{returnbidiClasses;}
709 private:
710 static std::unique_ptr<BidiClassTable> instance;
711 BidiClassTable();
712 std::vector<BidiClass> bidiClasses;
713 std::unordered_map<BidiClassId, const BidiClass*, BidiClassIdHash> bidiClassMap;
714 std::unordered_map<std::string, const BidiClass*> shortNameMap;
715 std::unordered_map<std::string, const BidiClass*> longNameMap;
716 };
717
718 inline const BidiClass& GetBidiClass(BidiClassId id)
719 {
720 return BidiClassTable::Instance().GetBidiClass(id);
721 }
722
723 inline const BidiClass& GetBidiClassByShortName(const std::string& shortName)
724 {
725 return BidiClassTable::Instance().GetBidiClassByShortName(shortName);
726 }
727
728 inline const BidiClass& GetBidiClassByLongName(const std::string& longtName)
729 {
730 return BidiClassTable::Instance().GetBidiClassByLongName(longtName);
731 }
732
733 enum class BidiPairedBracketTypeId : uint8_t
734 {
735 none= 0,
736 o, c
737 };
738
739 struct BidiPairedBracketTypeIdHash
740 {
741 size_t operator()(BidiPairedBracketTypeId id) const
742 {
743 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
744 }
745 };
746
747 class BidiPairedBracketType
748 {
749 public:
750 BidiPairedBracketType(BidiPairedBracketTypeId id_, const std::string& shortName_, const std::string& longName_);
751 BidiPairedBracketTypeId Id() const { return id; }
752 const std::string& ShortName() const { return shortName; }
753 const std::string& LongName() const { return longName; }
754 private:
755 BidiPairedBracketTypeId id;
756 std::string shortName;
757 std::string longName;
758 };
759
760 class BidiPairedBracketTypeTable
761 {
762 public:
763 static void Init();
764 static void Done();
765 static BidiPairedBracketTypeTable& Instance() { return *instance; }
766 const BidiPairedBracketType& GetBidiPairedBracketType(BidiPairedBracketTypeId id) const;
767 const BidiPairedBracketType& GetBidiPairedBracketTypeByShortName(const std::string& shortName) const;
768 const BidiPairedBracketType& GetBidiPairedBracketTypeByLongName(const std::string& longName) const;
769 const std::std::vector<BidiPairedBracketType>&BidiPairedBracketTypes() const{returnbidiPairedBracketTypes;}
770 private:
771 static std::unique_ptr<BidiPairedBracketTypeTable> instance;
772 BidiPairedBracketTypeTable();
773 std::vector<BidiPairedBracketType> bidiPairedBracketTypes;
774 std::unordered_map<BidiPairedBracketTypeId, const BidiPairedBracketType*, BidiPairedBracketTypeIdHash> typeMap;
775 std::unordered_map<std::string, const BidiPairedBracketType*> shortNameMap;
776 std::unordered_map<std::string, const BidiPairedBracketType*> longNameMap;
777 };
778
779 inline const BidiPairedBracketType& GetBidiPairedBracketType(BidiPairedBracketTypeId id)
780 {
781 return BidiPairedBracketTypeTable::Instance().GetBidiPairedBracketType(id);
782 }
783
784 inline const BidiPairedBracketType& GetBidiPairedBracketTypeByShortName(const std::string& shortName)
785 {
786 return BidiPairedBracketTypeTable::Instance().GetBidiPairedBracketTypeByShortName(shortName);
787 }
788
789 inline const BidiPairedBracketType& GetBidiPairedBracketTypeByLongName(const std::string& longName)
790 {
791 return BidiPairedBracketTypeTable::Instance().GetBidiPairedBracketTypeByLongName(longName);
792 }
793
794 enum class AliasTypeId : uint8_t
795 {
796 none= 0,
797 correction, control, alternate, figment, abbreviation
798 };
799
800 struct AliasTypeIdHash
801 {
802 size_t operator()(AliasTypeId id) const
803 {
804 return std::hash<uint8_t>()(static_cast<uint8_t>(id));
805 }
806 };
807
808 class AliasType
809 {
810 public:
811 AliasType(AliasTypeId id_, const std::string& name_);
812 AliasTypeId Id() const { return id; }
813 const std::string& Name() const { return name; }
814 private:
815 AliasTypeId id;
816 std::string name;
817 };
818
819 class AliasTypeTable
820 {
821 public:
822 static void Init();
823 static void Done();
824 static AliasTypeTable& Instance() { return *instance; }
825 const AliasType& GetAliasType(AliasTypeId id) const;
826 const AliasType& GetAliasType(const std::string& typeName) const;
827 const std::std::vector<AliasType>&AliasTypes() const{returnaliasTypes;}
828 private:
829 static std::unique_ptr<AliasTypeTable> instance;
830 AliasTypeTable();
831 std::vector<AliasType> aliasTypes;
832 std::unordered_map<AliasTypeId, const AliasType*, AliasTypeIdHash> aliasTypeMap;
833 std::unordered_map<std::string, const AliasType*> typeNameMap;
834 };
835
836 inline const AliasType& GetAliasType(AliasTypeId id)
837 {
838 return AliasTypeTable::Instance().GetAliasType(id);
839 }
840
841 inline const AliasType& GetAliasType(const std::string& typeName)
842 {
843 return AliasTypeTable::Instance().GetAliasType(typeName);
844 }
845
846 class Alias
847 {
848 public:
849 Alias();
850 Alias(AliasTypeId typeId_, const std::string& name_);
851 AliasTypeId TypeId() const { return typeId; }
852 const std::string& Name() const { return name; }
853 void Write(BinaryWriter& writer);
854 void Read(BinaryReader& reader);
855 private:
856 AliasTypeId typeId;
857 std::string name;
858 };
859
860 class ExtendedCharacterInfo
861 {
862 public:
863 ExtendedCharacterInfo();
864 const std::string& CharacterName() const
865 {
866 return characterName;
867 }
868 void SetCharacterName(const std::string& characterName_);
869 const std::string& Unicode1Name() const
870 {
871 return unicode1Name;
872 }
873 void SetUnicode1Name(const std::string& unicode1Name_);
874 uint8_t GetCanonicalCombiningClass() const
875 {
876 return canonicalCombiningClass;
877 }
878 void SetCanonicalCombiningClass(uint8_t canonicalCombiningClass_)
879 {
880 canonicalCombiningClass = canonicalCombiningClass_;
881 }
882 const std::u32string& FullUpper() const
883 {
884 return fullUpper;
885 }
886 std::u32string& FullUpper()
887 {
888 return fullUpper;
889 }
890 const std::u32string& FullLower() const
891 {
892 return fullLower;
893 }
894 std::u32string& FullLower()
895 {
896 return fullLower;
897 }
898 const std::u32string& FullTitle() const
899 {
900 return fullTitle;
901 }
902 std::u32string& FullTitle()
903 {
904 return fullTitle;
905 }
906 const std::u32string& FullFolding() const
907 {
908 return fullFolding;
909 }
910 std::u32string& FullFolding()
911 {
912 return fullFolding;
913 }
914 BidiClassId GetBidiClass() const
915 {
916 return bidiClass;
917 }
918 void SetBidiClass(BidiClassId bidiClass_)
919 {
920 bidiClass = bidiClass_;
921 }
922 NumericTypeId GetNumericType() const
923 {
924 return numericType;
925 }
926 void SetNumericType(NumericTypeId numericType_)
927 {
928 numericType = numericType_;
929 }
930 const std::string& GetNumericValue() const
931 {
932 return numericValue;
933 }
934 void SetNumericValue(const std::string& numericValue_)
935 {
936 numericValue = numericValue_;
937 }
938 const std::std::vector<Alias>&Aliases() const
939 {
940 return aliases;
941 }
942 std::std::vector<Alias>&Aliases()
943 {
944 return aliases;
945 }
946 char32_t GetBidiMirroringGlyph() const
947 {
948 return bidiMirroringGlyph;
949 }
950 void SetBidiMirroringGlyph(char32_t bidiMirroringGlyph_)
951 {
952 bidiMirroringGlyph = bidiMirroringGlyph_;
953 }
954 BidiPairedBracketTypeId GetBidiPairedBracketType() const
955 {
956 return bidiPairedBracketType;
957 }
958 void SetBidiPairedBracketType(BidiPairedBracketTypeId bidiPairedBracketType_)
959 {
960 bidiPairedBracketType = bidiPairedBracketType_;
961 }
962 char32_t GetBidiPairedBracket() const
963 {
964 return bidiPairedBracket;
965 }
966 void SetBidiPairedBracket(char32_t bidiPairedBracket_)
967 {
968 bidiPairedBracket = bidiPairedBracket_;
969 }
970 void Write(BinaryWriter& writer);
971 void Read(BinaryReader& reader);
972 private:
973 std::string characterName;
974 std::string unicode1Name;
975 uint8_t canonicalCombiningClass;
976 std::u32string fullUpper;
977 std::u32string fullLower;
978 std::u32string fullTitle;
979 std::u32string fullFolding;
980 BidiClassId bidiClass;
981 NumericTypeId numericType;
982 std::string numericValue;
983 std::vector<Alias> aliases;
984 char32_t bidiMirroringGlyph;
985 BidiPairedBracketTypeId bidiPairedBracketType;
986 char32_t bidiPairedBracket;
987 };
988
989 class CharacterInfoPage
990 {
991 public:
992 CharacterInfoPage();
993 CharacterInfoPage(const CharacterInfoPage&) = delete;
994 CharacterInfoPage& operator=(const CharacterInfoPage&) = delete;
995 const CharacterInfo& GetCharacterInfo(int index) const;
996 CharacterInfo& GetCharacterInfo(int index);
997 void Write(BinaryWriter& writer);
998 void Read(BinaryReader& reader);
999 private:
1000 std::vector<CharacterInfo> characterInfos;
1001 };
1002
1003 class ExtendedCharacterInfoPage
1004 {
1005 public:
1006 ExtendedCharacterInfoPage();
1007 ExtendedCharacterInfoPage(const ExtendedCharacterInfoPage&) = delete;
1008 ExtendedCharacterInfoPage& operator=(const ExtendedCharacterInfoPage&) = delete;
1009 const ExtendedCharacterInfo& GetExtendedCharacterInfo(int index) const;
1010 ExtendedCharacterInfo& GetExtendedCharacterInfo(int index);
1011 void Write(BinaryWriter& writer);
1012 void Read(BinaryReader& reader);
1013 private:
1014 std::vector<ExtendedCharacterInfo> extendedCharacterInfos;
1015 };
1016
1017 class ExtendedCharacterInfoHeader
1018 {
1019 public:
1020 void AllocatePages(int numExtendedPages);
1021 void Write(BinaryWriter& writer);
1022 void Read(BinaryReader& reader);
1023 uint32_t GetPageStart(int pageIndex) const;
1024 void SetPageStart(int pageIndex, uint32_t extendedPageStart);
1025 private:
1026 std::vector<uint32_t> extendedPageStarts;
1027 };
1028
1029 const uint8_t cmajor_ucd_version_1 = '1';
1030 const uint8_t cmajor_ucd_version_2 = '2';
1031 const uint8_t cmajor_ucd_version_3 = '3';
1032 const uint8_t current_cmajor_ucd_version = cmajor_ucd_version_3;
1033
1034 class CharacterTable
1035 {
1036 public:
1037 CharacterTable(const CharacterTable&) = delete;
1038 CharacterTable& operator=(const CharacterTable&) = delete;
1039 static void Init();
1040 static void Done();
1041 static CharacterTable& Instance() { return *instance; }
1042 const CharacterInfo& GetCharacterInfo(char32_t codePoint);
1043 CharacterInfo& CreateCharacterInfo(char32_t codePoint);
1044 const ExtendedCharacterInfo& GetExtendedCharacterInfo(char32_t codePoint);
1045 ExtendedCharacterInfo& CreateExtendedCharacterInfo(char32_t codePoint);
1046 void Write();
1047 private:
1048 static std::unique_ptr<CharacterTable> instance;
1049 CharacterTable();
1050 bool headerRead;
1051 std::vector<std::std::unique_ptr<CharacterInfoPage>>pages;
1052 uint32_t extendedHeaderStart;
1053 uint32_t extendedHeaderEnd;
1054 bool extendedHeaderRead;
1055 ExtendedCharacterInfoHeader extendedHeader;
1056 std::vector<std::std::unique_ptr<ExtendedCharacterInfoPage>>extendedPages;
1057 void WriteHeader(BinaryWriter& writer);
1058 void ReadHeader(BinaryReader& reader);
1059 void ReadExtendedHeader(BinaryReader& reader);
1060 const size_t headerSize = 4096;
1061 };
1062
1063 inline const CharacterInfo& GetCharacterInfo(char32_t codePoint) { return CharacterTable::Instance().GetCharacterInfo(codePoint); }
1064 inline CharacterInfo& CreateCharacterInfo(char32_t codePoint) { return CharacterTable::Instance().CreateCharacterInfo(codePoint); }
1065 inline const ExtendedCharacterInfo& GetExtendedCharacterInfo(char32_t codePoint) { return CharacterTable::Instance().GetExtendedCharacterInfo(codePoint); }
1066 inline ExtendedCharacterInfo& CreateExtendedCharacterInfo(char32_t codePoint) { return CharacterTable::Instance().CreateExtendedCharacterInfo(codePoint); }
1067
1068 inline GeneralCategoryId GetGeneralCategory(char32_t c) { return GetCharacterInfo(c).GetGeneralCategory(); }
1069 inline bool HasGeneralCategory(char32_t c, GeneralCategoryId generalCategory) { return GetCharacterInfo(c).HasGeneralCategory(generalCategory); }
1070
1071 inline bool IsUpperLetter(char32_t c)
1072 {
1073 return GetGeneralCategory(c) == GeneralCategoryId::Lu;
1074 }
1075
1076 inline bool IsLowerLetter(char32_t c)
1077 {
1078 return GetGeneralCategory(c) == GeneralCategoryId::Ll;
1079 }
1080
1081 inline bool IsTitleLetter(char32_t c)
1082 {
1083 return GetGeneralCategory(c) == GeneralCategoryId::Lt;
1084 }
1085
1086 inline bool IsModifierLetter(char32_t c)
1087 {
1088 return GetGeneralCategory(c) == GeneralCategoryId::Lm;
1089 }
1090
1091 inline bool IsOtherLetter(char32_t c)
1092 {
1093 return GetGeneralCategory(c) == GeneralCategoryId::Lo;
1094 }
1095
1096 inline bool IsCasedLetter(char32_t c)
1097 {
1098 return HasGeneralCategory(c, GeneralCategoryId::LC);
1099 }
1100
1101 inline bool IsLetter(char32_t c)
1102 {
1103 return HasGeneralCategory(c, GeneralCategoryId::L);
1104 }
1105
1106 inline bool IsNonspacingMark(char32_t c)
1107 {
1108 return GetGeneralCategory(c) == GeneralCategoryId::Mn;
1109 }
1110
1111 inline bool IsSpacingMark(char32_t c)
1112 {
1113 return GetGeneralCategory(c) == GeneralCategoryId::Mc;
1114 }
1115
1116 inline bool IsEnclosingMark(char32_t c)
1117 {
1118 return GetGeneralCategory(c) == GeneralCategoryId::Me;
1119 }
1120
1121 inline bool IsMark(char32_t c)
1122 {
1123 return HasGeneralCategory(c, GeneralCategoryId::M);
1124 }
1125
1126 inline bool IsDecimalNumber(char32_t c)
1127 {
1128 return GetGeneralCategory(c) == GeneralCategoryId::Nd;
1129 }
1130
1131 inline bool IsLetterNumber(char32_t c)
1132 {
1133 return GetGeneralCategory(c) == GeneralCategoryId::Nl;
1134 }
1135
1136 inline bool IsOtherNumber(char32_t c)
1137 {
1138 return GetGeneralCategory(c) == GeneralCategoryId::No;
1139 }
1140
1141 inline bool IsNumber(char32_t c)
1142 {
1143 return HasGeneralCategory(c, GeneralCategoryId::N);
1144 }
1145
1146 inline bool IsConnectorPunctuation(char32_t c)
1147 {
1148 return GetGeneralCategory(c) == GeneralCategoryId::Pc;
1149 }
1150
1151 inline bool IsDashPunctuation(char32_t c)
1152 {
1153 return GetGeneralCategory(c) == GeneralCategoryId::Pd;
1154 }
1155
1156 inline bool IsOpenPunctuation(char32_t c)
1157 {
1158 return GetGeneralCategory(c) == GeneralCategoryId::Ps;
1159 }
1160
1161 inline bool IsClosePunctuation(char32_t c)
1162 {
1163 return GetGeneralCategory(c) == GeneralCategoryId::Pe;
1164 }
1165
1166 inline bool IsInitialPunctuation(char32_t c)
1167 {
1168 return GetGeneralCategory(c) == GeneralCategoryId::Pi;
1169 }
1170
1171 inline bool IsFinalPunctuation(char32_t c)
1172 {
1173 return GetGeneralCategory(c) == GeneralCategoryId::Pf;
1174 }
1175
1176 inline bool IsOtherPunctuation(char32_t c)
1177 {
1178 return GetGeneralCategory(c) == GeneralCategoryId::Po;
1179 }
1180
1181 inline bool IsPunctuation(char32_t c)
1182 {
1183 return HasGeneralCategory(c, GeneralCategoryId::P);
1184 }
1185
1186 inline bool IsMathSymbol(char32_t c)
1187 {
1188 return GetGeneralCategory(c) == GeneralCategoryId::Sm;
1189 }
1190
1191 inline bool IsCurrencySymbol(char32_t c)
1192 {
1193 return GetGeneralCategory(c) == GeneralCategoryId::Sc;
1194 }
1195
1196 inline bool IsModifierSymbol(char32_t c)
1197 {
1198 return GetGeneralCategory(c) == GeneralCategoryId::Sk;
1199 }
1200
1201 inline bool IsOtherSymbol(char32_t c)
1202 {
1203 return GetGeneralCategory(c) == GeneralCategoryId::So;
1204 }
1205
1206 inline bool IsSymbol(char32_t c)
1207 {
1208 return HasGeneralCategory(c, GeneralCategoryId::S);
1209 }
1210
1211 inline bool IsSpaceSeparator(char32_t c)
1212 {
1213 return GetGeneralCategory(c) == GeneralCategoryId::Zs;
1214 }
1215
1216 inline bool IsLineSeparator(char32_t c)
1217 {
1218 return GetGeneralCategory(c) == GeneralCategoryId::Zl;
1219 }
1220
1221 inline bool IsParagraphSeparator(char32_t c)
1222 {
1223 return GetGeneralCategory(c) == GeneralCategoryId::Zp;
1224 }
1225
1226 inline bool IsSeparator(char32_t c)
1227 {
1228 return HasGeneralCategory(c, GeneralCategoryId::Z);
1229 }
1230
1231 inline bool IsControl(char32_t c)
1232 {
1233 return GetGeneralCategory(c) == GeneralCategoryId::Cc;
1234 }
1235
1236 inline bool IsFormat(char32_t c)
1237 {
1238 return GetGeneralCategory(c) == GeneralCategoryId::Cf;
1239 }
1240
1241 inline bool IsSurrogate(char32_t c)
1242 {
1243 return GetGeneralCategory(c) == GeneralCategoryId::Cs;
1244 }
1245
1246 inline bool IsPrivateUse(char32_t c)
1247 {
1248 return GetGeneralCategory(c) == GeneralCategoryId::Co;
1249 }
1250
1251 inline bool IsUnassigned(char32_t c)
1252 {
1253 return GetGeneralCategory(c) == GeneralCategoryId::Cn;
1254 }
1255
1256 inline bool IsOther(char32_t c)
1257 {
1258 return HasGeneralCategory(c, GeneralCategoryId::C);
1259 }
1260
1261 inline bool IsGraphic(char32_t c)
1262 {
1263 return HasGeneralCategory(c, GeneralCategoryId::G);
1264 }
1265
1266 inline bool IsBase(char32_t c)
1267 {
1268 return HasGeneralCategory(c, GeneralCategoryId::B);
1269 }
1270
1271 inline bool IsCombining(char32_t c)
1272 {
1273 return IsMark(c);
1274 }
1275
1276 inline char32_t ToUpper(char32_t c)
1277 {
1278 return GetCharacterInfo(c).Upper();
1279 }
1280
1281 inline char32_t ToLower(char32_t c)
1282 {
1283 return GetCharacterInfo(c).Lower();
1284 }
1285
1286 inline char32_t ToTitle(char32_t c)
1287 {
1288 return GetCharacterInfo(c).Title();
1289 }
1290
1291 inline char32_t ToFolding(char32_t c)
1292 {
1293 return GetCharacterInfo(c).Folding();
1294 }
1295
1296 inline const std::u32string& FullUpper(char32_t c)
1297 {
1298 return GetExtendedCharacterInfo(c).FullUpper();
1299 }
1300
1301 inline const std::u32string& FullLower(char32_t c)
1302 {
1303 return GetExtendedCharacterInfo(c).FullLower();
1304 }
1305
1306 inline const std::u32string& FullTitle(char32_t c)
1307 {
1308 return GetExtendedCharacterInfo(c).FullTitle();
1309 }
1310
1311 inline const std::u32string& FullFolding(char32_t c)
1312 {
1313 return GetExtendedCharacterInfo(c).FullFolding();
1314 }
1315
1316 inline bool IsWhiteSpace(char32_t c)
1317 {
1318 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::whiteSpace);
1319 }
1320
1321 inline bool IsAlphabetic(char32_t c)
1322 {
1323 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::alphabetic);
1324 }
1325
1326 inline bool IsAsciiHexDigit(char32_t c)
1327 {
1328 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::asciiHexDigit);
1329 }
1330
1331 bool IsAsciiDigit(char32_t c);
1332
1333 inline bool IsUppercase(char32_t c)
1334 {
1335 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::uppercase);
1336 }
1337
1338 inline bool IsLowercase(char32_t c)
1339 {
1340 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::lowercase);
1341 }
1342
1343 inline bool IsIdStart(char32_t c)
1344 {
1345 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::idStart);
1346 }
1347
1348 inline bool IsIdCont(char32_t c)
1349 {
1350 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::idContinue);
1351 }
1352
1353 inline bool IsGraphemeBase(char32_t c)
1354 {
1355 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::graphemeBase);
1356 }
1357
1358 inline bool IsGraphemeExtender(char32_t c)
1359 {
1360 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::graphemeExtend);
1361 }
1362
1363 inline bool IsOtherLower(char32_t c)
1364 {
1365 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::otherLowercase);
1366 }
1367
1368 inline bool IsOtherUpper(char32_t c)
1369 {
1370 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::otherUppercase);
1371 }
1372
1373 inline const std::string& GetCharacterName(char32_t c)
1374 {
1375 return GetExtendedCharacterInfo(c).CharacterName();
1376 }
1377
1378 inline const std::string& GetUnicode1Name(char32_t c)
1379 {
1380 return GetExtendedCharacterInfo(c).Unicode1Name();
1381 }
1382
1383 inline NumericTypeId GetNumericType(char32_t c)
1384 {
1385 return GetExtendedCharacterInfo(c).GetNumericType();
1386 }
1387
1388 inline const std::string& GetNumericValue(char32_t c)
1389 {
1390 return GetExtendedCharacterInfo(c).GetNumericValue();
1391 }
1392
1393 inline bool IsBidiMirrored(char32_t c)
1394 {
1395 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::bidiMirrored);
1396 }
1397
1398 inline bool IsBidiControl(char32_t c)
1399 {
1400 return GetCharacterInfo(c).GetBinaryProperty(BinaryPropertyId::bidiControl);
1401 }
1402
1403 inline char32_t GetBidiMirroringGlyph(char32_t c)
1404 {
1405 return GetExtendedCharacterInfo(c).GetBidiMirroringGlyph();
1406 }
1407
1408 inline BidiPairedBracketTypeId GetBidiPairedBracketType(char32_t c)
1409 {
1410 return GetExtendedCharacterInfo(c).GetBidiPairedBracketType();
1411 }
1412
1413 inline char32_t GetBidiPairedBracket(char32_t c)
1414 {
1415 return GetExtendedCharacterInfo(c).GetBidiPairedBracket();
1416 }
1417
1418 inline const std::std::vector<Alias>&Aliases(char32_tc)
1419 {
1420 return GetExtendedCharacterInfo(c).Aliases();
1421 }
1422
1423 void UnicodeInit();
1424 void UnicodeDone();
1425
1426 } }
1427
1428 #endif // SOULNG_UNICODE_UNICODE_INCLUDED