1 #include <sngcpp/lexer/CppLexerApi.hpp>
2
3 classmap CppClassMap;
4
5 prefix "sngcpp/lexer";
6
7 tokens CppTokens
8 {
9 (ALIGNAS, "'alignas'"), (ALIGNOF, "'alignof'"), (ASM, "'asm'"), (AUTO, "'auto'"), (BOOL, "'bool'"), (BREAK, "'break'"), (CASE, "'case'"), (CATCH, "'catch'"),
10 (CHAR, "'char'"), (CHAR8_T, "'char8_t'"), (CHAR16_T, "'char16_t'"), (CHAR32_T, "'char32_t'"), (CLASS, "'class'"), (CONCEPT, "'concept'"), (CONST, "'const'"),
11 (CONSTEVAL, "'consteval'"), (CONSTEXPR, "'constexpr'"),
12 (CONSTINIT, "'constinit'"), (CONST_CAST, "'const_cast'"), (CONTINUE, "'continue'"), (CO_AWAIT, "'co_await'"), (CO_RETURN, "'co_return'"), (CO_YIELD, "'co_yield'"),
13 (DECLTYPE, "'decltype'"), (DEFAULT, "'default'"), (DELETE, "'delete'"), (DO, "'do'"), (DOUBLE, "'double'"), (DYNAMIC_CAST, "'dynamic_cast'"), (ELSE, "'else'"),
14 (ENUM, "'enum'"), (EXPLICIT, "'explicit'"), (EXPORT, "'export'"), (EXTERN, "'extern'"),
15 (FALSE, "'false'"), (FLOAT, "'float'"), (FOR, "'for'"), (FRIEND, "'friend'"), (GOTO, "'goto'"), (IF, "'if'"), (INLINE, "'inline'"), (INT, "'int'"), (LONG, "'long'"),
16 (MUTABLE, "'mutable'"), (NAMESPACE, "'namespace'"), (NEW, "'new'"), (NOEXCEPT, "'noexcept'"), (NULLPTR, "'nullptr'"), (OPERATOR, "'operator'"), (PRIVATE, "'private'"),
17 (PROTECTED, "'protected'"),
18 (PUBLIC, "'public'"), (REGISTER, "'register'"), (REINTERPRET_CAST, "'reinterpret_cast'"), (REQUIRES, "'requires'"), (RETURN, "'return'"), (SHORT, "'short'"), (SIGNED, "'signed'"),
19 (SIZEOF, "'sizeof'"), (STATIC, "'static'"), (STATIC_ASSERT, "'static_assert'"), (STATIC_CAST, "'static_cast'"), (STRUCT, "'struct'"), (SWITCH, "'switch'"), (TEMPLATE, "'template'"),
20 (THIS, "'this'"), (THREAD_LOCAL, "'thread_local'"), (THROW, "'throw'"),
21 (TRUE, "'true'"), (TRY, "'try'"), (TYPEDEF, "'typedef'"), (TYPEID, "'typeid'"), (TYPENAME, "'typename'"), (UNION, "'union'"), (UNSIGNED, "'unsigned'"), (USING, "'using'"),
22 (VIRTUAL, "'virtual'"), (VOID, "'void'"), (VOLATILE, "'volatile'"), (WCHAR_T, "'wchar_t'"), (WHILE, "'while'"),
23 (OVERRIDE, "'override'"), (FINAL, "'final'"), (__DECLSPEC, "'__declspec'"), (__THREAD, "'__thread'"),
24 (FLOATLIT, "floating literal"), (INTLIT, "integer literal"), (CHARLIT, "character literal"), (STRINGLIT, "string literal"),
25 (ID, "identifier"), (COLONCOLON, "'::'"), (COMMA, "','"), (ASSIGN, "'='"), (MULASSIGN, "'*='"), (DIVASSIGN, "'/='"), (REMASSIGN, "'%='"), (ADDASSIGN, "'+='"), (SUBASSIGN, "'-='"),
26 (SHIFTRIGHTASSIGN, "'>>='"), (SHIFTLEFTASSIGN, "'<<='"), (ANDASSIGN, "'&='"), (XORASSIGN, "'^='"), (ORASSIGN, "'|='"), (QUEST, "'?'"), (COLON, "':'"),
27 (OROR, "'||'"), (AMPAMP, "'&&'"), (OR, "'|'"), (XOR, "'^'"), (AMP, "'&'"), (EQ, "'=='"), (NEQ, "'!='"), (LEQ, "'<='"), (GEQ, "'>='"), (SPACESHIP, "<=>"), (LANGLE, "'<'"), (RANGLE, "'>'"),
28 (SHIFTLEFT, "'<<'"), (SHIFTRIGHT, "'>>'"), (PLUS, "'+'"), (MINUS, "'-'"), (STAR, "'*'"), (DIV, "'/'"), (MOD, "'%'"), (DOTSTAR, "'.*'"), (ARROWSTAR, "'->*'"), (LPAREN, "'('"), (RPAREN, "')'"),
29 (PLUSPLUS, "'++'"), (MINUSMINUS, "'--'"), (EXCLAMATION, "'!'"), (TILDE, "'~'"),
30 (LBRACKET, "'['"), (RBRACKET, "']'"), (LBRACE, "'{'"), (RBRACE, "'}'"), (DOT, "'.'"), (ARROW, "'->'"), (SEMICOLON, "';'"), (ELLIPSES, "'...'"),
31 (MAX, "max")
32 }
33
34 keywords CppKeywords
35 {
36 ("alignas", ALIGNAS), ("alignof", ALIGNOF), ("asm", ASM), ("auto", AUTO), ("bool", BOOL), ("break", BREAK), ("case", CASE), ("catch", CATCH),
37 ("char", CHAR), ("char8_t", CHAR8_T), ("char16_t", CHAR16_T), ("char32_t", CHAR32_T), ("class", CLASS), ("concept", CONCEPT), ("const", CONST),
38 ("consteval", CONSTEVAL), ("constexpr", CONSTEXPR),
39 ("constinit", CONSTINIT), ("const_cast", CONST_CAST), ("continue", CONTINUE), ("co_await", CO_AWAIT), ("co_return", CO_RETURN), ("co_yield", CO_YIELD),
40 ("decltype", DECLTYPE), ("default", DEFAULT), ("delete", DELETE), ("do", DO), ("double", DOUBLE), ("dynamic_cast", DYNAMIC_CAST), ("else", ELSE),
41 ("enum", ENUM), ("explicit", EXPLICIT), ("export", EXPORT), ("extern", EXTERN),
42 ("false", FALSE), ("float", FLOAT), ("for", FOR), ("friend", FRIEND), ("goto", GOTO), ("if", IF), ("inline", INLINE), ("int", INT), ("long", LONG),
43 ("mutable", MUTABLE), ("namespace", NAMESPACE), ("new", NEW), ("noexcept", NOEXCEPT), ("nullptr", NULLPTR), ("operator", OPERATOR), ("private", PRIVATE),
44 ("protected", PROTECTED),
45 ("public", PUBLIC), ("register", REGISTER), ("reinterpret_cast", REINTERPRET_CAST), ("requires", REQUIRES), ("return", RETURN), ("short", SHORT), ("signed", SIGNED),
46 ("sizeof", SIZEOF), ("static", STATIC), ("static_assert", STATIC_ASSERT), ("static_cast", STATIC_CAST), ("struct", STRUCT), ("switch", SWITCH), ("template", TEMPLATE),
47 ("this", THIS), ("thread_local", THREAD_LOCAL), ("throw", THROW),
48 ("true", TRUE), ("try", TRY), ("typedef", TYPEDEF), ("typeid", TYPEID), ("typename", TYPENAME), ("union", UNION), ("unsigned", UNSIGNED), ("using", USING),
49 ("virtual", VIRTUAL), ("void", VOID), ("volatile", VOLATILE), ("wchar_t", WCHAR_T), ("while", WHILE), ("override", OVERRIDE), ("final", FINAL),
50 ("__thread", __THREAD), ("__declspec", __DECLSPEC)
51 }
52
53 expressions
54 {
55 ws = "[\n\r\t ]";
56 newline = "\r\n|\n|\r";
57 linecomment = "//[^\n\r]*{newline}";
58 blockcomment = "/\*([^*]|\*[^/])*\*/";
59 ppline = "[\t ]*#[^\x0\r\n]*{newline}";
60 separators = "({ppline}|{linecomment}|{blockcomment}|{ws})+";
61 id = "{idstart}{idcont}*";
62 decimaldigit = "[0-9]";
63 digitsequence = "{decimaldigit}+";
64 fractionalconstant = "{digitsequence}?\.{digitsequence}|{digitsequence}\.";
65 sign = "\+|-";
66 exponentpart = "[eE]{sign}?{digitsequence}";
67 floatingsuffix = "[fF]|[lL]";
68 floatingliteral = "({fractionalconstant}{exponentpart}?|{digitsequence}{exponentpart}){floatingsuffix}?";
69 hexdigit = "[0-9a-fA-F]";
70 hexadecimalliteral = "(0x|0X){hexdigit}+";
71 octaldigit = "[0-7]";
72 octalliteral = "0{octaldigit}*";
73 decimalliteral = "[1-9]{decimaldigit}*";
74 integersuffix = "[uU](ll|LL)?|[uU][lL]|(ll|LL)[uU]?|[lL][uU]?";
75 integerliteral = "{hexadecimalliteral}{integersuffix}?|{octalliteral}{integersuffix}?|{decimalliteral}{integersuffix}?";
76 hex4 = "{hexdigit}{hexdigit}{hexdigit}{hexdigit}";
77 hex8 = "{hex4}{hex4}";
78 simpleescape = "\\['\"\?\\abfnrtv]";
79 octalescape = "\\{octaldigit}|\\{octaldigit}{octaldigit}|\\{octaldigit}{octaldigit}{octaldigit}";
80 hexescape = "\\x{hexdigit}+";
81 escape = "{simpleescape}|{octalescape}|{hexescape}";
82 cchar = "[^\r\n\\']|{escape}|\\u{hex4}|\\U{hex8}";
83 characterliteral = "[uUL]?'{cchar}+'";
84 encodingprefix = "u8|u|U|L";
85 schar = "[^\r\n\\\"]|{escape}|\\u{hex4}|\\U{hex8}";
86 stringliteral = "{encodingprefix}?\"{schar}*\"|{encodingprefix}?R\"[^\n\r\"]*\"";
87 }
88
89 lexer api(SNGCPP_LEXER_API) CppLexer
90 {
91 "{separators}" {}
92 "{id}" { int kw = GetKeywordToken(token.match); if (kw == INVALID_TOKEN) return ID; else return kw; }
93 "{floatingliteral}" { return FLOATLIT; }
94 "{integerliteral}" { return INTLIT; }
95 "{characterliteral}" { return CHARLIT; }
96 "{stringliteral}" { return STRINGLIT; }
97 "::" { return COLONCOLON; }
98 "," { return COMMA; }
99 "=" { return ASSIGN; }
100 "\*=" { return MULASSIGN; }
101 "/=" { return DIVASSIGN; }
102 "%=" { return REMASSIGN; }
103 "\+=" { return ADDASSIGN; }
104 "-=" { return SUBASSIGN; }
105 ">>=" { return SHIFTRIGHTASSIGN; }
106 "<<=" { return SHIFTLEFTASSIGN; }
107 "&=" { return ANDASSIGN; }
108 "^=" { return XORASSIGN; }
109 "\|=" { return ORASSIGN; }
110 "\?" { return QUEST; }
111 ":" { return COLON; }
112 "\|\|" { return OROR; }
113 "&&" { return AMPAMP; }
114 "\|" { return OR; }
115 "^" { return XOR; }
116 "&" { return AMP; }
117 "==" { return EQ; }
118 "!=" { return NEQ; }
119 "<=" { return LEQ; }
120 ">=" { return GEQ; }
121 "<=>" { return SPACESHIP; }
122 "<" { return LANGLE; }
123 ">" { return RANGLE; }
124 "<<" { return SHIFTLEFT; }
125 ">>" $(1) { return SHIFTRIGHT; }
126 "\+" { return PLUS; }
127 "-" { return MINUS; }
128 "\*" { return STAR; }
129 "/" { return DIV; }
130 "%" { return MOD; }
131 "\.\*" { return DOTSTAR; }
132 "->\*" { return ARROWSTAR; }
133 "\(" { return LPAREN; }
134 "\)" { return RPAREN; }
135 "\+\+" { return PLUSPLUS; }
136 "--" { return MINUSMINUS; }
137 "!" { return EXCLAMATION; }
138 "~" { return TILDE; }
139 "\[" { return LBRACKET; }
140 "\]" { return RBRACKET; }
141 "\." { return DOT; }
142 "->" { return ARROW; }
143 ";" { return SEMICOLON; }
144 "\.\.\." { return ELLIPSES; }
145 "\{" { return LBRACE; }
146 "\}" { return RBRACE; }
147
148 variables
149 {
150 int langleCount;
151 }
152
153 actions
154 {
155 $(1) = { if (langleCount > 0) return INVALID_TOKEN; }
156 }
157 }