1 #include <sngcpp/lexer/CppLexerApi.hpp>
2 #include <sngcpp/ast/SourceCodeWriter.hpp>
3
4 classmap CppTokenLexerClassMap;
5
6 prefix "sngcpp/lexer";
7
8 tokens CppTokenLexerTokens
9 {
10 (ALIGNAS, "'alignas'"), (ALIGNOF, "'alignof'"), (ASM, "'asm'"), (AUTO, "'auto'"), (BOOL, "'bool'"), (BREAK, "'break'"), (CASE, "'case'"), (CATCH, "'catch'"),
11 (CHAR, "'char'"), (CHAR8_T, "'char8_t'"), (CHAR16_T, "'char16_t'"), (CHAR32_T, "'char32_t'"), (CLASS, "'class'"), (CONCEPT, "'concept'"), (CONST, "'const'"),
12 (CONSTEVAL, "'consteval'"), (CONSTEXPR, "'constexpr'"),
13 (CONSTINIT, "'constinit'"), (CONST_CAST, "'const_cast'"), (CONTINUE, "'continue'"), (CO_AWAIT, "'co_await'"), (CO_RETURN, "'co_return'"), (CO_YIELD, "'co_yield'"),
14 (DECLTYPE, "'decltype'"), (DEFAULT, "'default'"), (DELETE, "'delete'"), (DO, "'do'"), (DOUBLE, "'double'"), (DYNAMIC_CAST, "'dynamic_cast'"), (ELSE, "'else'"),
15 (ENUM, "'enum'"), (EXPLICIT, "'explicit'"), (EXPORT, "'export'"), (EXTERN, "'extern'"),
16 (FALSE, "'false'"), (FLOAT, "'float'"), (FOR, "'for'"), (FRIEND, "'friend'"), (GOTO, "'goto'"), (IF, "'if'"), (INLINE, "'inline'"), (INT, "'int'"), (LONG, "'long'"),
17 (MUTABLE, "'mutable'"), (NAMESPACE, "'namespace'"), (NEW, "'new'"), (NOEXCEPT, "'noexcept'"), (NULLPTR, "'nullptr'"), (OPERATOR, "'operator'"), (PRIVATE, "'private'"),
18 (PROTECTED, "'protected'"),
19 (PUBLIC, "'public'"), (REGISTER, "'register'"), (REINTERPRET_CAST, "'reinterpret_cast'"), (REQUIRES, "'requires'"), (RETURN, "'return'"), (SHORT, "'short'"), (SIGNED, "'signed'"),
20 (SIZEOF, "'sizeof'"), (STATIC, "'static'"), (STATIC_ASSERT, "'static_assert'"), (STATIC_CAST, "'static_cast'"), (STRUCT, "'struct'"), (SWITCH, "'switch'"), (TEMPLATE, "'template'"),
21 (THIS, "'this'"), (THREAD_LOCAL, "'thread_local'"), (THROW, "'throw'"),
22 (TRUE, "'true'"), (TRY, "'try'"), (TYPEDEF, "'typedef'"), (TYPEID, "'typeid'"), (TYPENAME, "'typename'"), (UNION, "'union'"), (UNSIGNED, "'unsigned'"), (USING, "'using'"),
23 (VIRTUAL, "'virtual'"), (VOID, "'void'"), (VOLATILE, "'volatile'"), (WCHAR_T, "'wchar_t'"), (WHILE, "'while'"),
24 (OVERRIDE, "'override'"), (FINAL, "'final'"), (__DECLSPEC, "'__declspec'"), (__THREAD, "'__thread'"),
25 (FLOATLIT, "floating literal"), (INTLIT, "integer literal"), (CHARLIT, "character literal"), (STRINGLIT, "string literal"),
26 (COLONCOLON, "'::'"), (COMMA, "','"), (ASSIGN, "'='"), (MULASSIGN, "'*='"), (DIVASSIGN, "'/='"), (REMASSIGN, "'%='"), (ADDASSIGN, "'+='"), (SUBASSIGN, "'-='"),
27 (SHIFTRIGHTASSIGN, "'>>='"), (SHIFTLEFTASSIGN, "'<<='"), (ANDASSIGN, "'&='"), (XORASSIGN, "'^='"), (ORASSIGN, "'|='"), (QUEST, "'?'"), (COLON, "':'"),
28 (OROR, "'||'"), (AMPAMP, "'&&'"), (OR, "'|'"), (XOR, "'^'"), (AMP, "'&'"), (EQ, "'=='"), (NEQ, "'!='"), (LEQ, "'<='"), (GEQ, "'>='"), (SPACESHIP, "<=>"), (LANGLE, "'<'"), (RANGLE, "'>'"),
29 (SHIFTLEFT, "'<<'"), (SHIFTRIGHT, "'>>'"), (PLUS, "'+'"), (MINUS, "'-'"), (STAR, "'*'"), (DIV, "'/'"), (MOD, "'%'"), (DOTSTAR, "'.*'"), (ARROWSTAR, "'->*'"), (LPAREN, "'('"), (RPAREN, "')'"),
30 (PLUSPLUS, "'++'"), (MINUSMINUS, "'--'"), (EXCLAMATION, "'!'"), (TILDE, "'~'"),
31 (LBRACKET, "'['"), (RBRACKET, "']'"), (LBRACE, "'{'"), (RBRACE, "'}'"), (DOT, "'.'"), (ARROW, "'->'"), (SEMICOLON, "';'"), (ELLIPSES, "'...'"),
32 (WS, "white space"), (LINECOMMENT, "line comment"), (BLOCKCOMMENT, "block comment"), (BLOCKCOMMENTLINE, "block comment line"),
33 (KEYWORD, "keyword"), (ID, "identifier"), (NUMBER, "number"), (PP, "pp"), (OTHER, "other"),
34 (MAX, "max")
35 }
36
37 keywords CppTokenLexerKeywords
38 {
39 ("alignas", ALIGNAS), ("alignof", ALIGNOF), ("asm", ASM), ("auto", AUTO), ("bool", BOOL), ("break", BREAK), ("case", CASE), ("catch", CATCH),
40 ("char", CHAR), ("char8_t", CHAR8_T), ("char16_t", CHAR16_T), ("char32_t", CHAR32_T), ("class", CLASS), ("concept", CONCEPT), ("const", CONST),
41 ("consteval", CONSTEVAL), ("constexpr", CONSTEXPR),
42 ("constinit", CONSTINIT), ("const_cast", CONST_CAST), ("continue", CONTINUE), ("co_await", CO_AWAIT), ("co_return", CO_RETURN), ("co_yield", CO_YIELD),
43 ("decltype", DECLTYPE), ("default", DEFAULT), ("delete", DELETE), ("do", DO), ("double", DOUBLE), ("dynamic_cast", DYNAMIC_CAST), ("else", ELSE),
44 ("enum", ENUM), ("explicit", EXPLICIT), ("export", EXPORT), ("extern", EXTERN),
45 ("false", FALSE), ("float", FLOAT), ("for", FOR), ("friend", FRIEND), ("goto", GOTO), ("if", IF), ("inline", INLINE), ("int", INT), ("long", LONG),
46 ("mutable", MUTABLE), ("namespace", NAMESPACE), ("new", NEW), ("noexcept", NOEXCEPT), ("nullptr", NULLPTR), ("operator", OPERATOR), ("private", PRIVATE),
47 ("protected", PROTECTED),
48 ("public", PUBLIC), ("register", REGISTER), ("reinterpret_cast", REINTERPRET_CAST), ("requires", REQUIRES), ("return", RETURN), ("short", SHORT), ("signed", SIGNED),
49 ("sizeof", SIZEOF), ("static", STATIC), ("static_assert", STATIC_ASSERT), ("static_cast", STATIC_CAST), ("struct", STRUCT), ("switch", SWITCH), ("template", TEMPLATE),
50 ("this", THIS), ("thread_local", THREAD_LOCAL), ("throw", THROW),
51 ("true", TRUE), ("try", TRY), ("typedef", TYPEDEF), ("typeid", TYPEID), ("typename", TYPENAME), ("union", UNION), ("unsigned", UNSIGNED), ("using", USING),
52 ("virtual", VIRTUAL), ("void", VOID), ("volatile", VOLATILE), ("wchar_t", WCHAR_T), ("while", WHILE), ("override", OVERRIDE), ("final", FINAL),
53 ("__thread", __THREAD), ("__declspec", __DECLSPEC)
54 }
55
56 expressions
57 {
58 ws = "[\t ]+";
59 newline = "\r\n|\n|\r";
60 linecomment = "//[^\n\r]*{newline}";
61 blockcomment = "/\*([^*\n\r]|\*[^/\n\r])*(\*/|{newline})";
62 blockcommentline = "([^*\n\r]|\*[^/\n\r])*(\*/|{newline})";
63 id = "{idstart}{idcont}*";
64 digit = "[0-9]";
65 nondigit = "[a-zA-Z_]";
66 hexdigit = "[0-9a-fA-F]";
67 hexquad = "{hexdigit}{hexdigit}{hexdigit}{hexdigit}";
68 universalcharactername = "\\u{hexquad}|\\U{hexquad}{hexquad}";
69 identifiernondigit = "{nondigit}|{universalcharactername}";
70 sign = "\+|-";
71 ppnumber = "({digit}+\.?|\.{digit}+)([eE]{sign}|{identifiernondigit})*";
72 octaldigit = "[0-7]";
73 simpleescape = "\\['\"\?\\abfnrtv]";
74 octalescape = "\\{octaldigit}|\\{octaldigit}{octaldigit}|\\{octaldigit}{octaldigit}{octaldigit}";
75 hexescape = "\\x{hexdigit}+";
76 escape = "{simpleescape}|{octalescape}|{hexescape}";
77 cchar = "[^\r\n\\']|{escape}|{universalcharactername}";
78 characterliteral = "[uUL]?'{cchar}+'";
79 encodingprefix = "u8|u|U|L";
80 schar = "[^\r\n\\\"]|{escape}|{universalcharactername}";
81 stringliteral = "{encodingprefix}?\"{schar}*\"|{encodingprefix}?R\"[^\n\r\"]*\"";
82 }
83
84 lexer api(SNGCPP_LEXER_API) CppTokenLexer
85 {
86 "{ws}" { return WS; }
87 "{newline}" { }
88 "{linecomment}" { return LINECOMMENT; }
89 "{blockcomment}" { return BLOCKCOMMENT; }
90 "{blockcommentline}" $(1) { return BLOCKCOMMENTLINE; }
91 "{id}" { if (GetKeywordToken(token.match) != INVALID_TOKEN) return KEYWORD; else return ID; }
92 "{characterliteral}" { return CHARLIT; }
93 "{stringliteral}" { return STRINGLIT; }
94 "{ppnumber}" { return NUMBER; }
95 "{ws}*#[^\x0\r\n]*" { return PP; }
96 "::" { return OTHER; }
97 "," { return OTHER; }
98 "=" { return OTHER; }
99 "\*=" { return OTHER; }
100 "/=" { return OTHER; }
101 "%=" { return OTHER; }
102 "\+=" { return OTHER; }
103 "-=" { return OTHER; }
104 ">>=" { return OTHER; }
105 "<<=" { return OTHER; }
106 "&=" { return OTHER; }
107 "^=" { return OTHER; }
108 "\|=" { return OTHER; }
109 "\?" { return OTHER; }
110 ":" { return OTHER; }
111 "\|\|" { return OTHER; }
112 "&&" { return OTHER; }
113 "\|" { return OTHER; }
114 "^" { return OTHER; }
115 "&" { return OTHER; }
116 "==" { return OTHER; }
117 "!=" { return OTHER; }
118 "<=" { return OTHER; }
119 ">=" { return OTHER; }
120 "<=>" { return OTHER; }
121 "<" { return OTHER; }
122 ">" { return OTHER; }
123 "<<" { return OTHER; }
124 ">>" { return OTHER; }
125 "\+" { return OTHER; }
126 "-" { return OTHER; }
127 "\*" { return OTHER; }
128 "/" { return OTHER; }
129 "%" { return OTHER; }
130 "\.\*" { return OTHER; }
131 "->\*" { return OTHER; }
132 "\(" { return OTHER; }
133 "\)" { return OTHER; }
134 "\+\+" { return OTHER; }
135 "--" { return OTHER; }
136 "!" { return OTHER; }
137 "~" { return OTHER; }
138 "\[" { return OTHER; }
139 "\]" { return OTHER; }
140 "\." { return OTHER; }
141 "->" { return OTHER; }
142 ";" { return OTHER; }
143 "\.\.\." { return OTHER; }
144 "\{" { return OTHER; }
145 "\}" { return OTHER; }
146
147 actions
148 {
149 $(1)={ if (!inBlockComment) return INVALID_TOKEN; }
150 }
151
152 variables
153 {
154 bool inBlockComment;
155 sngcpp::ast::SourceCodeWriter* writer;
156 }
157 }