1
2
3
4
5
6 #include <sngcm/cmlexer/TokenValueParsers.hpp>
7 #include <soulng/util/Unicode.hpp>
8 #include <sstream>
9
10 using namespace soulng::unicode;
11
12 void ParseFloatingLiteral(const std::string& fileName, const Token& token, double& floatingLit, bool& floatingLitFloat)
13 {
14 floatingLit = 0.0;
15 floatingLitFloat = false;
16 const char32_t* p = token.match.begin;
17 const char32_t* e = token.match.end;
18 std::string str;
19 while (p != e&&( (*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E' || *p == '-' || *p == '+'))
20 {
21 str.append(1, static_cast<unsigned char>(*p));
22 ++p;
23 }
24 if (p != e&&( *p == 'f' || *p == 'F'))
25 {
26 ++p;
27 floatingLitFloat = true;
28 }
29 if (p != e)
30 {
31 throw std::runtime_error("invalid floating literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
32 }
33 std::stringstream s;
34 s.str(str);
35 s >> floatingLit;
36 if (s.fail() || s.bad())
37 {
38 throw std::runtime_error("invalid floating literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
39 }
40 }
41
42 void ParseIntegerLiteral(const std::string& fileName, const Token& token, uint64_t& intLit, bool& intLitUnsigned)
43 {
44 intLit = 0;
45 intLitUnsigned = false;
46 const char32_t* p = token.match.begin;
47 const char32_t* e = token.match.end;
48 if (p != e && *p == '0')
49 {
50 ++p;
51 if (p != e&&( *p == 'x' || *p == 'X'))
52 {
53 ++p;
54 while (p != e&&( (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || (*p >= 'A' && *p <= 'F')))
55 {
56 switch (*p)
57 {
58 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
59 {
60 intLit = 16 * intLit + *p - '0';
61 break;
62 }
63 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
64 {
65 intLit = 16 * intLit + 10 + *p - 'A';
66 break;
67 }
68 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
69 {
70 intLit = 16 * intLit + 10 + *p - 'a';
71 break;
72 }
73 }
74 ++p;
75 }
76 }
77 else
78 {
79 while (p != e && *p >= '0' && *p <= '7')
80 {
81 intLit = 8 * intLit + (*p - '0');
82 ++p;
83 }
84 }
85 }
86 else if (p != e && *p >= '1' && *p <= '9')
87 {
88 while (p != e && *p >= '0' && *p <= '9')
89 {
90 intLit = 10 * intLit + (*p - '0');
91 ++p;
92 }
93 }
94 else
95 {
96 throw std::runtime_error("invalid integer literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
97 }
98 if (p != e&&( *p == 'u' || *p == 'U'))
99 {
100 ++p;
101 intLitUnsigned = true;
102 }
103 if (p != e)
104 {
105 throw std::runtime_error("invalid integer literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
106 }
107 }
108
109 void ParseHexChar(const std::string& fileName, char32_t& value, const char32_t*& p, const char32_t* e, const Token& token)
110 {
111 if (p != e)
112 {
113 switch (*p)
114 {
115 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
116 {
117 value = 16 * value + *p - '0';
118 break;
119 }
120 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
121 {
122 value = 16 * value + 10 + *p - 'A';
123 break;
124 }
125 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
126 {
127 value = 16 * value + 10 + *p - 'a';
128 break;
129 }
130 }
131 ++p;
132 }
133 else
134 {
135 throw std::runtime_error("hex character expected at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
136 }
137 }
138
139 char32_t ParseEscape(const std::string& fileName, const char32_t*& p, const char32_t* e, const Token& token)
140 {
141 char32_t value = '\0';
142 if (p != e&&( *p == 'x' || *p == 'X'))
143 {
144 ++p;
145 while (p != e&&( (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || (*p >= 'A' && *p <= 'F')))
146 {
147 ParseHexChar(fileName, value, p, e, token);
148 }
149 }
150 else if (p != e&&( *p == 'd' || *p == 'D'))
151 {
152 ++p;
153 while (p != e && *p >= '0' && *p <= '9')
154 {
155 value = 10 * value + (*p - '0');
156 ++p;
157 }
158 }
159 else if (p != e&&( *p >= '0' && *p <= '7'))
160 {
161 while (p != e && *p >= '0' && *p <= '7')
162 {
163 value = 8 * value + (*p - '0');
164 ++p;
165 }
166 }
167 else if (p != e && *p == 'u')
168 {
169 ++p;
170 ParseHexChar(fileName, value, p, e, token);
171 ParseHexChar(fileName, value, p, e, token);
172 ParseHexChar(fileName, value, p, e, token);
173 ParseHexChar(fileName, value, p, e, token);
174 }
175 else if (p != e && *p == 'U')
176 {
177 ++p;
178 ParseHexChar(fileName, value, p, e, token);
179 ParseHexChar(fileName, value, p, e, token);
180 ParseHexChar(fileName, value, p, e, token);
181 ParseHexChar(fileName, value, p, e, token);
182 ParseHexChar(fileName, value, p, e, token);
183 ParseHexChar(fileName, value, p, e, token);
184 ParseHexChar(fileName, value, p, e, token);
185 ParseHexChar(fileName, value, p, e, token);
186 }
187 else if (p != e)
188 {
189 switch (*p)
190 {
191 case 'a': value = '\a'; break;
192 case 'b': value = '\b'; break;
193 case 'f': value = '\f'; break;
194 case 'n': value = '\n'; break;
195 case 'r': value = '\r'; break;
196 case 't': value = '\t'; break;
197 case 'v': value = '\v'; break;
198 default: value = *p; break;
199 }
200 ++p;
201 }
202 return value;
203 }
204
205 void ParseCharacterLiteral(const std::string& fileName, const Token& token, char32_t& charLit, int& charLitPrefix)
206 {
207 charLit = '\0';
208 charLitPrefix = noPrefix;
209 const char32_t* p = token.match.begin;
210 const char32_t* e = token.match.end;
211 if (p != e && *p == 'w')
212 {
213 charLitPrefix = utf16Prefix;
214 ++p;
215 }
216 else if (p != e && *p == 'u')
217 {
218 charLitPrefix = utf32Prefix;
219 ++p;
220 }
221 if (p != e && *p == '\'')
222 {
223 ++p;
224 if (p != e && *p == '\\')
225 {
226 ++p;
227 charLit = ParseEscape(fileName, p, e, token);
228 }
229 else
230 {
231 std::string s;
232 while (p != e && *p != '\r' && *p != '\n' && *p != '\'')
233 {
234 s.append(1, static_cast<unsigned char>(*p));
235 ++p;
236 }
237 std::u32string u = ToUtf32(s);
238 if (u.size() != 1)
239 {
240 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
241 }
242 charLit = u.front();
243 }
244 if (p != e && *p == '\'')
245 {
246 ++p;
247 }
248 if (p != e)
249 {
250 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
251 }
252 }
253 else
254 {
255 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
256 }
257 }
258
259 void ParseStringLiteral(const std::string& fileName, const Token& token, std::u32string& stringLit, int& stringLitPrefix)
260 {
261 stringLit.clear();
262 stringLitPrefix = noPrefix;
263 const char32_t* p = token.match.begin;
264 const char32_t* e = token.match.end;
265 if (p != e && *p == 'w')
266 {
267 stringLitPrefix = utf16Prefix;
268 ++p;
269 }
270 else if (p != e && *p == 'u')
271 {
272 stringLitPrefix = utf32Prefix;
273 ++p;
274 }
275 if (p != e && *p == '@')
276 {
277 ++p;
278 if (p != e && *p == '"')
279 {
280 ++p;
281 while (p != e && *p != '"')
282 {
283 stringLit.append(1, *p);
284 ++p;
285 }
286 if (p != e && *p == '"')
287 {
288 ++p;
289 }
290 if (p != e)
291 {
292 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
293 }
294 }
295 }
296 else
297 {
298 if (p != e && *p == '"')
299 {
300 ++p;
301 while (p != e && *p != '\r' && *p != '\n' && *p != '"')
302 {
303 if (*p == '\\')
304 {
305 ++p;
306 stringLit.append(1, ParseEscape(fileName, p, e, token));
307 }
308 else
309 {
310 stringLit.append(1, *p);
311 ++p;
312 }
313 }
314 if (p != e && *p == '"')
315 {
316 ++p;
317 }
318 if (p != e)
319 {
320 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
321 }
322 }
323 else
324 {
325 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
326 }
327 }
328 }
329
330 std::string MakeFilePath(const Lexeme& lexeme)
331 {
332 std::u32string s;
333 const char32_t* p = lexeme.begin;
334 const char32_t* e = lexeme.end;
335 if (p != e && *p == '<')
336 {
337 ++p;
338 }
339 while (p != e && *p != '>')
340 {
341 s.append(1, *p);
342 ++p;
343 }
344 if (p != e && *p == '>')
345 {
346 ++p;
347 }
348 if (p != e)
349 {
350 throw std::runtime_error("invalid file path '" + ToUtf8(std::u32string(lexeme.begin, lexeme.end)));
351 }
352 return ToUtf8(s);
353 }