1
2
3
4
5
6 #include <sngcm/cmlexer/TokenValueParsers.hpp>
7 #include <soulng/util/Unicode.hpp>
8 #include <sstream>
9
10 using namespace soulng::unicode;
11
12 void ParseFloatingLiteral(const std::string& fileName, const Token& token, double& floatingLit, bool& floatingLitFloat)
13 {
14 floatingLit = 0.0;
15 floatingLitFloat = false;
16 const char32_t* p = token.match.begin;
17 const char32_t* e = token.match.end;
18 std::string str;
19 while (p != e&&( (*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E' || *p == '-' || *p == '+'))
20 {
21 str.append(1, static_cast<unsigned char>(*p));
22 ++p;
23 }
24 if (p != e&&( *p == 'f' || *p == 'F'))
25 {
26 ++p;
27 floatingLitFloat = true;
28 }
29 if (p != e)
30 {
31 throw std::runtime_error("invalid floating literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
32 }
33 std::stringstream s;
34 s.str(str);
35 s >> floatingLit;
36 if (s.fail() || s.bad())
37 {
38 throw std::runtime_error("invalid floating literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
39 }
40 }
41
42 void ParseIntegerLiteral(const std::string& fileName, const Token& token, uint64_t& intLit, bool& intLitUnsigned)
43 {
44 intLit = 0;
45 intLitUnsigned = false;
46 const char32_t* p = token.match.begin;
47 const char32_t* e = token.match.end;
48 if (p != e && *p == '0')
49 {
50 ++p;
51 if (p != e&&( *p == 'x' || *p == 'X'))
52 {
53 ++p;
54 while (p != e&&( (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || (*p >= 'A' && *p <= 'F')))
55 {
56 switch (*p)
57 {
58 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
59 {
60 intLit = 16 * intLit + *p - '0';
61 break;
62 }
63 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
64 {
65 intLit = 16 * intLit + 10 + *p - 'A';
66 break;
67 }
68 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
69 {
70 intLit = 16 * intLit + 10 + *p - 'a';
71 break;
72 }
73 }
74 ++p;
75 }
76 }
77 else
78 {
79 while (p != e && *p >= '0' && *p <= '7')
80 {
81 intLit = 8 * intLit + (*p - '0');
82 ++p;
83 }
84 }
85 }
86 else if (p != e && *p >= '1' && *p <= '9')
87 {
88 while (p != e && *p >= '0' && *p <= '9')
89 {
90 intLit = 10 * intLit + (*p - '0');
91 ++p;
92 }
93 }
94 else
95 {
96 throw std::runtime_error("invalid integer literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
97 }
98 if (p != e&&( *p == 'u' || *p == 'U'))
99 {
100 ++p;
101 intLitUnsigned = true;
102 }
103 if (p != e)
104 {
105 throw std::runtime_error("invalid integer literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
106 }
107 }
108
109 void ParseHexChar(const std::string& fileName, char32_t& value, const char32_t*& p, const char32_t* e, const Token& token)
110 {
111 if (p != e)
112 {
113 bool notHex = false;
114 switch (*p)
115 {
116 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
117 {
118 value = 16 * value + *p - '0';
119 break;
120 }
121 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
122 {
123 value = 16 * value + 10 + *p - 'A';
124 break;
125 }
126 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
127 {
128 value = 16 * value + 10 + *p - 'a';
129 break;
130 }
131 default:
132 {
133 notHex = true;
134 break;
135 }
136 }
137 if (notHex)
138 {
139 throw std::runtime_error("hex character expected at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
140 }
141 ++p;
142 }
143 else
144 {
145 throw std::runtime_error("hex character expected at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
146 }
147 }
148
149 char32_t ParseEscape(const std::string& fileName, const char32_t*& p, const char32_t* e, const Token& token)
150 {
151 char32_t value = '\0';
152 if (p != e&&( *p == 'x' || *p == 'X'))
153 {
154 ++p;
155 while (p != e&&( (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || (*p >= 'A' && *p <= 'F')))
156 {
157 ParseHexChar(fileName, value, p, e, token);
158 }
159 }
160 else if (p != e&&( *p == 'd' || *p == 'D'))
161 {
162 ++p;
163 while (p != e && *p >= '0' && *p <= '9')
164 {
165 value = 10 * value + (*p - '0');
166 ++p;
167 }
168 }
169 else if (p != e&&( *p >= '0' && *p <= '7'))
170 {
171 while (p != e && *p >= '0' && *p <= '7')
172 {
173 value = 8 * value + (*p - '0');
174 ++p;
175 }
176 }
177 else if (p != e && *p == 'u')
178 {
179 ++p;
180 ParseHexChar(fileName, value, p, e, token);
181 ParseHexChar(fileName, value, p, e, token);
182 ParseHexChar(fileName, value, p, e, token);
183 ParseHexChar(fileName, value, p, e, token);
184 }
185 else if (p != e && *p == 'U')
186 {
187 ++p;
188 ParseHexChar(fileName, value, p, e, token);
189 ParseHexChar(fileName, value, p, e, token);
190 ParseHexChar(fileName, value, p, e, token);
191 ParseHexChar(fileName, value, p, e, token);
192 ParseHexChar(fileName, value, p, e, token);
193 ParseHexChar(fileName, value, p, e, token);
194 ParseHexChar(fileName, value, p, e, token);
195 ParseHexChar(fileName, value, p, e, token);
196 }
197 else if (p != e)
198 {
199 switch (*p)
200 {
201 case 'a': value = '\a'; break;
202 case 'b': value = '\b'; break;
203 case 'f': value = '\f'; break;
204 case 'n': value = '\n'; break;
205 case 'r': value = '\r'; break;
206 case 't': value = '\t'; break;
207 case 'v': value = '\v'; break;
208 default: value = *p; break;
209 }
210 ++p;
211 }
212 return value;
213 }
214
215 void ParseCharacterLiteral(const std::string& fileName, const Token& token, char32_t& charLit, int& charLitPrefix)
216 {
217 charLit = '\0';
218 charLitPrefix = noPrefix;
219 const char32_t* p = token.match.begin;
220 const char32_t* e = token.match.end;
221 if (p != e && *p == 'w')
222 {
223 charLitPrefix = utf16Prefix;
224 ++p;
225 }
226 else if (p != e && *p == 'u')
227 {
228 charLitPrefix = utf32Prefix;
229 ++p;
230 }
231 if (p != e && *p == '\'')
232 {
233 ++p;
234 if (p != e && *p == '\\')
235 {
236 ++p;
237 charLit = ParseEscape(fileName, p, e, token);
238 }
239 else
240 {
241 std::u32string s;
242 while (p != e && *p != '\r' && *p != '\n' && *p != '\'')
243 {
244 s.append(1, *p);
245 ++p;
246 }
247 std::u32string u = s;
248 if (u.size() != 1)
249 {
250 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
251 }
252 charLit = u.front();
253 }
254 if (p != e && *p == '\'')
255 {
256 ++p;
257 }
258 if (p != e)
259 {
260 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
261 }
262 }
263 else
264 {
265 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
266 }
267 }
268
269 void ParseStringLiteral(const std::string& fileName, const Token& token, std::u32string& stringLit, int& stringLitPrefix)
270 {
271 stringLit.clear();
272 stringLitPrefix = noPrefix;
273 const char32_t* p = token.match.begin;
274 const char32_t* e = token.match.end;
275 if (p != e && *p == 'w')
276 {
277 stringLitPrefix = utf16Prefix;
278 ++p;
279 }
280 else if (p != e && *p == 'u')
281 {
282 stringLitPrefix = utf32Prefix;
283 ++p;
284 }
285 if (p != e && *p == '@')
286 {
287 ++p;
288 if (p != e && *p == '"')
289 {
290 ++p;
291 while (p != e && *p != '"')
292 {
293 stringLit.append(1, *p);
294 ++p;
295 }
296 if (p != e && *p == '"')
297 {
298 ++p;
299 }
300 if (p != e)
301 {
302 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
303 }
304 }
305 }
306 else
307 {
308 if (p != e && *p == '"')
309 {
310 ++p;
311 while (p != e && *p != '\r' && *p != '\n' && *p != '"')
312 {
313 if (*p == '\\')
314 {
315 ++p;
316 stringLit.append(1, ParseEscape(fileName, p, e, token));
317 }
318 else
319 {
320 stringLit.append(1, *p);
321 ++p;
322 }
323 }
324 if (p != e && *p == '"')
325 {
326 ++p;
327 }
328 if (p != e)
329 {
330 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
331 }
332 }
333 else
334 {
335 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
336 }
337 }
338 }
339
340 std::string MakeFilePath(const Lexeme& lexeme)
341 {
342 std::u32string s;
343 const char32_t* p = lexeme.begin;
344 const char32_t* e = lexeme.end;
345 if (p != e && *p == '<')
346 {
347 ++p;
348 }
349 while (p != e && *p != '>')
350 {
351 s.append(1, *p);
352 ++p;
353 }
354 if (p != e && *p == '>')
355 {
356 ++p;
357 }
358 if (p != e)
359 {
360 throw std::runtime_error("invalid file path '" + ToUtf8(std::u32string(lexeme.begin, lexeme.end)));
361 }
362 return ToUtf8(s);
363 }