1 // =================================
  2 // Copyright (c) 2021 Seppo Laakko
  3 // Distributed under the MIT license
  4 // =================================
  5
  6 #include <sngcm/cmlexer/TokenValueParsers.hpp>
  7 #include <soulng/util/Unicode.hpp>
  8 #include <sstream>
  9
10 using namespace soulng::unicode;
11
12 void ParseFloatingLiteral(const std::string& fileName, const Token& token, double& floatingLit, bool& floatingLitFloat)
13 {
14     floatingLit = 0.0;
15     floatingLitFloat = false;
16     const char32_t* p = token.match.begin;
17     const char32_t* e = token.match.end;
18     std::string str;
19     while (p != e&&(  (*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E' || *p == '-' || *p == '+'))
20     {
21         str.append(1, static_cast<unsigned char>(*p));
22         ++p;
23     }
24     if (p != e&&(  *p == 'f' || *p == 'F'))
25     {
26         ++p;
27         floatingLitFloat = true;
28     }
29     if (p != e)
30     {
31         throw std::runtime_error("invalid floating literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
32     }
33     std::stringstream s;
34     s.str(str);
35     s >> floatingLit;
36     if (s.fail() || s.bad())
37     {
38         throw std::runtime_error("invalid floating literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
39     }
40 }
41
42 void ParseIntegerLiteral(const std::string& fileName, const Token& token, uint64_t& intLit, bool& intLitUnsigned)
43 {
44     intLit = 0;
45     intLitUnsigned = false;
46     const char32_t* p = token.match.begin;
47     const char32_t* e = token.match.end;
48     if (p != e && *p == '0')
49     {
50         ++p;
51         if (p != e&&(  *p == 'x' || *p == 'X'))
52         {
53             ++p;
54             while (p != e&&(  (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || (*p >= 'A' && *p <= 'F')))
55             {
56                 switch (*p)
57                 {
58                     case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
59                     {
60                         intLit = 16 * intLit + *p - '0';
61                         break;
62                     }
63                     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
64                     {
65                         intLit = 16 * intLit + 10 + *p - 'A';
66                         break;
67                     }
68                     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
69                     {
70                         intLit = 16 * intLit + 10 + *p - 'a';
71                         break;
72                     }
73                 }
74                 ++p;
75             }
76         }
77         else
78         {
79             while (p != e && *p >= '0' && *p <= '7')
80             {
81                 intLit = 8 * intLit + (*p - '0');
82                 ++p;
83             }
84         }
85     }
86     else if (p != e && *p >= '1' && *p <= '9')
87     {
88         while (p != e && *p >= '0' && *p <= '9')
89         {
90             intLit = 10 * intLit + (*p - '0');
91             ++p;
92         }
93     }
94     else
95     {
96         throw std::runtime_error("invalid integer literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
97     }
98     if (p != e&&(  *p == 'u' || *p == 'U'))
99     {
100         ++p;
101         intLitUnsigned = true;
102     }
103     if (p != e)
104     {
105         throw std::runtime_error("invalid integer literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
106     }
107 }
108
109 void ParseHexChar(const std::string& fileName, char32_t& value, const char32_t*& p, const char32_t* e, const Token& token)
110 {
111     if (p != e)
112     {
113         bool notHex = false;
114         switch (*p)
115         {
116             case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
117             {
118                 value = 16 * value + *p - '0';
119                 break;
120             }
121             case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
122             {
123                 value = 16 * value + 10 + *p - 'A';
124                 break;
125             }
126             case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
127             {
128                 value = 16 * value + 10 + *p - 'a';
129                 break;
130             }
131             default:
132             {
133                 notHex = true;
134                 break;
135             }
136         }
137         if (notHex)
138         {
139             throw std::runtime_error("hex character expected at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
140         }
141         ++p;
142     }
143     else
144     {
145         throw std::runtime_error("hex character expected at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
146     }
147 }
148
149 char32_t ParseEscape(const std::string& fileName, const char32_t*& p, const char32_t* e, const Token& token)
150 {
151     char32_t value = '\0';
152     if (p != e&&(  *p == 'x' || *p == 'X'))
153     {
154         ++p;
155         while (p != e&&(  (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || (*p >= 'A' && *p <= 'F')))
156         {
157             ParseHexChar(fileName, value, p, e, token);
158         }
159     }
160     else if (p != e&&(  *p == 'd' || *p == 'D'))
161     {
162         ++p;
163         while (p != e && *p >= '0' && *p <= '9')
164         {
165             value = 10 * value + (*p - '0');
166             ++p;
167         }
168     }
169     else if (p != e&&(  *p >= '0' && *p <= '7'))
170     {
171         while (p != e && *p >= '0' && *p <= '7')
172         {
173             value = 8 * value + (*p - '0');
174             ++p;
175         }
176     }
177     else if (p != e && *p == 'u')
178     {
179         ++p;
180         ParseHexChar(fileName, value, p, e, token);
181         ParseHexChar(fileName, value, p, e, token);
182         ParseHexChar(fileName, value, p, e, token);
183         ParseHexChar(fileName, value, p, e, token);
184     }
185     else if (p != e && *p == 'U')
186     {
187         ++p;
188         ParseHexChar(fileName, value, p, e, token);
189         ParseHexChar(fileName, value, p, e, token);
190         ParseHexChar(fileName, value, p, e, token);
191         ParseHexChar(fileName, value, p, e, token);
192         ParseHexChar(fileName, value, p, e, token);
193         ParseHexChar(fileName, value, p, e, token);
194         ParseHexChar(fileName, value, p, e, token);
195         ParseHexChar(fileName, value, p, e, token);
196     }
197     else if (p != e)
198     {
199         switch (*p)
200         {
201             case 'a': value = '\a'; break;
202             case 'b': value = '\b'; break;
203             case 'f': value = '\f'; break;
204             case 'n': value = '\n'; break;
205             case 'r': value = '\r'; break;
206             case 't': value = '\t'; break;
207             case 'v': value = '\v'; break;
208             default: value = *p; break;
209         }
210         ++p;
211     }
212     return value;
213 }
214
215 void ParseCharacterLiteral(const std::string& fileName, const Token& token, char32_t& charLit, int& charLitPrefix)
216 {
217     charLit = '\0';
218     charLitPrefix = noPrefix;
219     const char32_t* p = token.match.begin;
220     const char32_t* e = token.match.end;
221     if (p != e && *p == 'w')
222     {
223         charLitPrefix = utf16Prefix;
224         ++p;
225     }
226     else if (p != e && *p == 'u')
227     {
228         charLitPrefix = utf32Prefix;
229         ++p;
230     }
231     if (p != e && *p == '\'')
232     {
233         ++p;
234         if (p != e && *p == '\\')
235         {
236             ++p;
237             charLit = ParseEscape(fileName, p, e, token);
238         }
239         else
240         {
241             std::u32string s;
242             while (p != e && *p != '\r' && *p != '\n' && *p != '\'')
243             {
244                 s.append(1, *p);
245                 ++p;
246             }
247             std::u32string u = s;
248             if (u.size() != 1)
249             {
250                 throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
251             }
252             charLit = u.front();
253         }
254         if (p != e && *p == '\'')
255         {
256             ++p;
257         }
258         if (p != e)
259         {
260             throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
261         }
262     }
263     else
264     {
265         throw std::runtime_error("invalid character literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
266     }
267 }
268
269 void ParseStringLiteral(const std::string& fileName, const Token& token, std::u32string& stringLit, int& stringLitPrefix)
270 {
271     stringLit.clear();
272     stringLitPrefix = noPrefix;
273     const char32_t* p = token.match.begin;
274     const char32_t* e = token.match.end;
275     if (p != e && *p == 'w')
276     {
277         stringLitPrefix = utf16Prefix;
278         ++p;
279     }
280     else if (p != e && *p == 'u')
281     {
282         stringLitPrefix = utf32Prefix;
283         ++p;
284     }
285     if (p != e && *p == '@')
286     {
287         ++p;
288         if (p != e && *p == '"')
289         {
290             ++p;
291             while (p != e && *p != '"')
292             {
293                 stringLit.append(1, *p);
294                 ++p;
295             }
296             if (p != e && *p == '"')
297             {
298                 ++p;
299             }
300             if (p != e)
301             {
302                 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
303             }
304         }
305     }
306     else
307     {
308         if (p != e && *p == '"')
309         {
310             ++p;
311             while (p != e && *p != '\r' && *p != '\n' && *p != '"')
312             {
313                 if (*p == '\\')
314                 {
315                     ++p;
316                     stringLit.append(1, ParseEscape(fileName, p, e, token));
317                 }
318                 else
319                 {
320                     stringLit.append(1, *p);
321                     ++p;
322                 }
323             }
324             if (p != e && *p == '"')
325             {
326                 ++p;
327             }
328             if (p != e)
329             {
330                 throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
331             }
332         }
333         else
334         {
335             throw std::runtime_error("invalid string literal at " + fileName + ":" + std::to_string(token.line) + ": " + ToUtf8(std::u32string(token.match.begin, token.match.end)));
336         }
337     }
338 }
339
340 std::string MakeFilePath(const Lexeme& lexeme)
341 {
342     std::u32string s;
343     const char32_t* p = lexeme.begin;
344     const char32_t* e = lexeme.end;
345     if (p != e && *p == '<')
346     {
347         ++p;
348     }
349     while (p != e && *p != '>')
350     {
351         s.append(1, *p);
352         ++p;
353     }
354     if (p != e && *p == '>')
355     {
356         ++p;
357     }
358     if (p != e)
359     {
360         throw std::runtime_error("invalid file path '" + ToUtf8(std::u32string(lexeme.begin, lexeme.end)));
361     }
362     return ToUtf8(s);
363 }