1 using System;
  2 using System.Collections;
  3 
  4 // this file has been semiautomatically generated from 'D:/work/soulng-project/soulng/lexer/Lexer.hpp' using cpp2cm version 1.0.0
  5 
  6 // this file has been semiautomatically generated from 'D:/work/soulng-project/soulng/lexer/Lexer.cpp' using cpp2cm version 1.0.0
  7 
  8 namespace System.Lex
  9 {
 10     public enum LexerFlags : sbyte
 11     {
 12         none = 0synchronize = 1 << 0synchronized = 1 << 1synchronizedAtLeastOnce = 1 << 2cursorSeen = 1 << 3farthestError = 1 << 4
 13     }
 14 
 15     public class Lexer
 16     {
 17         public Lexer(const ustring& content_const string& fileName_int fileIndex_) : 
 18             content(content_)fileName(fileName_)fileIndex(fileIndex_)line(1)keywordMap(null)start(content.Chars())end(content.Chars() + content.Length())
 19             pos(start)current(tokens.End())log(null)countLines(true)separatorChar('\0')
 20             commentTokenId(-1)farthestPos(GetPos())
 21         {
 22         }
 23         public Lexer(const uchar* start_const uchar* end_const string& fileName_int fileIndex_) : 
 24             content()fileName(fileName_)fileIndex(fileIndex_)line(1)keywordMap(null)start(start_)end(end_)pos(start)current(tokens.End())log(null)countLines(true)separatorChar('\0')
 25             commentTokenId(-1)farthestPos(GetPos())
 26         {
 27         }
 28         suppress Lexer(const Lexer&);
 29         suppress void operator=(const Lexer&);
 30         public void SetBlockCommentStates(const Set<int>& blockCommentStates_) const
 31         {
 32             blockCommentStates = blockCommentStates_;
 33         }
 34         public nothrow const Set<int>& BlockCommentStates() const
 35         {
 36             return blockCommentStates;
 37         }
 38         public nothrow void SetCommentTokenId(int commentTokenId_)
 39         {
 40             commentTokenId = commentTokenId_;
 41         }
 42         protected virtual nothrow int GetCommentTokenId() const
 43         {
 44             return -1;
 45         }
 46         public virtual ~Lexer()
 47         {
 48         }
 49         public int operator*() const
 50         {
 51             return current->id;
 52         }
 53         public void SetKeywordMap(KeywordMap* keywordMap_)
 54         {
 55             keywordMap = keywordMap_;
 56         }
 57         public KeywordMap* GetKeywordMap()
 58         {
 59             return keywordMap;
 60         }
 61         public void Retract()
 62         {
 63             token.match.end = pos;
 64         }
 65         public const string& FileName() const
 66         {
 67             return fileName;
 68         }
 69         public Span GetSpan() const
 70         {
 71             return Span(fileIndexlinecast<int>(GetPos()));
 72         }
 73         public void PushSpan()
 74         {
 75             spanStack.Push(currentSpan);
 76             currentSpan = Span(fileIndexline-1-1);
 77         }
 78         public Span PopSpan()
 79         {
 80             Span s = currentSpan;
 81             currentSpan = spanStack.Pop();
 82             return s;
 83         }
 84         public void SetSpan(long pos)
 85         {
 86             if (currentSpan.start == -1)
 87             {
 88                 currentSpan.line = line;
 89                 currentSpan.start = cast<int>(pos);
 90             }
 91             else
 92             {
 93                 currentSpan.end = cast<int>(pos);
 94             }
 95         }
 96         public inline nothrow Span GetCurrentSpan() const
 97         {
 98             return currentSpan;
 99         }
100         public void SetLine(int line_)
101         {
102             line = line_;
103         }
104         public void SetCountLines(bool countLines_)
105         {
106             countLines = countLines_;
107         }
108         public Token token;
109         public const uchar* Start() const
110         {
111             return start;
112         }
113         public const uchar* End() const
114         {
115             return end;
116         }
117         public const uchar* Pos() const
118         {
119             return pos;
120         }
121         public void SetLog(ParsingLog* log_)
122         {
123             log = log_;
124         }
125         public ParsingLog* Log() const
126         {
127             return log;
128         }
129         public void SetSeparatorChar(uchar separatorChar_)
130         {
131             separatorChar = separatorChar_;
132         }
133         public void operator++()
134         {
135             if (current != tokens.End())
136             {
137                 ++current;
138             }
139             if (current == tokens.End())
140             {
141                 NextToken();
142             }
143             else
144             {
145                 line = current->line;
146             }
147             if (GetFlag(LexerFlags.farthestError))
148             {
149                 long p = GetPos();
150                 if (p > farthestPos)
151                 {
152                     farthestPos = p;
153                     farthestRuleContext = ruleContext;
154                 }
155             }
156         }
157         public long GetPos() const
158         {
159             int p = cast<int>(current - tokens.Begin());
160             return (cast<long>(line) << 32) | cast<long>(p);
161         }
162         public void SetPos(long pos)
163         {
164             current = tokens.Begin() + cast<int>(pos);
165             line = cast<int>(pos >> 32);
166         }
167         public void NextToken()
168         {
169             int state = 0;
170             while (true)
171             {
172                 uchar c = separatorChar;
173                 if (pos != end)
174                 {
175                     c = *pos;
176                 }
177                 else if (c == '\0')
178                 {
179                     break;
180                 }
181                 if (state == 0)
182                 {
183                     lexeme.begin = pos;
184                     token.id = INVALID_TOKEN;
185                     token.line = line;
186                 }
187                 if (pos == end)
188                 {
189                     lexeme.end = end;
190                 }
191                 else
192                 {
193                     lexeme.end = pos + 1;
194                 }
195                 state = NextState(statec);
196                 if (state == -1)
197                 {
198                     if (token.id == CONTINUE_TOKEN)
199                     {
200                         if (pos == end)
201                         {
202                             break;
203                         }
204                         else
205                         {
206                             pos = token.match.end;
207                         }
208                         state = 0;
209                         continue;
210                     }
211                     else if (token.id == INVALID_TOKEN)
212                     {
213                         if (pos == end)
214                         {
215                             break;
216                         }
217                         else
218                         {
219                             throw Exception("System.Lex.Lexer.NextToken(): error: invalid character \'" + ToUtf8(ustring(c1)) + "\' in file \'" + fileName + "\' at line " + ToString(line));
220                         }
221                     }
222                     else
223                     {
224                         tokens.Add(token);
225                         current = tokens.End() - 1;
226                         pos = token.match.end;
227                         return;
228                     }
229                 }
230                 if (c == '\n' && countLines)
231                 {
232                     ++line;
233                 }
234                 ++pos;
235             }
236             token.id = INVALID_TOKEN;
237             state = NextState(state'\0');
238             long p = -1;
239             if (token.id != INVALID_TOKEN && token.id != CONTINUE_TOKEN)
240             {
241                 tokens.Add(token);
242                 current = tokens.End() - 1;
243                 p = GetPos();
244             }
245             Token endToken(END_TOKEN);
246             endToken.match.begin = end;
247             endToken.match.end = end;
248             tokens.Add(endToken);
249             if (p == -1)
250             {
251                 current = tokens.End() - 1;
252                 p = GetPos();
253             }
254             SetPos(p);
255         }
256         public int GetKeywordToken(const Lexeme& lexeme) const
257         {
258             if ((keywordMap != null))
259             {
260                 return keywordMap->GetKeywordToken(lexeme);
261             }
262             else
263             {
264                 return INVALID_TOKEN;
265             }
266         }
267         public void ConvertExternal(Span& span)
268         {
269             Token startToken = GetToken(span.start);
270             span.start = cast<int>(startToken.match.begin - start);
271             Token endToken = startToken;
272             if (span.end != -1)
273             {
274                 endToken = GetToken(span.end);
275             }
276             span.end = cast<int>(endToken.match.end - start);
277         }
278         public Token GetToken(long pos) const
279         {
280             int tokenIndex = cast<int>(pos);
281             if (tokenIndex >= 0 && tokenIndex < tokens.Count())
282             {
283                 return tokens[tokenIndex];
284             }
285             else
286             {
287                 throw Exception("invalid token index");
288             }
289         }
290         public char GetChar(long pos) const
291         {
292             Token t = GetToken(pos);
293             return cast<char>(*t.match.begin);
294         }
295         public wchar GetWChar(long pos) const
296         {
297             Token t = GetToken(pos);
298             return cast<wchar>(*t.match.begin);
299         }
300         public uchar GetUChar(long pos) const
301         {
302             Token t = GetToken(pos);
303             return *t.match.begin;
304         }
305         public int GetInt(long pos) const
306         {
307             Token t = GetToken(pos);
308             return ParseInt(ToUtf8(t.match.ToString()));
309         }
310         public double GetDouble(long pos) const
311         {
312             Token t = GetToken(pos);
313             return ParseDouble(ToUtf8(t.match.ToString()));
314         }
315         public void SetTokens(const List<Token>& tokens_)
316         {
317             if (!tokens_.IsEmpty())
318             {
319                 tokens.Add(tokens_.Front());
320             }
321             else
322             {
323                 tokens.Add(Token(END_TOKENLexeme(endend)1));
324             }
325             for (const Token& token : tokens_)
326             {
327                 tokens.Add(token);
328             }
329             tokens.Add(Token(END_TOKENLexeme(endend)1));
330             current = tokens.Begin();
331         }
332         public ustring GetMatch(const Span& span) const
333         {
334             ustring match;
335             Token startToken = GetToken(span.start);
336             match.Append(startToken.match.ToString());
337             const uchar* e = startToken.match.end;
338             for (int i = span.start + 1; i <= span.end; ++i;)
339             {
340                 Token token = GetToken(i);
341                 match.Append(ustring(' 'token.match.begin - e));
342                 match.Append(token.match.ToString());
343                 e = token.match.end;
344             }
345             return match;
346         }
347         public ustring ErrorLines(const Token& token) const
348         {
349             ustring lines;
350             const uchar* lineStart = LineStart(starttoken.match.begin);
351             const uchar* lineEnd = LineEnd(endtoken.match.end);
352             lines.Append(ustring(lineStarttoken.match.begin));
353             lines.Append(token.match.ToString());
354             lines.Append(ustring(token.match.endlineEnd));
355             lines.Append('\n'1);
356             lines.Append(' 'token.match.begin - lineStart);
357             lines.Append('^'Max(cast<long>(1)token.match.end - token.match.begin));
358             lines.Append(' 'lineEnd - token.match.end);
359             lines.Append('\n'1);
360             return lines;
361         }
362         public ustring ErrorLines(const Span& span) const
363         {
364             ustring lines;
365             Token startToken = GetToken(span.start);
366             Token endToken = startToken;
367             const uchar* lineStart = LineStart(startstartToken.match.begin);
368             if (span.end != -1 && span.end != span.start)
369             {
370                 endToken = GetToken(span.end);
371             }
372             const uchar* lineEnd = LineEnd(endendToken.match.end);
373             lines.Append(ustring(lineStartstartToken.match.begin));
374             lines.Append(startToken.match.ToString());
375             const uchar* s = startToken.match.begin;
376             const uchar* e = startToken.match.end;
377             for (int i = span.start + 1; i <= span.end; ++i;)
378             {
379                 Token token = GetToken(i);
380                 lines.Append(ustring(' 'token.match.begin - e));
381                 lines.Append(token.match.ToString());
382                 e = token.match.end;
383             }
384             lines.Append(ustring(elineEnd));
385             lines.Append('\n'1);
386             lines.Append(' 's - lineStart);
387             lines.Append('^'Max(cast<long>(1)e - s));
388             lines.Append(' 'lineEnd - e);
389             lines.Append('\n'1);
390             return lines;
391         }
392         public void GetColumns(const Span& spanint& startColint& endCol) const
393         {
394             Token startToken = GetToken(span.start);
395             Token endToken = startToken;
396             const uchar* lineStart = LineStart(startstartToken.match.begin);
397             if (span.end != -1 && span.end != span.start)
398             {
399                 endToken = GetToken(span.end);
400             }
401             int cols = cast<int>(startToken.match.begin - lineStart);
402             if (cols < 0)
403             {
404                 cols = 0;
405             }
406             startCol = cols + 1;
407             const uchar* lineEnd = LineEnd(endendToken.match.end);
408             if (lineEnd < lineStart)
409             {
410                 lineEnd = lineStart;
411             }
412             int lineLength = cast<int>(lineEnd - lineStart);
413             int spanCols = Max(cast<int>(1)Min(span.end - span.startlineLength - cols));
414             endCol = startCol + spanCols;
415         }
416         public void ThrowExpectationFailure(const Span& spanconst ustring& name)
417         {
418             Token token = GetToken(span.start);
419             throw ParsingException("parsing error in \'" + fileName + ":" + ToString(token.line) + "\': " + ToUtf8(name) + " expected:\n" + ToUtf8(ErrorLines(span))fileNamespan);
420         }
421         public string GetFarthestError() const
422         {
423             Token token = GetToken(farthestPos);
424             string parserStateStr = GetParserStateStr();
425             return "parsing error at '" + fileName + ":" + ToString(token.line) + "':\n" + ToUtf8(ErrorLines(token)) + parserStateStr;
426         }
427         public void ThrowFarthestError()
428         {
429             throw ParsingException(GetFarthestError()fileName);
430         }
431         public void AddError(const Span& spanconst ustring& name)
432         {
433             if (GetFlag(LexerFlags.synchronize) && GetFlag(LexerFlags.synchronized))
434             {
435                 SetFlag(LexerFlags.synchronizedAtLeastOnce);
436             }
437             else
438             {
439                 Token token = GetToken(span.start);
440                 ParsingException* error(new ParsingException("parsing error in '" + fileName + ":" + ToString(token.line) + "': " + ToUtf8(name) + " expected:\n" + ToUtf8(ErrorLines(span))fileNamespan));
441                 errors.Add(UniquePtr<Exception>(error));
442             }
443         }
444         public nothrow List<UniquePtr<Exception>> Errors()
445         {
446             return Rvalue(errors);
447         }
448         public ustring RestOfLine(int maxLineLength)
449         {
450             ustring restOfLine(current->match.ToString() + ustring(current->match.endpos) + ustring(posLineEnd(endpos)));
451             if (maxLineLength != 0)
452             {
453                 restOfLine = restOfLine.Substring(0maxLineLength);
454             }
455             return restOfLine;
456         }
457         public virtual int NextState(int stateuchar c)
458         {
459             return -1;
460         }
461         public TokenLine TokenizeLine(const ustring& lineint lineNumberint startState)
462         {
463             pos = line.Chars();
464             end = line.Chars() + line.Length();
465             TokenLine tokenLine;
466             tokenLine.startState = startState;
467             lexeme.begin = pos;
468             lexeme.end = end;
469             token.match = lexeme;
470             token.id = INVALID_TOKEN;
471             token.line = lineNumber;
472             int state = startState;
473             while (pos != end)
474             {
475                 uchar c = *pos;
476                 if (state == 0)
477                 {
478                     lexeme.begin = pos;
479                     token.id = INVALID_TOKEN;
480                     token.line = lineNumber;
481                 }
482                 lexeme.end = pos + 1;
483                 int prevState = state;
484                 state = NextState(statec);
485                 if (state == -1)
486                 {
487                     if (prevState == 0)
488                     {
489                         break;
490                     }
491                     state = 0;
492                     pos = token.match.end;
493                     tokenLine.tokens.Add(token);
494                     lexeme.begin = lexeme.end;
495                 }
496                 else
497                 {
498                     ++pos;
499                 }
500             }
501             if (state != 0 && state != -1)
502             {
503                 state = NextState(state'\r');
504             }
505             if (state != 0 && state != -1)
506             {
507                 state = NextState(state'\n');
508             }
509             if (state != 0 && state != -1)
510             {
511                 if (blockCommentStates.CFind(state) != blockCommentStates.CEnd())
512                 {
513                     token.id = commentTokenId;
514                     token.match.end = end;
515                     tokenLine.tokens.Add(token);
516                     tokenLine.endState = state;
517                     return tokenLine;
518                 }
519             }
520             if (lexeme.begin != lexeme.end)
521             {
522                 token.match = lexeme;
523                 tokenLine.tokens.Add(token);
524             }
525             if (state == -1)
526             {
527                 state = 0;
528             }
529             tokenLine.endState = state;
530             return tokenLine;
531         }
532         public nothrow void SetSyncTokens(const List<int>& syncTokens_)
533         {
534             syncTokens = syncTokens_;
535         }
536         public nothrow bool Synchronize()
537         {
538             if (GetFlag(LexerFlags.synchronize))
539             {
540                 if (GetFlag(LexerFlags.synchronized)) return false;
541                 SetFlag(LexerFlags.synchronized);
542                 while (pos != end)
543                 {
544                     int curToken = token.id;
545                     for (int syncToken : syncTokens)
546                     {
547                         if (curToken == syncToken)
548                         {
549                             return true;
550                         }
551                     }
552                     Lexer& lexer = *this;
553                     ++lexer;
554                 }
555             }
556             return false;
557         }
558         public inline nothrow LexerFlags Flags() const
559         {
560             return flags;
561         }
562         public inline nothrow bool GetFlag(LexerFlags flag) const
563         {
564             return (flags & flag) != LexerFlags.none;
565         }
566         public inline nothrow void SetFlag(LexerFlags flag)
567         {
568             flags = cast<LexerFlags>(flags | flag);
569         }
570         public inline nothrow void ResetFlag(LexerFlags flag)
571         {
572             flags = cast<LexerFlags>(flags & ~flag);
573         }
574         public nothrow const List<int>& RuleContext() const
575         {
576             return ruleContext;
577         }
578         public nothrow const List<int>& FarthestRuleContext() const
579         {
580             return farthestRuleContext;
581         }
582         public nothrow void SetRuleNameListPtr(List<string>* ruleNameListPtr_)
583         {
584             ruleNameListPtr = ruleNameListPtr_;
585         }
586         public nothrow string GetParserStateStr() const
587         {
588             string parserStateStr;
589             long n = farthestRuleContext.Count();
590             if (ruleNameListPtr != null && n > 0)
591             {
592                 parserStateStr.Append("\nParser state:\n");
593                 for (long i = 0; i < n; ++i;)
594                 {
595                     int ruleId = farthestRuleContext[i];
596                     if (ruleId >= 0 && ruleId < ruleNameListPtr->Count())
597                     {
598                         string ruleName = (*ruleNameListPtr)[ruleId];
599                         parserStateStr.Append(ruleName).Append("\n");
600                     }
601                 }
602             }
603             return parserStateStr;
604         }
605         public void PushRule(int ruleId)
606         {
607             ruleContext.Add(ruleId);
608         }
609         public void PopRule()
610         {
611             ruleContext.RemoveLast();
612         }
613         protected Lexeme lexeme;
614         protected int line;
615         private ustring content;
616         private string fileName;
617         private int fileIndex;
618         private KeywordMap* keywordMap;
619         private const uchar* start;
620         private const uchar* end;
621         private const uchar* pos;
622         private List<Token> tokens;
623         private List<Token>.Iterator current;
624         private List<UniquePtr<Exception>> errors;
625         private List<int> syncTokens;
626         private ParsingLog* log;
627         private bool countLines;
628         private uchar separatorChar;
629         private Stack<Span> spanStack;
630         private Span currentSpan;
631         private Set<int> blockCommentStates;
632         private int commentTokenId;
633         private LexerFlags flags;
634         private long farthestPos;
635         private List<int> ruleContext;
636         private List<int> farthestRuleContext;
637         private List<string>* ruleNameListPtr;
638     }
639     public const uchar* LineStart(const uchar* startconst uchar* p)
640     {
641         while (p != start && *p != '\n' && *p != '\r')
642         {
643             --p;
644         }
645         if (p != start)
646         {
647             ++p;
648         }
649         return p;
650     }
651     public const uchar* LineEnd(const uchar* endconst uchar* p)
652     {
653         while (p != end && *p != '\n' && *p != '\r')
654         {
655             ++p;
656         }
657         return p;
658     }
659     public ustring GetErrorLines(const uchar* startconst uchar* endconst Span& externalSpan)
660     {
661         const uchar* startPos = start + externalSpan.start;
662         if (startPos < start || startPos >= end)
663         {
664             return ustring();
665         }
666         const uchar* lineStart = LineStart(startstartPos);
667         int cols = cast<int>(startPos - lineStart);
668         if (cols < 0)
669         {
670             cols = 0;
671         }
672         const uchar* lineEnd = LineEnd(endstartPos);
673         if (lineEnd < lineStart)
674         {
675             lineEnd = lineStart;
676         }
677         int lineLength = cast<int>(lineEnd - lineStart);
678         ustring lines(lineStartlineEnd);
679         int spanCols = Max(cast<int>(1)Min(externalSpan.end - externalSpan.startlineLength - cols));
680         lines.Append('\n'1).Append(ustring(' 'cols)).Append('^'spanCols);
681         return lines;
682     }
683     public void GetColumns(const uchar* startconst uchar* endconst Span& externalSpanint& startColint& endCol)
684     {
685         startCol = 0;
686         endCol = 0;
687         const uchar* startPos = start + externalSpan.start;
688         if (startPos < start || startPos >= end)
689         {
690             return;
691         }
692         const uchar* lineStart = LineStart(startstartPos);
693         int cols = cast<int>(startPos - lineStart);
694         if (cols < 0)
695         {
696             cols = 0;
697         }
698         startCol = cols + 1;
699         const uchar* lineEnd = LineEnd(endstartPos);
700         if (lineEnd < lineStart)
701         {
702             lineEnd = lineStart;
703         }
704         int lineLength = cast<int>(lineEnd - lineStart);
705         int spanCols = Max(cast<int>(1)Min(externalSpan.end - externalSpan.startlineLength - cols));
706         endCol = startCol + spanCols;
707     }
708     public void WriteBeginRuleToLog(Lexer& lexerconst ustring& ruleName)
709     {
710         lexer.Log()->WriteBeginRule(ruleName);
711         lexer.Log()->IncIndent();
712         lexer.Log()->WriteTry(lexer.RestOfLine(lexer.Log()->MaxLineLength()));
713         lexer.Log()->IncIndent();
714     }
715     public void WriteSuccessToLog(Lexer& lexerconst Span& matchSpanconst ustring& ruleName)
716     {
717         lexer.Log()->DecIndent();
718         lexer.Log()->WriteSuccess(lexer.GetMatch(matchSpan));
719         lexer.Log()->DecIndent();
720         lexer.Log()->WriteEndRule(ruleName);
721     }
722     public void WriteFailureToLog(Lexer& lexerconst ustring& ruleName)
723     {
724         lexer.Log()->DecIndent();
725         lexer.Log()->WriteFail();
726         lexer.Log()->DecIndent();
727         lexer.Log()->WriteEndRule(ruleName);
728     }
729 
730     public class RuleGuard
731     {
732         public nothrow RuleGuard(Lexer& lexer_int ruleId_) : lexer(lexer_)
733         {
734             lexer.PushRule(ruleId_);
735         }
736         public ~RuleGuard()
737         {
738             lexer.PopRule();
739         }
740         private Lexer& lexer;
741     }
742 
743 } // namespace System.Lex