1 using System;
  2 using System.Collections;
  3 
  4 // this file has been semiautomatically generated from 'D:/work/soulng-project/soulng/lexer/Lexer.hpp' using cpp2cm version 1.0.0
  5 
  6 // this file has been semiautomatically generated from 'D:/work/soulng-project/soulng/lexer/Lexer.cpp' using cpp2cm version 1.0.0
  7 
  8 namespace System.Lex
  9 {
 10     public enum LexerFlags : sbyte
 11     {
 12         none = 0synchronize = 1 << 0synchronized = 1 << 1synchronizedAtLeastOnce = 1 << 2cursorSeen = 1 << 3farthestError = 1 << 4
 13     }
 14 
 15     public class Lexer
 16     {
 17         public Lexer(const ustring& content_const string& fileName_int fileIndex_) : 
 18             content(content_)fileName(fileName_)fileIndex(fileIndex_)line(1)keywordMap(null)start(content.Chars())end(content.Chars() + content.Length())pos(start)current(tokens.End())log(null)countLines(true)separatorChar('\0')
 19             commentTokenId(-1)farthestPos(GetPos())
 20         {
 21         }
 22         public Lexer(const uchar* start_const uchar* end_const string& fileName_int fileIndex_) : 
 23             content()fileName(fileName_)fileIndex(fileIndex_)line(1)keywordMap(null)start(start_)end(end_)pos(start)current(tokens.End())log(null)countLines(true)separatorChar('\0')
 24             commentTokenId(-1)farthestPos(GetPos())
 25         {
 26         }
 27         suppress Lexer(const Lexer&);
 28         suppress void operator=(const Lexer&);
 29         public void SetBlockCommentStates(const Set<int>& blockCommentStates_) const
 30         {
 31             blockCommentStates = blockCommentStates_;
 32         }
 33         public nothrow const Set<int>& BlockCommentStates() const
 34         {
 35             return blockCommentStates;
 36         }
 37         public nothrow void SetCommentTokenId(int commentTokenId_)
 38         {
 39             commentTokenId = commentTokenId_;
 40         }
 41         protected virtual nothrow int GetCommentTokenId() const
 42         {
 43             return -1;
 44         }
 45         public virtual ~Lexer()
 46         {
 47         }
 48         public int operator*() const
 49         {
 50             return current->id;
 51         }
 52         public void SetKeywordMap(KeywordMap* keywordMap_)
 53         {
 54             keywordMap = keywordMap_;
 55         }
 56         public KeywordMap* GetKeywordMap()
 57         {
 58             return keywordMap;
 59         }
 60         public void Retract()
 61         {
 62             token.match.end = pos;
 63         }
 64         public const string& FileName() const
 65         {
 66             return fileName;
 67         }
 68         public Span GetSpan() const
 69         {
 70             return Span(fileIndexlinecast<int>(GetPos()));
 71         }
 72         public void PushSpan()
 73         {
 74             spanStack.Push(currentSpan);
 75             currentSpan = Span(fileIndexline-1-1);
 76         }
 77         public Span PopSpan()
 78         {
 79             Span s = currentSpan;
 80             currentSpan = spanStack.Pop();
 81             return s;
 82         }
 83         public void SetSpan(long pos)
 84         {
 85             if (currentSpan.start == -1)
 86             {
 87                 currentSpan.line = line;
 88                 currentSpan.start = cast<int>(pos);
 89             }
 90             else
 91             {
 92                 currentSpan.end = cast<int>(pos);
 93             }
 94         }
 95         public inline nothrow Span GetCurrentSpan() const
 96         {
 97             return currentSpan;
 98         }
 99         public void SetLine(int line_)
100         {
101             line = line_;
102         }
103         public void SetCountLines(bool countLines_)
104         {
105             countLines = countLines_;
106         }
107         public Token token;
108         public const uchar* Start() const
109         {
110             return start;
111         }
112         public const uchar* End() const
113         {
114             return end;
115         }
116         public const uchar* Pos() const
117         {
118             return pos;
119         }
120         public void SetLog(ParsingLog* log_)
121         {
122             log = log_;
123         }
124         public ParsingLog* Log() const
125         {
126             return log;
127         }
128         public void SetSeparatorChar(uchar separatorChar_)
129         {
130             separatorChar = separatorChar_;
131         }
132         public void operator++()
133         {
134             if (current != tokens.End())
135             {
136                 ++current;
137             }
138             if (current == tokens.End())
139             {
140                 NextToken();
141             }
142             else
143             {
144                 line = current->line;
145             }
146             if (GetFlag(LexerFlags.farthestError))
147             {
148                 long p = GetPos();
149                 if (p > farthestPos)
150                 {
151                     farthestPos = p;
152                     farthestRuleContext = ruleContext;
153                 }
154             }
155         }
156         public long GetPos() const
157         {
158             int p = cast<int>(current - tokens.Begin());
159             return (cast<long>(line) << 32) | cast<long>(p);
160         }
161         public void SetPos(long pos)
162         {
163             current = tokens.Begin() + cast<int>(pos);
164             line = cast<int>(pos >> 32);
165         }
166         public void NextToken()
167         {
168             int state = 0;
169             while (true)
170             {
171                 uchar c = separatorChar;
172                 if (pos != end)
173                 {
174                     c = *pos;
175                 }
176                 else if (c == '\0')
177                 {
178                     break;
179                 }
180                 if (state == 0)
181                 {
182                     lexeme.begin = pos;
183                     token.id = INVALID_TOKEN;
184                     token.line = line;
185                 }
186                 if (pos == end)
187                 {
188                     lexeme.end = end;
189                 }
190                 else
191                 {
192                     lexeme.end = pos + 1;
193                 }
194                 state = NextState(statec);
195                 if (state == -1)
196                 {
197                     if (token.id == CONTINUE_TOKEN)
198                     {
199                         if (pos == end)
200                         {
201                             break;
202                         }
203                         else
204                         {
205                             pos = token.match.end;
206                         }
207                         state = 0;
208                         continue;
209                     }
210                     else if (token.id == INVALID_TOKEN)
211                     {
212                         if (pos == end)
213                         {
214                             break;
215                         }
216                         else
217                         {
218                             throw Exception("System.Lex.Lexer.NextToken(): error: invalid character \'" + ToUtf8(ustring(c1)) + "\' in file \'" + fileName + "\' at line " + ToString(line));
219                         }
220                     }
221                     else
222                     {
223                         tokens.Add(token);
224                         current = tokens.End() - 1;
225                         pos = token.match.end;
226                         return;
227                     }
228                 }
229                 if (c == '\n' && countLines)
230                 {
231                     ++line;
232                 }
233                 ++pos;
234             }
235             token.id = INVALID_TOKEN;
236             state = NextState(state'\0');
237             long p = -1;
238             if (token.id != INVALID_TOKEN && token.id != CONTINUE_TOKEN)
239             {
240                 tokens.Add(token);
241                 current = tokens.End() - 1;
242                 p = GetPos();
243             }
244             Token endToken(END_TOKEN);
245             endToken.match.begin = end;
246             endToken.match.end = end;
247             tokens.Add(endToken);
248             if (p == -1)
249             {
250                 current = tokens.End() - 1;
251                 p = GetPos();
252             }
253             SetPos(p);
254         }
255         public int GetKeywordToken(const Lexeme& lexeme) const
256         {
257             if ((keywordMap != null))
258             {
259                 return keywordMap->GetKeywordToken(lexeme);
260             }
261             else
262             {
263                 return INVALID_TOKEN;
264             }
265         }
266         public void ConvertExternal(Span& span)
267         {
268             Token startToken = GetToken(span.start);
269             span.start = cast<int>(startToken.match.begin - start);
270             Token endToken = startToken;
271             if (span.end != -1)
272             {
273                 endToken = GetToken(span.end);
274             }
275             span.end = cast<int>(endToken.match.end - start);
276         }
277         public Token GetToken(long pos) const
278         {
279             int tokenIndex = cast<int>(pos);
280             if (tokenIndex >= 0 && tokenIndex < tokens.Count())
281             {
282                 return tokens[tokenIndex];
283             }
284             else
285             {
286                 throw Exception("invalid token index");
287             }
288         }
289         public char GetChar(long pos) const
290         {
291             Token t = GetToken(pos);
292             return cast<char>(*t.match.begin);
293         }
294         public wchar GetWChar(long pos) const
295         {
296             Token t = GetToken(pos);
297             return cast<wchar>(*t.match.begin);
298         }
299         public uchar GetUChar(long pos) const
300         {
301             Token t = GetToken(pos);
302             return *t.match.begin;
303         }
304         public int GetInt(long pos) const
305         {
306             Token t = GetToken(pos);
307             return ParseInt(ToUtf8(t.match.ToString()));
308         }
309         public double GetDouble(long pos) const
310         {
311             Token t = GetToken(pos);
312             return ParseDouble(ToUtf8(t.match.ToString()));
313         }
314         public void SetTokens(const List<Token>& tokens_)
315         {
316             if (!tokens_.IsEmpty())
317             {
318                 tokens.Add(tokens_.Front());
319             }
320             else
321             {
322                 tokens.Add(Token(END_TOKENLexeme(endend)1));
323             }
324             for (const Token& token : tokens_)
325             {
326                 tokens.Add(token);
327             }
328             tokens.Add(Token(END_TOKENLexeme(endend)1));
329             current = tokens.Begin();
330         }
331         public ustring GetMatch(const Span& span) const
332         {
333             ustring match;
334             Token startToken = GetToken(span.start);
335             match.Append(startToken.match.ToString());
336             const uchar* e = startToken.match.end;
337             for (int i = span.start + 1; i <= span.end; ++i;)
338             {
339                 Token token = GetToken(i);
340                 match.Append(ustring(' 'token.match.begin - e));
341                 match.Append(token.match.ToString());
342                 e = token.match.end;
343             }
344             return match;
345         }
346         public ustring ErrorLines(const Token& token) const
347         {
348             ustring lines;
349             const uchar* lineStart = LineStart(starttoken.match.begin);
350             const uchar* lineEnd = LineEnd(endtoken.match.end);
351             lines.Append(ustring(lineStarttoken.match.begin));
352             lines.Append(token.match.ToString());
353             lines.Append(ustring(token.match.endlineEnd));
354             lines.Append('\n'1);
355             lines.Append(' 'token.match.begin - lineStart);
356             lines.Append('^'Max(cast<long>(1)token.match.end - token.match.begin));
357             lines.Append(' 'lineEnd - token.match.end);
358             lines.Append('\n'1);
359             return lines;
360         }
361         public ustring ErrorLines(const Span& span) const
362         {
363             ustring lines;
364             Token startToken = GetToken(span.start);
365             Token endToken = startToken;
366             const uchar* lineStart = LineStart(startstartToken.match.begin);
367             if (span.end != -1 && span.end != span.start)
368             {
369                 endToken = GetToken(span.end);
370             }
371             const uchar* lineEnd = LineEnd(endendToken.match.end);
372             lines.Append(ustring(lineStartstartToken.match.begin));
373             lines.Append(startToken.match.ToString());
374             const uchar* s = startToken.match.begin;
375             const uchar* e = startToken.match.end;
376             for (int i = span.start + 1; i <= span.end; ++i;)
377             {
378                 Token token = GetToken(i);
379                 lines.Append(ustring(' 'token.match.begin - e));
380                 lines.Append(token.match.ToString());
381                 e = token.match.end;
382             }
383             lines.Append(ustring(elineEnd));
384             lines.Append('\n'1);
385             lines.Append(' 's - lineStart);
386             lines.Append('^'Max(cast<long>(1)e - s));
387             lines.Append(' 'lineEnd - e);
388             lines.Append('\n'1);
389             return lines;
390         }
391         public void GetColumns(const Span& spanint& startColint& endCol) const
392         {
393             Token startToken = GetToken(span.start);
394             Token endToken = startToken;
395             const uchar* lineStart = LineStart(startstartToken.match.begin);
396             if (span.end != -1 && span.end != span.start)
397             {
398                 endToken = GetToken(span.end);
399             }
400             int cols = cast<int>(startToken.match.begin - lineStart);
401             if (cols < 0)
402             {
403                 cols = 0;
404             }
405             startCol = cols + 1;
406             const uchar* lineEnd = LineEnd(endendToken.match.end);
407             if (lineEnd < lineStart)
408             {
409                 lineEnd = lineStart;
410             }
411             int lineLength = cast<int>(lineEnd - lineStart);
412             int spanCols = Max(cast<int>(1)Min(span.end - span.startlineLength - cols));
413             endCol = startCol + spanCols;
414         }
415         public void ThrowExpectationFailure(const Span& spanconst ustring& name)
416         {
417             Token token = GetToken(span.start);
418             throw ParsingException("parsing error in \'" + fileName + ":" + ToString(token.line) + "\': " + ToUtf8(name) + " expected:\n" + ToUtf8(ErrorLines(span))fileNamespan);
419         }
420         public string GetFarthestError() const
421         {
422             Token token = GetToken(farthestPos);
423             string parserStateStr = GetParserStateStr();
424             return "parsing error at '" + fileName + ":" + ToString(token.line) + "':\n" + ToUtf8(ErrorLines(token)) + parserStateStr;
425         }
426         public void ThrowFarthestError()
427         {
428             throw ParsingException(GetFarthestError()fileName);
429         }
430         public void AddError(const Span& spanconst ustring& name)
431         {
432             if (GetFlag(LexerFlags.synchronize) && GetFlag(LexerFlags.synchronized))
433             {
434                 SetFlag(LexerFlags.synchronizedAtLeastOnce);
435             }
436             else
437             {
438                 Token token = GetToken(span.start);
439                 ParsingException* error(new ParsingException("parsing error in '" + fileName + ":" + ToString(token.line) + "': " + ToUtf8(name) + " expected:\n" + ToUtf8(ErrorLines(span))fileNamespan));
440                 errors.Add(UniquePtr<Exception>(error));
441             }
442         }
443         public nothrow List<UniquePtr<Exception>> Errors()
444         {
445             return Rvalue(errors);
446         }
447         public ustring RestOfLine(int maxLineLength)
448         {
449             ustring restOfLine(current->match.ToString() + ustring(current->match.endpos) + ustring(posLineEnd(endpos)));
450             if (maxLineLength != 0)
451             {
452                 restOfLine = restOfLine.Substring(0maxLineLength);
453             }
454             return restOfLine;
455         }
456         public virtual int NextState(int stateuchar c)
457         {
458             return -1;
459         }
460         public TokenLine TokenizeLine(const ustring& lineint lineNumberint startState)
461         {
462             pos = line.Chars();
463             end = line.Chars() + line.Length();
464             TokenLine tokenLine;
465             tokenLine.startState = startState;
466             lexeme.begin = pos;
467             lexeme.end = end;
468             token.match = lexeme;
469             token.id = INVALID_TOKEN;
470             token.line = lineNumber;
471             int state = startState;
472             while (pos != end)
473             {
474                 uchar c = *pos;
475                 if (state == 0)
476                 {
477                     lexeme.begin = pos;
478                     token.id = INVALID_TOKEN;
479                     token.line = lineNumber;
480                 }
481                 lexeme.end = pos + 1;
482                 int prevState = state;
483                 state = NextState(statec);
484                 if (state == -1)
485                 {
486                     if (prevState == 0)
487                     {
488                         break;
489                     }
490                     state = 0;
491                     pos = token.match.end;
492                     tokenLine.tokens.Add(token);
493                     lexeme.begin = lexeme.end;
494                 }
495                 else
496                 {
497                     ++pos;
498                 }
499             }
500             if (state != 0 && state != -1)
501             {
502                 state = NextState(state'\r');
503             }
504             if (state != 0 && state != -1)
505             {
506                 state = NextState(state'\n');
507             }
508             if (state != 0 && state != -1)
509             {
510                 if (blockCommentStates.CFind(state) != blockCommentStates.CEnd())
511                 {
512                     token.id = commentTokenId;
513                     token.match.end = end;
514                     tokenLine.tokens.Add(token);
515                     tokenLine.endState = state;
516                     return tokenLine;
517                 }
518             }
519             if (lexeme.begin != lexeme.end)
520             {
521                 token.match = lexeme;
522                 tokenLine.tokens.Add(token);
523             }
524             if (state == -1)
525             {
526                 state = 0;
527             }
528             tokenLine.endState = state;
529             return tokenLine;
530         }
531         public nothrow void SetSyncTokens(const List<int>& syncTokens_)
532         {
533             syncTokens = syncTokens_;
534         }
535         public nothrow bool Synchronize()
536         {
537             if (GetFlag(LexerFlags.synchronize))
538             {
539                 if (GetFlag(LexerFlags.synchronized)) return false;
540                 SetFlag(LexerFlags.synchronized);
541                 while (pos != end)
542                 {
543                     int curToken = token.id;
544                     for (int syncToken : syncTokens)
545                     {
546                         if (curToken == syncToken)
547                         {
548                             return true;
549                         }
550                     }
551                     Lexer& lexer = *this;
552                     ++lexer;
553                 }
554             }
555             return false;
556         }
557         public inline nothrow LexerFlags Flags() const
558         {
559             return flags;
560         }
561         public inline nothrow bool GetFlag(LexerFlags flag) const
562         {
563             return (flags & flag) != LexerFlags.none;
564         }
565         public inline nothrow void SetFlag(LexerFlags flag)
566         {
567             flags = cast<LexerFlags>(flags | flag);
568         }
569         public inline nothrow void ResetFlag(LexerFlags flag)
570         {
571             flags = cast<LexerFlags>(flags & ~flag);
572         }
573         public nothrow const List<int>& RuleContext() const
574         {
575             return ruleContext;
576         }
577         public nothrow const List<int>& FarthestRuleContext() const
578         {
579             return farthestRuleContext;
580         }
581         public nothrow void SetRuleNameListPtr(List<string>* ruleNameListPtr_)
582         {
583             ruleNameListPtr = ruleNameListPtr_;
584         }
585         public nothrow string GetParserStateStr() const
586         {
587             string parserStateStr;
588             long n = farthestRuleContext.Count();
589             if (ruleNameListPtr != null && n > 0)
590             {
591                 parserStateStr.Append("\nParser state:\n");
592                 for (long i = 0; i < n; ++i;)
593                 {
594                     int ruleId = farthestRuleContext[i];
595                     if (ruleId >= 0 && ruleId < ruleNameListPtr->Count())
596                     {
597                         string ruleName = (*ruleNameListPtr)[ruleId];
598                         parserStateStr.Append(ruleName).Append("\n");
599                     }
600                 }
601             }
602             return parserStateStr;
603         }
604         public void PushRule(int ruleId)
605         {
606             ruleContext.Add(ruleId);
607         }
608         public void PopRule()
609         {
610             ruleContext.RemoveLast();
611         }
612         protected Lexeme lexeme;
613         protected int line;
614         private ustring content;
615         private string fileName;
616         private int fileIndex;
617         private KeywordMap* keywordMap;
618         private const uchar* start;
619         private const uchar* end;
620         private const uchar* pos;
621         private List<Token> tokens;
622         private List<Token>.Iterator current;
623         private List<UniquePtr<Exception>> errors;
624         private List<int> syncTokens;
625         private ParsingLog* log;
626         private bool countLines;
627         private uchar separatorChar;
628         private Stack<Span> spanStack;
629         private Span currentSpan;
630         private Set<int> blockCommentStates;
631         private int commentTokenId;
632         private LexerFlags flags;
633         private long farthestPos;
634         private List<int> ruleContext;
635         private List<int> farthestRuleContext;
636         private List<string>* ruleNameListPtr;
637     }
638     public const uchar* LineStart(const uchar* startconst uchar* p)
639     {
640         while (p != start && *p != '\n' && *p != '\r')
641         {
642             --p;
643         }
644         if (p != start)
645         {
646             ++p;
647         }
648         return p;
649     }
650     public const uchar* LineEnd(const uchar* endconst uchar* p)
651     {
652         while (p != end && *p != '\n' && *p != '\r')
653         {
654             ++p;
655         }
656         return p;
657     }
658     public ustring GetErrorLines(const uchar* startconst uchar* endconst Span& externalSpan)
659     {
660         const uchar* startPos = start + externalSpan.start;
661         if (startPos < start || startPos >= end)
662         {
663             return ustring();
664         }
665         const uchar* lineStart = LineStart(startstartPos);
666         int cols = cast<int>(startPos - lineStart);
667         if (cols < 0)
668         {
669             cols = 0;
670         }
671         const uchar* lineEnd = LineEnd(endstartPos);
672         if (lineEnd < lineStart)
673         {
674             lineEnd = lineStart;
675         }
676         int lineLength = cast<int>(lineEnd - lineStart);
677         ustring lines(lineStartlineEnd);
678         int spanCols = Max(cast<int>(1)Min(externalSpan.end - externalSpan.startlineLength - cols));
679         lines.Append('\n'1).Append(ustring(' 'cols)).Append('^'spanCols);
680         return lines;
681     }
682     public void GetColumns(const uchar* startconst uchar* endconst Span& externalSpanint& startColint& endCol)
683     {
684         startCol = 0;
685         endCol = 0;
686         const uchar* startPos = start + externalSpan.start;
687         if (startPos < start || startPos >= end)
688         {
689             return;
690         }
691         const uchar* lineStart = LineStart(startstartPos);
692         int cols = cast<int>(startPos - lineStart);
693         if (cols < 0)
694         {
695             cols = 0;
696         }
697         startCol = cols + 1;
698         const uchar* lineEnd = LineEnd(endstartPos);
699         if (lineEnd < lineStart)
700         {
701             lineEnd = lineStart;
702         }
703         int lineLength = cast<int>(lineEnd - lineStart);
704         int spanCols = Max(cast<int>(1)Min(externalSpan.end - externalSpan.startlineLength - cols));
705         endCol = startCol + spanCols;
706     }
707     public void WriteBeginRuleToLog(Lexer& lexerconst ustring& ruleName)
708     {
709         lexer.Log()->WriteBeginRule(ruleName);
710         lexer.Log()->IncIndent();
711         lexer.Log()->WriteTry(lexer.RestOfLine(lexer.Log()->MaxLineLength()));
712         lexer.Log()->IncIndent();
713     }
714     public void WriteSuccessToLog(Lexer& lexerconst Span& matchSpanconst ustring& ruleName)
715     {
716         lexer.Log()->DecIndent();
717         lexer.Log()->WriteSuccess(lexer.GetMatch(matchSpan));
718         lexer.Log()->DecIndent();
719         lexer.Log()->WriteEndRule(ruleName);
720     }
721     public void WriteFailureToLog(Lexer& lexerconst ustring& ruleName)
722     {
723         lexer.Log()->DecIndent();
724         lexer.Log()->WriteFail();
725         lexer.Log()->DecIndent();
726         lexer.Log()->WriteEndRule(ruleName);
727     }
728 
729     public class RuleGuard
730     {
731         public nothrow RuleGuard(Lexer& lexer_int ruleId_) : lexer(lexer_)
732         {
733             lexer.PushRule(ruleId_);
734         }
735         public ~RuleGuard()
736         {
737             lexer.PopRule();
738         }
739         private Lexer& lexer;
740     }
741 
742 } // namespace System.Lex