1
2
3
4
5
6 #ifndef SOULNG_LEXER_LEXER_INCLUDED
7 #define SOULNG_LEXER_LEXER_INCLUDED
8 #include <soulng/lexer/Keyword.hpp>
9 #include <soulng/lexer/Span.hpp>
10 #include <soulng/lexer/ParsingLog.hpp>
11 #include <set>
12 #include <vector>
13 #include <list>
14 #include <map>
15 #include <memory>
16 #include <stdint.h>
17
18 namespace soulng { namespace lexer {
19
20 enum class LexerFlags : int8_t
21 {
22 none= 0, synchronize= 1 << 0, synchronized= 1 << 1, synchronizedAtLeastOnce= 1 << 2, cursorSeen= 1 << 3
23 };
24
25 inline LexerFlags operator|(LexerFlags left, LexerFlags right)
26 {
27 return static_cast<LexerFlags>(static_cast<int8_t>(left) | static_cast<int8_t>(right));
28 }
29
30 inline LexerFlags operator&(LexerFlags left, LexerFlags right)
31 {
32 return static_cast<LexerFlags>(static_cast<int8_t>(left) & static_cast<int8_t>(right));
33 }
34
35 inline LexerFlags operator~(LexerFlags flag)
36 {
37 return static_cast<LexerFlags>(~static_cast<int8_t>(flag));
38 }
39
40 class Lexer
41 {
42 public:
43 Lexer(const std::u32string& content_, const std::string& fileName_, int fileIndex_);
44 Lexer(const char32_t* start_, const char32_t* end_, const std::string& fileName_, int fileIndex_);
45 Lexer(const Lexer&) = delete;
46 Lexer(Lexer&&) = delete;
47 Lexer& operator=(const Lexer&) = delete;
48 Lexer& operator=(Lexer&&) = delete;
49 virtual ~Lexer();
50 int operator*() const { return current->id; }
51 void operator++();
52 int64_t GetPos() const;
53 void SetPos(int64_t pos);
54 virtual int NextState(int state, char32_t c);
55 void SetKeywordMap(KeywordMap* keywordMap_) { keywordMap = keywordMap_; }
56 KeywordMap* GetKeywordMap() { return keywordMap; }
57 int GetKeywordToken(const Lexeme& lexeme) const;
58 void Retract() { token.match.end = pos; }
59 const std::string& FileName() const { return fileName; }
60 Span GetSpan() const { return Span(fileIndex, line, static_cast<int32_t>(GetPos())); }
61 void ConvertExternal(Span& span);
62 Token GetToken(int64_t pos) const;
63 void SetTokens(const std::std::vector<Token>&tokens_);
64 void SetLine(int line_) { line = line_; }
65 void SetCountLines(bool countLines_) { countLines = countLines_; }
66 Token token;
67 std::u32string GetMatch(const Span& span) const;
68 std::u32string ErrorLines(const Token& token) const;
69 std::u32string ErrorLines(const Span& span) const;
70 void GetColumns(const Span& span, int32_t& startCol, int32_t& endCol) const;
71 void ThrowExpectationFailure(const Span& span, const std::u32string& name);
72 void AddError(const Span& span, const std::u32string& name);
73 std::std::vector<std::std::unique_ptr<std::exception>>Errors(){returnstd::move(errors);}
74 const char32_t* Start() const { return start; }
75 const char32_t* End() const { return end; }
76 const char32_t* Pos() const { return pos; }
77 void SetLog(ParsingLog* log_) { log = log_; }
78 ParsingLog* Log() const { return log; }
79 std::u32string RestOfLine(int maxLineLength);
80 void SetSeparatorChar(char32_t separatorChar_) { separatorChar = separatorChar_; }
81 TokenLine TokenizeLine(const std::u32string& line, int lineNumber, int startState);
82 void SetSyncTokens(const std::std::vector<int>&syncTokens_);
83 bool Synchronize();
84 void SetBlockCommentStates(const std::std::set<int>&blockCommentStates_);
85 const std::std::set<int>&BlockCommentStates() const;
86 void SetCommentTokenId(int commentTokenId_) { commentTokenId = commentTokenId_; }
87 LexerFlags Flags() const { return flags; }
88 bool GetFlag(LexerFlags flag) const { return (flags & flag) != LexerFlags::none; }
89 void SetFlag(LexerFlags flag) { flags = flags | flag; }
90 void ResetFlag(LexerFlags flag) { flags = flags & ~flag; }
91 protected:
92 Lexeme lexeme;
93 int32_t line;
94 virtual int GetCommentTokenId() const { return -1; }
95 private:
96 std::u32string content;
97 std::string fileName;
98 int fileIndex;
99 KeywordMap* keywordMap;
100 const char32_t* start;
101 const char32_t* end;
102 const char32_t* pos;
103 std::vector<Token> tokens;
104 std::vector<Token>::iterator current;
105 std::vector<std::std::unique_ptr<std::exception>>errors;
106 std::vector<int> syncTokens;
107 ParsingLog* log;
108 bool countLines;
109 char32_t separatorChar;
110 LexerFlags flags;
111 std::set<int> blockCommentStates;
112 int commentTokenId;
113 void NextToken();
114 };
115
116 std::u32string GetErrorLines(const char32_t* start, const char32_t* end, const Span& externalSpan);
117 void GetColumns(const char32_t* start, const char32_t* end, const Span& externalSpan, int32_t& startCol, int32_t& endCol);
118 void WriteBeginRuleToLog(Lexer& lexer, const std::u32string& ruleName);
119 void WriteSuccessToLog(Lexer& lexer, const Span& matchSpan, const std::u32string& ruleName);
120 void WriteFailureToLog(Lexer& lexer, const std::u32string& ruleName);
121
122 } }
123
124 #endif // SOULNG_LEXER_LEXER_INCLUDED