1 using System;
2 using System.Collections;
3 using System.Text;
4
5
6
7
8
9 namespace soulcm.scmlg
10 {
11 public const uchar eps = '\0';
12 public abstract class Symbol
13 {
14 public Symbol() :
15 contained(false), dontSetContained(false)
16 {
17 }
18 public virtual ~Symbol()
19 {
20 }
21 public const string& Name() const
22 {
23 return name;
24 }
25 public bool Contained() const
26 {
27 return contained;
28 }
29 public void SetContained()
30 {
31 if (!dontSetContained) contained = true;
32 }
33 public void DontSetContained()
34 {
35 dontSetContained = true;
36 }
37 public void SetName(const string& name_)
38 {
39 name = name_;
40 }
41 public abstract bool Match(uchar c);
42 public abstract void Accept(Visitor& visitor);
43 public virtual bool IsClass() const
44 {
45 return false;
46 }
47 public virtual bool IsChar() const
48 {
49 return false;
50 }
51 public virtual bool IsAny() const
52 {
53 return false;
54 }
55 public virtual bool IsRange() const
56 {
57 return false;
58 }
59 private string name;
60 private bool contained;
61 private bool dontSetContained;
62 }
63 public class Char : Symbol
64 {
65 public Char(uchar chr_) :
66 chr(chr_)
67 {
68 SetName("(" + ToUtf8(ustring(chr, 1)) + ")");
69 }
70 public uchar Chr() const
71 {
72 return chr;
73 }
74 public override bool IsChar() const
75 {
76 return true;
77 }
78 public override bool Match(uchar c)
79 {
80 return chr == c;
81 }
82 public override void Accept(Visitor& visitor)
83 {
84 visitor.Visit(*this);
85 }
86 private uchar chr;
87 }
88 public class Any : Symbol
89 {
90 public Any()
91 {
92 SetName("(*)");
93 }
94 public override bool IsAny() const
95 {
96 return true;
97 }
98 public override bool Match(uchar c)
99 {
100 return true;
101 }
102 public override void Accept(Visitor& visitor)
103 {
104 visitor.Visit(*this);
105 }
106 }
107 public class Range : Symbol
108 {
109 public Range(uchar start_, uchar end_) :
110 start(start_), end(end_)
111 {
112 SetName("(" + ToUtf8(ustring(start, 1)) + "-" + ToUtf8(ustring(end, 1)) + ")");
113 }
114 public bool IsEmpty() const
115 {
116 return start > end;
117 }
118 public uchar Start() const
119 {
120 return start;
121 }
122 public uchar End() const
123 {
124 return end;
125 }
126 public void Print(CodeFormatter& formatter)
127 {
128 formatter.Write(ToUtf8(CharStr(start)));
129 formatter.Write("-");
130 formatter.Write(ToUtf8(CharStr(end)));
131 }
132 public override bool IsRange() const
133 {
134 return true;
135 }
136 public override bool Match(uchar c)
137 {
138 return c >= start && c <= end;
139 }
140 public override void Accept(Visitor& visitor)
141 {
142 visitor.Visit(*this);
143 }
144 private uchar start;
145 private uchar end;
146 }
147 public inline bool operator==(const Range& left, const Range& right)
148 {
149 return left.Start() == right.Start() && left.End() == right.End();
150 }
151 public inline bool operator<(const Range& left, const Range& right)
152 {
153 if (left.Start() < right.Start()) return true;
154 if (left.Start() > right.Start()) return false;
155 return left.End() < right.End();
156 }
157 public class Class : Symbol
158 {
159 public Class(int index_) :
160 index(index_), inverse(false)
161 {
162 SetName("[" + ToString(index) + "]");
163 }
164 public bool Inverse() const
165 {
166 return inverse;
167 }
168 public void SetInverse()
169 {
170 inverse = true;
171 }
172 public const List<Symbol*>& Symbols() const
173 {
174 return symbols;
175 }
176 public int Index() const
177 {
178 return index;
179 }
180 public const List<uchar>& Chars() const
181 {
182 return chars;
183 }
184 public bool IsEmpty() const
185 {
186 return symbols.IsEmpty();
187 }
188 public const LinkedList<Range>& Ranges() const
189 {
190 return ranges;
191 }
192 public LinkedList<Range>& Ranges()
193 {
194 return ranges;
195 }
196 public void SetIndex(int index_)
197 {
198 index = index_;
199 SetName("[" + ToString(index) + "]");
200 }
201 public Class* MakeCanonical(LexerContext& lexerContext)
202 {
203 List<Range> rangeVec;
204 Class* canonicalClass = new Class(-1);
205 for (Symbol* symbol : symbols)
206 {
207 if (symbol->IsChar())
208 {
209 Char* chr = cast<Char*>(symbol);
210 rangeVec.Add(Range(chr->Chr(), chr->Chr()));
211 }
212 else if (symbol->IsRange())
213 {
214 Range* range = cast<Range*>(symbol);
215 rangeVec.Add(*range);
216 }
217 else if (symbol->IsAny())
218 {
219 throw Exception("class contains any");
220 }
221 else if (symbol->IsClass())
222 {
223 throw Exception("class contains class");
224 }
225 }
226 for (const Range& range : rangeVec)
227 {
228 canonicalClass->ranges.Add(range);
229 }
230 if (inverse)
231 {
232 canonicalClass->MakeInverse(lexerContext);
233 }
234 return canonicalClass;
235 }
236 public void MakeMinimal(LexerContext& lexerContext)
237 {
238 List<Range> rangeVec;
239 for (const Range& range : ranges)
240 {
241 rangeVec.Add(range);
242 }
243 ranges.Clear();
244 Sort(rangeVec.Begin(), rangeVec.End());
245 rangeVec.Resize(Unique(rangeVec.Begin(), rangeVec.End()) - rangeVec.Begin());
246 bool changed = true;
247 while (changed)
248 {
249 changed = false;
250 List<Range> combinedRanges;
251 for (int i = 0; i < cast<int>(rangeVec.Count()); ++i;)
252 {
253 bool combined = false;
254 Range current = rangeVec[i];
255 if (i > 0)
256 {
257 Range left = rangeVec[i - 1];
258 if (cast<int>(left.End()) + 1 == cast<int>(current.Start()))
259 {
260 combinedRanges.Add(Range(left.Start(), current.End()));
261 combined = true;
262 }
263 }
264 if (i < rangeVec.Count() - 1)
265 {
266 Range right = rangeVec[i + 1];
267 if (cast<int>(current.End()) + 1 == cast<int>(right.Start()))
268 {
269 combinedRanges.Add(Range(current.Start(), right.End()));
270 combined = true;
271 }
272 }
273 if (combined)
274 {
275 changed = true;
276 }
277 else
278 {
279 combinedRanges.Add(current);
280 }
281 }
282 Sort(combinedRanges.Begin(), combinedRanges.End());
283 combinedRanges.Resize(Unique(combinedRanges.Begin(), combinedRanges.End()) - combinedRanges.Begin());
284 Swap(rangeVec, combinedRanges);
285 }
286 symbols.Clear();
287 chars.Clear();
288 for (const Range& range : rangeVec)
289 {
290 if (range.IsEmpty()) continue;
291 ranges.Add(range);
292 if (chars.IsEmpty())
293 {
294 AddChar(range.Start());
295 }
296 }
297 }
298 public void MakeInverse(LexerContext& lexerContext)
299 {
300 List<Range> rangeVec;
301 for (const Range& range : ranges)
302 {
303 rangeVec.Add(range);
304 }
305 ranges.Clear();
306 List<Range> inverseRanges;
307 inverseRanges.Add(Range(cast<uchar>(1), cast<uchar>(1114111)));
308 for (const Range& range : rangeVec)
309 {
310 List<Range> newInverse;
311 for (const Range& i : inverseRanges)
312 {
313 if (Intersect(range, i))
314 {
315 Range intersection = range & i;
316 Range left = Range(i.Start(), cast<uchar>(cast<int>(intersection.Start()) - 1));
317 if (!left.IsEmpty())
318 {
319 newInverse.Add(left);
320 }
321 Range right = Range(cast<uchar>(cast<int>(intersection.End()) + 1), i.End());
322 if (!right.IsEmpty())
323 {
324 newInverse.Add(right);
325 }
326 }
327 else
328 {
329 newInverse.Add(i);
330 }
331 }
332 Swap(inverseRanges, newInverse);
333 }
334 symbols.Clear();
335 chars.Clear();
336 for (const Range& range : inverseRanges)
337 {
338 ranges.Add(range);
339 if (chars.IsEmpty())
340 {
341 AddChar(range.Start());
342 }
343 }
344 MakeMinimal(lexerContext);
345 inverse = false;
346 }
347 public Class* Clone()
348 {
349 Class* cls = new Class(-1);
350 for (Symbol* symbol : symbols)
351 {
352 cls->AddSymbol(symbol);
353 }
354 for (uchar c : chars)
355 {
356 cls->AddChar(c);
357 }
358 return cls;
359 }
360 public void AddSymbol(Symbol* symbol)
361 {
362 symbol->SetContained();
363 symbols.Add(symbol);
364 }
365 public void AddChar(uchar c)
366 {
367 chars.Add(c);
368 }
369 public void Print(CodeFormatter& formatter)
370 {
371 formatter.Write("{");
372 for (Range& range : ranges)
373 {
374 range.Print(formatter);
375 }
376 formatter.Write("}");
377 }
378 public override bool IsClass() const
379 {
380 return true;
381 }
382 public override bool Match(uchar c)
383 {
384 bool match = false;
385 for (Symbol* symbol : symbols)
386 {
387 if (symbol->Match(c))
388 {
389 match = true;
390 break;
391 }
392 }
393 return match != inverse;
394 }
395 public override void Accept(Visitor& visitor)
396 {
397 visitor.Visit(*this);
398 }
399 private int index;
400 private bool inverse;
401 private List<Symbol*> symbols;
402 private List<uchar> chars;
403 private LinkedList<Range> ranges;
404 }
405 public bool Intersect(const Range& left, const Range& right)
406 {
407 if (left.IsEmpty() || right.IsEmpty()) return false;
408 if (left.Start() <= right.Start())
409 {
410 return right.Start() >= left.Start() && right.Start() <= left.End();
411 }
412 else
413 {
414 return left.Start() >= right.Start() && left.Start() <= right.End();
415 }
416 }
417 public Range operator&(const Range& left, const Range& right)
418 {
419 if (Intersect(left, right))
420 {
421 Range intersection(Max(left.Start(), right.Start()), Min(left.End(), right.End()));
422 return intersection;
423 }
424 return Range(cast<uchar>(1), cast<uchar>(0));
425 }
426 public List<Range> operator-(const Range& left, const Range& right)
427 {
428 List<Range> ranges;
429 if (right.Start() <= left.Start() && right.End() >= left.End())
430 {
431 return ranges;
432 }
433 else if (right.Start() > left.End() || right.End() < left.Start())
434 {
435 ranges.Add(left);
436 }
437 else
438 {
439 if (right.Start() >= left.Start() && right.Start() <= left.End())
440 {
441 if (left.Start() <= cast<uchar>(cast<int>(right.Start()) - 1))
442 {
443 ranges.Add(Range(left.Start(), cast<uchar>(cast<int>(right.Start()) - 1)));
444 }
445 }
446 if (right.End() >= left.Start() && right.End() <= left.End())
447 {
448 if (cast<uchar>(cast<int>(right.End()) + 1) <= left.End())
449 {
450 ranges.Add(Range(cast<uchar>(cast<int>(right.End()) + 1), left.End()));
451 }
452 }
453 }
454 return ranges;
455 }
456 public List<Range> operator~(const Range& that)
457 {
458 List<Range> result;
459 if (that.Start() > cast<uchar>(1))
460 {
461 result.Add(Range(cast<uchar>(1), cast<uchar>(cast<int>(that.Start()) - 1)));
462 }
463 if (cast<int>(that.End()) < 1114112)
464 {
465 result.Add(Range(cast<uchar>(cast<int>(that.End()) + 1), cast<uchar>(1114111)));
466 }
467 return result;
468 }
469 public bool operator==(const Class& left, const Class& right)
470 {
471 if (left.Symbols().Count() != right.Symbols().Count()) return false;
472 int n = cast<int>(left.Symbols().Count());
473 for (int i = 0; i < n; ++i;)
474 {
475 Symbol* leftSymbol = left.Symbols()[i];
476 Symbol* rightSymbol = right.Symbols()[i];
477 if (leftSymbol->IsRange() && rightSymbol->IsRange())
478 {
479 const Range* leftRange = cast<const Range*>(leftSymbol);
480 const Range* rightRange = cast<const Range*>(rightSymbol);
481 if (!(*leftRange == *rightRange))
482 {
483 return false;
484 }
485 }
486 else
487 {
488 return false;
489 }
490 }
491 return true;
492 }
493 public class RangeEndLess : Rel<Range>
494 {
495 public bool operator()(const Range& left, const Range& right) const
496 {
497 if (left.End() < right.End()) return true;
498 if (left.End() > right.End()) return false;
499 return left.Start() < right.Start();
500 }
501 }
502 public bool Intersect(const Class& left, const Class& right)
503 {
504 for (Symbol* leftSymbol : left.Symbols())
505 {
506 if (leftSymbol->IsRange())
507 {
508 Range* leftRange = cast<Range*>(leftSymbol);
509 for (Symbol* rightSymbol : right.Symbols())
510 {
511 if (rightSymbol->IsRange())
512 {
513 Range* rightRange = cast<Range*>(rightSymbol);
514 if (Intersect(*leftRange, *rightRange))
515 {
516 return true;
517 }
518 }
519 }
520 }
521 }
522 return false;
523 }
524 public Class* MakeIntertersection(const Class& left, const Class& right, LexerContext& lexerContext)
525 {
526 List<Range> leftRanges;
527 for (Symbol* leftSymbol : left.Symbols())
528 {
529 if (leftSymbol->IsRange())
530 {
531 Range* leftRange = cast<Range*>(leftSymbol);
532 leftRanges.Add(*leftRange);
533 }
534 }
535 List<Range> rightRanges;
536 for (Symbol* rightSymbol : right.Symbols())
537 {
538 if (rightSymbol->IsRange())
539 {
540 Range* rightRange = cast<Range*>(rightSymbol);
541 rightRanges.Add(*rightRange);
542 }
543 }
544 List<Range> intersection;
545 for (const Range& left : leftRanges)
546 {
547 List<Range>.Iterator start = LowerBound(rightRanges.Begin(), rightRanges.End(), left);
548 List<Range>.Iterator end = UpperBound(rightRanges.Begin(), rightRanges.End(), left, RangeEndLess());
549 if (start != rightRanges.Begin()) --start;
550 if (end < rightRanges.End()) ++end;
551 for (List<Range>.Iterator i = start; i != end; ++i;)
552 {
553 const Range& right = *i;
554 if (left == right)
555 {
556 intersection.Add(left);
557 break;
558 }
559 else if (Intersect(left, right))
560 {
561 intersection.Add(left & right);
562 }
563 }
564 }
565 Sort(intersection.Begin(), intersection.End());
566 intersection.Resize(Unique(intersection.Begin(), intersection.End()) - intersection.Begin());
567 Class* cls = lexerContext.MakeClass();
568 for (const Range& i : intersection)
569 {
570 cls->AddSymbol(lexerContext.MakeRange(i.Start(), i.End()));
571 if (cls->Chars().IsEmpty())
572 {
573 cls->AddChar(i.Start());
574 }
575 }
576 return cls;
577 }
578 public Class* MakeDifference(const Class& left, const Class& right, LexerContext& lexerContext)
579 {
580 List<Range> leftRanges;
581 for (Symbol* leftSymbol : left.Symbols())
582 {
583 if (leftSymbol->IsRange())
584 {
585 Range* leftRange = cast<Range*>(leftSymbol);
586 leftRanges.Add(*leftRange);
587 }
588 }
589 List<Range> rightRanges;
590 for (Symbol* rightSymbol : right.Symbols())
591 {
592 if (rightSymbol->IsRange())
593 {
594 Range* rightRange = cast<Range*>(rightSymbol);
595 rightRanges.Add(*rightRange);
596 }
597 }
598 List<Range> difference;
599 for (const Range& left : leftRanges)
600 {
601 bool found = false;
602 List<Range> diffs;
603 List<Range>.Iterator start = LowerBound(rightRanges.Begin(), rightRanges.End(), left);
604 List<Range>.Iterator end = UpperBound(rightRanges.Begin(), rightRanges.End(), left, RangeEndLess());
605 if (start != rightRanges.Begin()) --start;
606 if (end < rightRanges.End()) ++end;
607 for (List<Range>.Iterator i = start; i != end; ++i;)
608 {
609 const Range& right = *i;
610 if (left == right)
611 {
612 found = true;
613 break;
614 }
615 else
616 {
617 if (Intersect(left, right))
618 {
619 Range intersection = left & right;
620 Range l = Range(left.Start(), cast<uchar>(cast<int>(intersection.Start()) - 1));
621 if (!l.IsEmpty())
622 {
623 diffs.Add(l);
624 }
625 Range r = Range(cast<uchar>(cast<int>(intersection.End()) + 1), left.End());
626 if (!r.IsEmpty())
627 {
628 diffs.Add(r);
629 }
630 }
631 }
632 }
633 if (!found)
634 {
635 if (diffs.IsEmpty())
636 {
637 difference.Add(left);
638 }
639 else
640 {
641 for (const Range& diff : diffs)
642 {
643 difference.Add(diff);
644 }
645 }
646 }
647 }
648 Class* d = lexerContext.MakeClass();
649 for (const Range& r : difference)
650 {
651 d->AddSymbol(lexerContext.MakeRange(r.Start(), r.End()));
652 if (d->Chars().IsEmpty())
653 {
654 d->AddChar(r.Start());
655 }
656 }
657 d->MakeMinimal(lexerContext);
658 return d;
659 }
660 }