1 using System;
2 using System.Collections;
3 using System.Text;
4
5 namespace soulcm.scm2html
6 {
7 public const uchar eps = '\0';
8
9 public abstract class Symbol
10 {
11 public Symbol() :
12 contained(false), dontSetContained(false)
13 {
14 }
15 public virtual ~Symbol()
16 {
17 }
18 public const string& Name() const
19 {
20 return name;
21 }
22 public bool Contained() const
23 {
24 return contained;
25 }
26 public void SetContained()
27 {
28 if (!dontSetContained) contained = true;
29 }
30 public void DontSetContained()
31 {
32 dontSetContained = true;
33 }
34 public void SetName(const string& name_)
35 {
36 name = name_;
37 }
38 public abstract bool Match(uchar c);
39 public abstract void Accept(Visitor& visitor);
40 public virtual bool IsClass() const
41 {
42 return false;
43 }
44 public virtual bool IsChar() const
45 {
46 return false;
47 }
48 public virtual bool IsAny() const
49 {
50 return false;
51 }
52 public virtual bool IsRange() const
53 {
54 return false;
55 }
56 private string name;
57 private bool contained;
58 private bool dontSetContained;
59 }
60
61 public class Char : Symbol
62 {
63 public Char(uchar chr_) :
64 chr(chr_)
65 {
66 SetName("(" + ToUtf8(ustring(chr, 1)) + ")");
67 }
68 public uchar Chr() const
69 {
70 return chr;
71 }
72 public override bool IsChar() const
73 {
74 return true;
75 }
76 public override bool Match(uchar c)
77 {
78 return chr == c;
79 }
80 public override void Accept(Visitor& visitor)
81 {
82 visitor.Visit(*this);
83 }
84 private uchar chr;
85 }
86
87 public class Any : Symbol
88 {
89 public Any()
90 {
91 SetName("(*)");
92 }
93 public override bool IsAny() const
94 {
95 return true;
96 }
97 public override bool Match(uchar c)
98 {
99 return true;
100 }
101 public override void Accept(Visitor& visitor)
102 {
103 visitor.Visit(*this);
104 }
105 }
106
107 public class Range : Symbol
108 {
109 public Range(uchar start_, uchar end_) :
110 start(start_), end(end_)
111 {
112 SetName("(" + ToUtf8(ustring(start, 1)) + "-" + ToUtf8(ustring(end, 1)) + ")");
113 }
114 public bool IsEmpty() const
115 {
116 return start > end;
117 }
118 public uchar Start() const
119 {
120 return start;
121 }
122 public uchar End() const
123 {
124 return end;
125 }
126 public void Print(CodeFormatter& formatter)
127 {
128 formatter.Write(ToUtf8(CharStr(start)));
129 formatter.Write("-");
130 formatter.Write(ToUtf8(CharStr(end)));
131 }
132 public override bool IsRange() const
133 {
134 return true;
135 }
136 public override bool Match(uchar c)
137 {
138 return c >= start && c <= end;
139 }
140 public override void Accept(Visitor& visitor)
141 {
142 visitor.Visit(*this);
143 }
144 private uchar start;
145 private uchar end;
146 }
147
148 public inline bool operator==(const Range& left, const Range& right)
149 {
150 return left.Start() == right.Start() && left.End() == right.End();
151 }
152
153 public inline bool operator<(const Range& left, const Range& right)
154 {
155 if (left.Start() < right.Start()) return true;
156 if (left.Start() > right.Start()) return false;
157 return left.End() < right.End();
158 }
159
160 public class Class : Symbol
161 {
162 public Class(int index_) :
163 index(index_), inverse(false)
164 {
165 SetName("[" + ToString(index) + "]");
166 }
167 public bool Inverse() const
168 {
169 return inverse;
170 }
171 public void SetInverse()
172 {
173 inverse = true;
174 }
175 public const List<Symbol*>& Symbols() const
176 {
177 return symbols;
178 }
179 public int Index() const
180 {
181 return index;
182 }
183 public const List<uchar>& Chars() const
184 {
185 return chars;
186 }
187 public bool IsEmpty() const
188 {
189 return symbols.IsEmpty();
190 }
191 public const LinkedList<Range>& Ranges() const
192 {
193 return ranges;
194 }
195 public LinkedList<Range>& Ranges()
196 {
197 return ranges;
198 }
199 public void SetIndex(int index_)
200 {
201 index = index_;
202 SetName("[" + ToString(index) + "]");
203 }
204 public Class* MakeCanonical(LexerContext& lexerContext)
205 {
206 List<Range> rangeVec;
207 Class* canonicalClass = new Class(-1);
208 for (Symbol* symbol : symbols)
209 {
210 if (symbol->IsChar())
211 {
212 Char* chr = cast<Char*>(symbol);
213 rangeVec.Add(Range(chr->Chr(), chr->Chr()));
214 }
215 else if (symbol->IsRange())
216 {
217 Range* range = cast<Range*>(symbol);
218 rangeVec.Add(*range);
219 }
220 else if (symbol->IsAny())
221 {
222 throw Exception("class contains any");
223 }
224 else if (symbol->IsClass())
225 {
226 throw Exception("class contains class");
227 }
228 }
229 for (const Range& range : rangeVec)
230 {
231 canonicalClass->ranges.Add(range);
232 }
233 if (inverse)
234 {
235 canonicalClass->MakeInverse(lexerContext);
236 }
237 return canonicalClass;
238 }
239 public void MakeMinimal(LexerContext& lexerContext)
240 {
241 List<Range> rangeVec;
242 for (const Range& range : ranges)
243 {
244 rangeVec.Add(range);
245 }
246 ranges.Clear();
247 Sort(rangeVec.Begin(), rangeVec.End());
248 rangeVec.Resize(Unique(rangeVec.Begin(), rangeVec.End()) - rangeVec.Begin());
249 bool changed = true;
250 while (changed)
251 {
252 changed = false;
253 List<Range> combinedRanges;
254 for (int i = 0; i < cast<int>(rangeVec.Count()); ++i;)
255 {
256 bool combined = false;
257 Range current = rangeVec[i];
258 if (i > 0)
259 {
260 Range left = rangeVec[i - 1];
261 if (cast<int>(left.End()) + 1 == cast<int>(current.Start()))
262 {
263 combinedRanges.Add(Range(left.Start(), current.End()));
264 combined = true;
265 }
266 }
267 if (i < rangeVec.Count() - 1)
268 {
269 Range right = rangeVec[i + 1];
270 if (cast<int>(current.End()) + 1 == cast<int>(right.Start()))
271 {
272 combinedRanges.Add(Range(current.Start(), right.End()));
273 combined = true;
274 }
275 }
276 if (combined)
277 {
278 changed = true;
279 }
280 else
281 {
282 combinedRanges.Add(current);
283 }
284 }
285 Sort(combinedRanges.Begin(), combinedRanges.End());
286 combinedRanges.Resize(Unique(combinedRanges.Begin(), combinedRanges.End()) - combinedRanges.Begin());
287 Swap(rangeVec, combinedRanges);
288 }
289 symbols.Clear();
290 chars.Clear();
291 for (const Range& range : rangeVec)
292 {
293 if (range.IsEmpty()) continue;
294 ranges.Add(range);
295 if (chars.IsEmpty())
296 {
297 AddChar(range.Start());
298 }
299 }
300 }
301 public void MakeInverse(LexerContext& lexerContext)
302 {
303 List<Range> rangeVec;
304 for (const Range& range : ranges)
305 {
306 rangeVec.Add(range);
307 }
308 ranges.Clear();
309 List<Range> inverseRanges;
310 inverseRanges.Add(Range(cast<uchar>(1), cast<uchar>(1114111)));
311 for (const Range& range : rangeVec)
312 {
313 List<Range> newInverse;
314 for (const Range& i : inverseRanges)
315 {
316 if (Intersect(range, i))
317 {
318 Range intersection = range & i;
319 Range left = Range(i.Start(), cast<uchar>(cast<int>(intersection.Start()) - 1));
320 if (!left.IsEmpty())
321 {
322 newInverse.Add(left);
323 }
324 Range right = Range(cast<uchar>(cast<int>(intersection.End()) + 1), i.End());
325 if (!right.IsEmpty())
326 {
327 newInverse.Add(right);
328 }
329 }
330 else
331 {
332 newInverse.Add(i);
333 }
334 }
335 Swap(inverseRanges, newInverse);
336 }
337 symbols.Clear();
338 chars.Clear();
339 for (const Range& range : inverseRanges)
340 {
341 ranges.Add(range);
342 if (chars.IsEmpty())
343 {
344 AddChar(range.Start());
345 }
346 }
347 MakeMinimal(lexerContext);
348 inverse = false;
349 }
350 public Class* Clone()
351 {
352 Class* cls = new Class(-1);
353 for (Symbol* symbol : symbols)
354 {
355 cls->AddSymbol(symbol);
356 }
357 for (uchar c : chars)
358 {
359 cls->AddChar(c);
360 }
361 return cls;
362 }
363 public void AddSymbol(Symbol* symbol)
364 {
365 symbol->SetContained();
366 symbols.Add(symbol);
367 }
368 public void AddChar(uchar c)
369 {
370 chars.Add(c);
371 }
372 public void Print(CodeFormatter& formatter)
373 {
374 formatter.Write("{");
375 for (Range& range : ranges)
376 {
377 range.Print(formatter);
378 }
379 formatter.Write("}");
380 }
381 public override bool IsClass() const
382 {
383 return true;
384 }
385 public override bool Match(uchar c)
386 {
387 bool match = false;
388 for (Symbol* symbol : symbols)
389 {
390 if (symbol->Match(c))
391 {
392 match = true;
393 break;
394 }
395 }
396 return match != inverse;
397 }
398 public override void Accept(Visitor& visitor)
399 {
400 visitor.Visit(*this);
401 }
402 private int index;
403 private bool inverse;
404 private List<Symbol*> symbols;
405 private List<uchar> chars;
406 private LinkedList<Range> ranges;
407 }
408
409 public bool Intersect(const Range& left, const Range& right)
410 {
411 if (left.IsEmpty() || right.IsEmpty()) return false;
412 if (left.Start() <= right.Start())
413 {
414 return right.Start() >= left.Start() && right.Start() <= left.End();
415 }
416 else
417 {
418 return left.Start() >= right.Start() && left.Start() <= right.End();
419 }
420 }
421
422 public Range operator&(const Range& left, const Range& right)
423 {
424 if (Intersect(left, right))
425 {
426 Range intersection(Max(left.Start(), right.Start()), Min(left.End(), right.End()));
427 return intersection;
428 }
429 return Range(cast<uchar>(1), cast<uchar>(0));
430 }
431
432 public List<Range> operator-(const Range& left, const Range& right)
433 {
434 List<Range> ranges;
435 if (right.Start() <= left.Start() && right.End() >= left.End())
436 {
437 return ranges;
438 }
439 else if (right.Start() > left.End() || right.End() < left.Start())
440 {
441 ranges.Add(left);
442 }
443 else
444 {
445 if (right.Start() >= left.Start() && right.Start() <= left.End())
446 {
447 if (left.Start() <= cast<uchar>(cast<int>(right.Start()) - 1))
448 {
449 ranges.Add(Range(left.Start(), cast<uchar>(cast<int>(right.Start()) - 1)));
450 }
451 }
452 if (right.End() >= left.Start() && right.End() <= left.End())
453 {
454 if (cast<uchar>(cast<int>(right.End()) + 1) <= left.End())
455 {
456 ranges.Add(Range(cast<uchar>(cast<int>(right.End()) + 1), left.End()));
457 }
458 }
459 }
460 return ranges;
461 }
462
463 public List<Range> operator~(const Range& that)
464 {
465 List<Range> result;
466 if (that.Start() > cast<uchar>(1))
467 {
468 result.Add(Range(cast<uchar>(1), cast<uchar>(cast<int>(that.Start()) - 1)));
469 }
470 if (cast<int>(that.End()) < 1114112)
471 {
472 result.Add(Range(cast<uchar>(cast<int>(that.End()) + 1), cast<uchar>(1114111)));
473 }
474 return result;
475 }
476
477 public bool operator==(const Class& left, const Class& right)
478 {
479 if (left.Symbols().Count() != right.Symbols().Count()) return false;
480 int n = cast<int>(left.Symbols().Count());
481 for (int i = 0; i < n; ++i;)
482 {
483 Symbol* leftSymbol = left.Symbols()[i];
484 Symbol* rightSymbol = right.Symbols()[i];
485 if (leftSymbol->IsRange() && rightSymbol->IsRange())
486 {
487 const Range* leftRange = cast<const Range*>(leftSymbol);
488 const Range* rightRange = cast<const Range*>(rightSymbol);
489 if (!(*leftRange == *rightRange))
490 {
491 return false;
492 }
493 }
494 else
495 {
496 return false;
497 }
498 }
499 return true;
500 }
501
502 public class RangeEndLess : Rel<Range>
503 {
504 public bool operator()(const Range& left, const Range& right) const
505 {
506 if (left.End() < right.End()) return true;
507 if (left.End() > right.End()) return false;
508 return left.Start() < right.Start();
509 }
510 }
511
512 public bool Intersect(const Class& left, const Class& right)
513 {
514 for (Symbol* leftSymbol : left.Symbols())
515 {
516 if (leftSymbol->IsRange())
517 {
518 Range* leftRange = cast<Range*>(leftSymbol);
519 for (Symbol* rightSymbol : right.Symbols())
520 {
521 if (rightSymbol->IsRange())
522 {
523 Range* rightRange = cast<Range*>(rightSymbol);
524 if (Intersect(*leftRange, *rightRange))
525 {
526 return true;
527 }
528 }
529 }
530 }
531 }
532 return false;
533 }
534
535 public Class* MakeIntertersection(const Class& left, const Class& right, LexerContext& lexerContext)
536 {
537 List<Range> leftRanges;
538 for (Symbol* leftSymbol : left.Symbols())
539 {
540 if (leftSymbol->IsRange())
541 {
542 Range* leftRange = cast<Range*>(leftSymbol);
543 leftRanges.Add(*leftRange);
544 }
545 }
546 List<Range> rightRanges;
547 for (Symbol* rightSymbol : right.Symbols())
548 {
549 if (rightSymbol->IsRange())
550 {
551 Range* rightRange = cast<Range*>(rightSymbol);
552 rightRanges.Add(*rightRange);
553 }
554 }
555 List<Range> intersection;
556 for (const Range& left : leftRanges)
557 {
558 List<Range>.Iterator start = LowerBound(rightRanges.Begin(), rightRanges.End(), left);
559 List<Range>.Iterator end = UpperBound(rightRanges.Begin(), rightRanges.End(), left, RangeEndLess());
560 if (start != rightRanges.Begin()) --start;
561 if (end < rightRanges.End()) ++end;
562 for (List<Range>.Iterator i = start; i != end; ++i;)
563 {
564 const Range& right = *i;
565 if (left == right)
566 {
567 intersection.Add(left);
568 break;
569 }
570 else if (Intersect(left, right))
571 {
572 intersection.Add(left & right);
573 }
574 }
575 }
576 Sort(intersection.Begin(), intersection.End());
577 intersection.Resize(Unique(intersection.Begin(), intersection.End()) - intersection.Begin());
578 Class* cls = lexerContext.MakeClass();
579 for (const Range& i : intersection)
580 {
581 cls->AddSymbol(lexerContext.MakeRange(i.Start(), i.End()));
582 if (cls->Chars().IsEmpty())
583 {
584 cls->AddChar(i.Start());
585 }
586 }
587 return cls;
588 }
589
590 public Class* MakeDifference(const Class& left, const Class& right, LexerContext& lexerContext)
591 {
592 List<Range> leftRanges;
593 for (Symbol* leftSymbol : left.Symbols())
594 {
595 if (leftSymbol->IsRange())
596 {
597 Range* leftRange = cast<Range*>(leftSymbol);
598 leftRanges.Add(*leftRange);
599 }
600 }
601 List<Range> rightRanges;
602 for (Symbol* rightSymbol : right.Symbols())
603 {
604 if (rightSymbol->IsRange())
605 {
606 Range* rightRange = cast<Range*>(rightSymbol);
607 rightRanges.Add(*rightRange);
608 }
609 }
610 List<Range> difference;
611 for (const Range& left : leftRanges)
612 {
613 bool found = false;
614 List<Range> diffs;
615 List<Range>.Iterator start = LowerBound(rightRanges.Begin(), rightRanges.End(), left);
616 List<Range>.Iterator end = UpperBound(rightRanges.Begin(), rightRanges.End(), left, RangeEndLess());
617 if (start != rightRanges.Begin()) --start;
618 if (end < rightRanges.End()) ++end;
619 for (List<Range>.Iterator i = start; i != end; ++i;)
620 {
621 const Range& right = *i;
622 if (left == right)
623 {
624 found = true;
625 break;
626 }
627 else
628 {
629 if (Intersect(left, right))
630 {
631 Range intersection = left & right;
632 Range l = Range(left.Start(), cast<uchar>(cast<int>(intersection.Start()) - 1));
633 if (!l.IsEmpty())
634 {
635 diffs.Add(l);
636 }
637 Range r = Range(cast<uchar>(cast<int>(intersection.End()) + 1), left.End());
638 if (!r.IsEmpty())
639 {
640 diffs.Add(r);
641 }
642 }
643 }
644 }
645 if (!found)
646 {
647 if (diffs.IsEmpty())
648 {
649 difference.Add(left);
650 }
651 else
652 {
653 for (const Range& diff : diffs)
654 {
655 difference.Add(diff);
656 }
657 }
658 }
659 }
660 Class* d = lexerContext.MakeClass();
661 for (const Range& r : difference)
662 {
663 d->AddSymbol(lexerContext.MakeRange(r.Start(), r.End()));
664 if (d->Chars().IsEmpty())
665 {
666 d->AddChar(r.Start());
667 }
668 }
669 d->MakeMinimal(lexerContext);
670 return d;
671 }
672 }