1 using System;
  2 using System.Collections;
  3 using System.Text;
  4 
  5 namespace soulcm.scm2html
  6 {
  7     public const uchar eps = '\0';
  8 
  9     public abstract class Symbol
 10     {
 11         public Symbol() : 
 12             contained(false)dontSetContained(false)
 13         {
 14         }
 15         public virtual ~Symbol()
 16         {
 17         }
 18         public const string& Name() const
 19         {
 20             return name;
 21         }
 22         public bool Contained() const
 23         {
 24             return contained;
 25         }
 26         public void SetContained()
 27         {
 28             if (!dontSetContained) contained = true;
 29         }
 30         public void DontSetContained()
 31         {
 32             dontSetContained = true;
 33         }
 34         public void SetName(const string& name_)
 35         {
 36             name = name_;
 37         }
 38         public abstract bool Match(uchar c);
 39         public abstract void Accept(Visitor& visitor);
 40         public virtual bool IsClass() const
 41         {
 42             return false;
 43         }
 44         public virtual bool IsChar() const
 45         {
 46             return false;
 47         }
 48         public virtual bool IsAny() const
 49         {
 50             return false;
 51         }
 52         public virtual bool IsRange() const
 53         {
 54             return false;
 55         }
 56         private string name;
 57         private bool contained;
 58         private bool dontSetContained;
 59     }
 60 
 61     public class Char : Symbol
 62     {
 63         public Char(uchar chr_) : 
 64             chr(chr_)
 65         {
 66             SetName("(" + ToUtf8(ustring(chr1)) + ")");
 67         }
 68         public uchar Chr() const
 69         {
 70             return chr;
 71         }
 72         public override bool IsChar() const
 73         {
 74             return true;
 75         }
 76         public override bool Match(uchar c)
 77         {
 78             return chr == c;
 79         }
 80         public override void Accept(Visitor& visitor)
 81         {
 82             visitor.Visit(*this);
 83         }
 84         private uchar chr;
 85     }
 86 
 87     public class Any : Symbol
 88     {
 89         public Any()
 90         {
 91             SetName("(*)");
 92         }
 93         public override bool IsAny() const
 94         {
 95             return true;
 96         }
 97         public override bool Match(uchar c)
 98         {
 99             return true;
100         }
101         public override void Accept(Visitor& visitor)
102         {
103             visitor.Visit(*this);
104         }
105     }
106 
107     public class Range : Symbol
108     {
109         public Range(uchar start_uchar end_) : 
110             start(start_)end(end_)
111         {
112             SetName("(" + ToUtf8(ustring(start1)) + "-" + ToUtf8(ustring(end1)) + ")");
113         }
114         public bool IsEmpty() const
115         {
116             return start > end;
117         }
118         public uchar Start() const
119         {
120             return start;
121         }
122         public uchar End() const
123         {
124             return end;
125         }
126         public void Print(CodeFormatter& formatter)
127         {
128             formatter.Write(ToUtf8(CharStr(start)));
129             formatter.Write("-");
130             formatter.Write(ToUtf8(CharStr(end)));
131         }
132         public override bool IsRange() const
133         {
134             return true;
135         }
136         public override bool Match(uchar c)
137         {
138             return c >= start && c <= end;
139         }
140         public override void Accept(Visitor& visitor)
141         {
142             visitor.Visit(*this);
143         }
144         private uchar start;
145         private uchar end;
146     }
147 
148     public inline bool operator==(const Range& leftconst Range& right)
149     {
150         return left.Start() == right.Start() && left.End() == right.End();
151     }
152 
153     public inline bool operator<(const Range& leftconst Range& right)
154     {
155         if (left.Start() < right.Start()) return true;
156         if (left.Start() > right.Start()) return false;
157         return left.End() < right.End();
158     }
159 
160     public class Class : Symbol
161     {
162         public Class(int index_) : 
163             index(index_)inverse(false)
164         {
165             SetName("[" + ToString(index) + "]");
166         }
167         public bool Inverse() const
168         {
169             return inverse;
170         }
171         public void SetInverse()
172         {
173             inverse = true;
174         }
175         public const List<Symbol*>& Symbols() const
176         {
177             return symbols;
178         }
179         public int Index() const
180         {
181             return index;
182         }
183         public const List<uchar>& Chars() const
184         {
185             return chars;
186         }
187         public bool IsEmpty() const
188         {
189             return symbols.IsEmpty();
190         }
191         public const LinkedList<Range>& Ranges() const
192         {
193             return ranges;
194         }
195         public LinkedList<Range>& Ranges()
196         {
197             return ranges;
198         }
199         public void SetIndex(int index_)
200         {
201             index = index_;
202             SetName("[" + ToString(index) + "]");
203         }
204         public Class* MakeCanonical(LexerContext& lexerContext)
205         {
206             List<Range> rangeVec;
207             Class* canonicalClass = new Class(-1);
208             for (Symbol* symbol : symbols)
209             {
210                 if (symbol->IsChar())
211                 {
212                     Char* chr = cast<Char*>(symbol);
213                     rangeVec.Add(Range(chr->Chr()chr->Chr()));
214                 }
215                 else if (symbol->IsRange())
216                 {
217                     Range* range = cast<Range*>(symbol);
218                     rangeVec.Add(*range);
219                 }
220                 else if (symbol->IsAny())
221                 {
222                     throw Exception("class contains any");
223                 }
224                 else if (symbol->IsClass())
225                 {
226                     throw Exception("class contains class");
227                 }
228             }
229             for (const Range& range : rangeVec)
230             {
231                 canonicalClass->ranges.Add(range);
232             }
233             if (inverse)
234             {
235                 canonicalClass->MakeInverse(lexerContext);
236             }
237             return canonicalClass;
238         }
239         public void MakeMinimal(LexerContext& lexerContext)
240         {
241             List<Range> rangeVec;
242             for (const Range& range : ranges)
243             {
244                 rangeVec.Add(range);
245             }
246             ranges.Clear();
247             Sort(rangeVec.Begin()rangeVec.End());
248             rangeVec.Resize(Unique(rangeVec.Begin()rangeVec.End()) - rangeVec.Begin());
249             bool changed = true;
250             while (changed)
251             {
252                 changed = false;
253                 List<Range> combinedRanges;
254                 for (int i = 0; i < cast<int>(rangeVec.Count()); ++i;)
255                 {
256                     bool combined = false;
257                     Range current = rangeVec[i];
258                     if (i > 0)
259                     {
260                         Range left = rangeVec[i - 1];
261                         if (cast<int>(left.End()) + 1 == cast<int>(current.Start()))
262                         {
263                             combinedRanges.Add(Range(left.Start()current.End()));
264                             combined = true;
265                         }
266                     }
267                     if (i < rangeVec.Count() - 1)
268                     {
269                         Range right = rangeVec[i + 1];
270                         if (cast<int>(current.End()) + 1 == cast<int>(right.Start()))
271                         {
272                             combinedRanges.Add(Range(current.Start()right.End()));
273                             combined = true;
274                         }
275                     }
276                     if (combined)
277                     {
278                         changed = true;
279                     }
280                     else
281                     {
282                         combinedRanges.Add(current);
283                     }
284                 }
285                 Sort(combinedRanges.Begin()combinedRanges.End());
286                 combinedRanges.Resize(Unique(combinedRanges.Begin()combinedRanges.End()) - combinedRanges.Begin());
287                 Swap(rangeVeccombinedRanges);
288             }
289             symbols.Clear();
290             chars.Clear();
291             for (const Range& range : rangeVec)
292             {
293                 if (range.IsEmpty()) continue;
294                 ranges.Add(range);
295                 if (chars.IsEmpty())
296                 {
297                     AddChar(range.Start());
298                 }
299             }
300         }
301         public void MakeInverse(LexerContext& lexerContext)
302         {
303             List<Range> rangeVec;
304             for (const Range& range : ranges)
305             {
306                 rangeVec.Add(range);
307             }
308             ranges.Clear();
309             List<Range> inverseRanges;
310             inverseRanges.Add(Range(cast<uchar>(1)cast<uchar>(1114111)));
311             for (const Range& range : rangeVec)
312             {
313                 List<Range> newInverse;
314                 for (const Range& i : inverseRanges)
315                 {
316                     if (Intersect(rangei))
317                     {
318                         Range intersection = range & i;
319                         Range left = Range(i.Start()cast<uchar>(cast<int>(intersection.Start()) - 1));
320                         if (!left.IsEmpty())
321                         {
322                             newInverse.Add(left);
323                         }
324                         Range right = Range(cast<uchar>(cast<int>(intersection.End()) + 1)i.End());
325                         if (!right.IsEmpty())
326                         {
327                             newInverse.Add(right);
328                         }
329                     }
330                     else
331                     {
332                         newInverse.Add(i);
333                     }
334                 }
335                 Swap(inverseRangesnewInverse);
336             }
337             symbols.Clear();
338             chars.Clear();
339             for (const Range& range : inverseRanges)
340             {
341                 ranges.Add(range);
342                 if (chars.IsEmpty())
343                 {
344                     AddChar(range.Start());
345                 }
346             }
347             MakeMinimal(lexerContext);
348             inverse = false;
349         }
350         public Class* Clone()
351         {
352             Class* cls = new Class(-1);
353             for (Symbol* symbol : symbols)
354             {
355                 cls->AddSymbol(symbol);
356             }
357             for (uchar c : chars)
358             {
359                 cls->AddChar(c);
360             }
361             return cls;
362         }
363         public void AddSymbol(Symbol* symbol)
364         {
365             symbol->SetContained();
366             symbols.Add(symbol);
367         }
368         public void AddChar(uchar c)
369         {
370             chars.Add(c);
371         }
372         public void Print(CodeFormatter& formatter)
373         {
374             formatter.Write("{");
375             for (Range& range : ranges)
376             {
377                 range.Print(formatter);
378             }
379             formatter.Write("}");
380         }
381         public override bool IsClass() const
382         {
383             return true;
384         }
385         public override bool Match(uchar c)
386         {
387             bool match = false;
388             for (Symbol* symbol : symbols)
389             {
390                 if (symbol->Match(c))
391                 {
392                     match = true;
393                     break;
394                 }
395             }
396             return match != inverse;
397         }
398         public override void Accept(Visitor& visitor)
399         {
400             visitor.Visit(*this);
401         }
402         private int index;
403         private bool inverse;
404         private List<Symbol*> symbols;
405         private List<uchar> chars;
406         private LinkedList<Range> ranges;
407     }
408 
409     public bool Intersect(const Range& leftconst Range& right)
410     {
411         if (left.IsEmpty() || right.IsEmpty()) return false;
412         if (left.Start() <= right.Start())
413         {
414             return right.Start() >= left.Start() && right.Start() <= left.End();
415         }
416         else
417         {
418             return left.Start() >= right.Start() && left.Start() <= right.End();
419         }
420     }
421 
422     public Range operator&(const Range& leftconst Range& right)
423     {
424         if (Intersect(leftright))
425         {
426             Range intersection(Max(left.Start()right.Start())Min(left.End()right.End()));
427             return intersection;
428         }
429         return Range(cast<uchar>(1)cast<uchar>(0));
430     }
431 
432     public List<Range> operator-(const Range& leftconst Range& right)
433     {
434         List<Range> ranges;
435         if (right.Start() <= left.Start() && right.End() >= left.End())
436         {
437             return ranges;
438         }
439         else if (right.Start() > left.End() || right.End() < left.Start())
440         {
441             ranges.Add(left);
442         }
443         else
444         {
445             if (right.Start() >= left.Start() && right.Start() <= left.End())
446             {
447                 if (left.Start() <= cast<uchar>(cast<int>(right.Start()) - 1))
448                 {
449                     ranges.Add(Range(left.Start()cast<uchar>(cast<int>(right.Start()) - 1)));
450                 }
451             }
452             if (right.End() >= left.Start() && right.End() <= left.End())
453             {
454                 if (cast<uchar>(cast<int>(right.End()) + 1) <= left.End())
455                 {
456                     ranges.Add(Range(cast<uchar>(cast<int>(right.End()) + 1)left.End()));
457                 }
458             }
459         }
460         return ranges;
461     }
462 
463     public List<Range> operator~(const Range& that)
464     {
465         List<Range> result;
466         if (that.Start() > cast<uchar>(1))
467         {
468             result.Add(Range(cast<uchar>(1)cast<uchar>(cast<int>(that.Start()) - 1)));
469         }
470         if (cast<int>(that.End()) < 1114112)
471         {
472             result.Add(Range(cast<uchar>(cast<int>(that.End()) + 1)cast<uchar>(1114111)));
473         }
474         return result;
475     }
476 
477     public bool operator==(const Class& leftconst Class& right)
478     {
479         if (left.Symbols().Count() != right.Symbols().Count()) return false;
480         int n = cast<int>(left.Symbols().Count());
481         for (int i = 0; i < n; ++i;)
482         {
483             Symbol* leftSymbol = left.Symbols()[i];
484             Symbol* rightSymbol = right.Symbols()[i];
485             if (leftSymbol->IsRange() && rightSymbol->IsRange())
486             {
487                 const Range* leftRange = cast<const Range*>(leftSymbol);
488                 const Range* rightRange = cast<const Range*>(rightSymbol);
489                 if (!(*leftRange == *rightRange))
490                 {
491                     return false;
492                 }
493             }
494             else
495             {
496                 return false;
497             }
498         }
499         return true;
500     }
501 
502     public class RangeEndLess : Rel<Range>
503     {
504         public bool operator()(const Range& leftconst Range& right) const
505         {
506             if (left.End() < right.End()) return true;
507             if (left.End() > right.End()) return false;
508             return left.Start() < right.Start();
509         }
510     }
511 
512     public bool Intersect(const Class& leftconst Class& right)
513     {
514         for (Symbol* leftSymbol : left.Symbols())
515         {
516             if (leftSymbol->IsRange())
517             {
518                 Range* leftRange = cast<Range*>(leftSymbol);
519                 for (Symbol* rightSymbol : right.Symbols())
520                 {
521                     if (rightSymbol->IsRange())
522                     {
523                         Range* rightRange = cast<Range*>(rightSymbol);
524                         if (Intersect(*leftRange*rightRange))
525                         {
526                             return true;
527                         }
528                     }
529                 }
530             }
531         }
532         return false;
533     }
534 
535     public Class* MakeIntertersection(const Class& leftconst Class& rightLexerContext& lexerContext)
536     {
537         List<Range> leftRanges;
538         for (Symbol* leftSymbol : left.Symbols())
539         {
540             if (leftSymbol->IsRange())
541             {
542                 Range* leftRange = cast<Range*>(leftSymbol);
543                 leftRanges.Add(*leftRange);
544             }
545         }
546         List<Range> rightRanges;
547         for (Symbol* rightSymbol : right.Symbols())
548         {
549             if (rightSymbol->IsRange())
550             {
551                 Range* rightRange = cast<Range*>(rightSymbol);
552                 rightRanges.Add(*rightRange);
553             }
554         }
555         List<Range> intersection;
556         for (const Range& left : leftRanges)
557         {
558             List<Range>.Iterator start = LowerBound(rightRanges.Begin()rightRanges.End()left);
559             List<Range>.Iterator end = UpperBound(rightRanges.Begin()rightRanges.End()leftRangeEndLess());
560             if (start != rightRanges.Begin()) --start;
561             if (end < rightRanges.End()) ++end;
562             for (List<Range>.Iterator i = start; i != end; ++i;)
563             {
564                 const Range& right = *i;
565                 if (left == right)
566                 {
567                     intersection.Add(left);
568                     break;
569                 }
570                 else if (Intersect(leftright))
571                 {
572                     intersection.Add(left & right);
573                 }
574             }
575         }
576         Sort(intersection.Begin()intersection.End());
577         intersection.Resize(Unique(intersection.Begin()intersection.End()) - intersection.Begin());
578         Class* cls = lexerContext.MakeClass();
579         for (const Range& i : intersection)
580         {
581             cls->AddSymbol(lexerContext.MakeRange(i.Start()i.End()));
582             if (cls->Chars().IsEmpty())
583             {
584                 cls->AddChar(i.Start());
585             }
586         }
587         return cls;
588     }
589 
590     public Class* MakeDifference(const Class& leftconst Class& rightLexerContext& lexerContext)
591     {
592         List<Range> leftRanges;
593         for (Symbol* leftSymbol : left.Symbols())
594         {
595             if (leftSymbol->IsRange())
596             {
597                 Range* leftRange = cast<Range*>(leftSymbol);
598                 leftRanges.Add(*leftRange);
599             }
600         }
601         List<Range> rightRanges;
602         for (Symbol* rightSymbol : right.Symbols())
603         {
604             if (rightSymbol->IsRange())
605             {
606                 Range* rightRange = cast<Range*>(rightSymbol);
607                 rightRanges.Add(*rightRange);
608             }
609         }
610         List<Range> difference;
611         for (const Range& left : leftRanges)
612         {
613             bool found = false;
614             List<Range> diffs;
615             List<Range>.Iterator start = LowerBound(rightRanges.Begin()rightRanges.End()left);
616             List<Range>.Iterator end = UpperBound(rightRanges.Begin()rightRanges.End()leftRangeEndLess());
617             if (start != rightRanges.Begin()) --start;
618             if (end < rightRanges.End()) ++end;
619             for (List<Range>.Iterator i = start; i != end; ++i;)
620             {
621                 const Range& right = *i;
622                 if (left == right)
623                 {
624                     found = true;
625                     break;
626                 }
627                 else
628                 {
629                     if (Intersect(leftright))
630                     {
631                         Range intersection = left & right;
632                         Range l = Range(left.Start()cast<uchar>(cast<int>(intersection.Start()) - 1));
633                         if (!l.IsEmpty())
634                         {
635                             diffs.Add(l);
636                         }
637                         Range r = Range(cast<uchar>(cast<int>(intersection.End()) + 1)left.End());
638                         if (!r.IsEmpty())
639                         {
640                             diffs.Add(r);
641                         }
642                     }
643                 }
644             }
645             if (!found)
646             {
647                 if (diffs.IsEmpty())
648                 {
649                     difference.Add(left);
650                 }
651                 else
652                 {
653                     for (const Range& diff : diffs)
654                     {
655                         difference.Add(diff);
656                     }
657                 }
658             }
659         }
660         Class* d = lexerContext.MakeClass();
661         for (const Range& r : difference)
662         {
663             d->AddSymbol(lexerContext.MakeRange(r.Start()r.End()));
664             if (d->Chars().IsEmpty())
665             {
666                 d->AddChar(r.Start());
667             }
668         }
669         d->MakeMinimal(lexerContext);
670         return d;
671     }
672 } // namespace soulcm.scm2html