1 // =================================
  2 // Copyright (c) 2024 Seppo Laakko
  3 // Distributed under the MIT license
  4 // =================================
  5 
  6 using System;
  7 using System.Collections;
  8 
  9 namespace System.Xml
 10 {
 11     public class Entity
 12     {
 13         public virtual ~Entity()
 14         {
 15         }
 16         public virtual bool IsInternalEntity() const
 17         {
 18             return false;
 19         }
 20     }
 21 
 22     public class InternalEntity : Entity
 23     {
 24         public InternalEntity(const ustring& value_) : 
 25             value(value_)
 26         {
 27         }
 28         public const ustring& Value() const
 29         {
 30             return value;
 31         }
 32         public override bool IsInternalEntity() const
 33         {
 34             return true;
 35         }
 36         private ustring value;
 37     }
 38 
 39     public class XmlProcessor
 40     {
 41         public XmlProcessor(System.Lex.TrivialLexer& lexer_XmlContentHandler* contentHandler_) : 
 42             lexer(lexer_)lineStarts(lexer.GetLineStartIndeces())contentHandler(contentHandler_)attValue()errorId(0)
 43         {
 44             InternalEntity* quot = new InternalEntity(u"\"");
 45             entityMap[u"quot"] = quot;
 46             InternalEntity* amp = new InternalEntity(u"&");
 47             entityMap[u"amp"] = amp;
 48             InternalEntity* apos = new InternalEntity(u"\'");
 49             entityMap[u"apos"] = apos;
 50             InternalEntity* lt = new InternalEntity(u"<");
 51             entityMap[u"lt"] = lt;
 52             InternalEntity* gt = new InternalEntity(u">");
 53             entityMap[u"gt"] = gt;
 54             entities.Add(UniquePtr<Entity>(quot));
 55             entities.Add(UniquePtr<Entity>(amp));
 56             entities.Add(UniquePtr<Entity>(apos));
 57             entities.Add(UniquePtr<Entity>(lt));
 58             entities.Add(UniquePtr<Entity>(gt));
 59         }
 60         public inline bool Error() const
 61         {
 62             return errorId != 0;
 63         }
 64         public inline int GetErrorId() const
 65         {
 66             return errorId;
 67         }
 68         public const ustring& AttValue() const
 69         {
 70             return attValue;
 71         }
 72         public ustring& AttValue()
 73         {
 74             return attValue;
 75         }
 76         public System.Lex.TrivialLexer& Lexer() const
 77         {
 78             return lexer;
 79         }
 80         [nodiscard]
 81         public Result<bool> StartDocument(const System.Lex.Span& spanint fileIndex)
 82         {
 83             if (Error())
 84             {
 85                 return Result<bool>(GetErrorId());
 86             }
 87             auto result = contentHandler->StartDocument(spanfileIndex);
 88             if (result.Error())
 89             {
 90                 SetErrorId(result.GetErrorId());
 91                 return Result<bool>(ErrorId(result.GetErrorId()));
 92             }
 93             return Result<bool>(true);
 94         }
 95         [nodiscard]
 96         public Result<bool> EndDocument()
 97         {
 98             if (Error())
 99             {
100                 return Result<bool>(GetErrorId());
101             }
102             auto result = contentHandler->EndDocument();
103             if (result.Error())
104             {
105                 SetErrorId(result.GetErrorId());
106                 return Result<bool>(ErrorId(result.GetErrorId()));
107             }
108             return Result<bool>(true);
109         }
110         [nodiscard]
111         public Result<bool> Text(const System.Lex.Span& spanint fileIndexconst ustring& text)
112         {
113             if (Error())
114             {
115                 return Result<bool>(GetErrorId());
116             }
117             auto result = contentHandler->HandleText(spanfileIndextext);
118             if (result.Error())
119             {
120                 SetErrorId(result.GetErrorId());
121                 return Result<bool>(ErrorId(result.GetErrorId()));
122             }
123             return Result<bool>(true);
124         }
125         [nodiscard]
126         public Result<bool> Comment(const System.Lex.Span& spanint fileIndexconst ustring& text)
127         {
128             if (Error())
129             {
130                 return Result<bool>(GetErrorId());
131             }
132             auto result = contentHandler->HandleComment(spanfileIndextext);
133             if (result.Error())
134             {
135                 SetErrorId(result.GetErrorId());
136                 return Result<bool>(ErrorId(result.GetErrorId()));
137             }
138             return Result<bool>(true);
139         }
140         [nodiscard]
141         public Result<bool> PI(const System.Lex.Span& spanint fileIndexconst ustring& targetconst ustring& data)
142         {
143             if (Error())
144             {
145                 return Result<bool>(GetErrorId());
146             }
147             auto result = contentHandler->HandlePI(spanfileIndextargetdata);
148             if (result.Error())
149             {
150                 SetErrorId(result.GetErrorId());
151                 return Result<bool>(ErrorId(result.GetErrorId()));
152             }
153             return Result<bool>(true);
154         }
155         [nodiscard]
156         public Result<bool> CDataSection(const System.Lex.Span& spanint fileIndexconst ustring& cdata)
157         {
158             if (Error())
159             {
160                 return Result<bool>(GetErrorId());
161             }
162             auto result = contentHandler->HandleCDataSection(spanfileIndexcdata);
163             if (result.Error())
164             {
165                 SetErrorId(result.GetErrorId());
166                 return Result<bool>(ErrorId(result.GetErrorId()));
167             }
168             return Result<bool>(true);
169         }
170         [nodiscard]
171         public Result<bool> Version(const ustring& version)
172         {
173             if (Error())
174             {
175                 return Result<bool>(GetErrorId());
176             }
177             auto result = contentHandler->Version(version);
178             if (result.Error())
179             {
180                 SetErrorId(result.GetErrorId());
181                 return Result<bool>(ErrorId(result.GetErrorId()));
182             }
183             return Result<bool>(true);
184         }
185         [nodiscard]
186         public Result<bool> Standalone(bool standalone)
187         {
188             if (Error())
189             {
190                 return Result<bool>(GetErrorId());
191             }
192             auto result = contentHandler->Standalone(standalone);
193             if (result.Error())
194             {
195                 SetErrorId(result.GetErrorId());
196                 return Result<bool>(ErrorId(result.GetErrorId()));
197             }
198             return Result<bool>(true);
199         }
200         [nodiscard]
201         public Result<bool> Encoding(const ustring& encoding)
202         {
203             if (Error())
204             {
205                 return Result<bool>(GetErrorId());
206             }
207             auto result = contentHandler->Encoding(encoding);
208             if (result.Error())
209             {
210                 SetErrorId(result.GetErrorId());
211                 return Result<bool>(ErrorId(result.GetErrorId()));
212             }
213             return Result<bool>(true);
214         }
215         [nodiscard]
216         public Result<bool> BeginStartTag(const ustring& tagName)
217         {
218             if (Error())
219             {
220                 return Result<bool>(GetErrorId());
221             }
222             tagStack.Push(currentTagName);
223             currentTagName = tagName;
224             namespaceUriStack.Push(currentNamespaceUri);
225             namespacePrefixStack.Push(currentNamespacePrefix);
226             attributes.Clear();
227             return Result<bool>(true);
228         }
229         [nodiscard]
230         public Result<bool> EndStartTag(const System.Lex.Span& spanint fileIndexconst string& systemIdlong pos)
231         {
232             if (Error())
233             {
234                 return Result<bool>(GetErrorId());
235             }
236             ustring localName;
237             ustring prefix;
238             auto parseResult = ParseQualifiedName(currentTagNamelocalNameprefixspansystemIdpos);
239             if (parseResult.Error())
240             {
241                 SetErrorId(parseResult.GetErrorId());
242                 return Result<bool>(ErrorId(parseResult.GetErrorId()));
243             }
244             if (prefix == u"xmlns")
245             {
246                 string errorMessage = MakeErrorMessage("\'xmlns\' prefix cannot be declared for an element"systemIdspanpos);
247                 int errorId = AllocateError(errorMessage);
248                 SetErrorId(errorId);
249                 return Result<bool>(ErrorId(errorId));
250             }
251             auto uriResult = GetNamespaceUri(prefixspansystemIdpos);
252             if (uriResult.Error())
253             {
254                 SetErrorId(uriResult.GetErrorId());
255                 return Result<bool>(ErrorId(uriResult.GetErrorId()));
256             }
257             auto result = contentHandler->StartElement(spanfileIndexuriResult.Value()localNamecurrentTagNameattributes);
258             if (result.Error())
259             {
260                 SetErrorId(result.GetErrorId());
261                 return Result<bool>(ErrorId(result.GetErrorId()));
262             }
263             return Result<bool>(true);
264         }
265         [nodiscard]
266         public Result<bool> EndTag(const ustring& tagNameconst System.Lex.Span& spanconst string& systemIdlong pos)
267         {
268             if (Error())
269             {
270                 return Result<bool>(GetErrorId());
271             }
272             if (tagStack.IsEmpty())
273             {
274                 auto tagNameResult = ToUtf8(tagName);
275                 if (tagNameResult.Error())
276                 {
277                     SetErrorId(tagNameResult.GetErrorId());
278                     return Result<bool>(ErrorId(tagNameResult.GetErrorId()));
279                 }
280                 string errorMessage = MakeErrorMessage("end tag \'" + tagNameResult.Value() + "\' has no corresponding start tag"systemIdspanpos);
281                 int errorId = AllocateError(errorMessage);
282                 SetErrorId(errorId);
283                 return Result<bool>(ErrorId(errorId));
284             }
285             if (tagName != currentTagName)
286             {
287                 auto tagNameResult = ToUtf8(tagName);
288                 if (tagNameResult.Error())
289                 {
290                     SetErrorId(tagNameResult.GetErrorId());
291                     return Result<bool>(ErrorId(tagNameResult.GetErrorId()));
292                 }
293                 auto currentTagNameResult = ToUtf8(currentTagName);
294                 if (currentTagNameResult.Error())
295                 {
296                     SetErrorId(currentTagNameResult.GetErrorId());
297                     return Result<bool>(ErrorId(currentTagNameResult.GetErrorId()));
298                 }
299                 string errorMessage = MakeErrorMessage("end tag \'" + tagNameResult.Value() + "\' does not match start tag \'" + currentTagNameResult.Value() + "\'"
300                     systemIdspanpos);
301                 int errorId = AllocateError(errorMessage);
302                 SetErrorId(errorId);
303                 return Result<bool>(ErrorId(errorId));
304             }
305             ustring localName;
306             ustring prefix;
307             auto parseResult = ParseQualifiedName(currentTagNamelocalNameprefixspansystemIdpos);
308             if (parseResult.Error())
309             {
310                 SetErrorId(parseResult.GetErrorId());
311                 return Result<bool>(ErrorId(parseResult.GetErrorId()));
312             }
313             if (prefix == u"xmlns")
314             {
315                 string errorMessage = MakeErrorMessage("\'xmlns\' prefix cannot be declared for an element"systemIdspanpos);
316                 int errorId = AllocateError(errorMessage);
317                 SetErrorId(errorId);
318                 return Result<bool>(ErrorId(errorId));
319             }
320             auto uriResult = GetNamespaceUri(prefixspansystemIdpos);
321             if (uriResult.Error())
322             {
323                 SetErrorId(uriResult.GetErrorId());
324                 return Result<bool>(ErrorId(uriResult.GetErrorId()));
325             }
326             auto result = contentHandler->EndElement(uriResult.Value()localNamecurrentTagName);
327             if (result.Error())
328             {
329                 SetErrorId(result.GetErrorId());
330                 return Result<bool>(ErrorId(result.GetErrorId()));
331             }
332             if (namespaceUriStack.IsEmpty())
333             {
334                 string errorMessage = MakeErrorMessage("namespace URI stack is empty"systemIdspanpos);
335                 int errorId = AllocateError(errorMessage);
336                 SetErrorId(errorId);
337                 return Result<bool>(ErrorId(errorId));
338             }
339             currentNamespaceUri = namespaceUriStack.Pop();
340             namespacePrefixMap.Remove(currentNamespacePrefix);
341             if (namespacePrefixStack.IsEmpty())
342             {
343                 string errorMessage = MakeErrorMessage("namespace prefix stack is empty"systemIdspanpos);
344                 int errorId = AllocateError(errorMessage);
345                 SetErrorId(errorId);
346                 return Result<bool>(ErrorId(errorId));
347             }
348             currentNamespacePrefix = namespacePrefixStack.Pop();
349             namespacePrefixMap[currentNamespacePrefix] = currentNamespaceUri;
350             if (tagStack.IsEmpty())
351             {
352                 string errorMessage = MakeErrorMessage("tag stack is empty"systemIdspanpos);
353                 int errorId = AllocateError(errorMessage);
354                 SetErrorId(errorId);
355                 return Result<bool>(ErrorId(errorId));
356             }
357             currentTagName = tagStack.Pop();
358             return Result<bool>(true);
359         }
360         [nodiscard]
361         public Result<bool> AddAttribute(const ustring& attNameconst ustring& attValueconst System.Lex.Span& spanconst string& systemIdlong pos)
362         {
363             if (Error())
364             {
365                 return Result<bool>(GetErrorId());
366             }
367             ustring localName;
368             ustring prefix;
369             auto parseResult = ParseQualifiedName(attNamelocalNameprefixspansystemIdpos);
370             if (parseResult.Error())
371             {
372                 SetErrorId(parseResult.GetErrorId());
373                 return Result<bool>(ErrorId(parseResult.GetErrorId()));
374             }
375             if (prefix == u"xmlns")
376             {
377                 currentNamespacePrefix = localName;
378                 currentNamespaceUri = attValue;
379                 namespacePrefixMap[currentNamespacePrefix] = currentNamespaceUri;
380             }
381             else if (localName == u"xmlns")
382             {
383                 currentNamespacePrefix.Clear();
384                 currentNamespaceUri = attValue;
385             }
386             else
387             {
388                 auto uriResult = GetNamespaceUri(prefixspansystemIdpos);
389                 if (uriResult.Error())
390                 {
391                     SetErrorId(uriResult.GetErrorId());
392                     return Result<bool>(ErrorId(uriResult.GetErrorId()));
393                 }
394                 attributes.Add(Attribute(spanuriResult.Value()localNameattNameattValue));
395             }
396             return Result<bool>(true);
397         }
398         [nodiscard]
399         public Result<bool> EntityRef(const ustring& entityNameconst System.Lex.Span& spanint fileIndexconst string& systemId)
400         {
401             if (Error())
402             {
403                 return Result<bool>(GetErrorId());
404             }
405             auto it = entityMap.CFind(entityName);
406             if (it != entityMap.CEnd())
407             {
408                 Entity* entity = it->second;
409                 if (entity->IsInternalEntity())
410                 {
411                     InternalEntity* internalEntity = cast<InternalEntity*>(entity);
412                     const ustring& entityValue = internalEntity->Value();
413                     if (!attValueStack.IsEmpty())
414                     {
415                         attValue.Append(entityValue);
416                     }
417                     else
418                     {
419                         return Text(spanfileIndexentityValue);
420                     }
421                 }
422                 else
423                 {
424                     auto result = contentHandler->SkippedEntity(entityName);
425                     if (result.Error())
426                     {
427                         SetErrorId(result.GetErrorId());
428                         return Result<bool>(ErrorId(result.GetErrorId()));
429                     }
430                 }
431             }
432             else
433             {
434                 auto result = contentHandler->SkippedEntity(entityName);
435                 if (result.Error())
436                 {
437                     SetErrorId(result.GetErrorId());
438                     return Result<bool>(ErrorId(result.GetErrorId()));
439                 }
440             }
441             return Result<bool>(true);
442         }
443         [nodiscard]
444         public Result<bool> BeginAttributeValue()
445         {
446             if (Error())
447             {
448                 return Result<bool>(GetErrorId());
449             }
450             attValueStack.Push(attValue);
451             attValue.Clear();
452             return Result<bool>(true);
453         }
454         [nodiscard]
455         public Result<bool> EndAttributeValue()
456         {
457             if (Error())
458             {
459                 return Result<bool>(GetErrorId());
460             }
461             #assert(!attValueStack.IsEmpty());
462             attValue = attValueStack.Top();
463             attValueStack.Pop();
464             return Result<bool>(true);
465         }
466         [nodiscard]
467         public Result<ustring> GetNamespaceUri(const ustring& namespacePrefixconst System.Lex.Span& spanconst string& systemIdlong pos)
468         {
469             if (Error())
470             {
471                 return Result<ustring>(GetErrorId());
472             }
473             if (namespacePrefix.IsEmpty())
474             {
475                 return Result<ustring>(currentNamespaceUri);
476             }
477             auto it = namespacePrefixMap.CFind(namespacePrefix);
478             if (it != namespacePrefixMap.CEnd())
479             {
480                 return Result<ustring>(it->second);
481             }
482             else
483             {
484                 auto namespacePrefixResult = ToUtf8(namespacePrefix);
485                 if (namespacePrefixResult.Error())
486                 {
487                     SetErrorId(namespacePrefixResult.GetErrorId());
488                     return Result<ustring>(ErrorId(namespacePrefixResult.GetErrorId()));
489                 }
490                 string errorMessage = MakeErrorMessage("namespace prefix \'" + namespacePrefixResult.Value() + "\' not bound to any namespace URI"systemIdspanpos);
491                 int errorId = AllocateError(errorMessage);
492                 return Result<ustring>(ErrorId(errorId));
493             }
494             return Result<ustring>(ustring());
495         }
496         [nodiscard]
497         public Result<bool> ParseQualifiedName(const ustring& qualifiedNameustring& localNameustring& prefix
498             const System.Lex.Span& spanconst string& systemIdlong pos)
499         {
500             if (Error())
501             {
502                 return Result<bool>(GetErrorId());
503             }
504             List<ustring> parts = qualifiedName.Split(':');
505             if (parts.Count() > 2)
506             {
507                 auto qualifiedNameResult = ToUtf8(qualifiedName);
508                 if (qualifiedNameResult.Error())
509                 {
510                     SetErrorId(qualifiedNameResult.GetErrorId());
511                     return Result<bool>(ErrorId(qualifiedNameResult.GetErrorId()));
512                 }
513                 string errorMessage = MakeErrorMessage("qualified name \'" + qualifiedNameResult.Value() + "\' has more than one \':\' character"systemIdspanpos);
514                 int errorId = AllocateError(errorMessage);
515                 SetErrorId(errorId);
516                 return Result<bool>(ErrorId(errorId));
517             }
518             else if (parts.Count() == 2)
519             {
520                 prefix = parts[0];
521                 localName = parts[1];
522             }
523             else
524             {
525                 prefix.Clear();
526                 localName = qualifiedName;
527             }
528             return Result<bool>(true);
529         }
530         public string MakeErrorMessage(const string& messageconst string& systemIdconst System.Lex.Span& spanlong pos) const
531         {
532             System.Lex.LineColLen lineColLen = System.Lex.SpanToLineColLen(spanlineStarts);
533             ustring errorLines = lexer.ErrorLines(pos);
534             auto utf8Result = ToUtf8(errorLines);
535             if (!utf8Result.Error())
536             {
537                 return "error: " + message + " in \'" + systemId + "\' at line " + ToString(lineColLen.line) + ":\n" + utf8Result.Value();
538             }
539             return "error: " + message + " in \'" + systemId + "\' at line " + ToString(lineColLen.line);
540         }
541         public void SetErrorId(int errorId_)
542         {
543             errorId = errorId_;
544         }
545         private System.Lex.TrivialLexer& lexer;
546         private List<int> lineStarts;
547         private XmlContentHandler* contentHandler;
548         private Stack<ustring> tagStack;
549         private ustring currentTagName;
550         private Stack<ustring> attValueStack;
551         private ustring attValue;
552         private Stack<ustring> namespaceUriStack;
553         private ustring currentNamespaceUri;
554         private Stack<ustring> namespacePrefixStack;
555         private ustring currentNamespacePrefix;
556         private Map<ustringustring> namespacePrefixMap;
557         private Attributes attributes;
558         private Map<ustringEntity*> entityMap;
559         private List<UniquePtr<Entity>> entities;
560         private int errorId;
561     }
562