1
2
3
4
5
6 parser XmlParser
7 {
8 uselexer TrivialLexer;
9 farthest_error;
10 state;
11 main;
12
13 Document(System.Xml.XmlProcessor* processor)
14 ::= empty{ processor->StartDocument(); }
15 (Prolog(processor):prolog Element(processor):element (Misc(processor):misc)*){ processor->EndDocument(); }
16 ;
17
18 Char : uchar
19 ::= ("[\x9\xA\xD\x20-\xD7FF\xE000-\xFFFD\x10000-\x10FFFF]"){ Token token = lexer.GetToken(pos); return cast<uchar>(token.id); }
20 ;
21
22 S
23 ::= "[\x20\x9\xD\xA]"+
24 ;
25
26 NameStartChar
27 ::= "[:A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\x2FF\x370-\x37D\x37F-\x1FFF\x200C-\x200D\x2070-\x218F\x2C00-\x2FEF\x3001-\xD7FF\xF900-\xFDCF\xFDF0-\xFFFD\x10000-\xEFFFF]"
28 ;
29
30 NameChar
31 ::= NameStartChar:nameStartChar | "[-.0-9\xB7\x300-\x36F\x203F-\x2040]"
32 ;
33
34 Name(var Span s) : ustring
35 ::= (NameStartChar:nameStartChar{ s = span; } (NameChar:nameChar{ s.end = span.end; })*)
36 {
37 return lexer.GetMatch(s);
38 }
39 ;
40
41 Names
42 ::= Name:name1 ('\x20' Name:name2)*
43 ;
44
45 Nmtoken
46 ::= NameChar:nameChar+
47 ;
48
49 Nmtokens
50 ::= Nmtoken:nmToken1 ('\x20' Nmtoken:nmToken2)*
51 ;
52
53 Prolog(System.Xml.XmlProcessor* processor)
54 ::= XMLDecl(processor):xmlDecl? Misc(processor):m1* (DocTypeDecl(processor):docTypeDecl Misc(processor):m2*)?
55 ;
56
57 XMLDecl(System.Xml.XmlProcessor* processor)
58 ::= "<?xml" VersionInfo(processor):versionInfo EncodingDecl(processor):encodingDecl? SDDecl(processor):sdDecl? S:s? "?>"
59 ;
60
61 VersionInfo(System.Xml.XmlProcessor* processor)
62 ::= S:s "version" Eq:eq VersionNumber(processor):versionNumber
63 ;
64
65 VersionNum(System.Xml.XmlProcessor* processor, var Span s)
66 ::= ('1'{ s = span; } '.' ("[0-9]"{ s.end = span.end; })+)
67 {
68 processor->Version(lexer.GetMatch(s));
69 }
70 ;
71
72 VersionNumDQ(System.Xml.XmlProcessor* processor)
73 ::= '"' VersionNum(processor):versionNum '"'
74 ;
75
76 VersionNumSQ(System.Xml.XmlProcessor* processor)
77 ::= '\'' VersionNum(processor):versionNum '\''
78 ;
79
80 VersionNumber(System.Xml.XmlProcessor* processor)
81 ::= VersionNumDQ(processor):versionNumDQ
82 | VersionNumSQ(processor):versionNumSQ
83 ;
84
85 EncodingDecl(System.Xml.XmlProcessor* processor)
86 ::= S:s "encoding" Eq:eq EncName(processor):encName
87 ;
88
89 EncodingName(System.Xml.XmlProcessor* processor, var Span s)
90 ::= ("[A-Za-z]"{ s = span; } ("[A-Za-z0-9._-]"{ s.end = span.end; })*)
91 {
92 processor->Encoding(lexer.GetMatch(s));
93 }
94 ;
95
96 EncNameDQ(System.Xml.XmlProcessor* processor)
97 ::= '"' EncodingName(processor):encodingName '"'
98 ;
99
100 EncNameSQ(System.Xml.XmlProcessor* processor)
101 ::= '\'' EncodingName(processor):encodingName '\''
102 ;
103
104 EncName(System.Xml.XmlProcessor* processor)
105 ::= EncNameDQ(processor):encNameDQ
106 | EncNameSQ(processor):encNameSQ
107 ;
108
109 SDDecl(System.Xml.XmlProcessor* processor)
110 ::= S:s "standalone" Eq:eq YesNo:yn{ processor->Standalone(yn); }
111 ;
112
113 DocTypeDecl(System.Xml.XmlProcessor* processor)
114 ::= "<!DOCTYPE" S:s1 Name:rootElementName (S:s2 ExternalID:extID)? S:s3? ('[' IntSubset(processor):intSubset ']' S:s4?)? '>'
115 ;
116
117 ExternalID
118 ::= ("SYSTEM" S:s1 SystemLiteral:s2)
119 | ("PUBLIC" S:s3 PubidLiteral:p2 S:s4 SystemLiteral:s5)
120 ;
121
122 SystemLiteral
123 ::= ('"' ("[^\"]"*) '"') | ('\'' ("[^']"*) '\'')
124 ;
125
126 PubidLiteral
127 ::= '"' (PubidChar:p1*) '"' | '\'' ((PubidChar:p2 - '\'')*) '\''
128 ;
129
130 PubidChar
131 ::= "[\x20\xD\xA]" | "[a-zA-Z0-9]" | "[-'()+,./:=?;!*#@$_%]"
132 ;
133
134 IntSubset(System.Xml.XmlProcessor* processor)
135 ::= (MarkupDecl(processor):mdecl | DeclSep(processor):declsep)*
136 ;
137
138 MarkupDecl(System.Xml.XmlProcessor* processor)
139 ::= ElementDecl(processor):elementDecl | AttlistDecl(processor):attlistDecl | EntityDecl(processor):entityDecl | NotationDecl(processor):notationDecl | PI(processor):pi | Comment(processor):comment
140 ;
141
142 DeclSep(System.Xml.XmlProcessor* processor)
143 ::= PEReference(processor):peref | S:s
144 ;
145
146 ElementDecl(System.Xml.XmlProcessor* processor)
147 ::= "<!ELEMENT" S:s1 Name:elementName S:s2 ContentSpec:contentSpec S:s3? '>'
148 ;
149
150 ContentSpec
151 ::= "EMPTY" | "ANY" | Mixed:mixed | Children:children
152 ;
153
154 Children
155 ::= (Choice:choice | Seq:seq) ('?' | '*' | '+')?
156 ;
157
158 CP
159 ::= (Name:name | Choice:choice | Seq:seq) ('?' | '*' | '+')?
160 ;
161
162 Choice
163 ::= '(' S:s1? CP:cp1 (S:s2? '|' S:s3? CP:cp2)+ S:s4? ')'
164 ;
165
166 Seq
167 ::= '(' S:s1? CP:cp1 (S:s2? ',' S:s3? CP:cp2)* S:s4? ')'
168 ;
169
170 Mixed
171 ::= '(' S:s1? "#PCDATA" (S:s2? '|' S:s3? Name:name)* S:s4? ")*"
172 | '(' S:s5? "#PCDATA" S:s6? ')'
173 ;
174
175 AttlistDecl(System.Xml.XmlProcessor* processor)
176 ::= "<!ATTLIST" S:s1 Name:name AttDef(processor):attdef* S:s2? '>'
177 ;
178
179 AttDef(System.Xml.XmlProcessor* processor)
180 ::= S:s Name:name S:s2 AttType:attType S:s3 DefaultDecl(processor):defaultDecl
181 ;
182
183 AttType
184 ::= StringType:stringType | TokenizedType:tokenizedType | EnumeratedType:enumeratedType
185 ;
186
187 StringType
188 ::= "CDATA"
189 ;
190
191 TokenizedType
192 ::= "ID"
193 | "IDREF"
194 | "IDREFS"
195 | "ENTITY"
196 | "ENTITIES"
197 | "NMTOKEN"
198 | "NMTOKENS"
199 ;
200
201 EnumeratedType
202 ::= NotationType:notationType | Enumeration:enumeration
203 ;
204
205 NotationType
206 ::= "NOTATION" S:s1 '(' S:s2? Name:f (S:s3? '|' S:s4? Name:n)* S:s5? ')'
207 ;
208
209 Enumeration
210 ::= '(' S:s1? Nmtoken:nmtoken (S:s2? '|' S:s3? Nmtoken:nmtoken2)* S:s4? ')'
211 ;
212
213 DefaultDecl(System.Xml.XmlProcessor* processor)
214 ::= "#REQUIRED" | "#IMPLIED" | (("#FIXED" S:s)? AttValue(processor):attVAlue)
215 ;
216
217 EntityDecl(System.Xml.XmlProcessor* processor)
218 ::= GEDecl(processor):gedecl | PEDecl(processor):pedecl
219 ;
220
221 GEDecl(System.Xml.XmlProcessor* processor)
222 ::= "<!ENTITY" S:s0 Name:entityName S:s1 EntityDef(processor):entityValue S:s2? '>'
223 ;
224
225 PEDecl(System.Xml.XmlProcessor* processor)
226 ::= "<!ENTITY" S:s0 '%' S:s1 Name:peName S:s2 PEDef(processor):peValue S:s3? '>'
227 ;
228
229 EntityDef(System.Xml.XmlProcessor* processor)
230 ::= EntityValue(processor):entityValue | (ExternalID:extID NDataDecl:notation?)
231 ;
232
233 PEDef(System.Xml.XmlProcessor* processor)
234 ::= EntityValue(processor):entityValue | ExternalID:extID
235 ;
236
237 EntityValue(System.Xml.XmlProcessor* processor)
238 ::= '"'
239 ( "[^%&\"]"
240 | PEReference(processor):pr1
241 | Reference(processor):ref1
242 )* '"'
243 | '\''
244 ( "[^%&']"
245 | PEReference(processor):pr2
246 | Reference(processor):ref2
247 )* '\''
248 ;
249
250 NDataDecl
251 ::= S:s1 "NDATA" S:s2 Name:name
252 ;
253
254 PEReference(System.Xml.XmlProcessor* processor)
255 ::= '%' Name:name ';'
256 ;
257
258 NotationDecl(System.Xml.XmlProcessor* processor)
259 ::= "<!NOTATION" S:s Name:name S:s2 (ExternalID:extID | PublicID:pubID) S:s3? '>'
260 ;
261
262 PublicID
263 ::= "PUBLIC" S:s PubidLiteral:pl
264 ;
265
266 Element(System.Xml.XmlProcessor* processor, var ustring tagName)
267 ::= '<' Name:name{ tagName = name; processor->BeginStartTag(tagName); } (S:s Attribute(processor):attribute)* S:s?
268 ( "/>"{ processor->EndStartTag(span, lexer.FileName()); processor->EndTag(tagName, span, lexer.FileName()); }
269 | '>'{ processor->EndStartTag(span, lexer.FileName()); } Content(processor):content ETag(processor):etag
270 )
271 ;
272
273 ETag(System.Xml.XmlProcessor* processor)
274 ::= "</" Name:name{ processor->EndTag(name, span, lexer.FileName()); } S:s? '>'
275 ;
276
277 Content(System.Xml.XmlProcessor* processor)
278 ::= CharData(processor):cd1? ((Element(processor):element | Reference(processor):reference | CDSect(processor):cdsect | PI(processor):pi | Comment(processor):comment) CharData(processor):cd2?)*
279 ;
280
281 CharDataChar : uchar
282 ::= "[^<&]"{ Token token = lexer.GetToken(pos); return cast<uchar>(token.id); }
283 ;
284
285 CharData(System.Xml.XmlProcessor* processor, var ustring s)
286 ::= ((CharDataChar:chr{ s.Append(chr); })* - ("[^<&]"* "]]>" "[^<&]"*)){ processor->Text(s); }
287 ;
288
289 CDSect(System.Xml.XmlProcessor* processor, var ustring s)
290 ::= ("<![CDATA[" ((Char:chr - "]]>"){ s.Append(chr); })* "]]>")
291 {
292 processor->CDataSection(s);
293 }
294 ;
295
296 Attribute(System.Xml.XmlProcessor* processor)
297 ::= Name:attName Eq:eq AttValue(processor):attValue{ processor->AddAttribute(attName, attValue, span, lexer.FileName()); }
298 ;
299
300 AttValueDQ(System.Xml.XmlProcessor* processor) : ustring
301 ::= '"'{ processor->BeginAttributeValue(); }
302 ( "[^<&\"]"{ processor->AttValue().Append(lexer.GetMatch(span)); }
303 | Reference(processor):reference)* '"'
304 {
305 ustring value = processor->AttValue();
306 processor->EndAttributeValue();
307 return value;
308 }
309 ;
310
311 AttValueSQ(System.Xml.XmlProcessor* processor) : ustring
312 ::= '\''{ processor->BeginAttributeValue(); }
313 ( "[^<&\']"{ processor->AttValue().Append(lexer.GetMatch(span)); }
314 | Reference(processor):reference)* '\''
315 {
316 ustring value = processor->AttValue();
317 processor->EndAttributeValue();
318 return value;
319 }
320 ;
321
322 AttValue(System.Xml.XmlProcessor* processor) : ustring
323 ::= AttValueDQ(processor):attValueDQ{ return attValueDQ; }
324 | AttValueSQ(processor):attValueSQ{ return attValueSQ; }
325 ;
326
327 EntityRef(System.Xml.XmlProcessor* processor)
328 ::= ('&' Name:name ';'){ processor->EntityRef(name, span, lexer.FileName()); }
329 ;
330
331 DecCodePoint(var uint val) : uint
332 ::= (("[0-9]"{ Token token = lexer.GetToken(pos); val = 10u * val + cast<uint>(token.id) - cast<uint>('0'); })+)
333 {
334 return val;
335 }
336 ;
337
338 HexCodePoint(var uint val) : uint
339 ::=
340 ( ("[0-9a-fA-F]"
341 {
342 Token token = lexer.GetToken(pos);
343 if (token.id >= cast<int>('0') && token.id <= cast<int>('9')) val = 16u * val + cast<uint>(token.id) - cast<uint>('0');
344 else if (token.id >= cast<int>('a') && token.id <= cast<int>('f')) val = 16u * val + 10u + cast<uint>(token.id) - cast<uint>('a');
345 else if (token.id >= cast<int>('A') && token.id <= cast<int>('F')) val = 16u * val + 10u + cast<uint>(token.id) - cast<uint>('A');
346 }
347 )+
348 )
349 {
350 return val;
351 }
352 ;
353
354 CharRef(System.Xml.XmlProcessor* processor)
355 ::= ("&#" DecCodePoint:decCodePoint ';'){ processor->Text(ustring(cast<uchar>(decCodePoint))); }
356 | ("&#x" HexCodePoint:hexCodePoint ';'){ processor->Text(ustring(cast<uchar>(hexCodePoint))); }
357 ;
358
359 Reference(System.Xml.XmlProcessor* processor)
360 ::= EntityRef(processor):entityRef
361 | CharRef(processor):charRef
362 ;
363
364 Misc(System.Xml.XmlProcessor* processor)
365 ::= Comment(processor):comment | PI(processor):pi | S:s
366 ;
367
368 Comment(System.Xml.XmlProcessor* processor, var ustring s)
369 ::=
370 ( "<!--" ((Char:chr - '-'){ s.Append(chr); } | '-' (Char:chr - '-'){ s.Append('-').Append(chr); })* "-->"
371 )
372 {
373 processor->Comment(s);
374 }
375 ;
376
377 PI(System.Xml.XmlProcessor* processor, var ustring data)
378 ::= ("<?" PITarget:target S:s ((Char:chr - "?>"){ data.Append(chr); })* "?>"){ processor->PI(target, data); }
379 ;
380
381 PITarget : ustring
382 ::= (Name:name - Xml:xml){ return name; }
383 ;
384
385 Xml
386 ::= "[xX]" "[mM]" "[lL]"
387 ;
388
389 Eq
390 ::= S:s? '=' S:s?
391 ;
392
393 YesNo : bool
394 ::= "\"yes\""{ return true; }
395 | "\"no\""{ return false; }
396 ;
397 }