1
2
3
4
5
6 [hpp]#include <sngxml/xml/XmlProcessor.hpp>
7 [cpp]#include <soulng/lexer/TrivialLexer.hpp>
8 [cpp]#include <soulng/parser/Range.hpp>
9
10 using namespace soulng::lexer;
11
12 parser api(SNGXML_XML_API) XmlParser
13 {
14 uselexer TrivialLexer;
15 main;
16
17 Document(sngxml::xml::XmlProcessor* processor)
18 ::= empty{ processor->StartDocument(); }
19 (Prolog(processor):prolog Element(processor):element (Misc(processor):misc)*){ processor->EndDocument(); }
20 ;
21
22 Char : char32_t
23 ::= ("[\x9\xA\xD\x20-\xD7FF\xE000-\xFFFD\x10000-\x10FFFF]"){ Token token = lexer.GetToken(pos); return static_cast(token.id); }
24 ;
25
26 S
27 ::= "[\x20\x9\xD\xA]"+
28 ;
29
30 NameStartChar
31 ::= "[:A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\x2FF\x370-\x37D\x37F-\x1FFF\x200C-\x200D\x2070-\x218F\x2C00-\x2FEF\x3001-\xD7FF\xF900-\xFDCF\xFDF0-\xFFFD\x10000-\xEFFFF]"
32 ;
33
34 NameChar
35 ::= NameStartChar:nameStartChar | "[-.0-9\xB7\x300-\x36F\x203F-\x2040]"
36 ;
37
38 Name(var Span s) : std::u32string
39 ::= (NameStartChar:nameStartChar{ s = span; } (NameChar:nameChar{ s.end = span.end; })*)
40 {
41 return lexer.GetMatch(s);
42 }
43 ;
44
45 Names
46 ::= Name:name1 ('\x20' Name:name2)*
47 ;
48
49 Nmtoken
50 ::= NameChar:nameChar+
51 ;
52
53 Nmtokens
54 ::= Nmtoken:nmToken1 ('\x20' Nmtoken:nmToken2)*
55 ;
56
57 Prolog(sngxml::xml::XmlProcessor* processor)
58 ::= XMLDecl(processor):xmlDecl? Misc(processor):m1* (DocTypeDecl(processor):docTypeDecl Misc(processor):m2*)?
59 ;
60
61 XMLDecl(sngxml::xml::XmlProcessor* processor)
62 ::= "<?xml" VersionInfo(processor):versionInfo EncodingDecl(processor):encodingDecl? SDDecl(processor):sdDecl? S:s? "?>"
63 ;
64
65 VersionInfo(sngxml::xml::XmlProcessor* processor)
66 ::= S:s "version" Eq:eq VersionNumber(processor):versionNumber
67 ;
68
69 VersionNum(sngxml::xml::XmlProcessor* processor, var Span s)
70 ::= ('1'{ s = span; } '.' ("[0-9]"{ s.end = span.end; })+)
71 {
72 processor->Version(lexer.GetMatch(s));
73 }
74 ;
75
76 VersionNumDQ(sngxml::xml::XmlProcessor* processor)
77 ::= '"' VersionNum(processor):versionNum '"'
78 ;
79
80 VersionNumSQ(sngxml::xml::XmlProcessor* processor)
81 ::= '\'' VersionNum(processor):versionNum '\''
82 ;
83
84 VersionNumber(sngxml::xml::XmlProcessor* processor)
85 ::= VersionNumDQ(processor):versionNumDQ
86 | VersionNumSQ(processor):versionNumSQ
87 ;
88
89 EncodingDecl(sngxml::xml::XmlProcessor* processor)
90 ::= S:s "encoding" Eq:eq EncName(processor):encName
91 ;
92
93 EncodingName(sngxml::xml::XmlProcessor* processor, var Span s)
94 ::= ("[A-Za-z]"{ s = span; } ("[A-Za-z0-9._-]"{ s.end = span.end; })*)
95 {
96 processor->Encoding(lexer.GetMatch(s));
97 }
98 ;
99
100 EncNameDQ(sngxml::xml::XmlProcessor* processor)
101 ::= '"' EncodingName(processor):encodingName '"'
102 ;
103
104 EncNameSQ(sngxml::xml::XmlProcessor* processor)
105 ::= '\'' EncodingName(processor):encodingName '\''
106 ;
107
108 EncName(sngxml::xml::XmlProcessor* processor)
109 ::= EncNameDQ(processor):encNameDQ
110 | EncNameSQ(processor):encNameSQ
111 ;
112
113 SDDecl(sngxml::xml::XmlProcessor* processor)
114 ::= S:s "standalone" Eq:eq YesNo:yn{ processor->Standalone(yn); }
115 ;
116
117 DocTypeDecl(sngxml::xml::XmlProcessor* processor)
118 ::= "<!DOCTYPE" S:s1 Name:rootElementName (S:s2 ExternalID:extID)? S:s3? ('[' IntSubset(processor):intSubset ']' S:s4?)? '>'
119 ;
120
121 ExternalID
122 ::= ("SYSTEM" S:s1 SystemLiteral:s2)
123 | ("PUBLIC" S:s3 PubidLiteral:p2 S:s4 SystemLiteral:s5)
124 ;
125
126 SystemLiteral
127 ::= ('"' ("[^\"]"*) '"') | ('\'' ("[^']"*) '\'')
128 ;
129
130 PubidLiteral
131 ::= '"' (PubidChar:p1*) '"' | '\'' ((PubidChar:p2 - '\'')*) '\''
132 ;
133
134 PubidChar
135 ::= "[\x20\xD\xA]" | "[a-zA-Z0-9]" | "[-'()+,./:=?;!*#@$_%]"
136 ;
137
138 IntSubset(sngxml::xml::XmlProcessor* processor)
139 ::= (MarkupDecl(processor):mdecl | DeclSep(processor):declsep)*
140 ;
141
142 MarkupDecl(sngxml::xml::XmlProcessor* processor)
143 ::= ElementDecl(processor):elementDecl | AttlistDecl(processor):attlistDecl | EntityDecl(processor):entityDecl | NotationDecl(processor):notationDecl | PI(processor):pi | Comment(processor):comment
144 ;
145
146 DeclSep(sngxml::xml::XmlProcessor* processor)
147 ::= PEReference(processor):peref | S:s
148 ;
149
150 ElementDecl(sngxml::xml::XmlProcessor* processor)
151 ::= "<!ELEMENT" S:s1 Name:elementName S:s2 ContentSpec:contentSpec S:s3? '>'
152 ;
153
154 ContentSpec
155 ::= "EMPTY" | "ANY" | Mixed:mixed | Children:children
156 ;
157
158 Children
159 ::= (Choice:choice | Seq:seq) ('?' | '*' | '+')?
160 ;
161
162 CP
163 ::= (Name:name | Choice:choice | Seq:seq) ('?' | '*' | '+')?
164 ;
165
166 Choice
167 ::= '(' S:s1? CP:cp1 (S:s2? '|' S:s3? CP:cp2)+ S:s4? ')'
168 ;
169
170 Seq
171 ::= '(' S:s1? CP:cp1 (S:s2? ',' S:s3? CP:cp2)* S:s4? ')'
172 ;
173
174 Mixed
175 ::= '(' S:s1? "#PCDATA" (S:s2? '|' S:s3? Name:name)* S:s4? ")*"
176 | '(' S:s5? "#PCDATA" S:s6? ')'
177 ;
178
179 AttlistDecl(sngxml::xml::XmlProcessor* processor)
180 ::= "<!ATTLIST" S:s1 Name:name AttDef(processor):attdef* S:s2? '>'
181 ;
182
183 AttDef(sngxml::xml::XmlProcessor* processor)
184 ::= S:s Name:name S:s2 AttType:attType S:s3 DefaultDecl(processor):defaultDecl
185 ;
186
187 AttType
188 ::= StringType:stringType | TokenizedType:tokenizedType | EnumeratedType:enumeratedType
189 ;
190
191 StringType
192 ::= "CDATA"
193 ;
194
195 TokenizedType
196 ::= "ID"
197 | "IDREF"
198 | "IDREFS"
199 | "ENTITY"
200 | "ENTITIES"
201 | "NMTOKEN"
202 | "NMTOKENS"
203 ;
204
205 EnumeratedType
206 ::= NotationType:notationType | Enumeration:enumeration
207 ;
208
209 NotationType
210 ::= "NOTATION" S:s1 '(' S:s2? Name:f (S:s3? '|' S:s4? Name:n)* S:s5? ')'
211 ;
212
213 Enumeration
214 ::= '(' S:s1? Nmtoken:nmtoken (S:s2? '|' S:s3? Nmtoken:nmtoken2)* S:s4? ')'
215 ;
216
217 DefaultDecl(sngxml::xml::XmlProcessor* processor)
218 ::= "#REQUIRED" | "#IMPLIED" | (("#FIXED" S:s)? AttValue(processor):attVAlue)
219 ;
220
221 EntityDecl(sngxml::xml::XmlProcessor* processor)
222 ::= GEDecl(processor):gedecl | PEDecl(processor):pedecl
223 ;
224
225 GEDecl(sngxml::xml::XmlProcessor* processor)
226 ::= "<!ENTITY" S:s0 Name:entityName S:s1 EntityDef(processor):entityValue S:s2? '>'
227 ;
228
229 PEDecl(sngxml::xml::XmlProcessor* processor)
230 ::= "<!ENTITY" S:s0 '%' S:s1 Name:peName S:s2 PEDef(processor):peValue S:s3? '>'
231 ;
232
233 EntityDef(sngxml::xml::XmlProcessor* processor)
234 ::= EntityValue(processor):entityValue | (ExternalID:extID NDataDecl:notation?)
235 ;
236
237 PEDef(sngxml::xml::XmlProcessor* processor)
238 ::= EntityValue(processor):entityValue | ExternalID:extID
239 ;
240
241 EntityValue(sngxml::xml::XmlProcessor* processor)
242 ::= '"'
243 ( "[^%&\"]"
244 | PEReference(processor):pr1
245 | Reference(processor):ref1
246 )* '"'
247 | '\''
248 ( "[^%&']"
249 | PEReference(processor):pr2
250 | Reference(processor):ref2
251 )* '\''
252 ;
253
254 NDataDecl
255 ::= S:s1 "NDATA" S:s2 Name:name
256 ;
257
258 PEReference(sngxml::xml::XmlProcessor* processor)
259 ::= '%' Name:name ';'
260 ;
261
262 NotationDecl(sngxml::xml::XmlProcessor* processor)
263 ::= "<!NOTATION" S:s Name:name S:s2 (ExternalID:extID | PublicID:pubID) S:s3? '>'
264 ;
265
266 PublicID
267 ::= "PUBLIC" S:s PubidLiteral:pl
268 ;
269
270 Element(sngxml::xml::XmlProcessor* processor, var std::u32string tagName)
271 ::= '<' Name:name{ tagName = name; processor->BeginStartTag(tagName); } (S:s Attribute(processor):attribute)* S:s?
272 ( "/>"{ processor->EndStartTag(span, lexer.FileName()); processor->EndTag(tagName, span, lexer.FileName()); }
273 | '>'{ processor->EndStartTag(span, lexer.FileName()); } Content(processor):content ETag(processor):etag
274 )
275 ;
276
277 ETag(sngxml::xml::XmlProcessor* processor)
278 ::= "</" Name:name{ processor->EndTag(name, span, lexer.FileName()); } S:s? '>'
279 ;
280
281 Content(sngxml::xml::XmlProcessor* processor)
282 ::= CharData(processor):cd1? ((Element(processor):element | Reference(processor):reference | CDSect(processor):cdsect | PI(processor):pi | Comment(processor):comment) CharData(processor):cd2?)*
283 ;
284
285 CharDataChar : char32_t
286 ::= "[^<&]"{ Token token = lexer.GetToken(pos); return static_cast(token.id); }
287 ;
288
289 CharData(sngxml::xml::XmlProcessor* processor, var std::u32string s)
290 ::= ((CharDataChar:chr{ s.append(1, chr); })* - ("[^<&]"* "]]>" "[^<&]"*)){ processor->Text(s); }
291 ;
292
293 CDSect(sngxml::xml::XmlProcessor* processor, var std::u32string s)
294 ::= ("<![CDATA[" ((Char:chr - "]]>"){ s.append(1, chr); })* "]]>")
295 {
296 processor->CDataSection(s);
297 }
298 ;
299
300 Attribute(sngxml::xml::XmlProcessor* processor)
301 ::= Name:attName Eq:eq AttValue(processor):attValue{ processor->AddAttribute(attName, attValue, span, lexer.FileName()); }
302 ;
303
304 AttValueDQ(sngxml::xml::XmlProcessor* processor) : std::u32string
305 ::= '"'{ processor->BeginAttributeValue(); }
306 ( "[^<&\"]"{ processor->AttValue().append(lexer.GetMatch(span)); }
307 | Reference(processor):reference)* '"'
308 {
309 std::u32string value = processor->AttValue();
310 processor->EndAttributeValue();
311 return value;
312 }
313 ;
314
315 AttValueSQ(sngxml::xml::XmlProcessor* processor) : std::u32string
316 ::= '\''{ processor->BeginAttributeValue(); }
317 ( "[^<&\']"{ processor->AttValue().append(lexer.GetMatch(span)); }
318 | Reference(processor):reference)* '\''
319 {
320 std::u32string value = processor->AttValue();
321 processor->EndAttributeValue();
322 return value;
323 }
324 ;
325
326 AttValue(sngxml::xml::XmlProcessor* processor) : std::u32string
327 ::= AttValueDQ(processor):attValueDQ{ return attValueDQ; }
328 | AttValueSQ(processor):attValueSQ{ return attValueSQ; }
329 ;
330
331 EntityRef(sngxml::xml::XmlProcessor* processor)
332 ::= ('&' Name:name ';'){ processor->EntityRef(name, span, lexer.FileName()); }
333 ;
334
335 DecCodePoint(var uint32_t val) : uint32_t
336 ::= (("[0-9]"{ Token token = lexer.GetToken(pos); val = 10 * val + token.id - '0'; })+)
337 {
338 return val;
339 }
340 ;
341
342 HexCodePoint(var uint32_t val) : uint32_t
343 ::=
344 ( ("[0-9a-fA-F]"
345 {
346 Token token = lexer.GetToken(pos);
347 if (token.id >= '0' && token.id <= '9') val = 16 * val + token.id - '0';
348 else if (token.id >= 'a' && token.id <= 'f') val = 16 * val + 10 + token.id - 'a';
349 else if (token.id >= 'A' && token.id <= 'F') val = 16 * val + 10 + token.id - 'A';
350 }
351 )+
352 )
353 {
354 return val;
355 }
356 ;
357
358 CharRef(sngxml::xml::XmlProcessor* processor)
359 ::= ("&#" DecCodePoint:decCodePoint ';'){ processor->Text(std::u32string(1, static_cast(decCodePoint))); }
360 | ("&#x" HexCodePoint:hexCodePoint ';'){ processor->Text(std::u32string(1, static_cast(hexCodePoint))); }
361 ;
362
363 Reference(sngxml::xml::XmlProcessor* processor)
364 ::= EntityRef(processor):entityRef
365 | CharRef(processor):charRef
366 ;
367
368 Misc(sngxml::xml::XmlProcessor* processor)
369 ::= Comment(processor):comment | PI(processor):pi | S:s
370 ;
371
372 Comment(sngxml::xml::XmlProcessor* processor, var std::u32string s)
373 ::=
374 ( "<!--" ((Char:chr - '-'){ s.append(1, chr); } | '-' (Char:chr - '-'){ s.append(1, '-').append(1, chr); })* "-->"
375 )
376 {
377 processor->Comment(s);
378 }
379 ;
380
381 PI(sngxml::xml::XmlProcessor* processor, var std::u32string data)
382 ::= ("<?" PITarget:target S:s ((Char:chr - "?>"){ data.append(1, chr); })* "?>"){ processor->PI(target, data); }
383 ;
384
385 PITarget : std::u32string
386 ::= (Name:name - Xml:xml){ return name; }
387 ;
388
389 Xml
390 ::= "[xX]" "[mM]" "[lL]"
391 ;
392
393 Eq
394 ::= S:s? '=' S:s?
395 ;
396
397 YesNo : bool
398 ::= "\"yes\""{ return true; }
399 | "\"no\""{ return false; }
400 ;
401 }