1
2
3
4
5
6 #include <sngxml/xml/XmlProcessor.hpp>
7 #include <soulng/util/Util.hpp>
8 #include <soulng/util/Unicode.hpp>
9
10 namespace sngxml { namespace xml {
11
12 using namespace soulng::util;
13 using namespace soulng::unicode;
14
15 XmlProcessingException::XmlProcessingException(const std::string& message_) : std::runtime_error(message_)
16 {
17 }
18
19 Entity::~Entity()
20 {
21 }
22
23 InternalEntity::InternalEntity(const std::u32string& value_) : value(value_)
24 {
25 }
26
27 XmlProcessor::XmlProcessor(TrivialLexer& lexer_, XmlContentHandler* contentHandler_) : lexer(lexer_), contentHandler(contentHandler_), attValue()
28 {
29 entityMap[U"quot"] = std::unique_ptr<Entity>(new InternalEntity(U"\""));
30 entityMap[U"amp"] = std::unique_ptr<Entity>(new InternalEntity(U"&"));
31 entityMap[U"apos"] = std::unique_ptr<Entity>(new InternalEntity(U"'"));
32 entityMap[U"lt"] = std::unique_ptr<Entity>(new InternalEntity(U"<"));
33 entityMap[U"gt"] = std::unique_ptr<Entity>(new InternalEntity(U">"));
34 }
35
36 void XmlProcessor::StartDocument()
37 {
38 contentHandler->StartDocument();
39 }
40
41 void XmlProcessor::EndDocument()
42 {
43 contentHandler->EndDocument();
44 }
45
46 void XmlProcessor::Text(const std::u32string& text)
47 {
48 contentHandler->Text(text);
49 }
50
51 void XmlProcessor::Comment(const std::u32string& text)
52 {
53 contentHandler->Comment(text);
54 }
55
56 void XmlProcessor::PI(const std::u32string& target, const std::u32string& data)
57 {
58 contentHandler->PI(target, data);
59 }
60
61 void XmlProcessor::CDataSection(const std::u32string& cdata)
62 {
63 contentHandler->CDataSection(cdata);
64 }
65
66 void XmlProcessor::Version(const std::u32string& version)
67 {
68 contentHandler->Version(version);
69 }
70
71 void XmlProcessor::Standalone(bool standalone)
72 {
73 contentHandler->Standalone(standalone);
74 }
75
76 void XmlProcessor::Encoding(const std::u32string& encoding)
77 {
78 contentHandler->Encoding(encoding);
79 }
80
81 void XmlProcessor::BeginStartTag(const std::u32string& tagName)
82 {
83 tagStack.push(currentTagName);
84 currentTagName = tagName;
85 namespaceUriStack.push(currentNamespaceUri);
86 namespacePrefixStack.push(currentNamespacePrefix);
87 attributes.Clear();
88 }
89
90 void XmlProcessor::EndStartTag(const soulng::lexer::Span& span, const std::string& systemId)
91 {
92 std::u32string localName;
93 std::u32string prefix;
94 ParseQualifiedName(currentTagName, localName, prefix, span, systemId);
95 if (prefix == U"xmlns")
96 {
97 throw XmlProcessingException(GetErrorLocationStr(systemId, span) + ": 'xmlns' prefix cannot be declared for an element");
98 }
99 contentHandler->StartElement(GetNamespaceUri(prefix, span, systemId), localName, currentTagName, attributes);
100 }
101
102 void XmlProcessor::EndTag(const std::u32string& tagName, const soulng::lexer::Span& span, const std::string& systemId)
103 {
104 if (tagStack.empty())
105 {
106 throw XmlProcessingException(GetErrorLocationStr(systemId, span) + ": end tag '" + ToUtf8(tagName) + "' has no corresponding start tag");
107 }
108 if (tagName != currentTagName)
109 {
110 throw XmlProcessingException(GetErrorLocationStr(systemId, span) + ": end tag '" + ToUtf8(tagName) + "' does not match start tag '" + ToUtf8(currentTagName) + "'");
111 }
112 std::u32string localName;
113 std::u32string prefix;
114 ParseQualifiedName(currentTagName, localName, prefix, span, systemId);
115 if (prefix == U"xmlns")
116 {
117 throw XmlProcessingException(GetErrorLocationStr(systemId, span) + ": 'xmlns' prefix cannot be declared for an element");
118 }
119 contentHandler->EndElement(GetNamespaceUri(prefix, span, systemId), localName, currentTagName);
120 if (namespaceUriStack.empty())
121 {
122 throw std::runtime_error("namespace URI stack is empty");
123 }
124 currentNamespaceUri = namespaceUriStack.top();
125 namespaceUriStack.pop();
126 namespacePrefixMap.erase(currentNamespacePrefix);
127 if (namespacePrefixStack.empty())
128 {
129 throw std::runtime_error("namespace prefix stack is empty");
130 }
131 currentNamespacePrefix = namespacePrefixStack.top();
132 namespacePrefixStack.pop();
133 namespacePrefixMap[currentNamespacePrefix] = currentNamespaceUri;
134 currentTagName = tagStack.top();
135 tagStack.pop();
136 }
137
138 void XmlProcessor::AddAttribute(const std::u32string& attName, const std::u32string& attValue, const soulng::lexer::Span& span, const std::string& systemId)
139 {
140 std::u32string localName;
141 std::u32string prefix;
142 ParseQualifiedName(attName, localName, prefix, span, systemId);
143 if (prefix == U"xmlns")
144 {
145 currentNamespacePrefix = localName;
146 currentNamespaceUri = attValue;
147 namespacePrefixMap[currentNamespacePrefix] = currentNamespaceUri;
148 }
149 else if (localName == U"xmlns")
150 {
151 currentNamespacePrefix.clear();
152 currentNamespaceUri = attValue;
153 }
154 else
155 {
156 attributes.Add(Attribute(GetNamespaceUri(prefix, span, systemId), localName, attName, attValue));
157 }
158 }
159
160 void XmlProcessor::EntityRef(const std::u32string& entityName, const soulng::lexer::Span& span, const std::string& systemId)
161 {
162 std::unordered_map<std::u32string, std::std::unique_ptr<Entity>>::const_iteratorit=entityMap.find(entityName);
163 if (it != entityMap.cend())
164 {
165 const std::std::unique_ptr<Entity>&entity=it->second;
166 if (entity->IsInternalEntity())
167 {
168 InternalEntity* internalEntity = static_cast<InternalEntity*>(entity.get());
169 const std::u32string& entityValue = internalEntity->Value();
170 if (!attValueStack.empty())
171 {
172 attValue.append(entityValue);
173 }
174 else
175 {
176 Text(entityValue);
177 }
178 }
179 else
180 {
181 contentHandler->SkippedEntity(entityName);
182 }
183 }
184 else
185 {
186 contentHandler->SkippedEntity(entityName);
187 }
188 }
189
190 void XmlProcessor::BeginAttributeValue()
191 {
192 attValueStack.push(attValue);
193 attValue.clear();
194 }
195
196 void XmlProcessor::EndAttributeValue()
197 {
198 if (attValueStack.empty())
199 {
200 throw std::runtime_error("attribute value stack is empty");
201 }
202 attValue = attValueStack.top();
203 attValueStack.pop();
204 }
205
206 std::u32string XmlProcessor::GetNamespaceUri(const std::u32string& namespacePrefix, const soulng::lexer::Span& span, const std::string& systemId)
207 {
208 if (namespacePrefix.empty())
209 {
210 return currentNamespaceUri;
211 }
212 std::unordered_map<std::u32string, std::u32string>::const_iterator it = namespacePrefixMap.find(namespacePrefix);
213 if (it != namespacePrefixMap.cend())
214 {
215 return it->second;
216 }
217 else
218 {
219 throw XmlProcessingException(GetErrorLocationStr(systemId, span) + ": namespace prefix '" + ToUtf8(namespacePrefix) + "' not bound to any namespace URI");
220 }
221 }
222
223 void XmlProcessor::ParseQualifiedName(const std::u32string& qualifiedName, std::u32string& localName, std::u32string& prefix, const soulng::lexer::Span& span, const std::string& systemId)
224 {
225 std::vector<std::u32string> parts = Split(qualifiedName, ':');
226 if (parts.size() > 2)
227 {
228 throw XmlProcessingException(GetErrorLocationStr(systemId, span) + ": qualified name '" + ToUtf8(qualifiedName) + "' has more than one ':' character");
229 }
230 else if (parts.size() == 2)
231 {
232 prefix = parts[0];
233 localName = parts[1];
234 }
235 else
236 {
237 prefix.clear();
238 localName = qualifiedName;
239 }
240 }
241
242 int XmlProcessor::GetErrorColumn(int index) const
243 {
244 int errorColumn = 0;
245 while (index > 0 && lexer.Start()[index] != '\n' && lexer.Start()[index] != '\r')
246 {
247 ++errorColumn;
248 --index;
249 }
250 if (errorColumn == 0)
251 {
252 errorColumn = 1;
253 }
254 return errorColumn;
255 }
256
257 std::string XmlProcessor::GetErrorLocationStr(const std::string& systemId, const soulng::lexer::Span& span) const
258 {
259 soulng::lexer::Token token = lexer.GetToken(span.start);
260 int errorColumn = GetErrorColumn(token.match.begin - lexer.Start());
261 return "error in '" + systemId + "' at line " + std::to_string(span.line) + " column " + std::to_string(errorColumn);
262 }
263
264 } }