1#![cfg(any(feature = "xml", feature = "xml2json", feature = "xq"))]
2use quick_xml::events::Event;
3use quick_xml::Reader;
4use serde_json::{to_value, Map, Value};
5use std::io::BufRead;
6use std::mem::take;
7
8pub fn wrap_xml_reader<R: BufRead>(reader: R) -> Value {
9 let mut xml_reader = Reader::from_reader(reader);
10 let config = xml_reader.config_mut();
11 config.expand_empty_elements = true;
12 config.trim_text(true);
13 read(&mut xml_reader)
14}
15
16trait AttrMap {
17 fn insert_text(&mut self, value: &Value) -> Option<Value>;
18 fn insert_text_node(&mut self, value: Value);
19}
20
21impl AttrMap for Map<String, Value> {
22 fn insert_text(&mut self, value: &Value) -> Option<Value> {
23 if !self.is_empty() {
24 if value.is_string() {
25 self.insert_text_node(value.clone());
26 }
27 if let Ok(attrs) = to_value(take(self)) {
28 return Some(attrs);
29 }
30 }
31 None
32 }
33
34 fn insert_text_node(&mut self, value: Value) {
35 self.insert("$text".to_string(), value);
36 }
37}
38
39struct NodeValues {
40 node: Map<String, Value>,
41 nodes: Vec<Map<String, Value>>,
42 nodes_are_map: Vec<bool>,
43 values: Vec<Value>,
44}
45
46impl NodeValues {
47 fn new() -> Self {
48 Self {
49 values: Vec::new(),
50 node: Map::new(),
51 nodes: Vec::new(),
52 nodes_are_map: Vec::new(),
53 }
54 }
55
56 fn insert(&mut self, key: String, value: Value) {
57 self.node.insert(key, value);
58 }
59
60 fn insert_text(&mut self, text: &str) {
61 if !self.node.is_empty() {
62 self.nodes.push(take(&mut self.node));
63 self.nodes_are_map.push(true);
64 }
65
66 self.values.push(Value::String(text.to_string()));
67 self.nodes_are_map.push(false);
68 }
69
70 fn remove_entry(&mut self, key: &String) -> Option<Value> {
71 if self.node.contains_key(key) {
72 if let Some((_, existing)) = self.node.remove_entry(key) {
73 return Some(existing);
74 }
75 }
76 None
77 }
78
79 fn get_value(&mut self) -> Value {
80 if !self.node.is_empty() {
81 self.nodes.push(take(&mut self.node));
82 self.nodes_are_map.push(true);
83 }
84
85 if !self.nodes.is_empty() {
86 if self.nodes.len() == 1 && self.values.len() <= 1 {
90 if self.values.len() == 1 {
91 self.nodes[0].insert_text_node(self.values.remove(0));
92 }
93 return to_value(&self.nodes[0]).expect("Failed to #to_value() a node!");
94 }
95 for (index, node_is_map) in self.nodes_are_map.iter().enumerate() {
96 if *node_is_map {
97 self.values
98 .insert(index, Value::Object(self.nodes.remove(0)));
99 }
100 }
101 }
102
103 match self.values.len() {
104 0 => Value::Null,
105 1 => self.values.pop().unwrap(),
106 _ => Value::Array(take(&mut self.values)),
107 }
108 }
109}
110
111fn read<R: BufRead>(reader: &mut Reader<R>) -> Value {
134 let mut buf = Vec::new();
135 let mut nodes = NodeValues::new();
136
137 loop {
138 match reader.read_event_into(&mut buf) {
139 Ok(Event::Start(ref e)) => {
140 if let Ok(name) = String::from_utf8(e.name().into_inner().to_vec()) {
141 let mut child = read(reader);
142 let mut attrs = Map::new();
143
144 let _ = e
145 .attributes()
146 .map(|a| {
147 if let Ok(attr) = a {
148 let key = String::from_utf8(attr.key.into_inner().to_vec());
149 let value = String::from_utf8(attr.value.to_vec());
150
151 if let (Ok(key), Ok(value)) = (key, value) {
153 let key = format!("@{key}");
154 let value = Value::String(value);
155
156 if child.is_object() {
158 child.as_object_mut().unwrap().insert(key, value);
159 } else {
160 attrs.insert(key, value);
161 }
162 }
163 }
164 })
165 .collect::<Vec<_>>();
166
167 if let Some(mut existing) = nodes.remove_entry(&name) {
168 let mut entries: Vec<Value> = vec![];
169
170 if existing.is_array() {
171 let existing = existing.as_array_mut().unwrap();
172 while !existing.is_empty() {
173 entries.push(existing.remove(0));
174 }
175 } else {
176 entries.push(existing);
177 }
178
179 if let Some(attrs) = attrs.insert_text(&child) {
183 entries.push(attrs);
184 } else {
185 entries.push(child);
186 }
187
188 nodes.insert(name, Value::Array(entries));
189 } else if let Some(attrs) = attrs.insert_text(&child) {
193 nodes.insert(name, attrs);
194 } else {
195 nodes.insert(name, child);
196 }
197 }
198 }
199 Ok(Event::Text(ref e)) => {
200 if let Ok(decoded) = e.unescape() {
201 nodes.insert_text(&decoded);
202 }
203 }
204 Ok(Event::CData(ref e)) => {
205 if let Ok(decoded) = e.clone().escape() {
206 if let Ok(decoded_bt) = decoded.unescape() {
207 nodes.insert_text(&decoded_bt);
208 }
209 }
210 }
211 Ok(Event::End(ref _e)) => break,
212 Ok(Event::Eof) => break,
213 _ => (),
214 }
215 }
216 nodes.get_value()
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222 use serde_json::json;
223
224 #[test]
225 fn test_read() {
226 let input = r"";
227 let result = read(&mut Reader::from_str(input));
228 assert_eq!(result, Value::Null);
229
230 let input = r"<root/>";
232 let result = read(&mut Reader::from_str(input));
233 assert_eq!(result, Value::Null);
234
235 let mut reader = Reader::from_str(input);
236 let config = reader.config_mut();
237 config.expand_empty_elements = true;
238 let result = read(&mut reader);
239 assert_eq!(result, json!({"root": null}));
240
241 let input = r"<key>value</key>";
242 let result = read(&mut Reader::from_str(input));
243 assert_eq!(result, json!({"key": "value"}));
244
245 let input = r#"<key attr="A">B</key><out>C<in/></out>"#;
247 let result = read(&mut Reader::from_str(input));
248 assert_eq!(
249 result,
250 json!({"key": {"$text": "B", "@attr": "A"}, "out": "C"})
251 );
252
253 let mut reader = Reader::from_str(input);
254 let config = reader.config_mut();
255 config.expand_empty_elements = true;
256 let result = read(&mut reader);
257 assert_eq!(
258 result,
259 json!({"key": {"$text": "B", "@attr": "A"}, "out": {"$text": "C", "in": null}})
260 );
261
262 let input = r"<tag><inner>A</inner><inner>B</inner></tag>";
263 let result = read(&mut Reader::from_str(input));
264 assert_eq!(result, json!({"tag": {"inner": ["A", "B"]}}));
265
266 let input = r#"<tag><inner attr="A">A</inner><inner attr="B">B</inner></tag>"#;
267 let result = read(&mut Reader::from_str(input));
268 assert_eq!(
269 result,
270 json!({"tag": {"inner": [{"$text": "A", "@attr": "A"}, {"$text": "B", "@attr": "B"}]}})
271 );
272
273 let input = r#"<tag>A <some attr="B"/> C</tag>"#;
275 let result = read(&mut Reader::from_str(input));
276 assert_eq!(result, json!({"tag": ["A ", " C"]}));
277
278 let mut reader = Reader::from_str(input);
279 let config = reader.config_mut();
280 config.expand_empty_elements = true;
281 let result = read(&mut reader);
282 assert_eq!(
283 result,
284 json!({"tag": ["A ", {"some": {"@attr": "B"}}, " C"]})
285 );
286
287 let input = r"<tag>A <some>B</some> C <some>D</some></tag>";
288 let result = read(&mut Reader::from_str(input));
289 assert_eq!(
290 result,
291 json!({"tag": ["A ", {"some": "B"}, " C ", {"some": "D"}]})
292 );
293
294 let input = r"<![CDATA[sample]]>";
295 let result = read(&mut Reader::from_str(input));
296 assert_eq!(result, json!("sample"));
297
298 let input = r"<tag><![CDATA[sample]]></tag>";
299 let result = read(&mut Reader::from_str(input));
300 assert_eq!(result, json!({"tag": "sample"}));
301
302 let input = r#"<tag attr="B"><![CDATA[A]]></tag>"#;
303 let result = read(&mut Reader::from_str(input));
304 assert_eq!(result, json!({"tag": {"$text": "A", "@attr": "B"}}));
305
306 let input = r#"<tag attr="C">A <some><![CDATA[B]]></some></tag>"#;
307 let result = read(&mut Reader::from_str(input));
308 assert_eq!(
309 result,
310 json!({"tag": {"$text": "A ", "@attr": "C", "some": "B"}})
311 );
312 }
313}