convert2json/
xml.rs

1#![cfg(any(feature = "xml", feature = "xml2json", feature = "xq"))]
2use quick_xml::events::Event;
3use quick_xml::Reader;
4use serde_json::{to_value, Map, Value};
5use std::io::BufRead;
6use std::mem::take;
7
8pub fn wrap_xml_reader<R: BufRead>(reader: R) -> Value {
9    let mut xml_reader = Reader::from_reader(reader);
10    let config = xml_reader.config_mut();
11    config.expand_empty_elements = true;
12    config.trim_text(true);
13    read(&mut xml_reader)
14}
15
16trait AttrMap {
17    fn insert_text(&mut self, value: &Value) -> Option<Value>;
18    fn insert_text_node(&mut self, value: Value);
19}
20
21impl AttrMap for Map<String, Value> {
22    fn insert_text(&mut self, value: &Value) -> Option<Value> {
23        if !self.is_empty() {
24            if value.is_string() {
25                self.insert_text_node(value.clone());
26            }
27            if let Ok(attrs) = to_value(take(self)) {
28                return Some(attrs);
29            }
30        }
31        None
32    }
33
34    fn insert_text_node(&mut self, value: Value) {
35        self.insert("$text".to_string(), value);
36    }
37}
38
39struct NodeValues {
40    node: Map<String, Value>,
41    nodes: Vec<Map<String, Value>>,
42    nodes_are_map: Vec<bool>,
43    values: Vec<Value>,
44}
45
46impl NodeValues {
47    fn new() -> Self {
48        Self {
49            values: Vec::new(),
50            node: Map::new(),
51            nodes: Vec::new(),
52            nodes_are_map: Vec::new(),
53        }
54    }
55
56    fn insert(&mut self, key: String, value: Value) {
57        self.node.insert(key, value);
58    }
59
60    fn insert_text(&mut self, text: &str) {
61        if !self.node.is_empty() {
62            self.nodes.push(take(&mut self.node));
63            self.nodes_are_map.push(true);
64        }
65
66        self.values.push(Value::String(text.to_string()));
67        self.nodes_are_map.push(false);
68    }
69
70    fn remove_entry(&mut self, key: &String) -> Option<Value> {
71        if self.node.contains_key(key) {
72            if let Some((_, existing)) = self.node.remove_entry(key) {
73                return Some(existing);
74            }
75        }
76        None
77    }
78
79    fn get_value(&mut self) -> Value {
80        if !self.node.is_empty() {
81            self.nodes.push(take(&mut self.node));
82            self.nodes_are_map.push(true);
83        }
84
85        if !self.nodes.is_empty() {
86            // If we had collected some text along the way, that needs to be inserted
87            // so we don't lose it
88
89            if self.nodes.len() == 1 && self.values.len() <= 1 {
90                if self.values.len() == 1 {
91                    self.nodes[0].insert_text_node(self.values.remove(0));
92                }
93                return to_value(&self.nodes[0]).expect("Failed to #to_value() a node!");
94            }
95            for (index, node_is_map) in self.nodes_are_map.iter().enumerate() {
96                if *node_is_map {
97                    self.values
98                        .insert(index, Value::Object(self.nodes.remove(0)));
99                }
100            }
101        }
102
103        match self.values.len() {
104            0 => Value::Null,
105            1 => self.values.pop().unwrap(),
106            _ => Value::Array(take(&mut self.values)),
107        }
108    }
109}
110
111/// This function is part of xmltojson.
112///
113/// xmltojson is free software: you can redistribute it and/or modify
114/// it under the terms of the GNU Lesser General Public License as published by
115/// the Free Software Foundation, either version 3 of the License, or
116/// (at your option) any later version.
117///
118/// xmltojson is distributed in the hope that it will be useful,
119/// but WITHOUT ANY WARRANTY; without even the implied warranty of
120/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
121/// GNU Lesser General Public License for more details.
122///
123/// You should have received a copy of the GNU Lesser General Public License
124/// along with xmltojson.  If not, see <http://www.gnu.org/licenses/>.
125///
126/// See also: <https://github.com/rtyler/xmltojson>/ & <https://crates.io/crates/xmltojson>
127///
128/// Changes over the version of the function found in xmltojson:
129/// - removed debug statements, to reduce required dependencies
130/// - removed depth parameter, only used in debug statements
131/// - handle duplicate nodes with attributes
132/// - treat mixes of text and other nodes as sequences
133fn read<R: BufRead>(reader: &mut Reader<R>) -> Value {
134    let mut buf = Vec::new();
135    let mut nodes = NodeValues::new();
136
137    loop {
138        match reader.read_event_into(&mut buf) {
139            Ok(Event::Start(ref e)) => {
140                if let Ok(name) = String::from_utf8(e.name().into_inner().to_vec()) {
141                    let mut child = read(reader);
142                    let mut attrs = Map::new();
143
144                    let _ = e
145                        .attributes()
146                        .map(|a| {
147                            if let Ok(attr) = a {
148                                let key = String::from_utf8(attr.key.into_inner().to_vec());
149                                let value = String::from_utf8(attr.value.to_vec());
150
151                                // Only bother adding the attribute if both key and value are valid utf8
152                                if let (Ok(key), Ok(value)) = (key, value) {
153                                    let key = format!("@{key}");
154                                    let value = Value::String(value);
155
156                                    // If the child is already an object, that's where the insert should happen
157                                    if child.is_object() {
158                                        child.as_object_mut().unwrap().insert(key, value);
159                                    } else {
160                                        attrs.insert(key, value);
161                                    }
162                                }
163                            }
164                        })
165                        .collect::<Vec<_>>();
166
167                    if let Some(mut existing) = nodes.remove_entry(&name) {
168                        let mut entries: Vec<Value> = vec![];
169
170                        if existing.is_array() {
171                            let existing = existing.as_array_mut().unwrap();
172                            while !existing.is_empty() {
173                                entries.push(existing.remove(0));
174                            }
175                        } else {
176                            entries.push(existing);
177                        }
178
179                        /*
180                         * nodes with attributes need to be handled special
181                         */
182                        if let Some(attrs) = attrs.insert_text(&child) {
183                            entries.push(attrs);
184                        } else {
185                            entries.push(child);
186                        }
187
188                        nodes.insert(name, Value::Array(entries));
189                    /*
190                     * nodes with attributes need to be handled special
191                     */
192                    } else if let Some(attrs) = attrs.insert_text(&child) {
193                        nodes.insert(name, attrs);
194                    } else {
195                        nodes.insert(name, child);
196                    }
197                }
198            }
199            Ok(Event::Text(ref e)) => {
200                if let Ok(decoded) = e.unescape() {
201                    nodes.insert_text(&decoded);
202                }
203            }
204            Ok(Event::CData(ref e)) => {
205                if let Ok(decoded) = e.clone().escape() {
206                    if let Ok(decoded_bt) = decoded.unescape() {
207                        nodes.insert_text(&decoded_bt);
208                    }
209                }
210            }
211            Ok(Event::End(ref _e)) => break,
212            Ok(Event::Eof) => break,
213            _ => (),
214        }
215    }
216    nodes.get_value()
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use serde_json::json;
223
224    #[test]
225    fn test_read() {
226        let input = r"";
227        let result = read(&mut Reader::from_str(input));
228        assert_eq!(result, Value::Null);
229
230        // without config of expand_empty_elements true, empty node will be removed
231        let input = r"<root/>";
232        let result = read(&mut Reader::from_str(input));
233        assert_eq!(result, Value::Null);
234
235        let mut reader = Reader::from_str(input);
236        let config = reader.config_mut();
237        config.expand_empty_elements = true;
238        let result = read(&mut reader);
239        assert_eq!(result, json!({"root": null}));
240
241        let input = r"<key>value</key>";
242        let result = read(&mut Reader::from_str(input));
243        assert_eq!(result, json!({"key": "value"}));
244
245        // without config of expand_empty_elements true, empty node will be removed
246        let input = r#"<key attr="A">B</key><out>C<in/></out>"#;
247        let result = read(&mut Reader::from_str(input));
248        assert_eq!(
249            result,
250            json!({"key": {"$text": "B", "@attr": "A"}, "out": "C"})
251        );
252
253        let mut reader = Reader::from_str(input);
254        let config = reader.config_mut();
255        config.expand_empty_elements = true;
256        let result = read(&mut reader);
257        assert_eq!(
258            result,
259            json!({"key": {"$text": "B", "@attr": "A"}, "out": {"$text": "C", "in": null}})
260        );
261
262        let input = r"<tag><inner>A</inner><inner>B</inner></tag>";
263        let result = read(&mut Reader::from_str(input));
264        assert_eq!(result, json!({"tag": {"inner": ["A", "B"]}}));
265
266        let input = r#"<tag><inner attr="A">A</inner><inner attr="B">B</inner></tag>"#;
267        let result = read(&mut Reader::from_str(input));
268        assert_eq!(
269            result,
270            json!({"tag": {"inner": [{"$text": "A", "@attr": "A"}, {"$text": "B", "@attr": "B"}]}})
271        );
272
273        // without config of expand_empty_elements true, empty node will be removed
274        let input = r#"<tag>A <some attr="B"/> C</tag>"#;
275        let result = read(&mut Reader::from_str(input));
276        assert_eq!(result, json!({"tag": ["A ", " C"]}));
277
278        let mut reader = Reader::from_str(input);
279        let config = reader.config_mut();
280        config.expand_empty_elements = true;
281        let result = read(&mut reader);
282        assert_eq!(
283            result,
284            json!({"tag": ["A ", {"some": {"@attr": "B"}}, " C"]})
285        );
286
287        let input = r"<tag>A <some>B</some> C <some>D</some></tag>";
288        let result = read(&mut Reader::from_str(input));
289        assert_eq!(
290            result,
291            json!({"tag": ["A ", {"some": "B"}, " C ", {"some": "D"}]})
292        );
293
294        let input = r"<![CDATA[sample]]>";
295        let result = read(&mut Reader::from_str(input));
296        assert_eq!(result, json!("sample"));
297
298        let input = r"<tag><![CDATA[sample]]></tag>";
299        let result = read(&mut Reader::from_str(input));
300        assert_eq!(result, json!({"tag": "sample"}));
301
302        let input = r#"<tag attr="B"><![CDATA[A]]></tag>"#;
303        let result = read(&mut Reader::from_str(input));
304        assert_eq!(result, json!({"tag": {"$text": "A", "@attr": "B"}}));
305
306        let input = r#"<tag attr="C">A <some><![CDATA[B]]></some></tag>"#;
307        let result = read(&mut Reader::from_str(input));
308        assert_eq!(
309            result,
310            json!({"tag": {"$text": "A ", "@attr": "C", "some": "B"}})
311        );
312    }
313}