1#[cfg(test)]
7#[macro_use]
8extern crate serde_json;
9
10use log::*;
11use quick_xml::escape::resolve_predefined_entity;
12use quick_xml::events::Event;
13use quick_xml::Reader;
14use serde_json::{to_value, Map, Value};
15use std::io::BufRead;
16use std::mem::take;
17
18#[derive(Debug)]
19pub struct Error {}
20
21trait AttrMap {
22 fn insert_text(&mut self, value: &Value) -> Option<Value>;
23 fn insert_text_node(&mut self, value: Value);
24}
25
26impl AttrMap for Map<String, Value> {
27 fn insert_text(&mut self, value: &Value) -> Option<Value> {
28 if !self.is_empty() {
29 if value.is_string() {
30 self.insert_text_node(value.clone());
31 }
32 if let Ok(attrs) = to_value(take(self)) {
33 return Some(attrs);
34 }
35 }
36 None
37 }
38
39 fn insert_text_node(&mut self, value: Value) {
40 self.insert("#text".to_string(), value);
41 }
42}
43
44struct NodeValues {
45 node: Map<String, Value>,
46 nodes: Vec<Map<String, Value>>,
47 nodes_are_map: Vec<bool>,
48 values: Vec<Value>,
49}
50
51impl NodeValues {
52 fn new() -> Self {
53 Self {
54 values: Vec::new(),
55 node: Map::new(),
56 nodes: Vec::new(),
57 nodes_are_map: Vec::new(),
58 }
59 }
60
61 fn insert(&mut self, key: String, value: Value) {
62 self.node.insert(key, value);
63 }
64
65 fn insert_cdata(&mut self, value: &str) {
66 let key = "#cdata".to_string();
67 let new_value = match self.node.get(&key) {
68 Some(existing) => {
69 let mut old_value = existing.as_str().unwrap().to_string();
70 old_value.push_str(value);
71 old_value
72 }
73 None => value.to_string(),
74 };
75 self.node.insert(key, Value::String(new_value));
76 }
77
78 fn insert_text(&mut self, text: &str) {
79 if self.node.is_empty() {
80 if let Some(value) = self.values.pop() {
82 let mut value_text = value.as_str().unwrap_or_default().to_string();
83 value_text.push_str(text);
84 self.values.push(Value::String(value_text));
85 return;
86 }
87 } else {
88 if text.trim().is_empty() {
90 return;
91 }
92
93 self.nodes.push(take(&mut self.node));
94 self.nodes_are_map.push(true);
95 }
96
97 self.values.push(Value::String(text.to_string()));
98 self.nodes_are_map.push(false);
99 }
100
101 fn remove_entry(&mut self, key: &String) -> Option<Value> {
102 if self.node.contains_key(key) {
103 debug!("Node contains `{}` already, need to convert to array", key);
104 if let Some((_, existing)) = self.node.remove_entry(key) {
105 return Some(existing);
106 }
107 }
108 None
109 }
110
111 fn get_value(&mut self) -> Value {
112 debug!("values to return: {:?}", self.values);
113 if !self.node.is_empty() {
114 self.nodes.push(take(&mut self.node));
115 self.nodes_are_map.push(true);
116 }
117
118 if !self.nodes.is_empty() {
119 if self.nodes.len() == 1 && self.values.len() <= 1 {
123 if self.values.len() == 1 {
124 let value = self.values.remove(0);
125 let text = value.as_str().unwrap_or_default().trim();
126 if !text.is_empty() {
127 self.nodes[0].insert_text_node(Value::String(text.to_string()));
128 }
129 }
130 debug!("returning node instead: {:?}", self.nodes[0]);
131 return to_value(&self.nodes[0]).expect("Failed to #to_value() a node!");
132 }
133 for (index, node_is_map) in self.nodes_are_map.iter().enumerate() {
134 if *node_is_map {
135 self.values
136 .insert(index, Value::Object(self.nodes.remove(0)));
137 }
138 }
139 }
140
141 self.values = self
143 .values
144 .clone()
145 .into_iter()
146 .filter_map(|value| {
147 if value.is_string() {
148 let trimmed = value.as_str().unwrap_or_default().trim();
149 if trimmed.is_empty() {
150 return None;
151 }
152 return Some(Value::String(trimmed.to_string()));
153 }
154 Some(value)
155 })
156 .collect();
157
158 match self.values.len() {
159 0 => Value::Null,
160 1 => self.values.pop().unwrap(),
161 _ => Value::Array(take(&mut self.values)),
162 }
163 }
164}
165
166pub fn read<R: BufRead>(reader: &mut Reader<R>, depth: u64) -> Value {
167 let mut buf = Vec::new();
168 let mut nodes = NodeValues::new();
169 debug!("Parsing at depth: {}", depth);
170
171 loop {
172 match reader.read_event_into(&mut buf) {
173 Ok(Event::Start(ref e)) => {
174 if let Ok(name) = String::from_utf8(e.name().into_inner().to_vec()) {
175 let mut child = read(reader, depth + 1);
176 let mut attrs = Map::new();
177 debug!("{} children: {:?}", name, child);
178
179 let _ = e
180 .attributes()
181 .map(|a| {
182 if let Ok(attr) = a {
183 let key = String::from_utf8(attr.key.into_inner().to_vec());
184 let value = String::from_utf8(attr.value.to_vec());
185
186 if let (Ok(key), Ok(value)) = (key, value) {
188 let key = format!("@{}", key);
189 let value = Value::String(value);
190
191 if child.is_object() {
194 child.as_object_mut().unwrap().insert(key, value);
195 } else {
196 attrs.insert(key, value);
197 }
198 }
199 }
200 })
201 .collect::<Vec<_>>();
202
203 if let Some(mut existing) = nodes.remove_entry(&name) {
204 let mut entries: Vec<Value> = vec![];
205
206 if existing.is_array() {
207 let existing = existing.as_array_mut().unwrap();
208 while !existing.is_empty() {
209 entries.push(existing.remove(0));
210 }
211 } else {
212 entries.push(existing);
213 }
214
215 if let Some(attrs) = attrs.insert_text(&child) {
219 entries.push(attrs);
220 } else {
221 entries.push(child);
222 }
223
224 nodes.insert(name, Value::Array(entries));
225 } else if let Some(attrs) = attrs.insert_text(&child) {
229 nodes.insert(name, attrs);
230 } else {
231 nodes.insert(name, child);
232 }
233 }
234 }
235 Ok(Event::Text(ref e)) => {
236 if let Ok(decoded) = e.decode() {
237 nodes.insert_text(&decoded);
238 }
239 }
240 Ok(Event::CData(ref e)) => {
241 if let Ok(decoded) = e.decode() {
242 nodes.insert_cdata(&decoded);
243 }
244 }
245 Ok(Event::GeneralRef(ref e)) => {
246 if let Ok(Some(ch)) = e.resolve_char_ref() {
247 nodes.insert_text(&ch.to_string());
248 } else if let Ok(decoded) = e.decode() {
249 if let Some(entity) = resolve_predefined_entity(&decoded) {
250 nodes.insert_text(entity);
251 }
252 }
253 }
254 Ok(Event::End(ref _e)) => break,
255 Ok(Event::Eof) => break,
256 _ => (),
257 }
258 }
259 nodes.get_value()
260}
261
262pub fn to_json(xml: &str) -> Result<Value, Error> {
267 let mut reader = Reader::from_str(xml);
268 let config = reader.config_mut();
269 config.expand_empty_elements = true;
270 Ok(read(&mut reader, 0))
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 fn json_eq(left: Value, right: Result<Value, Error>) {
280 assert!(right.is_ok());
281 assert_eq!(left, right.unwrap());
282 }
283
284 #[test]
285 fn single_node() {
286 json_eq(json!({ "e": null }), to_json("<e></e>"));
287 }
288
289 #[test]
290 fn node_with_text() {
291 json_eq(json!({"e" : "foo"}), to_json("<e>foo</e>"));
292 }
293
294 #[test]
295 fn node_with_attr() {
296 json_eq(
297 json!({"e" : {"@name":"value"}}),
298 to_json("<e name=\"value\"></e>"),
299 );
300 }
301
302 #[test]
303 fn node_with_attr_and_text() {
304 json_eq(
305 json!({"e": {"@name":"value", "#text" : "text"}}),
306 to_json(r#"<e name="value">text</e>"#),
307 );
308 }
309
310 #[test]
311 fn node_with_children() {
312 json_eq(
313 json!(
314 {
315 "e":{
316 "a":"text1",
317 "b":"text2"
318 }
319 }),
320 to_json(r#"<e> <a>text1</a> <b>text2</b> </e>"#),
321 );
322 }
323
324 #[test]
325 fn node_with_multiple_identical_children() {
326 json_eq(
327 json!({
328 "e":{"a":[
329 "text",
330 "text"
331 ]}
332 }),
333 to_json(r#"<e><a>text</a><a>text</a></e>"#),
334 );
335 }
336
337 #[test]
338 fn node_with_n_identical_children() {
339 json_eq(
340 json!({
341 "e":{"a":[
342 "text1",
343 "text2",
344 "text3"
345 ]}
346 }),
347 to_json(r#"<e><a>text1</a><a>text2</a><a>text3</a></e>"#),
348 );
349 }
350
351 #[test]
352 fn node_with_text_and_child() {
353 json_eq(
354 json!(
355 {
356 "e":{
357 "#text":"lol",
358 "a":"text"
359 }
360 }),
361 to_json(r#"<e> lol <a>text</a></e>"#),
362 );
363 }
364
365 #[test]
366 fn node_with_just_text() {
367 json_eq(
368 json!(
369 {
370 "a":"hello"
371 }),
372 to_json(r#"<a>hello</a>"#),
373 );
374 }
375
376 #[test]
377 fn node_with_attrs_and_text() {
378 json_eq(
379 json!(
380 {
381 "a":{
382 "@x":"y",
383 "#text":"hello"
384 }
385 }),
386 to_json(r#"<a x="y">hello</a>"#),
387 );
388 }
389
390 #[test]
391 fn nested_nodes_with_attrs() {
392 json_eq(
393 json!(
394 {
395 "a":{
396 "@id":"a",
397 "b":{
398 "@id":"b",
399 "#text":"hey!"
400 }
401 }
402 }),
403 to_json(r#"<a id="a"><b id="b">hey!</b></a>"#),
404 );
405 }
406
407 #[test]
408 fn node_with_nested_text() {
409 json_eq(
410 json!(
411 {
412 "a":["x",{"c":null},"y"]
413 }),
414 to_json(r#"<a>x<c/>y</a>"#),
415 );
416 }
417
418 #[test]
419 fn node_with_empty_attrs() {
420 json_eq(
421 json!(
422 {
423 "x":{"@u":""}
424 }),
425 to_json(r#"<x u=""/>"#),
426 );
427 }
428
429 #[test]
430 fn some_basic_html() {
431 json_eq(
432 json!(
433 {
434 "html":{
435 "head":{
436 "title":"Xml/Json",
437 "meta":{
438 "@name":"x",
439 "@content":"y"
440 }
441 },
442 "body":null
443 }
444 }),
445 to_json(
446 r#"<html><head><title>Xml/Json</title><meta name="x" content="y"/></head><body/></html>"#,
447 ),
448 );
449 }
450
451 #[test]
452 fn more_complex_html() {
453 json_eq(
454 json!(
455 {
456 "ol":{
457 "@class":"xoxo",
458 "li":[
459 {
460 "#text":"Subject 1",
461 "ol":{"li":[
462 "subpoint a",
463 "subpoint b"
464 ]}
465 },
466 {
467 "span":"Subject 2",
468 "ol":{
469 "@compact":"compact",
470 "li":[
471 "subpoint c",
472 "subpoint d"
473 ]
474 }
475 }
476 ]
477 }
478 }),
479 to_json(
480 r#"<ol class="xoxo"><li>Subject 1 <ol><li>subpoint a</li><li>subpoint b</li></ol></li><li><span>Subject 2</span><ol compact="compact"><li>subpoint c</li><li>subpoint d</li></ol></li></ol>"#,
481 ),
482 );
483 }
484
485 #[test]
486 fn node_with_cdata() {
487 json_eq(
488 json!(
489 {
490 "e":{"#cdata":" .. some data .. "}
491 }),
492 to_json(r#"<e><![CDATA[ .. some data .. ]]></e>"#),
493 );
494 }
495
496 #[test]
497 fn node_with_cdata_and_siblings() {
498 json_eq(
499 json!(
500 {
501 "e":{
502 "a":null,
503 "#cdata":" .. some data .. ",
504 "b":null
505 }
506 }),
507 to_json(r#"<e><a/><![CDATA[ .. some data .. ]]><b/></e>"#),
508 );
509 }
510
511 #[test]
512 fn node_with_cdata_inside_text() {
513 json_eq(
514 json!(
515 {
516 "e":["some text",{"#cdata":" .. some data .. "}, "more text"]
517 }),
518 to_json(r#"<e> some text <![CDATA[ .. some data .. ]]> more text</e>"#),
519 );
520 }
521
522 #[test]
523 fn node_with_child_cdata_and_text() {
524 json_eq(
525 json!(
526 {
527 "e":{
528 "#text":"some text",
529 "#cdata":" .. some data .. ",
530 "a":null
531 }
532 }),
533 to_json(r#"<e> some text <![CDATA[ .. some data .. ]]><a/></e>"#),
534 );
535 }
536
537 #[test]
538 fn node_with_duplicate_cdata() {
539 json_eq(
540 json!(
541 {
542 "e":{
543 "#cdata":" .. some data .. .. more data .. ",
544 }
545 }),
546 to_json(r#"<e><![CDATA[ .. some data .. ]]><![CDATA[ .. more data .. ]]></e>"#),
547 );
548 }
549
550 #[test]
551 fn node_empty() {
552 json_eq(json!(null), to_json(""));
553 }
554
555 #[test]
556 fn node_with_duplicate_text() {
557 json_eq(
558 json!({"e": {"a": ["x", "y"]}}),
559 to_json("<e><a>x</a><a>y</a></e>"),
560 );
561 }
562
563 #[test]
564 fn node_with_duplicate_attrs_and_text() {
565 json_eq(
566 json!({"e": {"a": [{"#text": "x", "@u": "x"}, {"#text": "y", "@u": "y"}]}}),
567 to_json(r#"<e><a u="x">x</a><a u="y">y</a></e>"#),
568 );
569 }
570
571 #[test]
572 fn node_with_text_and_siblings() {
573 json_eq(
574 json!({"e":["x", {"a": {"@u": "y"}}, "z"]}),
575 to_json(r#"<e>x <a u="y"/> z</e>"#),
576 );
577 }
578
579 #[test]
580 fn node_with_text_and_siblings_mixed() {
581 json_eq(
582 json!({"e":["a", {"x": "b"}, "c", {"x": "d"}]}),
583 to_json(r#"<e>a <x>b</x> c <x>d</x></e>"#),
584 );
585 }
586
587 #[test]
588 fn node_with_cdata_only() {
589 json_eq(
590 json!(
591 {
592 "#cdata":" .. some data .. "
593 }),
594 to_json(r#"<![CDATA[ .. some data .. ]]>"#),
595 );
596 }
597
598 #[test]
599 fn node_with_entities() {
600 json_eq(
601 json!({"pets": "A cat & a dog"}),
602 to_json(r#"<pets>A cat & a dog</pets>"#),
603 );
604 }
605}