xmltojson 0.1.0

A simple crate for converting XML to JSON
Documentation
/*
 * This library helps convert an XML String into a serde_json::Value which can be
 * used to generate JSON
 */

#[cfg(test)]
#[macro_use]
extern crate serde_json;

use log::*;
use quick_xml::events::Event;
use quick_xml::Reader;
use serde_json::{Map, Value};

#[derive(Debug)]
pub struct Error {}

fn read(mut reader: &mut Reader<&[u8]>, depth: u64) -> Value {
    let mut buf = Vec::new();
    let mut values = Vec::new();
    let mut node = Map::new();
    debug!("Parsing at depth: {}", depth);

    loop {
        match reader.read_event(&mut buf) {
            Ok(Event::Start(ref e)) => {
                if let Ok(name) = String::from_utf8(e.name().to_vec()) {
                    let mut child = read(&mut reader, depth + 1);
                    let mut attrs = Map::new();
                    debug!("{} children: {:?}", name, child);

                    let _ = e
                        .attributes()
                        .map(|a| {
                            if let Ok(attr) = a {
                                let key = String::from_utf8(attr.key.to_vec());
                                let value = String::from_utf8(attr.value.to_vec());

                                // Only bother adding the attribute if both key and value are valid utf8
                                if let (Ok(key), Ok(value)) = (key, value) {
                                    let key = format!("@{}", key);
                                    let value = Value::String(value);

                                    // If the child is already an object, that's where the insert
                                    // should happen
                                    if child.is_object() {
                                        child.as_object_mut().unwrap().insert(key, value);
                                    } else {
                                        attrs.insert(key, value);
                                    }
                                }
                            }
                        })
                        .collect::<Vec<_>>();

                    /*
                     * nodes with attributes need to be handled special
                     */
                    if !attrs.is_empty() {
                        if child.is_string() {
                            attrs.insert("#text".to_string(), child);
                        }

                        if let Ok(attrs) = serde_json::to_value(attrs) {
                            node.insert(name, attrs);
                        }
                    } else if node.contains_key(&name) {
                        debug!("Node contains `{}` already, need to convert to array", name);
                        let (_, mut existing) = node.remove_entry(&name).unwrap();
                        let mut entries: Vec<Value> = vec![];

                        if existing.is_array() {
                            let existing = existing.as_array_mut().unwrap();
                            while !existing.is_empty() {
                                entries.push(existing.remove(0));
                            }
                        } else {
                            entries.push(existing);
                        }
                        entries.push(child);

                        node.insert(name, Value::Array(entries));
                    } else {
                        node.insert(name, child);
                    }
                }
            }
            Ok(Event::Text(ref e)) => {
                if let Ok(decoded) = e.unescape_and_decode(&reader) {
                    values.push(Value::String(decoded));
                }
            }
            Ok(Event::CData(ref e)) => {
                if let Ok(decoded) = e.unescape_and_decode(&reader) {
                    node.insert("#cdata".to_string(), Value::String(decoded));
                }
            }
            Ok(Event::End(ref _e)) => break,
            Ok(Event::Eof) => break,
            _ => (),
        }
    }

    debug!("values to return: {:?}", values);
    if !node.is_empty() {
        // If we had collected some text along the way, that needs to be inserted
        // so we don't lose it
        let mut index = 0;
        let mut has_text = false;
        for value in values.iter() {
            if value.is_string() {
                has_text = true;
                break;
            }
            index += 1;
        }

        if has_text {
            node.insert("#text".to_string(), values.remove(index));
        }
        debug!("returning node instead: {:?}", node);
        return serde_json::to_value(&node).expect("Failed to #to_value() a node!");
    }

    match values.len() {
        0 => Value::Null,
        1 => values.pop().unwrap(),
        _ => Value::Array(values),
    }
}

/**
 * to_json() will take an input string and attempt to convert it into a form
 * of JSON
 */
pub fn to_json(xml: &str) -> Result<Value, Error> {
    let mut reader = Reader::from_str(xml);
    reader.expand_empty_elements(true);
    reader.trim_text(true);

    Ok(read(&mut reader, 0))
}

#[cfg(test)]
mod tests {
    use super::*;

    fn json_eq(left: Value, right: Result<Value, Error>) {
        assert!(right.is_ok());
        assert_eq!(left, right.unwrap());
    }

    #[test]
    fn single_node() {
        json_eq(json!({ "e": null }), to_json("<e></e>"));
    }

    #[test]
    fn node_with_text() {
        json_eq(json!({"e" : "foo"}), to_json("<e>foo</e>"));
    }

    #[test]
    fn node_with_attr() {
        json_eq(
            json!({"e" : {"@name":"value"}}),
            to_json("<e name=\"value\"></e>"),
        );
    }

    #[test]
    fn node_with_attr_and_text() {
        json_eq(
            json!({"e": {"@name":"value", "#text" : "text"}}),
            to_json(r#"<e name="value">text</e>"#),
        );
    }

    #[test]
    fn node_with_children() {
        json_eq(
            json!(
            {
            "e":{
                "a":"text1",
                "b":"text2"
            }
            }),
            to_json(r#"<e> <a>text1</a> <b>text2</b> </e>"#),
        );
    }

    #[test]
    fn node_with_multiple_identical_children() {
        json_eq(
            json!({
            "e":{"a":[
                "text",
                "text"
                ]}
            }),
            to_json(r#"<e><a>text</a><a>text</a></e>"#),
        );
    }

    #[test]
    fn node_with_n_identical_children() {
        json_eq(
            json!({
            "e":{"a":[
                "text1",
                "text2",
                "text3"
                ]}
            }),
            to_json(r#"<e><a>text1</a><a>text2</a><a>text3</a></e>"#),
        );
    }

    #[test]
    fn node_with_text_and_child() {
        json_eq(
            json!(
            {
            "e":{
                "#text":"lol",
                "a":"text"
            }
            }),
            to_json(r#"<e> lol <a>text</a></e>"#),
        );
    }

    #[test]
    fn node_with_just_text() {
        json_eq(
            json!(
            {
            "a":"hello"
            }),
            to_json(r#"<a>hello</a>"#),
        );
    }

    #[test]
    fn node_with_attrs_and_text() {
        json_eq(
            json!(
            {
                "a":{
                    "@x":"y",
                    "#text":"hello"
                }
            }),
            to_json(r#"<a x="y">hello</a>"#),
        );
    }

    #[test]
    fn nested_nodes_with_attrs() {
        json_eq(
            json!(
            {
                "a":{
                    "@id":"a",
                    "b":{
                    "@id":"b",
                    "#text":"hey!"
                    }
                }
            }),
            to_json(r#"<a id="a"><b id="b">hey!</b></a>"#),
        );
    }

    #[test]
    fn node_with_nested_test() {
        /*
        todo!("this syntax makes no sense to me");
        json_eq(json!(
            {
                "a":"x<c/>y"
            }),
            to_json(r#"<a>x<c/>y</a>"#)
        );
        */
    }

    #[test]
    fn node_with_empty_attrs() {
        json_eq(
            json!(
            {
            "x":{"@u":""}
            }),
            to_json(r#"<x u=""/>"#),
        );
    }

    #[test]
    fn some_basic_html() {
        json_eq(
            json!(
            {
            "html":{
                "head":{
                "title":"Xml/Json",
                "meta":{
                    "@name":"x",
                    "@content":"y"
                }
                },
                "body":null
            }
            }),
            to_json(
                r#"<html><head><title>Xml/Json</title><meta name="x" content="y"/></head><body/></html>"#,
            ),
        );
    }

    #[test]
    fn more_complex_html() {
        json_eq(
            json!(
            {
                "ol":{
                    "@class":"xoxo",
                    "li":[
                    {
                        "#text":"Subject 1",
                        "ol":{"li":[
                            "subpoint a",
                            "subpoint b"
                        ]}
                    },
                    {
                        "span":"Subject 2",
                        "ol":{
                        "@compact":"compact",
                        "li":[
                            "subpoint c",
                            "subpoint d"
                        ]
                        }
                    }
                    ]
                }
            }),
            to_json(
                r#"<ol class="xoxo"><li>Subject 1     <ol><li>subpoint a</li><li>subpoint b</li></ol></li><li><span>Subject 2</span><ol compact="compact"><li>subpoint c</li><li>subpoint d</li></ol></li></ol>"#,
            ),
        );
    }

    #[test]
    fn node_with_cdata() {
        json_eq(
            json!(
            {
            "e":{"#cdata":" .. some data .. "}
            }),
            to_json(r#"<e><![CDATA[ .. some data .. ]]></e>"#),
        );
    }

    #[test]
    fn node_with_cdata_and_siblings() {
        json_eq(
            json!(
            {
            "e":{
                "a":null,
                "#cdata":" .. some data .. ",
                "b":null
            }
            }),
            to_json(r#"<e><a/><![CDATA[ .. some data .. ]]><b/></e>"#),
        );
    }

    #[test]
    fn node_with_cdata_inside_text() {
        /*
         * TODO
        json_eq(json!(
            {
            "e":"\n  some text\n  <![CDATA[ .. some data .. ]]>\n  more text\n"
            }),
            to_json(r#"<e>  some text  <![CDATA[ .. some data .. ]]>  more text</e>"#)
        );
        */
    }

    #[test]
    fn node_with_child_cdata_and_text() {
        json_eq(
            json!(
            {
            "e":{
                "#text":"some text",
                "#cdata":" .. some data .. ",
                "a":null
            }
            }),
            to_json(r#"<e>  some text  <![CDATA[ .. some data .. ]]><a/></e>"#),
        );
    }

    #[test]
    fn node_with_duplicate_cdata() {
        /*
         * TODO: unsure about this approach to handling cdata
        json_eq(json!(
            {
            "e":"<![CDATA[ .. some data .. ]]><![CDATA[ .. more data .. ]]>"
            }
            ),
            to_json(r#"<e><![CDATA[ .. some data .. ]]><![CDATA[ .. more data .. ]]></e>"#)
        );
        */
    }
}