#![cfg(any(feature = "xml", feature = "xml2json", feature = "xq"))]
use quick_xml::events::Event;
use quick_xml::Reader;
use serde_json::{to_value, Map, Value};
use std::io::BufRead;
use std::mem::take;
pub fn wrap_xml_reader<R: BufRead>(reader: R) -> Value {
let mut xml_reader = Reader::from_reader(reader);
let config = xml_reader.config_mut();
config.expand_empty_elements = true;
config.trim_text(true);
read(&mut xml_reader)
}
trait AttrMap {
fn insert_text(&mut self, value: &Value) -> Option<Value>;
fn insert_text_node(&mut self, value: Value);
}
impl AttrMap for Map<String, Value> {
fn insert_text(&mut self, value: &Value) -> Option<Value> {
if !self.is_empty() {
if value.is_string() {
self.insert_text_node(value.clone());
}
if let Ok(attrs) = to_value(take(self)) {
return Some(attrs);
}
}
None
}
fn insert_text_node(&mut self, value: Value) {
self.insert("$text".to_string(), value);
}
}
struct NodeValues {
node: Map<String, Value>,
nodes: Vec<Map<String, Value>>,
nodes_are_map: Vec<bool>,
values: Vec<Value>,
}
impl NodeValues {
fn new() -> Self {
Self {
values: Vec::new(),
node: Map::new(),
nodes: Vec::new(),
nodes_are_map: Vec::new(),
}
}
fn insert(&mut self, key: String, value: Value) {
self.node.insert(key, value);
}
fn insert_text(&mut self, text: &str) {
if !self.node.is_empty() {
self.nodes.push(take(&mut self.node));
self.nodes_are_map.push(true);
}
self.values.push(Value::String(text.to_string()));
self.nodes_are_map.push(false);
}
fn remove_entry(&mut self, key: &String) -> Option<Value> {
if self.node.contains_key(key) {
if let Some((_, existing)) = self.node.remove_entry(key) {
return Some(existing);
}
}
None
}
fn get_value(&mut self) -> Value {
if !self.node.is_empty() {
self.nodes.push(take(&mut self.node));
self.nodes_are_map.push(true);
}
if !self.nodes.is_empty() {
if self.nodes.len() == 1 && self.values.len() <= 1 {
if self.values.len() == 1 {
self.nodes[0].insert_text_node(self.values.remove(0));
}
return to_value(&self.nodes[0]).expect("Failed to #to_value() a node!");
}
for (index, node_is_map) in self.nodes_are_map.iter().enumerate() {
if *node_is_map {
self.values
.insert(index, Value::Object(self.nodes.remove(0)));
}
}
}
match self.values.len() {
0 => Value::Null,
1 => self.values.pop().unwrap(),
_ => Value::Array(take(&mut self.values)),
}
}
}
fn read<R: BufRead>(reader: &mut Reader<R>) -> Value {
let mut buf = Vec::new();
let mut nodes = NodeValues::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
if let Ok(name) = String::from_utf8(e.name().into_inner().to_vec()) {
let mut child = read(reader);
let mut attrs = Map::new();
let _ = e
.attributes()
.map(|a| {
if let Ok(attr) = a {
let key = String::from_utf8(attr.key.into_inner().to_vec());
let value = String::from_utf8(attr.value.to_vec());
if let (Ok(key), Ok(value)) = (key, value) {
let key = format!("@{key}");
let value = Value::String(value);
if child.is_object() {
child.as_object_mut().unwrap().insert(key, value);
} else {
attrs.insert(key, value);
}
}
}
})
.collect::<Vec<_>>();
if let Some(mut existing) = nodes.remove_entry(&name) {
let mut entries: Vec<Value> = vec![];
if existing.is_array() {
let existing = existing.as_array_mut().unwrap();
while !existing.is_empty() {
entries.push(existing.remove(0));
}
} else {
entries.push(existing);
}
if let Some(attrs) = attrs.insert_text(&child) {
entries.push(attrs);
} else {
entries.push(child);
}
nodes.insert(name, Value::Array(entries));
} else if let Some(attrs) = attrs.insert_text(&child) {
nodes.insert(name, attrs);
} else {
nodes.insert(name, child);
}
}
}
Ok(Event::Text(ref e)) => {
if let Ok(decoded) = e.unescape() {
nodes.insert_text(&decoded);
}
}
Ok(Event::CData(ref e)) => {
if let Ok(decoded) = e.clone().escape() {
if let Ok(decoded_bt) = decoded.unescape() {
nodes.insert_text(&decoded_bt);
}
}
}
Ok(Event::End(ref _e)) => break,
Ok(Event::Eof) => break,
_ => (),
}
}
nodes.get_value()
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_read() {
let input = r"";
let result = read(&mut Reader::from_str(input));
assert_eq!(result, Value::Null);
let input = r"<root/>";
let result = read(&mut Reader::from_str(input));
assert_eq!(result, Value::Null);
let mut reader = Reader::from_str(input);
let config = reader.config_mut();
config.expand_empty_elements = true;
let result = read(&mut reader);
assert_eq!(result, json!({"root": null}));
let input = r"<key>value</key>";
let result = read(&mut Reader::from_str(input));
assert_eq!(result, json!({"key": "value"}));
let input = r#"<key attr="A">B</key><out>C<in/></out>"#;
let result = read(&mut Reader::from_str(input));
assert_eq!(
result,
json!({"key": {"$text": "B", "@attr": "A"}, "out": "C"})
);
let mut reader = Reader::from_str(input);
let config = reader.config_mut();
config.expand_empty_elements = true;
let result = read(&mut reader);
assert_eq!(
result,
json!({"key": {"$text": "B", "@attr": "A"}, "out": {"$text": "C", "in": null}})
);
let input = r"<tag><inner>A</inner><inner>B</inner></tag>";
let result = read(&mut Reader::from_str(input));
assert_eq!(result, json!({"tag": {"inner": ["A", "B"]}}));
let input = r#"<tag><inner attr="A">A</inner><inner attr="B">B</inner></tag>"#;
let result = read(&mut Reader::from_str(input));
assert_eq!(
result,
json!({"tag": {"inner": [{"$text": "A", "@attr": "A"}, {"$text": "B", "@attr": "B"}]}})
);
let input = r#"<tag>A <some attr="B"/> C</tag>"#;
let result = read(&mut Reader::from_str(input));
assert_eq!(result, json!({"tag": ["A ", " C"]}));
let mut reader = Reader::from_str(input);
let config = reader.config_mut();
config.expand_empty_elements = true;
let result = read(&mut reader);
assert_eq!(
result,
json!({"tag": ["A ", {"some": {"@attr": "B"}}, " C"]})
);
let input = r"<tag>A <some>B</some> C <some>D</some></tag>";
let result = read(&mut Reader::from_str(input));
assert_eq!(
result,
json!({"tag": ["A ", {"some": "B"}, " C ", {"some": "D"}]})
);
let input = r"<![CDATA[sample]]>";
let result = read(&mut Reader::from_str(input));
assert_eq!(result, json!("sample"));
let input = r"<tag><![CDATA[sample]]></tag>";
let result = read(&mut Reader::from_str(input));
assert_eq!(result, json!({"tag": "sample"}));
let input = r#"<tag attr="B"><![CDATA[A]]></tag>"#;
let result = read(&mut Reader::from_str(input));
assert_eq!(result, json!({"tag": {"$text": "A", "@attr": "B"}}));
let input = r#"<tag attr="C">A <some><![CDATA[B]]></some></tag>"#;
let result = read(&mut Reader::from_str(input));
assert_eq!(
result,
json!({"tag": {"$text": "A ", "@attr": "C", "some": "B"}})
);
}
}