use std::{
fmt,
io::{Cursor, Read},
};
use serde_json::{self, map::Map};
use xml::{
attribute::OwnedAttribute,
name::OwnedName,
namespace::{self, Namespace},
reader::{EventReader, XmlEvent},
};
#[derive(Debug, Clone, PartialEq)]
pub struct ParseErr(String);
impl fmt::Display for ParseErr {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(
formatter,
"Failed to parse string to XML document: {}",
self.0
)
}
}
#[derive(Debug, Clone)]
pub struct XmlDocument {
pub root: Vec<XmlNode>,
}
impl fmt::Display for XmlDocument {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
self
.root
.iter()
.fold(String::new(), |acc_string, xml_node| {
format!("{}{}", acc_string, xml_node)
})
.fmt(formatter)
}
}
impl XmlDocument {
pub fn from_reader<R>(source: R) -> Result<Self, ParseErr>
where
R: Read,
{
let reader = EventReader::new(source);
let mut xml_events: Vec<XmlEvent> = Vec::new();
for event in reader.into_iter() {
match event {
Ok(event_type) => {
xml_events.push(event_type);
}
Err(error) => {
return Err(ParseErr(format!("{}", error)));
}
}
}
xml_events.reverse();
Self::parse(xml_events, None, Vec::new(), Namespace::empty())
.map(|(root, _)| XmlDocument { root })
.map_err(|error| ParseErr(error))
}
pub fn from_string(string: &str) -> Result<XmlDocument, ParseErr> {
Self::from_reader(Cursor::new(string.to_owned().into_bytes()))
}
pub fn to_object(&self) -> Map<String, serde_json::Value> {
let mut map: Map<String, serde_json::Value> = Map::new();
for (key, value) in self.root.iter().map(|xml_node| xml_node.to_key_value()) {
map.insert(key, value);
}
map
}
pub fn to_json(&self) -> serde_json::Result<String> {
serde_json::to_string(&self.to_object())
}
fn parse_xml_attributes(
xml_attributes: Vec<xml::attribute::OwnedAttribute>,
) -> Vec<(String, String)> {
xml_attributes
.into_iter()
.map(|xml_attribute| {
let parsed_attribute_name = match xml_attribute.name.prefix {
Some(prefix) => format!("{}:{}", prefix, xml_attribute.name.local_name),
None => xml_attribute.name.local_name.clone(),
};
(parsed_attribute_name, xml_attribute.value)
})
.collect()
}
fn parse(
mut xml_events: Vec<XmlEvent>,
current_node: Option<XmlNode>,
mut current_node_group: Vec<XmlNode>,
current_namespace: Namespace,
) -> Result<(Vec<XmlNode>, Vec<XmlEvent>), String> {
match xml_events.pop() {
Some(xml_event) => match xml_event {
XmlEvent::StartElement {
name,
attributes,
namespace,
} => {
let formatted_node_name = match name.prefix {
Some(prefix) => format!("{}:{}", prefix, name.local_name),
None => name.local_name,
};
let current_attributes = if namespace == current_namespace {
attributes
} else {
let mut attributes = attributes;
let cloned_namespace = namespace.clone();
let namespace_attributes = cloned_namespace
.into_iter()
.filter(|&(key, value)| {
key != namespace::NS_NO_PREFIX && value != namespace::NS_EMPTY_URI
})
.map(|(key, value)| OwnedAttribute {
name: OwnedName {
local_name: key.to_owned(),
namespace: Some(value.to_owned()),
prefix: Some("xmlns".to_owned()),
},
value: value.to_owned(),
});
attributes.extend(namespace_attributes);
attributes
};
let child_node = XmlNode {
name: formatted_node_name,
attributes: Self::parse_xml_attributes(current_attributes),
content: None,
children: Vec::new(),
};
let (parsed_child_node, remaining_xml_events) =
Self::parse(xml_events, Some(child_node), Vec::new(), namespace.clone())?;
match current_node {
Some(mut current_xml_node) => {
current_xml_node.children.extend(parsed_child_node);
Self::parse(
remaining_xml_events,
Some(current_xml_node),
current_node_group,
namespace,
)
}
None => {
current_node_group.extend(parsed_child_node);
Self::parse(remaining_xml_events, None, current_node_group, namespace)
}
}
}
XmlEvent::Characters(raw_content) | XmlEvent::CData(raw_content) => {
let parsed_content = raw_content.trim().to_owned();
match current_node {
Some(mut current_xml_node) => {
current_xml_node.content = Some(parsed_content);
Self::parse(
xml_events,
Some(current_xml_node),
current_node_group,
current_namespace,
)
}
None => Err("Invalid form of XML doc".to_owned()),
}
}
XmlEvent::EndElement { name } => {
let formatted_node_name = match name.prefix {
Some(prefix) => format!("{}:{}", prefix, name.local_name),
None => name.local_name.clone(),
};
match current_node {
Some(current_xml_node) => {
if current_xml_node.name == formatted_node_name {
current_node_group.push(current_xml_node);
return Ok((current_node_group, xml_events));
}
Err(format!(
"Invalid end tag: expected {}, got {}",
current_xml_node.name, name.local_name
))
}
None => Err(format!("Invalid end tag: {}", name.local_name)),
}
}
_ => Self::parse(
xml_events,
current_node,
current_node_group,
current_namespace,
),
},
None => match current_node {
Some(_) => Err("Invalid end tag".to_owned()),
None => Ok((current_node_group, Vec::new())),
},
}
}
}
#[derive(Debug, Clone)]
pub struct XmlNode {
pub name: String,
pub attributes: Vec<(String, String)>,
pub content: Option<String>,
pub children: Vec<XmlNode>,
}
impl fmt::Display for XmlNode {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "{}", self.format(0))
}
}
impl XmlNode {
pub fn format(&self, depth: usize) -> String {
let formatted_children = if self.children.is_empty() {
String::new()
} else {
self
.children
.iter()
.fold("\n".to_owned(), |acc_string, child_node| {
format!("{}{}", acc_string, child_node.format(depth + 1))
})
};
let indentation = Self::indent(depth);
let formatted_content = match self.content {
Some(ref content) => format!("\n{}{}", Self::indent(depth + 1), content),
None => "".to_owned(),
};
format!(
"{}<{}{}>{}{}\n{}</{}>\n",
indentation,
self.name,
Self::attributes_to_string(&self.attributes),
formatted_content,
formatted_children,
indentation,
self.name
)
}
pub fn to_key_value(&self) -> (String, serde_json::Value) {
let mut map: Map<String, serde_json::Value> = Map::new();
if self.content.is_some() {
map.insert(
"_".to_owned(),
serde_json::Value::String(self.content.clone().unwrap()),
);
}
let children_entries = self
.children
.iter()
.map(|child_node| child_node.to_key_value());
for (key, value) in children_entries {
let found = match map.get_mut(&key) {
Some(thing) => match thing {
&mut serde_json::Value::Array(ref mut vector) => {
vector.push(value.clone());
true
}
json_value => {
let vector = vec![json_value.clone(), value.clone()];
*json_value = serde_json::Value::Array(vector);
true
}
},
None => false,
};
if !found {
map.insert(key.clone(), value.clone());
}
}
let mut attributes_map: Map<String, serde_json::Value> = Map::new();
for (key, value) in self.attributes.iter() {
let found = match attributes_map.get_mut(key) {
Some(thing) => match thing {
&mut serde_json::Value::Array(ref mut vector) => {
vector.push(serde_json::Value::String(value.clone()));
true
}
json_value => {
let vector = vec![json_value.clone(), serde_json::Value::String(value.clone())];
*json_value = serde_json::Value::Array(vector);
true
}
},
None => false,
};
if !found {
attributes_map.insert(key.clone(), serde_json::Value::String(value.clone()));
}
}
if !attributes_map.is_empty() {
map.insert("$".to_owned(), serde_json::Value::Object(attributes_map));
}
(self.name.clone(), serde_json::Value::Object(map))
}
fn indent(size: usize) -> String {
const INDENT: &'static str = " ";
(0..size).map(|_| INDENT).fold(
String::with_capacity(size * INDENT.len()),
|acc_string, string| acc_string + string,
)
}
fn attributes_to_string(attributes: &[(String, String)]) -> String {
attributes.iter().fold(String::new(), |acc, (key, value)| {
format!("{} {}=\"{}\"", acc, key, value)
})
}
}