use std::{
collections::HashMap,
fs::File,
io::{BufReader, Read},
path::Path,
};
use xml::reader::{EventReader, XmlEvent};
use super::{Element, SelectError};
#[derive(Clone, Debug)]
pub enum DocumentError {
UnableToOpenFile(String),
ParseError(String),
}
#[derive(Clone, Debug)]
pub struct Document {
root: Element,
}
impl Document {
pub fn new_from_xml_stream<R: Read>(stream: R) -> Result<Document, DocumentError> {
let event_reader = EventReader::new(stream);
let mut elements: Vec<Element> = Vec::new();
let mut next_node_index = 1;
for event in event_reader {
match event {
Ok(XmlEvent::StartElement {
ref name,
ref attributes,
..
}) => {
let attr_map =
attributes
.iter()
.fold(HashMap::new(), |mut hash_map, attribute| {
hash_map.insert(
attribute.name.local_name.clone(),
attribute.value.clone(),
);
return hash_map;
});
elements.push(Element {
node_index: next_node_index,
children: None,
tag_name: name.local_name.clone(),
attr_map: attr_map,
text: String::new(),
});
next_node_index = next_node_index + 1;
}
Ok(XmlEvent::EndElement { ref name, .. })
if elements.last().unwrap().tag_name() == name.local_name =>
{
let child_node = elements.pop().unwrap();
if let Some(mut parent) = elements.pop() {
if let Some(ref mut children) = parent.children {
children.push(child_node);
} else {
parent.children = Some(vec![child_node]);
}
elements.push(parent);
} else {
return Ok(Document {
root: Element {
node_index: 0,
tag_name: "[root]".to_string(),
children: Some(vec![child_node]),
attr_map: HashMap::new(),
text: String::new(),
},
});
}
}
Ok(XmlEvent::Characters(string)) => {
elements.last_mut().unwrap().text.push_str(&string);
}
Ok(XmlEvent::Whitespace(string)) => {
elements.last_mut().unwrap().text.push_str(&string);
}
Err(error) => {
return Err(DocumentError::ParseError(error.to_string()));
}
Ok(_) => {}
}
}
panic!("Root element was not properly returned!");
}
pub fn new_from_xml_string(string: &str) -> Result<Document, DocumentError> {
Document::new_from_xml_stream(string.as_bytes())
}
pub fn new_from_xml_file(filename: &str) -> Result<Document, DocumentError> {
let path = Path::new(filename);
if let Ok(file) = File::open(path) {
let reader = BufReader::new(file);
Document::new_from_xml_stream(reader)
} else {
Err(DocumentError::UnableToOpenFile(
path.to_str().unwrap().to_string(),
))
}
}
pub fn number_of_elements(&self) -> usize {
self.root.subtree_size() - 1
}
pub fn select_all<'a>(
&'a self,
selector: &str,
) -> Result<Box<dyn Iterator<Item = &'a Element> + 'a>, SelectError> {
self.root.select_all(selector)
}
pub fn select<'a>(&'a self, selector: &str) -> Result<&'a Element, SelectError> {
self.root.select(selector)
}
}
#[test]
fn it_assigns_node_indices_in_monotonically_increasing_order() {
let document = Document::new_from_xml_string(
r#"
<?xml version="1.0" encoding="UTF-8"?>
<sample type="simple">
This is some text
<!-- This is a comment -->
<title>Simple Sample</title>
<note long="false">Some unrecognisable scribbling</note>
<related>
<!-- This is another comment -->
<item id="1">
<title>Another Sample</title>
<ref>http://path.to.somewhere</ref>
</item>
<item id="2">
<title>Other Sample</title>
<ref>http://some.other.path</ref>
</item>
</related>
<!-- div soup goodness -->
<div></div>
<div>
<other>
<div></div>
</other>
<div>
<div></div>
<div>
<div></div>
<div></div>
</div>
</div>
</div>
</sample>
"#,
)
.unwrap();
assert_eq!(document.root.node_index, 0);
document.root.children_deep_iter().fold(0, |index, child| {
assert!(index < child.node_index);
child.node_index
});
}