1use std::{
2 collections::HashMap,
3 fs::File,
4 io::{BufReader, Read},
5 path::Path,
6};
7
8use xml::reader::{EventReader, XmlEvent};
9
10use super::{Element, SelectError};
11
12#[derive(Clone, Debug)]
14pub enum DocumentError {
15 UnableToOpenFile(String),
16 ParseError(String),
17}
18
19#[derive(Clone, Debug)]
21pub struct Document {
22 root: Element,
23}
24
25impl Document {
26 pub fn new_from_xml_stream<R: Read>(stream: R) -> Result<Document, DocumentError> {
28 let event_reader = EventReader::new(stream);
29
30 let mut elements: Vec<Element> = Vec::new();
31 let mut next_node_index = 1;
32
33 for event in event_reader {
34 match event {
35 Ok(XmlEvent::StartElement {
36 ref name,
37 ref attributes,
38 ..
39 }) => {
40 let attr_map =
41 attributes
42 .iter()
43 .fold(HashMap::new(), |mut hash_map, attribute| {
44 hash_map.insert(
45 attribute.name.local_name.clone(),
46 attribute.value.clone(),
47 );
48
49 return hash_map;
50 });
51
52 elements.push(Element {
53 node_index: next_node_index,
54 children: None,
55 tag_name: name.local_name.clone(),
56 attr_map: attr_map,
57 text: String::new(),
58 });
59 next_node_index = next_node_index + 1;
60 }
61
62 Ok(XmlEvent::EndElement { ref name, .. })
63 if elements.last().unwrap().tag_name() == name.local_name =>
64 {
65 let child_node = elements.pop().unwrap();
66
67 if let Some(mut parent) = elements.pop() {
68 if let Some(ref mut children) = parent.children {
69 children.push(child_node);
70 } else {
71 parent.children = Some(vec![child_node]);
72 }
73
74 elements.push(parent);
75 } else {
76 return Ok(Document {
77 root: Element {
78 node_index: 0,
79 tag_name: "[root]".to_string(),
80 children: Some(vec![child_node]),
81 attr_map: HashMap::new(),
82 text: String::new(),
83 },
84 });
85 }
86 }
87
88 Ok(XmlEvent::Characters(string)) => {
89 elements.last_mut().unwrap().text.push_str(&string);
90 }
91
92 Ok(XmlEvent::Whitespace(string)) => {
93 elements.last_mut().unwrap().text.push_str(&string);
94 }
95
96 Err(error) => {
97 return Err(DocumentError::ParseError(error.to_string()));
98 }
99
100 Ok(_) => {}
101 }
102 }
103
104 panic!("Root element was not properly returned!");
105 }
106
107 pub fn new_from_xml_string(string: &str) -> Result<Document, DocumentError> {
109 Document::new_from_xml_stream(string.as_bytes())
110 }
111
112 pub fn new_from_xml_file(filename: &str) -> Result<Document, DocumentError> {
114 let path = Path::new(filename);
115
116 if let Ok(file) = File::open(path) {
117 let reader = BufReader::new(file);
118
119 Document::new_from_xml_stream(reader)
120 } else {
121 Err(DocumentError::UnableToOpenFile(
122 path.to_str().unwrap().to_string(),
123 ))
124 }
125 }
126
127 pub fn number_of_elements(&self) -> usize {
129 self.root.subtree_size() - 1
130 }
131
132 pub fn select_all<'a>(
134 &'a self,
135 selector: &str,
136 ) -> Result<Box<dyn Iterator<Item = &'a Element> + 'a>, SelectError> {
137 self.root.select_all(selector)
138 }
139
140 pub fn select<'a>(&'a self, selector: &str) -> Result<&'a Element, SelectError> {
142 self.root.select(selector)
143 }
144}
145
146#[test]
147fn it_assigns_node_indices_in_monotonically_increasing_order() {
148 let document = Document::new_from_xml_string(
149 r#"
150<?xml version="1.0" encoding="UTF-8"?>
151<sample type="simple">
152 This is some text
153 <!-- This is a comment -->
154 <title>Simple Sample</title>
155 <note long="false">Some unrecognisable scribbling</note>
156
157 <related>
158 <!-- This is another comment -->
159 <item id="1">
160 <title>Another Sample</title>
161 <ref>http://path.to.somewhere</ref>
162 </item>
163
164 <item id="2">
165 <title>Other Sample</title>
166 <ref>http://some.other.path</ref>
167 </item>
168 </related>
169
170 <!-- div soup goodness -->
171 <div></div>
172 <div>
173 <other>
174 <div></div>
175 </other>
176 <div>
177 <div></div>
178 <div>
179 <div></div>
180 <div></div>
181 </div>
182 </div>
183 </div>
184</sample>
185"#,
186 )
187 .unwrap();
188
189 assert_eq!(document.root.node_index, 0);
190
191 document.root.children_deep_iter().fold(0, |index, child| {
192 assert!(index < child.node_index);
193 child.node_index
194 });
195}