1use std::{
2 collections::BTreeMap,
3 io::Read,
4 marker::PhantomData,
5};
6
7use xmltree::Namespace;
8
9use crate::{
10 parser::Parser,
11 Node,
12};
13
14#[derive(Clone, Debug)]
18pub struct XMLParser<R> {
19 _marker: PhantomData<R>,
20}
21
22impl<R> Parser for XMLParser<R>
23where
24 R: Read,
25{
26 type Input = R;
27 type Node = XMLNode;
28 type Error = xmltree::ParseError;
29
30 fn parse(reader: R) -> Result<Vec<Self::Node>, Self::Error> {
31 Ok(xmltree::Element::parse_all(reader)?
32 .into_iter()
33 .map(Into::into)
34 .collect())
35 }
36}
37
38#[derive(Debug, Default, Clone, PartialEq, Eq)]
40pub struct XMLElement {
41 pub prefix: Option<String>,
43
44 pub namespace: Option<String>,
46
47 pub namespaces: Option<Namespace>,
49
50 pub name: String,
52
53 pub attributes: BTreeMap<String, String>,
55
56 pub children: Vec<XMLNode>,
58}
59
60impl From<xmltree::Element> for XMLElement {
61 fn from(value: xmltree::Element) -> Self {
62 Self {
63 prefix: value.prefix,
64 namespace: value.namespace,
65 namespaces: value.namespaces,
66 name: value.name,
67 attributes: value.attributes.into_iter().collect(),
68 children: value.children.into_iter().map(Into::into).collect(),
69 }
70 }
71}
72
73#[derive(Debug, Clone, PartialEq, Eq)]
75pub enum XMLNode {
76 Element(XMLElement),
78
79 Comment(String),
81
82 CData(String),
84
85 Text(String),
87
88 ProcessingInstruction(String, Option<String>),
90}
91
92impl From<xmltree::XMLNode> for XMLNode {
93 fn from(value: xmltree::XMLNode) -> Self {
94 match value {
95 xmltree::XMLNode::Element(e) => XMLNode::Element(e.into()),
96 xmltree::XMLNode::Comment(c) => XMLNode::Comment(c),
97 xmltree::XMLNode::CData(d) => XMLNode::CData(d),
98 xmltree::XMLNode::Text(t) => XMLNode::Text(t),
99 xmltree::XMLNode::ProcessingInstruction(a, b) => XMLNode::ProcessingInstruction(a, b),
100 }
101 }
102}
103
104impl Node for XMLNode {
105 type Text = String;
106
107 fn name(&self) -> Option<&String> {
108 match self {
109 XMLNode::Element(e) => Some(&e.name),
110 _ => None,
111 }
112 }
113
114 fn text(&self) -> Option<&String> {
115 match self {
116 XMLNode::Text(t) => Some(t),
117 _ => None,
118 }
119 }
120
121 fn attrs(&self) -> Option<&BTreeMap<String, String>> {
122 match self {
123 XMLNode::Element(e) => Some(&e.attributes),
124 _ => None,
125 }
126 }
127
128 fn children(&self) -> &[Self] {
129 if let XMLNode::Element(e) = &self {
130 e.children.as_slice()
131 } else {
132 &[]
133 }
134 }
135}
136
137impl XMLNode {
138 pub fn iter(&self) -> std::slice::Iter<Self> {
140 self.children().iter()
141 }
142}
143
144impl<'a> IntoIterator for &'a XMLNode {
145 type Item = &'a XMLNode;
146 type IntoIter = std::slice::Iter<'a, XMLNode>;
147
148 fn into_iter(self) -> Self::IntoIter {
149 self.iter()
150 }
151}
152
153#[cfg(test)]
154mod tests {
155 use std::ops::Deref;
156
157 use super::*;
158 use crate::*;
159
160 const HELLO: &str = r#"<?xml version="1.0" encoding="utf-8"?>
161<root>
162 <simple>Here's some text</simple>
163 <complex id="hello">
164 <nested>Nested text!</nested>
165 <example>More text</example>
166
167 <tree depth="1">
168 <tree depth="2">
169 <tree depth="3">Tree text</tree>
170 </tree>
171 </tree>
172 </complex>
173
174 <b>
175 <a>Inner text</a>
176 </b>
177
178 <a>Outer text</a>
179</root>"#;
180
181 #[test]
182 fn test_text() {
183 let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
184
185 let example = soup
186 .tag("example")
187 .first()
188 .expect("Could not find 'example' tag");
189
190 let child = example
192 .children()
193 .first()
194 .expect("Could not find 'example' child node");
195
196 assert_eq!(child.text(), Some(&"More text".into()));
197
198 let root = soup.tag("root").first().expect("Could not find 'root' tag");
199
200 assert_eq!(
201 root.all_text(),
202 "Here's some text\nNested text!\nMore text\nTree text\nInner text\nOuter text"
203 );
204 }
205
206 #[test]
207 fn test_tree_iter() {
208 let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
209
210 let complex = soup
211 .tag("complex")
212 .first()
213 .expect("Could not find 'complex' tag")
214 .deref()
215 .clone();
216
217 let mut nodes = complex.descendants();
218
219 assert_eq!(nodes.next().unwrap().name(), Some(&"complex".into()));
220
221 assert_eq!(
222 nodes.next().unwrap(),
223 &XMLNode::Element(XMLElement {
224 name: "nested".into(),
225 children: vec![XMLNode::Text("Nested text!".into())],
226 ..Default::default()
227 })
228 );
229
230 assert_eq!(nodes.next().unwrap(), &XMLNode::Text("Nested text!".into()));
231
232 assert_eq!(
233 nodes.next().unwrap(),
234 &XMLNode::Element(XMLElement {
235 name: "example".into(),
236 children: vec![XMLNode::Text("More text".into())],
237 ..Default::default()
238 })
239 );
240
241 assert_eq!(nodes.next().unwrap(), &XMLNode::Text("More text".into()));
242 }
243
244 #[test]
245 fn test_direct_iter() {
246 let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
247
248 let complex = soup
249 .tag("complex")
250 .first()
251 .expect("Could not find 'complex' tag")
252 .deref()
253 .clone();
254
255 let mut nodes = complex.into_iter();
256
257 assert_eq!(
258 nodes.next().unwrap(),
259 &XMLNode::Element(XMLElement {
260 name: "nested".into(),
261 children: vec![XMLNode::Text("Nested text!".into())],
262 ..Default::default()
263 })
264 );
265
266 assert_eq!(
267 nodes.next().unwrap(),
268 &XMLNode::Element(XMLElement {
269 name: "example".into(),
270 children: vec![XMLNode::Text("More text".into())],
271 ..Default::default()
272 })
273 );
274 }
275
276 #[test]
277 fn test_iter_order() {
278 let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
279
280 let soup = soup
281 .tag("root")
282 .first()
283 .expect("Failed to find 'root' tag")
284 .query();
285
286 assert_eq!(
288 soup.tag("a").first().map(|t| t.all_text()),
289 Some("Inner text".into())
290 );
291
292 assert_eq!(
294 soup.strict().tag("a").first().map(|t| t.all_text()),
295 Some("Outer text".into())
296 );
297 }
298}