use std::{
collections::BTreeMap,
io::Read,
marker::PhantomData,
};
use xmltree::Namespace;
use crate::{
parser::Parser,
Node,
};
#[derive(Clone, Debug)]
pub struct XMLParser<R> {
_marker: PhantomData<R>,
}
impl<R> Parser for XMLParser<R>
where
R: Read,
{
type Input = R;
type Node = XMLNode;
type Error = xmltree::ParseError;
fn parse(reader: R) -> Result<Vec<Self::Node>, Self::Error> {
Ok(xmltree::Element::parse_all(reader)?
.into_iter()
.map(Into::into)
.collect())
}
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct XMLElement {
pub prefix: Option<String>,
pub namespace: Option<String>,
pub namespaces: Option<Namespace>,
pub name: String,
pub attributes: BTreeMap<String, String>,
pub children: Vec<XMLNode>,
}
impl From<xmltree::Element> for XMLElement {
fn from(value: xmltree::Element) -> Self {
Self {
prefix: value.prefix,
namespace: value.namespace,
namespaces: value.namespaces,
name: value.name,
attributes: value.attributes.into_iter().collect(),
children: value.children.into_iter().map(Into::into).collect(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum XMLNode {
Element(XMLElement),
Comment(String),
CData(String),
Text(String),
ProcessingInstruction(String, Option<String>),
}
impl From<xmltree::XMLNode> for XMLNode {
fn from(value: xmltree::XMLNode) -> Self {
match value {
xmltree::XMLNode::Element(e) => XMLNode::Element(e.into()),
xmltree::XMLNode::Comment(c) => XMLNode::Comment(c),
xmltree::XMLNode::CData(d) => XMLNode::CData(d),
xmltree::XMLNode::Text(t) => XMLNode::Text(t),
xmltree::XMLNode::ProcessingInstruction(a, b) => XMLNode::ProcessingInstruction(a, b),
}
}
}
impl Node for XMLNode {
type Text = String;
fn name(&self) -> Option<&String> {
match self {
XMLNode::Element(e) => Some(&e.name),
_ => None,
}
}
fn text(&self) -> Option<&String> {
match self {
XMLNode::Text(t) => Some(t),
_ => None,
}
}
fn attrs(&self) -> Option<&BTreeMap<String, String>> {
match self {
XMLNode::Element(e) => Some(&e.attributes),
_ => None,
}
}
fn children(&self) -> &[Self] {
if let XMLNode::Element(e) = &self {
e.children.as_slice()
} else {
&[]
}
}
}
impl XMLNode {
pub fn iter(&self) -> std::slice::Iter<Self> {
self.children().iter()
}
}
impl<'a> IntoIterator for &'a XMLNode {
type Item = &'a XMLNode;
type IntoIter = std::slice::Iter<'a, XMLNode>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use std::ops::Deref;
use super::*;
use crate::*;
const HELLO: &str = r#"<?xml version="1.0" encoding="utf-8"?>
<root>
<simple>Here's some text</simple>
<complex id="hello">
<nested>Nested text!</nested>
<example>More text</example>
<tree depth="1">
<tree depth="2">
<tree depth="3">Tree text</tree>
</tree>
</tree>
</complex>
<b>
<a>Inner text</a>
</b>
<a>Outer text</a>
</root>"#;
#[test]
fn test_text() {
let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
let example = soup
.tag("example")
.first()
.expect("Could not find 'example' tag");
let child = example
.children()
.first()
.expect("Could not find 'example' child node");
assert_eq!(child.text(), Some(&"More text".into()));
let root = soup.tag("root").first().expect("Could not find 'root' tag");
assert_eq!(
root.all_text(),
"Here's some text\nNested text!\nMore text\nTree text\nInner text\nOuter text"
);
}
#[test]
fn test_tree_iter() {
let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
let complex = soup
.tag("complex")
.first()
.expect("Could not find 'complex' tag")
.deref()
.clone();
let mut nodes = complex.descendants();
assert_eq!(nodes.next().unwrap().name(), Some(&"complex".into()));
assert_eq!(
nodes.next().unwrap(),
&XMLNode::Element(XMLElement {
name: "nested".into(),
children: vec![XMLNode::Text("Nested text!".into())],
..Default::default()
})
);
assert_eq!(nodes.next().unwrap(), &XMLNode::Text("Nested text!".into()));
assert_eq!(
nodes.next().unwrap(),
&XMLNode::Element(XMLElement {
name: "example".into(),
children: vec![XMLNode::Text("More text".into())],
..Default::default()
})
);
assert_eq!(nodes.next().unwrap(), &XMLNode::Text("More text".into()));
}
#[test]
fn test_direct_iter() {
let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
let complex = soup
.tag("complex")
.first()
.expect("Could not find 'complex' tag")
.deref()
.clone();
let mut nodes = complex.into_iter();
assert_eq!(
nodes.next().unwrap(),
&XMLNode::Element(XMLElement {
name: "nested".into(),
children: vec![XMLNode::Text("Nested text!".into())],
..Default::default()
})
);
assert_eq!(
nodes.next().unwrap(),
&XMLNode::Element(XMLElement {
name: "example".into(),
children: vec![XMLNode::Text("More text".into())],
..Default::default()
})
);
}
#[test]
fn test_iter_order() {
let soup = Soup::xml(HELLO.as_bytes()).expect("Failed to parse XML");
let soup = soup
.tag("root")
.first()
.expect("Failed to find 'root' tag")
.query();
assert_eq!(
soup.tag("a").first().map(|t| t.all_text()),
Some("Inner text".into())
);
assert_eq!(
soup.strict().tag("a").first().map(|t| t.all_text()),
Some("Outer text".into())
);
}
}