use std::collections::BTreeMap;
use crate::node::Node;
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum HTMLNode<S> {
Comment(S),
Doctype(S),
Element {
name: S,
attrs: BTreeMap<S, S>,
children: Vec<HTMLNode<S>>,
},
RawElement {
name: S,
attrs: BTreeMap<S, S>,
content: S,
},
Void {
name: S,
attrs: BTreeMap<S, S>,
},
Text(S),
}
impl<S> Node for HTMLNode<S> {
type Text = S;
fn name(&self) -> Option<&S> {
match self {
Self::Element { name, .. }
| Self::RawElement { name, .. }
| Self::Void { name, .. } => Some(name),
_ => None,
}
}
fn text(&self) -> Option<&S> {
match self {
Self::Text(t) => Some(t),
_ => None,
}
}
fn attrs(&self) -> Option<&BTreeMap<S, S>> {
match self {
Self::Element { attrs, .. }
| Self::RawElement { attrs, .. }
| Self::Void { attrs, .. } => Some(attrs),
_ => None,
}
}
fn children(&self) -> &[Self] {
if let Self::Element { children, .. } = &self {
children.as_slice()
} else {
&[]
}
}
}
impl<S> HTMLNode<S> {
pub fn iter(&self) -> std::slice::Iter<Self> {
self.children().iter()
}
}
impl<'a, S> IntoIterator for &'a HTMLNode<S> {
type Item = &'a HTMLNode<S>;
type IntoIter = std::slice::Iter<'a, HTMLNode<S>>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[cfg(test)]
mod tests {
use std::ops::Deref;
use super::*;
use crate::*;
const HELLO: &str = r#"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<title>Hello!</title>
</head>
<body>
<h1>Hello World!</h1>
<p>This is a simple paragraph.</p>
<div class="parent">
<div class="child">
<div id="item">
<p>Nested item</p>
<a>Broken Link</a>
<a href="https://example.com">Example Link</a>
</div>
</div>
</div>
<a href="https://other.com">Other Link</a>
</body>
<img class="self-closing"/>
<!-- Simple comment -->
</html>"#;
#[test]
fn test_tree_iter() {
let soup = Soup::html_strict(HELLO).expect("Failed to parse HTML");
let body = soup
.tag("body")
.first()
.expect("Could not find body tag")
.deref()
.clone();
let mut nodes = body.descendants();
assert_eq!(nodes.next().unwrap().name(), Some(&"body".into()));
assert_eq!(nodes.next().unwrap(), &HTMLNode::Text("\n ".into()));
assert_eq!(nodes.next().unwrap(), &HTMLNode::Element {
name: "h1".into(),
attrs: [].into(),
children: vec![HTMLNode::Text("Hello World!".into())]
});
assert_eq!(
nodes.next().unwrap(),
&HTMLNode::Text("Hello World!".into())
);
assert_eq!(nodes.next().unwrap(), &HTMLNode::Text("\n ".into()));
assert_eq!(nodes.next().unwrap(), &HTMLNode::Element {
name: "p".into(),
attrs: [].into(),
children: vec![HTMLNode::Text("This is a simple paragraph.".into())]
});
}
#[test]
fn test_direct_iter() {
let soup = Soup::html_strict(HELLO).expect("Failed to parse HTML");
let body = soup
.tag("body")
.first()
.expect("Could not find body tag")
.deref()
.clone();
let mut nodes = body.into_iter();
assert_eq!(nodes.next().unwrap(), &HTMLNode::Text("\n ".into()));
assert_eq!(nodes.next().unwrap(), &HTMLNode::Element {
name: "h1".into(),
attrs: [].into(),
children: vec![HTMLNode::Text("Hello World!".into())]
});
assert_eq!(nodes.next().unwrap(), &HTMLNode::Text("\n ".into()));
assert_eq!(nodes.next().unwrap(), &HTMLNode::Element {
name: "p".into(),
attrs: [].into(),
children: vec![HTMLNode::Text("This is a simple paragraph.".into())]
});
}
#[test]
fn test_iter_order() {
let soup = Soup::html_strict(HELLO).expect("Failed to parse HTML");
let soup = soup
.tag("body")
.first()
.expect("Could not find body tag")
.query();
assert_eq!(
soup.tag("a").first().map(|t| t.all_text()),
Some("Broken Link".into())
);
assert_eq!(
soup.strict().tag("a").first().map(|t| t.all_text()),
Some("Other Link".into())
);
}
}