1use std::borrow::Cow;
4
5use ego_tree::iter::Nodes;
6use ego_tree::Tree;
7use html5ever::serialize::SerializeOpts;
8use html5ever::tree_builder::QuirksMode;
9use html5ever::QualName;
10use html5ever::{driver, serialize};
11use tendril::TendrilSink;
12
13use crate::selector::Selector;
14use crate::{ElementRef, Node};
15
16#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct Html {
24 pub errors: Vec<Cow<'static, str>>,
26
27 pub quirks_mode: QuirksMode,
29
30 pub tree: Tree<Node>,
32}
33
34impl Html {
35 pub fn new_document() -> Self {
37 Html {
38 errors: Vec::new(),
39 quirks_mode: QuirksMode::NoQuirks,
40 tree: Tree::new(Node::Document),
41 }
42 }
43
44 pub fn new_fragment() -> Self {
46 Html {
47 errors: Vec::new(),
48 quirks_mode: QuirksMode::NoQuirks,
49 tree: Tree::new(Node::Fragment),
50 }
51 }
52
53 pub fn parse_document(document: &str) -> Self {
72 let parser = driver::parse_document(Self::new_document(), Default::default());
73 parser.one(document)
74 }
75
76 pub fn parse_fragment(fragment: &str) -> Self {
78 let parser = driver::parse_fragment(
79 Self::new_fragment(),
80 Default::default(),
81 QualName::new(None, ns!(html), local_name!("body")),
82 Vec::new(),
83 );
84 parser.one(fragment)
85 }
86
87 pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b> {
89 Select {
90 inner: self.tree.nodes(),
91 selector,
92 }
93 }
94
95 pub fn root_element(&self) -> ElementRef {
97 let root_node = self
98 .tree
99 .root()
100 .children()
101 .find(|child| child.value().is_element())
102 .expect("html node missing");
103 ElementRef::wrap(root_node).unwrap()
104 }
105
106 pub fn html(&self) -> String {
108 let opts = SerializeOpts {
109 scripting_enabled: false, traversal_scope: html5ever::serialize::TraversalScope::IncludeNode,
111 create_missing_parent: false,
112 };
113 let mut buf = Vec::new();
114 serialize(&mut buf, self, opts).unwrap();
115 String::from_utf8(buf).unwrap()
116 }
117}
118
119#[derive(Debug)]
121pub struct Select<'a, 'b> {
122 inner: Nodes<'a, Node>,
123 selector: &'b Selector,
124}
125
126impl<'a, 'b> Iterator for Select<'a, 'b> {
127 type Item = ElementRef<'a>;
128
129 fn next(&mut self) -> Option<ElementRef<'a>> {
130 for node in self.inner.by_ref() {
131 if let Some(element) = ElementRef::wrap(node) {
132 if element.parent().is_some() && self.selector.matches(&element) {
133 return Some(element);
134 }
135 }
136 }
137 None
138 }
139}
140
141impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> {
142 fn next_back(&mut self) -> Option<Self::Item> {
143 for node in self.inner.by_ref().rev() {
144 if let Some(element) = ElementRef::wrap(node) {
145 if element.parent().is_some() && self.selector.matches(&element) {
146 return Some(element);
147 }
148 }
149 }
150 None
151 }
152}
153
154mod serializable;
155mod tree_sink;
156
157#[cfg(test)]
158mod tests {
159 use super::Html;
160 use super::Selector;
161
162 #[test]
163 fn root_element_fragment() {
164 let html = Html::parse_fragment(r#"<a href="http://github.com">1</a>"#);
165 let root_ref = html.root_element();
166 let href = root_ref
167 .select(&Selector::parse("a").unwrap())
168 .next()
169 .unwrap();
170 assert_eq!(href.inner_html(), "1");
171 assert_eq!(href.value().attr("href").unwrap(), "http://github.com");
172 }
173
174 #[test]
175 fn root_element_document_doctype() {
176 let html = Html::parse_document("<!DOCTYPE html>\n<title>abc</title>");
177 let root_ref = html.root_element();
178 let title = root_ref
179 .select(&Selector::parse("title").unwrap())
180 .next()
181 .unwrap();
182 assert_eq!(title.inner_html(), "abc");
183 }
184
185 #[test]
186 fn root_element_document_comment() {
187 let html = Html::parse_document("<!-- comment --><title>abc</title>");
188 let root_ref = html.root_element();
189 let title = root_ref
190 .select(&Selector::parse("title").unwrap())
191 .next()
192 .unwrap();
193 assert_eq!(title.inner_html(), "abc");
194 }
195
196 #[test]
197 fn select_is_reversible() {
198 let html = Html::parse_document("<p>element1</p><p>element2</p><p>element3</p>");
199 let selector = Selector::parse("p").unwrap();
200 let result: Vec<_> = html
201 .select(&selector)
202 .rev()
203 .map(|e| e.inner_html())
204 .collect();
205 assert_eq!(result, vec!["element3", "element2", "element1"]);
206 }
207}