1#[cfg(feature = "errors")]
4use std::borrow::Cow;
5use std::fmt;
6use std::iter::FusedIterator;
7
8use ego_tree::Tree;
9use ego_tree::iter::Nodes;
10use html5ever::serialize::SerializeOpts;
11use html5ever::tree_builder::QuirksMode;
12use html5ever::{QualName, driver, serialize};
13use selectors::matching::SelectorCaches;
14use tendril::TendrilSink;
15
16use crate::selector::Selector;
17use crate::{ElementRef, Node};
18
19pub use tree_sink::HtmlTreeSink;
20
21#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct Html {
29 #[cfg(feature = "errors")]
30 pub errors: Vec<Cow<'static, str>>,
32
33 pub quirks_mode: QuirksMode,
35
36 pub tree: Tree<Node>,
38}
39
40impl Html {
41 pub fn new_document() -> Self {
43 Html {
44 #[cfg(feature = "errors")]
45 errors: Vec::new(),
46 quirks_mode: QuirksMode::NoQuirks,
47 tree: Tree::new(Node::Document),
48 }
49 }
50
51 pub fn new_fragment() -> Self {
53 Html {
54 #[cfg(feature = "errors")]
55 errors: Vec::new(),
56 quirks_mode: QuirksMode::NoQuirks,
57 tree: Tree::new(Node::Fragment),
58 }
59 }
60
61 pub fn parse_document(document: &str) -> Self {
80 let parser =
81 driver::parse_document(HtmlTreeSink::new(Self::new_document()), Default::default());
82 parser.one(document)
83 }
84
85 pub fn parse_fragment(fragment: &str) -> Self {
87 let parser = driver::parse_fragment(
88 HtmlTreeSink::new(Self::new_fragment()),
89 Default::default(),
90 QualName::new(None, ns!(html), local_name!("body")),
91 Vec::new(),
92 false,
93 );
94 parser.one(fragment)
95 }
96
97 pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b> {
99 Select {
100 inner: self.tree.nodes(),
101 selector,
102 caches: Default::default(),
103 }
104 }
105
106 pub fn root_element(&self) -> ElementRef<'_> {
108 let root_node = self
109 .tree
110 .root()
111 .children()
112 .find(|child| child.value().is_element())
113 .expect("html node missing");
114 ElementRef::wrap(root_node).unwrap()
115 }
116
117 pub fn html(&self) -> String {
119 let opts = SerializeOpts {
120 scripting_enabled: false, traversal_scope: serialize::TraversalScope::IncludeNode,
122 create_missing_parent: false,
123 };
124 let mut buf = Vec::new();
125 serialize(&mut buf, self, opts).unwrap();
126 String::from_utf8(buf).unwrap()
127 }
128}
129
130pub struct Select<'a, 'b> {
132 inner: Nodes<'a, Node>,
133 selector: &'b Selector,
134 caches: SelectorCaches,
135}
136
137impl fmt::Debug for Select<'_, '_> {
138 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
139 fmt.debug_struct("Select")
140 .field("inner", &self.inner)
141 .field("selector", &self.selector)
142 .field("caches", &"..")
143 .finish()
144 }
145}
146
147impl Clone for Select<'_, '_> {
148 fn clone(&self) -> Self {
149 Self {
150 inner: self.inner.clone(),
151 selector: self.selector,
152 caches: Default::default(),
153 }
154 }
155}
156
157impl<'a> Iterator for Select<'a, '_> {
158 type Item = ElementRef<'a>;
159
160 fn next(&mut self) -> Option<ElementRef<'a>> {
161 for node in self.inner.by_ref() {
162 if let Some(element) = ElementRef::wrap(node)
163 && element.parent().is_some()
164 && self
165 .selector
166 .matches_with_scope_and_cache(&element, None, &mut self.caches)
167 {
168 return Some(element);
169 }
170 }
171 None
172 }
173
174 fn size_hint(&self) -> (usize, Option<usize>) {
175 let (_lower, upper) = self.inner.size_hint();
176
177 (0, upper)
178 }
179}
180
181impl DoubleEndedIterator for Select<'_, '_> {
182 fn next_back(&mut self) -> Option<Self::Item> {
183 for node in self.inner.by_ref().rev() {
184 if let Some(element) = ElementRef::wrap(node)
185 && element.parent().is_some()
186 && self
187 .selector
188 .matches_with_scope_and_cache(&element, None, &mut self.caches)
189 {
190 return Some(element);
191 }
192 }
193 None
194 }
195}
196
197impl FusedIterator for Select<'_, '_> {}
198
199mod serializable;
200mod tree_sink;
201
202#[cfg(test)]
203mod tests {
204 use super::Html;
205 use super::Selector;
206
207 #[test]
208 fn root_element_fragment() {
209 let html = Html::parse_fragment(r#"<a href="http://github.com">1</a>"#);
210 let root_ref = html.root_element();
211 let href = root_ref
212 .select(&Selector::parse("a").unwrap())
213 .next()
214 .unwrap();
215 assert_eq!(href.inner_html(), "1");
216 assert_eq!(href.value().attr("href").unwrap(), "http://github.com");
217 }
218
219 #[test]
220 fn root_element_document_doctype() {
221 let html = Html::parse_document("<!DOCTYPE html>\n<title>abc</title>");
222 let root_ref = html.root_element();
223 let title = root_ref
224 .select(&Selector::parse("title").unwrap())
225 .next()
226 .unwrap();
227 assert_eq!(title.inner_html(), "abc");
228 }
229
230 #[test]
231 fn root_element_document_comment() {
232 let html = Html::parse_document("<!-- comment --><title>abc</title>");
233 let root_ref = html.root_element();
234 let title = root_ref
235 .select(&Selector::parse("title").unwrap())
236 .next()
237 .unwrap();
238 assert_eq!(title.inner_html(), "abc");
239 }
240
241 #[test]
242 fn select_is_reversible() {
243 let html = Html::parse_document("<p>element1</p><p>element2</p><p>element3</p>");
244 let selector = Selector::parse("p").unwrap();
245 let result: Vec<_> = html
246 .select(&selector)
247 .rev()
248 .map(|e| e.inner_html())
249 .collect();
250 assert_eq!(result, vec!["element3", "element2", "element1"]);
251 }
252
253 #[test]
254 fn select_has_a_size_hint() {
255 let html = Html::parse_document("<p>element1</p><p>element2</p><p>element3</p>");
256 let selector = Selector::parse("p").unwrap();
257 let (lower, upper) = html.select(&selector).size_hint();
258 assert_eq!(lower, 0);
259 assert_eq!(upper, Some(10));
260 }
261
262 #[cfg(feature = "atomic")]
263 #[test]
264 fn html_is_send() {
265 fn send_sync<S: Send>() {}
266 send_sync::<Html>();
267 }
268}