1use ego_tree::iter::Nodes;
4use ego_tree::{NodeId, Tree};
5use html5ever::serialize::SerializeOpts;
6use html5ever::tree_builder::QuirksMode;
7use html5ever::QualName;
8use html5ever::{driver, serialize};
9use tendril::TendrilSink;
10
11use crate::element_ref::ElementRef;
12use crate::node::Node;
13use crate::selector::Selector;
14
15use self::tree_sink::HtmlBuilder;
16
17lazy_static! {
18 static ref HTML_SELECTOR: Selector = Selector::parse("html").unwrap();
19}
20
21#[derive(Debug, Clone)]
28pub struct Html {
29 pub quirks_mode: QuirksMode,
31 pub tree: Tree<Node>,
33 pub lang: String,
35}
36
37impl Html {
38 pub fn new_document() -> Self {
40 Html {
41 quirks_mode: QuirksMode::NoQuirks,
42 tree: Tree::new(Node::Document),
43 lang: Default::default(),
44 }
45 }
46
47 pub fn new_fragment() -> Self {
49 Html {
50 quirks_mode: QuirksMode::NoQuirks,
51 tree: Tree::new(Node::Fragment),
52 lang: Default::default(),
53 }
54 }
55
56 pub fn parse_document(document: &str) -> Self {
74 let parser = driver::parse_document(HtmlBuilder::new_document(), Default::default());
75 parser.one(document)
76 }
77
78 pub fn parse_fragment(fragment: &str) -> Self {
80 let parser = driver::parse_fragment(
81 HtmlBuilder::new_fragment(),
82 Default::default(),
83 QualName::new(None, ns!(html), local_name!("body")),
84 Vec::new(),
85 false,
86 );
87 parser.one(fragment)
88 }
89
90 pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b> {
92 Select {
93 inner: self.tree.nodes(),
94 selector,
95 }
96 }
97
98 pub fn root_element(&self) -> ElementRef {
100 let root_node = self
101 .tree
102 .root()
103 .children()
104 .find(|child| child.value().is_element())
105 .expect("html node missing");
106 ElementRef::wrap(root_node).unwrap()
107 }
108
109 pub fn set_language(&mut self, lang: String) {
111 self.lang = lang;
112 }
113
114 pub fn get_lang(&self) -> &str {
116 if self.lang.is_empty() {
117 if let Some(element) = self.select(&HTML_SELECTOR).next() {
118 if let Some(lang) = element.value().attr("lang") {
119 return lang;
120 }
121 }
122 &self.lang
123 } else {
124 &self.lang
125 }
126 }
127
128 pub fn html(&self) -> String {
130 let opts = SerializeOpts {
131 scripting_enabled: false, traversal_scope: html5ever::serialize::TraversalScope::IncludeNode,
133 create_missing_parent: false,
134 };
135 let mut buf = Vec::new();
136 let _ = serialize(&mut buf, self, opts);
137 auto_encoder::auto_encode_bytes(&buf)
138 }
139
140 pub fn remove_node(&mut self, node_id: NodeId) {
142 if let Some(mut node) = self.tree.get_mut(node_id) {
143 node.detach();
144 }
145 }
146}
147
148#[derive(Debug)]
150pub struct Select<'a, 'b> {
151 inner: Nodes<'a, Node>,
152 selector: &'b Selector,
153}
154
155impl<'a, 'b> Iterator for Select<'a, 'b> {
156 type Item = ElementRef<'a>;
157
158 fn next(&mut self) -> Option<ElementRef<'a>> {
159 for node in self.inner.by_ref() {
160 if let Some(element) = ElementRef::wrap(node) {
161 if element.parent().is_some() && self.selector.matches(&element) {
162 return Some(element);
163 }
164 }
165 }
166 None
167 }
168}
169
170impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> {
171 fn next_back(&mut self) -> Option<Self::Item> {
172 for node in self.inner.by_ref().rev() {
173 if let Some(element) = ElementRef::wrap(node) {
174 if element.parent().is_some() && self.selector.matches(&element) {
175 return Some(element);
176 }
177 }
178 }
179 None
180 }
181}
182
183mod serializable;
184mod tree_sink;
185
186#[cfg(test)]
187mod tests {
188 use super::Html;
189 use super::Selector;
190
191 #[test]
201 fn parsed_html_is_send() {
202 fn assert_send<T: Send>(_: &T) {}
203 let html = Html::parse_document("<p>hi</p>");
204 assert_send(&html);
205 }
206
207 #[test]
208 fn root_element_fragment() {
209 let html = Html::parse_fragment(r#"<a href="http://github.com">1</a>"#);
210 let root_ref = html.root_element();
211 let href = root_ref
212 .select(&Selector::parse("a").unwrap())
213 .next()
214 .unwrap();
215 assert_eq!(href.inner_html(), "1");
216 assert_eq!(href.value().attr("href").unwrap(), "http://github.com");
217 }
218
219 #[test]
220 fn root_element_document_doctype() {
221 let html = Html::parse_document("<!DOCTYPE html>\n<title>abc</title>");
222 let root_ref = html.root_element();
223 let title = root_ref
224 .select(&Selector::parse("title").unwrap())
225 .next()
226 .unwrap();
227 assert_eq!(title.inner_html(), "abc");
228 }
229
230 #[test]
231 fn root_element_document_comment() {
232 let html = Html::parse_document("<!-- comment --><title>abc</title>");
233 let root_ref = html.root_element();
234 let title = root_ref
235 .select(&Selector::parse("title").unwrap())
236 .next()
237 .unwrap();
238 assert_eq!(title.inner_html(), "abc");
239 }
240
241 #[test]
242 fn select_is_reversible() {
243 let html = Html::parse_document("<p>element1</p><p>element2</p><p>element3</p>");
244 let selector = Selector::parse("p").unwrap();
245 let result: Vec<_> = html
246 .select(&selector)
247 .rev()
248 .map(|e| e.inner_html())
249 .collect();
250 assert_eq!(result, vec!["element3", "element2", "element1"]);
251 }
252}