1use ego_tree::iter::Nodes;
4use ego_tree::{NodeId, Tree};
5use fast_html5ever::serialize::SerializeOpts;
6use fast_html5ever::tree_builder::QuirksMode;
7use fast_html5ever::QualName;
8use fast_html5ever::{driver, serialize};
9use tendril::TendrilSink;
10
11use crate::element_ref::ElementRef;
12use crate::node::Node;
13use crate::selector::Selector;
14
15lazy_static! {
16 static ref HTML_SELECTOR: Selector = Selector::parse("html").unwrap();
17}
18
19#[derive(Debug, Clone)]
26pub struct Html {
27 pub quirks_mode: QuirksMode,
29 pub tree: Tree<Node>,
31 pub lang: String,
33}
34
35impl Html {
36 pub fn new_document() -> Self {
38 Html {
39 quirks_mode: QuirksMode::NoQuirks,
40 tree: Tree::new(Node::Document),
41 lang: Default::default(),
42 }
43 }
44
45 pub fn new_fragment() -> Self {
47 Html {
48 quirks_mode: QuirksMode::NoQuirks,
49 tree: Tree::new(Node::Fragment),
50 lang: Default::default(),
51 }
52 }
53
54 pub fn parse_document(document: &str) -> Self {
72 let parser = driver::parse_document(Self::new_document(), Default::default());
73 parser.one(document)
74 }
75
76 pub fn parse_fragment(fragment: &str) -> Self {
78 let parser = driver::parse_fragment(
79 Self::new_fragment(),
80 Default::default(),
81 QualName::new(None, ns!(html), local_name!("body")),
82 Vec::new(),
83 );
84 parser.one(fragment)
85 }
86
87 pub fn select<'a, 'b>(&'a self, selector: &'b Selector) -> Select<'a, 'b> {
89 Select {
90 inner: self.tree.nodes(),
91 selector,
92 }
93 }
94
95 pub fn root_element(&self) -> ElementRef {
97 let root_node = self
98 .tree
99 .root()
100 .children()
101 .find(|child| child.value().is_element())
102 .expect("html node missing");
103 ElementRef::wrap(root_node).unwrap()
104 }
105
106 pub fn set_language(&mut self, lang: String) {
108 self.lang = lang;
109 }
110
111 pub fn get_lang(&self) -> &str {
113 if self.lang.is_empty() {
114 if let Some(element) = self.select(&HTML_SELECTOR).next() {
115 if let Some(lang) = element.value().attr("lang") {
116 return lang;
117 }
118 }
119 &self.lang
120 } else {
121 &self.lang
122 }
123 }
124
125 pub fn html(&self) -> String {
127 let opts = SerializeOpts {
128 scripting_enabled: false, traversal_scope: fast_html5ever::serialize::TraversalScope::IncludeNode,
130 create_missing_parent: false,
131 };
132 let mut buf = Vec::new();
133 let _ = serialize(&mut buf, self, opts);
134 auto_encoder::auto_encode_bytes(&buf)
135 }
136
137 pub fn remove_node(&mut self, node_id: NodeId) {
139 if let Some(mut node) = self.tree.get_mut(node_id) {
140 node.detach();
141 }
142 }
143}
144
145#[derive(Debug)]
147pub struct Select<'a, 'b> {
148 inner: Nodes<'a, Node>,
149 selector: &'b Selector,
150}
151
152impl<'a, 'b> Iterator for Select<'a, 'b> {
153 type Item = ElementRef<'a>;
154
155 fn next(&mut self) -> Option<ElementRef<'a>> {
156 for node in self.inner.by_ref() {
157 if let Some(element) = ElementRef::wrap(node) {
158 if element.parent().is_some() && self.selector.matches(&element) {
159 return Some(element);
160 }
161 }
162 }
163 None
164 }
165}
166
167impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> {
168 fn next_back(&mut self) -> Option<Self::Item> {
169 for node in self.inner.by_ref().rev() {
170 if let Some(element) = ElementRef::wrap(node) {
171 if element.parent().is_some() && self.selector.matches(&element) {
172 return Some(element);
173 }
174 }
175 }
176 None
177 }
178}
179
180mod serializable;
181mod tree_sink;
182
183#[cfg(test)]
184mod tests {
185 use super::Html;
186 use super::Selector;
187
188 #[test]
189 fn root_element_fragment() {
190 let html = Html::parse_fragment(r#"<a href="http://github.com">1</a>"#);
191 let root_ref = html.root_element();
192 let href = root_ref
193 .select(&Selector::parse("a").unwrap())
194 .next()
195 .unwrap();
196 assert_eq!(href.inner_html(), "1");
197 assert_eq!(href.value().attr("href").unwrap(), "http://github.com");
198 }
199
200 #[test]
201 fn root_element_document_doctype() {
202 let html = Html::parse_document("<!DOCTYPE html>\n<title>abc</title>");
203 let root_ref = html.root_element();
204 let title = root_ref
205 .select(&Selector::parse("title").unwrap())
206 .next()
207 .unwrap();
208 assert_eq!(title.inner_html(), "abc");
209 }
210
211 #[test]
212 fn root_element_document_comment() {
213 let html = Html::parse_document("<!-- comment --><title>abc</title>");
214 let root_ref = html.root_element();
215 let title = root_ref
216 .select(&Selector::parse("title").unwrap())
217 .next()
218 .unwrap();
219 assert_eq!(title.inner_html(), "abc");
220 }
221
222 #[test]
223 fn select_is_reversible() {
224 let html = Html::parse_document("<p>element1</p><p>element2</p><p>element3</p>");
225 let selector = Selector::parse("p").unwrap();
226 let result: Vec<_> = html
227 .select(&selector)
228 .rev()
229 .map(|e| e.inner_html())
230 .collect();
231 assert_eq!(result, vec!["element3", "element2", "element1"]);
232 }
233}