1use crate::dom_tree::append_to_existing_text;
2use crate::dom_tree::Element;
3use crate::dom_tree::NodeData;
4use crate::dom_tree::NodeId;
5use crate::dom_tree::NodeRef;
6use crate::dom_tree::Tree;
7use html5ever::parse_document;
8use markup5ever::interface::tree_builder;
9use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
10use markup5ever::Attribute;
11use markup5ever::ExpandedName;
12use markup5ever::QualName;
13use std::borrow::Cow;
14use std::collections::HashSet;
15use tendril::StrTendril;
16use tendril::TendrilSink;
17
18pub struct Document {
20 pub(crate) tree: Tree<NodeData>,
22
23 pub errors: Vec<Cow<'static, str>>,
25
26 pub quirks_mode: QuirksMode,
28}
29
30impl Default for Document {
31 fn default() -> Document {
32 Self {
33 tree: Tree::new(NodeData::Document),
34 errors: vec![],
35 quirks_mode: tree_builder::NoQuirks,
36 }
37 }
38}
39
40impl From<&str> for Document {
41 fn from(html: &str) -> Document {
42 parse_document(Document::default(), Default::default()).one(html)
43 }
44}
45
46impl From<StrTendril> for Document {
47 fn from(html: StrTendril) -> Document {
48 parse_document(Document::default(), Default::default()).one(html)
49 }
50}
51
52impl From<&String> for Document {
53 fn from(html: &String) -> Document {
54 Document::from(html.as_str())
55 }
56}
57
58impl Document {
59 pub fn root(&self) -> NodeRef<NodeData> {
61 self.tree.root()
62 }
63}
64
65impl TreeSink for Document {
66 type Output = Self;
68
69 fn finish(self) -> Self {
71 self
72 }
73
74 type Handle = NodeId;
77
78 fn parse_error(&mut self, msg: Cow<'static, str>) {
80 self.errors.push(msg);
81 }
82
83 fn get_document(&mut self) -> NodeId {
85 self.tree.root_id()
86 }
87
88 fn get_template_contents(&mut self, target: &NodeId) -> NodeId {
91 self.tree.query_node(target, |node| match node.data {
92 NodeData::Element(Element {
93 template_contents: Some(ref contents),
94 ..
95 }) => contents.clone(),
96 _ => panic!("not a template element!"),
97 })
98 }
99
100 fn set_quirks_mode(&mut self, mode: QuirksMode) {
102 self.quirks_mode = mode;
103 }
104
105 fn same_node(&self, x: &NodeId, y: &NodeId) -> bool {
107 *x == *y
108 }
109
110 fn elem_name(&self, target: &NodeId) -> ExpandedName {
113 self.tree.query_node(target, |node| match node.data {
114 NodeData::Element(Element { .. }) => self.tree.get_name(target).expanded(),
115 _ => panic!("not an element!"),
116 })
117 }
118
119 fn create_element(
124 &mut self,
125 name: QualName,
126 attrs: Vec<Attribute>,
127 flags: ElementFlags,
128 ) -> NodeId {
129 let template_contents = if flags.template {
130 Some(self.tree.create_node(NodeData::Document))
131 } else {
132 None
133 };
134
135 let id = self.tree.create_node(NodeData::Element(Element::new(
136 name.clone(),
137 attrs,
138 template_contents,
139 flags.mathml_annotation_xml_integration_point,
140 )));
141
142 self.tree.set_name(id, name);
143 id
144 }
145
146 fn create_comment(&mut self, text: StrTendril) -> NodeId {
148 self.tree.create_node(NodeData::Comment { contents: text })
149 }
150
151 fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeId {
153 self.tree.create_node(NodeData::ProcessingInstruction {
154 target: target,
155 contents: data,
156 })
157 }
158
159 fn append(&mut self, parent: &NodeId, child: NodeOrText<NodeId>) {
163 match child {
166 NodeOrText::AppendNode(node_id) => self.tree.append_child_of(parent, &node_id),
167 NodeOrText::AppendText(text) => {
168 let last_child = self.tree.last_child_of(parent);
169 let concated = last_child
170 .map(|child| {
171 self.tree
172 .update_node(&child.id, |node| append_to_existing_text(node, &text))
173 })
174 .unwrap_or(false);
175
176 if concated {
177 return;
178 }
179
180 self.tree
181 .append_child_data_of(parent, NodeData::Text { contents: text })
182 }
183 }
184 }
185
186 fn append_before_sibling(&mut self, sibling: &NodeId, child: NodeOrText<NodeId>) {
191 match child {
192 NodeOrText::AppendText(text) => {
193 let prev_sibling = self.tree.prev_sibling_of(sibling);
194 let concated = prev_sibling
195 .map(|sibling| {
196 self.tree
197 .update_node(&sibling.id, |node| append_to_existing_text(node, &text))
198 })
199 .unwrap_or(false);
200
201 if concated {
202 return;
203 }
204
205 let id = self.tree.create_node(NodeData::Text { contents: text });
206 self.tree.append_prev_sibling_of(sibling, &id);
207 }
208
209 NodeOrText::AppendNode(id) => self.tree.append_prev_sibling_of(sibling, &id),
214 };
215 }
216
217 fn append_based_on_parent_node(
221 &mut self,
222 element: &NodeId,
223 prev_element: &NodeId,
224 child: NodeOrText<NodeId>,
225 ) {
226 let has_parent = self.tree.parent_of(element).is_some();
227
228 if has_parent {
229 self.append_before_sibling(element, child);
230 } else {
231 self.append(prev_element, child);
232 }
233 }
234
235 fn append_doctype_to_document(
237 &mut self,
238 name: StrTendril,
239 public_id: StrTendril,
240 system_id: StrTendril,
241 ) {
242 let root = self.tree.root_id();
243 self.tree.append_child_data_of(
244 &root,
245 NodeData::Doctype {
246 name: name,
247 public_id: public_id,
248 system_id: system_id,
249 },
250 );
251 }
252
253 fn add_attrs_if_missing(&mut self, target: &NodeId, attrs: Vec<Attribute>) {
256 self.tree.update_node(target, |node| {
257 let existing = if let NodeData::Element(Element { ref mut attrs, .. }) = node.data {
258 attrs
259 } else {
260 panic!("not an element")
261 };
262 let existing_names = existing
263 .iter()
264 .map(|e| e.name.clone())
265 .collect::<HashSet<_>>();
266 existing.extend(
267 attrs
268 .into_iter()
269 .filter(|attr| !existing_names.contains(&attr.name)),
270 );
271 })
272 }
273
274 fn remove_from_parent(&mut self, target: &NodeId) {
276 self.tree.remove_from_parent(target);
277 }
278
279 fn reparent_children(&mut self, node: &NodeId, new_parent: &NodeId) {
281 self.tree.reparent_children_of(node, Some(*new_parent));
282 }
283}
284
285#[cfg(test)]
286mod tests {
287 use super::*;
288 use html5ever::driver::parse_document;
289 use tendril::TendrilSink;
290 #[test]
291 fn test_parse_html_dom() {
292 let html = r#"
293 <!DOCTYPE html>
294 <meta charset="utf-8">
295 <title>Hello, world!</title>
296 <h1 class="foo">Hello, <i>world!</i></h1>
297 "#;
298
299 let dom: Document = Default::default();
300 let parser = parse_document(dom, Default::default());
301 let _document = parser.one(html);
302 }
303}