nipper_trunk/
document.rs

1use crate::dom_tree::append_to_existing_text;
2use crate::dom_tree::Element;
3use crate::dom_tree::NodeData;
4use crate::dom_tree::NodeId;
5use crate::dom_tree::NodeRef;
6use crate::dom_tree::Tree;
7use html5ever::parse_document;
8use markup5ever::interface::tree_builder;
9use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
10use markup5ever::Attribute;
11use markup5ever::ExpandedName;
12use markup5ever::QualName;
13use std::borrow::Cow;
14use std::collections::HashSet;
15use tendril::StrTendril;
16use tendril::TendrilSink;
17
18/// Document represents an HTML document to be manipulated.
19pub struct Document {
20    /// The document's dom tree.
21    pub(crate) tree: Tree<NodeData>,
22
23    /// Errors that occurred during parsing.
24    pub errors: Vec<Cow<'static, str>>,
25
26    /// The document's quirks mode.
27    pub quirks_mode: QuirksMode,
28}
29
30impl Default for Document {
31    fn default() -> Document {
32        Self {
33            tree: Tree::new(NodeData::Document),
34            errors: vec![],
35            quirks_mode: tree_builder::NoQuirks,
36        }
37    }
38}
39
40impl From<&str> for Document {
41    fn from(html: &str) -> Document {
42        parse_document(Document::default(), Default::default()).one(html)
43    }
44}
45
46impl From<StrTendril> for Document {
47    fn from(html: StrTendril) -> Document {
48        parse_document(Document::default(), Default::default()).one(html)
49    }
50}
51
52impl From<&String> for Document {
53    fn from(html: &String) -> Document {
54        Document::from(html.as_str())
55    }
56}
57
58impl Document {
59    /// Return the underlying root document node.
60    pub fn root(&self) -> NodeRef<NodeData> {
61        self.tree.root()
62    }
63}
64
65impl TreeSink for Document {
66    // The overall result of parsing.
67    type Output = Self;
68
69    // Consume this sink and return the overall result of parsing.
70    fn finish(self) -> Self {
71        self
72    }
73
74    // Handle is a reference to a DOM node. The tree builder requires that a `Handle` implements `Clone` to get
75    // another reference to the same node.
76    type Handle = NodeId;
77
78    // Signal a parse error.
79    fn parse_error(&mut self, msg: Cow<'static, str>) {
80        self.errors.push(msg);
81    }
82
83    // Get a handle to the `Document` node.
84    fn get_document(&mut self) -> NodeId {
85        self.tree.root_id()
86    }
87
88    // Get a handle to a template's template contents. The tree builder promises this will never be called with
89    // something else than a template element.
90    fn get_template_contents(&mut self, target: &NodeId) -> NodeId {
91        self.tree.query_node(target, |node| match node.data {
92            NodeData::Element(Element {
93                template_contents: Some(ref contents),
94                ..
95            }) => contents.clone(),
96            _ => panic!("not a template element!"),
97        })
98    }
99
100    // Set the document's quirks mode.
101    fn set_quirks_mode(&mut self, mode: QuirksMode) {
102        self.quirks_mode = mode;
103    }
104
105    // Do two handles refer to the same node?.
106    fn same_node(&self, x: &NodeId, y: &NodeId) -> bool {
107        *x == *y
108    }
109
110    // What is the name of the element?
111    // Should never be called on a non-element node; Feel free to `panic!`.
112    fn elem_name(&self, target: &NodeId) -> ExpandedName {
113        self.tree.query_node(target, |node| match node.data {
114            NodeData::Element(Element { .. }) => self.tree.get_name(target).expanded(),
115            _ => panic!("not an element!"),
116        })
117    }
118
119    // Create an element.
120    // When creating a template element (`name.ns.expanded() == expanded_name!(html"template")`), an
121    // associated document fragment called the "template contents" should also be created. Later calls to
122    // self.get_template_contents() with that given element return it. See `the template element in the whatwg spec`,
123    fn create_element(
124        &mut self,
125        name: QualName,
126        attrs: Vec<Attribute>,
127        flags: ElementFlags,
128    ) -> NodeId {
129        let template_contents = if flags.template {
130            Some(self.tree.create_node(NodeData::Document))
131        } else {
132            None
133        };
134
135        let id = self.tree.create_node(NodeData::Element(Element::new(
136            name.clone(),
137            attrs,
138            template_contents,
139            flags.mathml_annotation_xml_integration_point,
140        )));
141
142        self.tree.set_name(id, name);
143        id
144    }
145
146    // Create a comment node.
147    fn create_comment(&mut self, text: StrTendril) -> NodeId {
148        self.tree.create_node(NodeData::Comment { contents: text })
149    }
150
151    // Create a Processing Instruction node.
152    fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeId {
153        self.tree.create_node(NodeData::ProcessingInstruction {
154            target: target,
155            contents: data,
156        })
157    }
158
159    // Append a node as the last child of the given node. If this would produce adjacent slbling text nodes, it
160    // should concatenate the text instead.
161    // The child node will not already have a parent.
162    fn append(&mut self, parent: &NodeId, child: NodeOrText<NodeId>) {
163        // Append to an existing Text node if we have one.
164
165        match child {
166            NodeOrText::AppendNode(node_id) => self.tree.append_child_of(parent, &node_id),
167            NodeOrText::AppendText(text) => {
168                let last_child = self.tree.last_child_of(parent);
169                let concated = last_child
170                    .map(|child| {
171                        self.tree
172                            .update_node(&child.id, |node| append_to_existing_text(node, &text))
173                    })
174                    .unwrap_or(false);
175
176                if concated {
177                    return;
178                }
179
180                self.tree
181                    .append_child_data_of(parent, NodeData::Text { contents: text })
182            }
183        }
184    }
185
186    // Append a node as the sibling immediately before the given node.
187    // The tree builder promises that `sibling` is not a text node. However its old previous sibling, which would
188    // become the new node's previs sibling, could be a text node. If the new node is also a text node, the two
189    // should be merged, as in the behavior of `append`.
190    fn append_before_sibling(&mut self, sibling: &NodeId, child: NodeOrText<NodeId>) {
191        match child {
192            NodeOrText::AppendText(text) => {
193                let prev_sibling = self.tree.prev_sibling_of(sibling);
194                let concated = prev_sibling
195                    .map(|sibling| {
196                        self.tree
197                            .update_node(&sibling.id, |node| append_to_existing_text(node, &text))
198                    })
199                    .unwrap_or(false);
200
201                if concated {
202                    return;
203                }
204
205                let id = self.tree.create_node(NodeData::Text { contents: text });
206                self.tree.append_prev_sibling_of(sibling, &id);
207            }
208
209            // The tree builder promises we won't have a text node after
210            // the insertion point.
211
212            // Any other kind of node.
213            NodeOrText::AppendNode(id) => self.tree.append_prev_sibling_of(sibling, &id),
214        };
215    }
216
217    // When the insertion point is decided by the existence of a parent node of the element, we consider both
218    // possibilities and send the element which will be used if a parent node exists, along with the element to be
219    // used if there isn't one.
220    fn append_based_on_parent_node(
221        &mut self,
222        element: &NodeId,
223        prev_element: &NodeId,
224        child: NodeOrText<NodeId>,
225    ) {
226        let has_parent = self.tree.parent_of(element).is_some();
227
228        if has_parent {
229            self.append_before_sibling(element, child);
230        } else {
231            self.append(prev_element, child);
232        }
233    }
234
235    // Append a `DOCTYPE` element to the `Document` node.
236    fn append_doctype_to_document(
237        &mut self,
238        name: StrTendril,
239        public_id: StrTendril,
240        system_id: StrTendril,
241    ) {
242        let root = self.tree.root_id();
243        self.tree.append_child_data_of(
244            &root,
245            NodeData::Doctype {
246                name: name,
247                public_id: public_id,
248                system_id: system_id,
249            },
250        );
251    }
252
253    // Add each attribute to the given element, if no attribute with that name already exists. The tree builder
254    // promises this will never be called with something else than an element.
255    fn add_attrs_if_missing(&mut self, target: &NodeId, attrs: Vec<Attribute>) {
256        self.tree.update_node(target, |node| {
257            let existing = if let NodeData::Element(Element { ref mut attrs, .. }) = node.data {
258                attrs
259            } else {
260                panic!("not an element")
261            };
262            let existing_names = existing
263                .iter()
264                .map(|e| e.name.clone())
265                .collect::<HashSet<_>>();
266            existing.extend(
267                attrs
268                    .into_iter()
269                    .filter(|attr| !existing_names.contains(&attr.name)),
270            );
271        })
272    }
273
274    // Detach the given node from its parent.
275    fn remove_from_parent(&mut self, target: &NodeId) {
276        self.tree.remove_from_parent(target);
277    }
278
279    // Remove all the children from node and append them to new_parent.
280    fn reparent_children(&mut self, node: &NodeId, new_parent: &NodeId) {
281        self.tree.reparent_children_of(node, Some(*new_parent));
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288    use html5ever::driver::parse_document;
289    use tendril::TendrilSink;
290    #[test]
291    fn test_parse_html_dom() {
292        let html = r#"
293            <!DOCTYPE html>
294            <meta charset="utf-8">
295            <title>Hello, world!</title>
296            <h1 class="foo">Hello, <i>world!</i></h1>
297        "#;
298
299        let dom: Document = Default::default();
300        let parser = parse_document(dom, Default::default());
301        let _document = parser.one(html);
302    }
303}