scraper/html/
tree_sink.rs

1use super::Html;
2use crate::node::{Doctype, Element, Node, ProcessingInstruction, Text};
3use ego_tree::NodeId;
4use fast_html5ever::tendril::StrTendril;
5use fast_html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
6use fast_html5ever::Attribute;
7use fast_html5ever::{ExpandedName, QualName};
8use std::borrow::Cow;
9
10/// Note: does not support the `<template>` element.
11impl TreeSink for Html {
12    type Output = Self;
13    type Handle = NodeId;
14
15    fn finish(self) -> Self {
16        self
17    }
18
19    // Signal a parse error.
20    fn parse_error(&mut self, _: Cow<'static, str>) {}
21
22    // Set the document's quirks mode.
23    fn set_quirks_mode(&mut self, mode: QuirksMode) {
24        self.quirks_mode = mode;
25    }
26
27    // Get a handle to the Document node.
28    fn get_document(&mut self) -> Self::Handle {
29        self.tree.root().id()
30    }
31
32    // Do two handles refer to the same node?
33    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
34        x == y
35    }
36
37    // What is the name of this element?
38    //
39    // Should never be called on a non-element node; feel free to panic!.
40    fn elem_name(&self, target: &Self::Handle) -> ExpandedName {
41        self.tree
42            .get(*target)
43            .unwrap()
44            .value()
45            .as_element()
46            .unwrap()
47            .name
48            .expanded()
49    }
50
51    // Create an element.
52    //
53    // When creating a template element (name.ns.expanded() == expanded_name!(html "template")), an
54    // associated document fragment called the "template contents" should also be created. Later
55    // calls to self.get_template_contents() with that given element return it.
56    fn create_element(
57        &mut self,
58        name: QualName,
59        attrs: Vec<Attribute>,
60        _flags: ElementFlags,
61    ) -> Self::Handle {
62        let mut node = self
63            .tree
64            .orphan(Node::Element(Element::new(name.clone(), attrs)));
65        if name.expanded() == expanded_name!(html "template") {
66            node.append(Node::Fragment);
67        }
68        node.id()
69    }
70
71    // Create a comment node. todo: remove from tree sink
72    fn create_comment(&mut self, _: StrTendril) {}
73
74    // Append a DOCTYPE element to the Document node.
75    fn append_doctype_to_document(
76        &mut self,
77        name: StrTendril,
78        public_id: StrTendril,
79        system_id: StrTendril,
80    ) {
81        let doctype = Doctype {
82            name: name.into_send().into(),
83            public_id: public_id.into_send().into(),
84            system_id: system_id.into_send().into(),
85        };
86        self.tree.root_mut().append(Node::Doctype(doctype));
87    }
88
89    // Append a node as the last child of the given node. If this would produce adjacent sibling
90    // text nodes, it should concatenate the text instead.
91    //
92    // The child node will not already have a parent.
93    fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) {
94        let mut parent = self.tree.get_mut(*parent).unwrap();
95
96        match child {
97            NodeOrText::AppendNode(id) => {
98                parent.append_id(id);
99            }
100
101            NodeOrText::AppendText(text) => {
102                let can_concat = parent
103                    .last_child()
104                    .map_or(false, |mut n| n.value().is_text());
105
106                let text = text.into_send().into();
107
108                if can_concat {
109                    let mut last_child = parent.last_child().unwrap();
110                    match *last_child.value() {
111                        Node::Text(ref mut t) => t.text.push_tendril(&text),
112                        _ => unreachable!(),
113                    }
114                } else {
115                    parent.append(Node::Text(Text { text }));
116                }
117            }
118        }
119    }
120
121    // Append a node as the sibling immediately before the given node. If that node has no parent,
122    // do nothing and return Err(new_node).
123    //
124    // The tree builder promises that sibling is not a text node. However its old previous sibling,
125    // which would become the new node's previous sibling, could be a text node. If the new node is
126    // also a text node, the two should be merged, as in the behavior of append.
127    //
128    // NB: new_node may have an old parent, from which it should be removed.
129    fn append_before_sibling(
130        &mut self,
131        sibling: &Self::Handle,
132        new_node: NodeOrText<Self::Handle>,
133    ) {
134        if let NodeOrText::AppendNode(id) = new_node {
135            self.tree.get_mut(id).unwrap().detach();
136        }
137
138        let mut sibling = self.tree.get_mut(*sibling).unwrap();
139        if sibling.parent().is_some() {
140            match new_node {
141                NodeOrText::AppendNode(id) => {
142                    sibling.insert_id_before(id);
143                }
144
145                NodeOrText::AppendText(text) => {
146                    let text = text.into_send().into();
147                    let can_concat = sibling
148                        .prev_sibling()
149                        .map_or(false, |mut n| n.value().is_text());
150
151                    if can_concat {
152                        let mut prev_sibling = sibling.prev_sibling().unwrap();
153                        match *prev_sibling.value() {
154                            Node::Text(ref mut t) => t.text.push_tendril(&text),
155                            _ => unreachable!(),
156                        }
157                    } else {
158                        sibling.insert_before(Node::Text(Text { text }));
159                    }
160                }
161            }
162        }
163    }
164
165    // Detach the given node from its parent.
166    fn remove_from_parent(&mut self, target: &Self::Handle) {
167        if let Some(mut p) = self.tree.get_mut(*target) {
168            p.detach();
169        }
170    }
171
172    // Remove all the children from node and append them to new_parent.
173    fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
174        if let Some(mut p) = self.tree.get_mut(*new_parent) {
175            p.reparent_from_id_append(*node);
176        }
177    }
178
179    // Add each attribute to the given element, if no attribute with that name already exists. The
180    // tree builder promises this will never be called with something else than an element.
181    fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>) {
182        let mut node = self.tree.get_mut(*target).unwrap();
183        let element = match *node.value() {
184            Node::Element(ref mut e) => e,
185            _ => unreachable!(),
186        };
187
188        for attr in attrs {
189            element
190                .attrs
191                .entry(attr.name)
192                .or_insert(attr.value.into_send().into());
193        }
194    }
195
196    // Get a handle to a template's template contents.
197    //
198    // The tree builder promises this will never be called with something else than a template
199    // element.
200    fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
201        self.tree.get(*target).unwrap().first_child().unwrap().id()
202    }
203
204    // Mark a HTML <script> element as "already started".
205    fn mark_script_already_started(&mut self, _node: &Self::Handle) {}
206
207    // Create Processing Instruction.
208    fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle {
209        self.tree
210            .orphan(Node::ProcessingInstruction(ProcessingInstruction {
211                target: target.into_send().into(),
212                data: data.into_send().into(),
213            }))
214            .id()
215    }
216
217    fn append_based_on_parent_node(
218        &mut self,
219        element: &Self::Handle,
220        prev_element: &Self::Handle,
221        child: NodeOrText<Self::Handle>,
222    ) {
223        if self.tree.get(*element).unwrap().parent().is_some() {
224            self.append_before_sibling(element, child)
225        } else {
226            self.append(prev_element, child)
227        }
228    }
229}