accessibility_scraper/html/
tree_sink.rs

1use super::Html;
2use crate::node::{Doctype, Element, Node, ProcessingInstruction, Text};
3use crate::tendril_util::make as make_tendril;
4use ego_tree::NodeId;
5use fast_html5ever::tendril::StrTendril;
6use fast_html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
7use fast_html5ever::Attribute;
8use fast_html5ever::{ExpandedName, QualName};
9use std::borrow::Cow;
10
11/// Note: does not support the `<template>` element.
12impl TreeSink for Html {
13    type Output = Self;
14    type Handle = NodeId;
15
16    fn finish(self) -> Self {
17        self
18    }
19
20    // Signal a parse error.
21    fn parse_error(&mut self, msg: Cow<'static, str>) {
22        #[cfg(feature = "errors")]
23        self.errors.push(msg);
24        #[cfg(not(feature = "errors"))]
25        let _ = msg;
26    }
27
28    // Set the document's quirks mode.
29    fn set_quirks_mode(&mut self, mode: QuirksMode) {
30        self.quirks_mode = mode;
31    }
32
33    // Get a handle to the Document node.
34    fn get_document(&mut self) -> Self::Handle {
35        self.tree.root().id()
36    }
37
38    // Do two handles refer to the same node?
39    fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
40        x == y
41    }
42
43    // What is the name of this element?
44    //
45    // Should never be called on a non-element node; feel free to panic!.
46    fn elem_name(&self, target: &Self::Handle) -> ExpandedName {
47        self.tree
48            .get(*target)
49            .unwrap()
50            .value()
51            .as_element()
52            .unwrap()
53            .name
54            .expanded()
55    }
56
57    // Create an element.
58    //
59    // When creating a template element (name.ns.expanded() == expanded_name!(html "template")), an
60    // associated document fragment called the "template contents" should also be created. Later
61    // calls to self.get_template_contents() with that given element return it.
62    fn create_element(
63        &mut self,
64        name: QualName,
65        attrs: Vec<Attribute>,
66        _flags: ElementFlags,
67    ) -> Self::Handle {
68        let mut node = self
69            .tree
70            .orphan(Node::Element(Element::new(name.clone(), attrs)));
71        if name.expanded() == expanded_name!(html "template") {
72            node.append(Node::Fragment);
73        }
74        node.id()
75    }
76
77    // Create a comment node.
78    fn create_comment(&mut self, _text: StrTendril) {
79        // self.tree
80        //     .orphan(Node::Comment(Comment {
81        //         comment: make_tendril(text),
82        //     }))
83        //     .id()
84    }
85
86    // Append a DOCTYPE element to the Document node.
87    fn append_doctype_to_document(
88        &mut self,
89        name: StrTendril,
90        public_id: StrTendril,
91        system_id: StrTendril,
92    ) {
93        let name = make_tendril(name);
94        let public_id = make_tendril(public_id);
95        let system_id = make_tendril(system_id);
96        let doctype = Doctype {
97            name,
98            public_id,
99            system_id,
100        };
101        self.tree.root_mut().append(Node::Doctype(doctype));
102    }
103
104    // Append a node as the last child of the given node. If this would produce adjacent sibling
105    // text nodes, it should concatenate the text instead.
106    //
107    // The child node will not already have a parent.
108    fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) {
109        let mut parent = self.tree.get_mut(*parent).unwrap();
110
111        match child {
112            NodeOrText::AppendNode(id) => {
113                parent.append_id(id);
114            }
115
116            NodeOrText::AppendText(text) => {
117                let text = make_tendril(text);
118                let can_concat = parent
119                    .last_child()
120                    .map_or(false, |mut n| n.value().is_text());
121
122                if can_concat {
123                    let mut last_child = parent.last_child().unwrap();
124                    match *last_child.value() {
125                        Node::Text(ref mut t) => t.text.push_tendril(&text),
126                        _ => unreachable!(),
127                    }
128                } else {
129                    parent.append(Node::Text(Text { text }));
130                }
131            }
132        }
133    }
134
135    // Append a node as the sibling immediately before the given node. If that node has no parent,
136    // do nothing and return Err(new_node).
137    //
138    // The tree builder promises that sibling is not a text node. However its old previous sibling,
139    // which would become the new node's previous sibling, could be a text node. If the new node is
140    // also a text node, the two should be merged, as in the behavior of append.
141    //
142    // NB: new_node may have an old parent, from which it should be removed.
143    fn append_before_sibling(
144        &mut self,
145        sibling: &Self::Handle,
146        new_node: NodeOrText<Self::Handle>,
147    ) {
148        if let NodeOrText::AppendNode(id) = new_node {
149            self.tree.get_mut(id).unwrap().detach();
150        }
151
152        let mut sibling = self.tree.get_mut(*sibling).unwrap();
153        if sibling.parent().is_some() {
154            match new_node {
155                NodeOrText::AppendNode(id) => {
156                    sibling.insert_id_before(id);
157                }
158
159                NodeOrText::AppendText(text) => {
160                    let text = make_tendril(text);
161                    let can_concat = sibling
162                        .prev_sibling()
163                        .map_or(false, |mut n| n.value().is_text());
164
165                    if can_concat {
166                        let mut prev_sibling = sibling.prev_sibling().unwrap();
167                        match *prev_sibling.value() {
168                            Node::Text(ref mut t) => t.text.push_tendril(&text),
169                            _ => unreachable!(),
170                        }
171                    } else {
172                        sibling.insert_before(Node::Text(Text { text }));
173                    }
174                }
175            }
176        }
177    }
178
179    // Detach the given node from its parent.
180    fn remove_from_parent(&mut self, target: &Self::Handle) {
181        self.tree.get_mut(*target).unwrap().detach();
182    }
183
184    // Remove all the children from node and append them to new_parent.
185    fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
186        self.tree
187            .get_mut(*new_parent)
188            .unwrap()
189            .reparent_from_id_append(*node);
190    }
191
192    // Add each attribute to the given element, if no attribute with that name already exists. The
193    // tree builder promises this will never be called with something else than an element.
194    fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>) {
195        let mut node = self.tree.get_mut(*target).unwrap();
196        let element = match *node.value() {
197            Node::Element(ref mut e) => e,
198            _ => unreachable!(),
199        };
200
201        for attr in attrs {
202            element
203                .attrs
204                .entry(attr.name)
205                .or_insert_with(|| make_tendril(attr.value));
206        }
207    }
208
209    // Get a handle to a template's template contents.
210    //
211    // The tree builder promises this will never be called with something else than a template
212    // element.
213    fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
214        self.tree.get(*target).unwrap().first_child().unwrap().id()
215    }
216
217    // Mark a HTML <script> element as "already started".
218    fn mark_script_already_started(&mut self, _node: &Self::Handle) {}
219
220    // Create Processing Instruction.
221    fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle {
222        let target = make_tendril(target);
223        let data = make_tendril(data);
224        self.tree
225            .orphan(Node::ProcessingInstruction(ProcessingInstruction {
226                target,
227                data,
228            }))
229            .id()
230    }
231
232    fn append_based_on_parent_node(
233        &mut self,
234        element: &Self::Handle,
235        prev_element: &Self::Handle,
236        child: NodeOrText<Self::Handle>,
237    ) {
238        if self.tree.get(*element).unwrap().parent().is_some() {
239            self.append_before_sibling(element, child)
240        } else {
241            self.append(prev_element, child)
242        }
243    }
244}