generational_arena_dom/
lib.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10// Based on https://github.com/servo/html5ever/blob/f413b98631f6f2998da48b14ebf34991b45ebcec/rcdom/lib.rs
11// and https://github.com/servo/html5ever/blob/f413b98631f6f2998da48b14ebf34991b45ebcec/html5ever/examples/arena.rs
12// Modified to use generational_indextree
13// The main implementation work here was implementing `TreeSink` for GenerationalArenaDom
14
15use generational_indextree::{Arena as TreeArena, NodeId};
16
17use std::borrow::Cow;
18use std::cell::RefCell;
19use std::collections::HashSet;
20use std::default::Default;
21
22use markup5ever::tendril::StrTendril;
23
24use markup5ever::interface::tree_builder;
25use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
26use markup5ever::Attribute;
27use markup5ever::ExpandedName;
28use markup5ever::QualName;
29
30/// The different kinds of nodes in the DOM.
31#[derive(Debug)]
32pub enum NodeData {
33    /// The `Document` itself - the root node of a HTML document.
34    Document,
35
36    /// A `DOCTYPE` with name, public id, and system id. See
37    /// [document type declaration on wikipedia][dtd wiki].
38    ///
39    /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
40    Doctype {
41        name: StrTendril,
42        public_id: StrTendril,
43        system_id: StrTendril,
44    },
45
46    /// A text node.
47    Text { contents: RefCell<StrTendril> },
48
49    /// A comment.
50    Comment { contents: StrTendril },
51
52    /// An element with attributes.
53    Element {
54        name: QualName,
55        attrs: RefCell<Vec<Attribute>>,
56
57        /// For HTML \<template\> elements, the [template contents].
58        ///
59        /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
60        template_contents: RefCell<Option<Handle>>,
61
62        /// Whether the node is a [HTML integration point].
63        ///
64        /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
65        mathml_annotation_xml_integration_point: bool,
66    },
67
68    /// A Processing instruction.
69    ProcessingInstruction {
70        target: StrTendril,
71        contents: StrTendril,
72    },
73}
74
75/// The Arena holding node data
76pub type Arena = TreeArena<NodeData>;
77
78/// Reference to a DOM node.
79pub type Handle = NodeId;
80
81fn append_to_existing_text(arena: &Arena, prev: Handle, text: &str) -> bool {
82    match arena.get(prev) {
83        Some(prev_node) => match prev_node.get() {
84            NodeData::Text { ref contents } => {
85                contents.borrow_mut().push_slice(text);
86                true
87            }
88            _ => false,
89        },
90        None => panic!("Node doesn't exist??"),
91    }
92}
93
94/// The DOM itself; the result of parsing.
95pub struct GenerationalArenaDom {
96    /// Arena holding the nodes of the Tree
97    pub arena: Arena,
98    /// The `Document` itself.
99    pub document: Handle,
100
101    /// Errors that occurred during parsing.
102    pub errors: Vec<Cow<'static, str>>,
103
104    /// The document's quirks mode.
105    pub quirks_mode: QuirksMode,
106}
107
108impl GenerationalArenaDom {
109    fn get_node(&self, target: &Handle) -> &NodeData {
110        self.arena.get(*target).expect("Invalid node!").get()
111    }
112
113    fn preceding_node(&self, target: &Handle) -> Option<Handle> {
114        self.arena
115            .get(*target)
116            .expect("Invalid node!")
117            .previous_sibling()
118    }
119}
120
121impl TreeSink for GenerationalArenaDom {
122    type Output = Self;
123    fn finish(self) -> Self {
124        self
125    }
126
127    type Handle = Handle;
128
129    fn parse_error(&mut self, msg: Cow<'static, str>) {
130        self.errors.push(msg);
131    }
132
133    fn get_document(&mut self) -> Handle {
134        self.document.clone()
135    }
136
137    fn elem_name(&self, target: &'_ Handle) -> ExpandedName<'_> {
138        return match self.get_node(target) {
139            NodeData::Element { ref name, .. } => name.expanded(),
140            _ => panic!("not an element!"),
141        };
142    }
143
144    fn create_element(
145        &mut self,
146        name: QualName,
147        attrs: Vec<Attribute>,
148        flags: ElementFlags,
149    ) -> Handle {
150        let template_inner = if flags.template {
151            Some(self.arena.new_node(NodeData::Document))
152        } else {
153            None
154        };
155        self.arena.new_node(NodeData::Element {
156            name,
157            attrs: RefCell::new(attrs),
158            template_contents: RefCell::new(template_inner),
159            mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
160        })
161    }
162
163    fn create_comment(&mut self, text: StrTendril) -> Handle {
164        self.arena.new_node(NodeData::Comment { contents: text })
165    }
166
167    fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle {
168        self.arena.new_node(NodeData::ProcessingInstruction {
169            target,
170            contents: data,
171        })
172    }
173
174    fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) {
175        let parent_node = self.arena.get(*parent).expect("Invalid node!");
176        // Append to an existing Text node if we have one.
177        match child {
178            NodeOrText::AppendText(ref text) => match parent_node.last_child() {
179                Some(h) => {
180                    if append_to_existing_text(&self.arena, h, text) {
181                        return;
182                    }
183                }
184                _ => (),
185            },
186            _ => (),
187        }
188
189        let new_child = match child {
190            NodeOrText::AppendText(text) => self.arena.new_node(NodeData::Text {
191                contents: RefCell::new(text),
192            }),
193            NodeOrText::AppendNode(node) => node,
194        };
195        parent.append(new_child, &mut self.arena);
196    }
197
198    fn append_based_on_parent_node(
199        &mut self,
200        element: &Self::Handle,
201        prev_element: &Self::Handle,
202        child: NodeOrText<Self::Handle>,
203    ) {
204        let element_node = self.arena.get(*element).expect("Invalid handle!");
205        let parent = element_node.parent();
206        if parent.is_some() {
207            self.append_before_sibling(element, child);
208        } else {
209            self.append(prev_element, child);
210        }
211    }
212
213    fn append_doctype_to_document(
214        &mut self,
215        name: StrTendril,
216        public_id: StrTendril,
217        system_id: StrTendril,
218    ) {
219        let new_node = self.arena.new_node(NodeData::Doctype {
220            name,
221            public_id,
222            system_id,
223        });
224        self.document.append(new_node, &mut self.arena)
225    }
226
227    fn get_template_contents(&mut self, target: &Handle) -> Handle {
228        if let NodeData::Element {
229            ref template_contents,
230            ..
231        } = self.get_node(target)
232        {
233            template_contents
234                .borrow()
235                .as_ref()
236                .expect("not a template element!")
237                .clone()
238        } else {
239            panic!("not a template element!")
240        }
241    }
242
243    fn same_node(&self, x: &Handle, y: &Handle) -> bool {
244        *x == *y
245    }
246
247    fn set_quirks_mode(&mut self, mode: QuirksMode) {
248        self.quirks_mode = mode;
249    }
250
251    fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) {
252        let preceding = self.preceding_node(sibling);
253        let child = match (child, preceding) {
254            // No previous node.
255            (NodeOrText::AppendText(text), None) => self.arena.new_node(NodeData::Text {
256                contents: RefCell::new(text),
257            }),
258
259            // Look for a text node before the insertion point.
260            (NodeOrText::AppendText(text), Some(prev)) => {
261                if append_to_existing_text(&self.arena, prev, &text) {
262                    return;
263                }
264                self.arena.new_node(NodeData::Text {
265                    contents: RefCell::new(text),
266                })
267            }
268
269            // The tree builder promises we won't have a text node after
270            // the insertion point.
271
272            // Any other kind of node.
273            (NodeOrText::AppendNode(node), _) => node,
274        };
275        child.insert_before(*sibling, &mut self.arena);
276    }
277
278    fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) {
279        let mut existing = if let NodeData::Element { ref attrs, .. } = self.get_node(target) {
280            attrs.borrow_mut()
281        } else {
282            panic!("not an element")
283        };
284
285        let existing_names = existing
286            .iter()
287            .map(|e| e.name.clone())
288            .collect::<HashSet<_>>();
289        existing.extend(
290            attrs
291                .into_iter()
292                .filter(|attr| !existing_names.contains(&attr.name)),
293        );
294    }
295
296    fn remove_from_parent(&mut self, target: &Handle) {
297        target.detach(&mut self.arena);
298    }
299
300    fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) {
301        let mut next_child = self
302            .arena
303            .get_mut(*node)
304            .and_then(|node| node.first_child());
305        while let Some(child) = next_child {
306            child.detach(&mut self.arena);
307            new_parent.append(child, &mut self.arena);
308            let child_node = self.arena.get_mut(child).unwrap();
309            next_child = child_node.next_sibling();
310        }
311    }
312
313    fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
314        if let NodeData::Element {
315            mathml_annotation_xml_integration_point,
316            ..
317        } = self.get_node(target)
318        {
319            *mathml_annotation_xml_integration_point
320        } else {
321            panic!("not an element!")
322        }
323    }
324}
325
326impl Default for GenerationalArenaDom {
327    fn default() -> GenerationalArenaDom {
328        let mut arena = Arena::new();
329        let document = arena.new_node(NodeData::Document);
330        GenerationalArenaDom {
331            arena,
332            document,
333            errors: vec![],
334            quirks_mode: tree_builder::NoQuirks,
335        }
336    }
337}