markup5ever_rcdom/
lib.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! A simple reference-counted DOM.
11//!
12//! This is sufficient as a static parse tree, but don't build a
13//! web browser using it. :)
14//!
15//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
16//! format. For example, the following graph
17//!
18//! ```text
19//! div
20//!  +- "text node"
21//!  +- span
22//! ```
23//! in HTML would be serialized as
24//!
25//! ```html
26//! <div>text node<span></span></div>
27//! ```
28//!
29//! See the [document object model article on wikipedia][dom wiki] for more information.
30//!
31//! This implementation stores the information associated with each node once, and then hands out
32//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
33//! a new ref and then a node will outlive the document. Nodes own their children, but only have
34//! weak references to their parents.
35//!
36//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
37//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
38
39extern crate markup5ever;
40extern crate tendril;
41
42use std::borrow::Cow;
43use std::cell::{Cell, RefCell};
44use std::collections::{HashSet, VecDeque};
45use std::fmt;
46use std::io;
47use std::mem;
48use std::rc::{Rc, Weak};
49
50use tendril::StrTendril;
51
52use markup5ever::interface::tree_builder;
53use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
54use markup5ever::serialize::TraversalScope;
55use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
56use markup5ever::serialize::{Serialize, Serializer};
57use markup5ever::Attribute;
58use markup5ever::ExpandedName;
59use markup5ever::QualName;
60
61/// The different kinds of nodes in the DOM.
62#[derive(Debug)]
63pub enum NodeData {
64    /// The `Document` itself - the root node of a HTML document.
65    Document,
66
67    /// A `DOCTYPE` with name, public id, and system id. See
68    /// [document type declaration on wikipedia][dtd wiki].
69    ///
70    /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
71    Doctype {
72        name: StrTendril,
73        public_id: StrTendril,
74        system_id: StrTendril,
75    },
76
77    /// A text node.
78    Text { contents: RefCell<StrTendril> },
79
80    /// A comment.
81    Comment { contents: StrTendril },
82
83    /// An element with attributes.
84    Element {
85        name: QualName,
86        attrs: RefCell<Vec<Attribute>>,
87
88        /// For HTML \<template\> elements, the [template contents].
89        ///
90        /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
91        template_contents: RefCell<Option<Handle>>,
92
93        /// Whether the node is a [HTML integration point].
94        ///
95        /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
96        mathml_annotation_xml_integration_point: bool,
97    },
98
99    /// A Processing instruction.
100    ProcessingInstruction {
101        target: StrTendril,
102        contents: StrTendril,
103    },
104}
105
106/// A DOM node.
107pub struct Node {
108    /// Parent node.
109    pub parent: Cell<Option<WeakHandle>>,
110    /// Child nodes of this node.
111    pub children: RefCell<Vec<Handle>>,
112    /// Represents this node's data.
113    pub data: NodeData,
114}
115
116impl Node {
117    /// Create a new node from its contents
118    pub fn new(data: NodeData) -> Rc<Self> {
119        Rc::new(Node {
120            data,
121            parent: Cell::new(None),
122            children: RefCell::new(Vec::new()),
123        })
124    }
125}
126
127impl Drop for Node {
128    fn drop(&mut self) {
129        let mut nodes = mem::take(&mut *self.children.borrow_mut());
130        while let Some(node) = nodes.pop() {
131            let children = mem::take(&mut *node.children.borrow_mut());
132            nodes.extend(children.into_iter());
133            if let NodeData::Element {
134                ref template_contents,
135                ..
136            } = node.data
137            {
138                if let Some(template_contents) = template_contents.borrow_mut().take() {
139                    nodes.push(template_contents);
140                }
141            }
142        }
143    }
144}
145
146impl fmt::Debug for Node {
147    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
148        fmt.debug_struct("Node")
149            .field("data", &self.data)
150            .field("children", &self.children)
151            .finish()
152    }
153}
154
155/// Reference to a DOM node.
156pub type Handle = Rc<Node>;
157
158/// Weak reference to a DOM node, used for parent pointers.
159pub type WeakHandle = Weak<Node>;
160
161/// Append a parentless node to another nodes' children
162fn append(new_parent: &Handle, child: Handle) {
163    let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
164    // Invariant: child cannot have existing parent
165    assert!(previous_parent.is_none());
166    new_parent.children.borrow_mut().push(child);
167}
168
169/// If the node has a parent, get it and this node's position in its children
170fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> {
171    if let Some(weak) = target.parent.take() {
172        let parent = weak.upgrade().expect("dangling weak pointer");
173        target.parent.set(Some(weak));
174        let i = match parent
175            .children
176            .borrow()
177            .iter()
178            .enumerate()
179            .find(|&(_, child)| Rc::ptr_eq(child, target))
180        {
181            Some((i, _)) => i,
182            None => panic!("have parent but couldn't find in parent's children!"),
183        };
184        Some((parent, i))
185    } else {
186        None
187    }
188}
189
190fn append_to_existing_text(prev: &Handle, text: &str) -> bool {
191    match prev.data {
192        NodeData::Text { ref contents } => {
193            contents.borrow_mut().push_slice(text);
194            true
195        },
196        _ => false,
197    }
198}
199
200fn remove_from_parent(target: &Handle) {
201    if let Some((parent, i)) = get_parent_and_index(target) {
202        parent.children.borrow_mut().remove(i);
203        target.parent.set(None);
204    }
205}
206
207/// The DOM itself; the result of parsing.
208pub struct RcDom {
209    /// The `Document` itself.
210    pub document: Handle,
211
212    /// Errors that occurred during parsing.
213    pub errors: Vec<Cow<'static, str>>,
214
215    /// The document's quirks mode.
216    pub quirks_mode: QuirksMode,
217}
218
219impl TreeSink for RcDom {
220    type Output = Self;
221    fn finish(self) -> Self {
222        self
223    }
224
225    type Handle = Handle;
226
227    fn parse_error(&mut self, msg: Cow<'static, str>) {
228        self.errors.push(msg);
229    }
230
231    fn get_document(&mut self) -> Handle {
232        self.document.clone()
233    }
234
235    fn get_template_contents(&mut self, target: &Handle) -> Handle {
236        if let NodeData::Element {
237            ref template_contents,
238            ..
239        } = target.data
240        {
241            template_contents
242                .borrow()
243                .as_ref()
244                .expect("not a template element!")
245                .clone()
246        } else {
247            panic!("not a template element!")
248        }
249    }
250
251    fn set_quirks_mode(&mut self, mode: QuirksMode) {
252        self.quirks_mode = mode;
253    }
254
255    fn same_node(&self, x: &Handle, y: &Handle) -> bool {
256        Rc::ptr_eq(x, y)
257    }
258
259    fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> {
260        return match target.data {
261            NodeData::Element { ref name, .. } => name.expanded(),
262            _ => panic!("not an element!"),
263        };
264    }
265
266    fn create_element(
267        &mut self,
268        name: QualName,
269        attrs: Vec<Attribute>,
270        flags: ElementFlags,
271    ) -> Handle {
272        Node::new(NodeData::Element {
273            name,
274            attrs: RefCell::new(attrs),
275            template_contents: RefCell::new(if flags.template {
276                Some(Node::new(NodeData::Document))
277            } else {
278                None
279            }),
280            mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
281        })
282    }
283
284    fn create_comment(&mut self, text: StrTendril) -> Handle {
285        Node::new(NodeData::Comment { contents: text })
286    }
287
288    fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle {
289        Node::new(NodeData::ProcessingInstruction {
290            target,
291            contents: data,
292        })
293    }
294
295    fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) {
296        // Append to an existing Text node if we have one.
297        if let NodeOrText::AppendText(text) = &child {
298            if let Some(h) = parent.children.borrow().last() {
299                if append_to_existing_text(h, text) {
300                    return;
301                }
302            }
303        }
304
305        append(
306            parent,
307            match child {
308                NodeOrText::AppendText(text) => Node::new(NodeData::Text {
309                    contents: RefCell::new(text),
310                }),
311                NodeOrText::AppendNode(node) => node,
312            },
313        );
314    }
315
316    fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) {
317        let (parent, i) = get_parent_and_index(sibling)
318            .expect("append_before_sibling called on node without parent");
319
320        let child = match (child, i) {
321            // No previous node.
322            (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text {
323                contents: RefCell::new(text),
324            }),
325
326            // Look for a text node before the insertion point.
327            (NodeOrText::AppendText(text), i) => {
328                let children = parent.children.borrow();
329                let prev = &children[i - 1];
330                if append_to_existing_text(prev, &text) {
331                    return;
332                }
333                Node::new(NodeData::Text {
334                    contents: RefCell::new(text),
335                })
336            },
337
338            // The tree builder promises we won't have a text node after
339            // the insertion point.
340
341            // Any other kind of node.
342            (NodeOrText::AppendNode(node), _) => node,
343        };
344
345        remove_from_parent(&child);
346
347        child.parent.set(Some(Rc::downgrade(&parent)));
348        parent.children.borrow_mut().insert(i, child);
349    }
350
351    fn append_based_on_parent_node(
352        &mut self,
353        element: &Self::Handle,
354        prev_element: &Self::Handle,
355        child: NodeOrText<Self::Handle>,
356    ) {
357        let parent = element.parent.take();
358        let has_parent = parent.is_some();
359        element.parent.set(parent);
360
361        if has_parent {
362            self.append_before_sibling(element, child);
363        } else {
364            self.append(prev_element, child);
365        }
366    }
367
368    fn append_doctype_to_document(
369        &mut self,
370        name: StrTendril,
371        public_id: StrTendril,
372        system_id: StrTendril,
373    ) {
374        append(
375            &self.document,
376            Node::new(NodeData::Doctype {
377                name,
378                public_id,
379                system_id,
380            }),
381        );
382    }
383
384    fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) {
385        let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
386            attrs.borrow_mut()
387        } else {
388            panic!("not an element")
389        };
390
391        let existing_names = existing
392            .iter()
393            .map(|e| e.name.clone())
394            .collect::<HashSet<_>>();
395        existing.extend(
396            attrs
397                .into_iter()
398                .filter(|attr| !existing_names.contains(&attr.name)),
399        );
400    }
401
402    fn remove_from_parent(&mut self, target: &Handle) {
403        remove_from_parent(target);
404    }
405
406    fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) {
407        let mut children = node.children.borrow_mut();
408        let mut new_children = new_parent.children.borrow_mut();
409        for child in children.iter() {
410            let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
411            assert!(Rc::ptr_eq(
412                node,
413                &previous_parent.unwrap().upgrade().expect("dangling weak")
414            ))
415        }
416        new_children.extend(mem::take(&mut *children));
417    }
418
419    fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
420        if let NodeData::Element {
421            mathml_annotation_xml_integration_point,
422            ..
423        } = target.data
424        {
425            mathml_annotation_xml_integration_point
426        } else {
427            panic!("not an element!")
428        }
429    }
430}
431
432impl Default for RcDom {
433    fn default() -> RcDom {
434        RcDom {
435            document: Node::new(NodeData::Document),
436            errors: vec![],
437            quirks_mode: tree_builder::NoQuirks,
438        }
439    }
440}
441
442enum SerializeOp {
443    Open(Handle),
444    Close(QualName),
445}
446
447pub struct SerializableHandle(Handle);
448
449impl From<Handle> for SerializableHandle {
450    fn from(h: Handle) -> SerializableHandle {
451        SerializableHandle(h)
452    }
453}
454
455impl Serialize for SerializableHandle {
456    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
457    where
458        S: Serializer,
459    {
460        let mut ops = VecDeque::new();
461        match traversal_scope {
462            IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
463            ChildrenOnly(_) => ops.extend(
464                self.0
465                    .children
466                    .borrow()
467                    .iter()
468                    .map(|h| SerializeOp::Open(h.clone())),
469            ),
470        }
471
472        while let Some(op) = ops.pop_front() {
473            match op {
474                SerializeOp::Open(handle) => match handle.data {
475                    NodeData::Element {
476                        ref name,
477                        ref attrs,
478                        ..
479                    } => {
480                        serializer.start_elem(
481                            name.clone(),
482                            attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
483                        )?;
484
485                        ops.reserve(1 + handle.children.borrow().len());
486                        ops.push_front(SerializeOp::Close(name.clone()));
487
488                        for child in handle.children.borrow().iter().rev() {
489                            ops.push_front(SerializeOp::Open(child.clone()));
490                        }
491                    },
492
493                    NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
494
495                    NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?,
496
497                    NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
498
499                    NodeData::ProcessingInstruction {
500                        ref target,
501                        ref contents,
502                    } => serializer.write_processing_instruction(target, contents)?,
503
504                    NodeData::Document => panic!("Can't serialize Document node itself"),
505                },
506
507                SerializeOp::Close(name) => {
508                    serializer.end_elem(name)?;
509                },
510            }
511        }
512
513        Ok(())
514    }
515}