kuchiki/
tree.rs

1use html5ever::tree_builder::QuirksMode;
2use html5ever::QualName;
3use std::cell::{Cell, RefCell};
4use std::fmt;
5use std::ops::Deref;
6use std::rc::{Rc, Weak};
7
8use crate::attributes::{Attribute, Attributes, ExpandedName};
9use crate::cell_extras::*;
10use crate::iter::NodeIterator;
11
12/// Node data specific to the node type.
13#[derive(Debug, PartialEq, Clone)]
14pub enum NodeData {
15    /// Element node
16    Element(ElementData),
17
18    /// Text node
19    Text(RefCell<String>),
20
21    /// Comment node
22    Comment(RefCell<String>),
23
24    /// Processing instruction node
25    ProcessingInstruction(RefCell<(String, String)>),
26
27    /// Doctype node
28    Doctype(Doctype),
29
30    /// Document node
31    Document(DocumentData),
32
33    /// Document fragment node
34    DocumentFragment,
35}
36
37/// Data specific to doctype nodes.
38#[derive(Debug, PartialEq, Clone)]
39pub struct Doctype {
40    /// The name of the doctype
41    pub name: String,
42
43    /// The public ID of the doctype
44    pub public_id: String,
45
46    /// The system ID of the doctype
47    pub system_id: String,
48}
49
50/// Data specific to element nodes.
51#[derive(Debug, PartialEq, Clone)]
52pub struct ElementData {
53    /// The namespace and local name of the element, such as `ns!(html)` and `body`.
54    pub name: QualName,
55
56    /// The attributes of the elements.
57    pub attributes: RefCell<Attributes>,
58
59    /// If the element is an HTML `<template>` element,
60    /// the document fragment node that is the root of template contents.
61    pub template_contents: Option<NodeRef>,
62}
63
64/// Data specific to document nodes.
65#[derive(Debug, PartialEq, Clone)]
66pub struct DocumentData {
67    #[doc(hidden)]
68    pub _quirks_mode: Cell<QuirksMode>,
69}
70
71impl DocumentData {
72    /// The quirks mode of the document, as determined by the HTML parser.
73    #[inline]
74    pub fn quirks_mode(&self) -> QuirksMode {
75        self._quirks_mode.get()
76    }
77}
78
79/// A strong reference to a node.
80///
81/// A node is destroyed when the last strong reference to it dropped.
82///
83/// Each node holds a strong reference to its first child and next sibling (if any),
84/// but only a weak reference to its last child, previous sibling, and parent.
85/// This is to avoid strong reference cycles, which would cause memory leaks.
86///
87/// As a result, a single `NodeRef` is sufficient to keep alive a node
88/// and nodes that are after it in tree order
89/// (its descendants, its following siblings, and their descendants)
90/// but not other nodes in a tree.
91///
92/// To avoid detroying nodes prematurely,
93/// programs typically hold a strong reference to the root of a document
94/// until they’re done with that document.
95#[derive(Clone, Debug)]
96pub struct NodeRef(pub Rc<Node>);
97
98impl Deref for NodeRef {
99    type Target = Node;
100    #[inline]
101    fn deref(&self) -> &Node {
102        &*self.0
103    }
104}
105
106impl Eq for NodeRef {}
107impl PartialEq for NodeRef {
108    #[inline]
109    fn eq(&self, other: &NodeRef) -> bool {
110        let a: *const Node = &*self.0;
111        let b: *const Node = &*other.0;
112        a == b
113    }
114}
115
116/// A node inside a DOM-like tree.
117pub struct Node {
118    parent: Cell<Option<Weak<Node>>>,
119    previous_sibling: Cell<Option<Weak<Node>>>,
120    next_sibling: Cell<Option<Rc<Node>>>,
121    first_child: Cell<Option<Rc<Node>>>,
122    last_child: Cell<Option<Weak<Node>>>,
123    data: NodeData,
124}
125
126impl fmt::Debug for Node {
127    #[inline]
128    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
129        write!(f, "{:?} @ {:?}", self.data, self as *const Node)
130    }
131}
132
133/// Prevent implicit recursion when dropping nodes to avoid overflowing the stack.
134///
135/// The implicit drop is correct, but recursive.
136/// In the worst case (where no node has both a next sibling and a child),
137/// a tree of a few tens of thousands of nodes could cause a stack overflow.
138///
139/// This `Drop` implementations makes sure the recursion does not happen.
140/// Instead, it has an explicit `Vec<Rc<Node>>` stack to traverse the subtree,
141/// but only following `Rc<Node>` references that are "unique":
142/// that have a strong reference count of 1.
143/// Those are the nodes that would have been dropped recursively.
144///
145/// The stack holds ancestors of the current node rather than preceding siblings,
146/// on the assumption that large document trees are typically wider than deep.
147impl Drop for Node {
148    fn drop(&mut self) {
149        // `.take_if_unique_strong()` temporarily leaves the tree in an inconsistent state,
150        // as the corresponding `Weak` reference in the other direction is not removed.
151        // It is important that all `Some(_)` strong references it returns
152        // are dropped by the end of this `drop` call,
153        // and that no user code is invoked in-between.
154
155        // Sharing `stack` between these two calls is not necessary,
156        // but it allows re-using memory allocations.
157        let mut stack = Vec::new();
158        if let Some(rc) = self.first_child.take_if_unique_strong() {
159            non_recursive_drop_unique_rc(rc, &mut stack);
160        }
161        if let Some(rc) = self.next_sibling.take_if_unique_strong() {
162            non_recursive_drop_unique_rc(rc, &mut stack);
163        }
164
165        fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
166            loop {
167                if let Some(child) = rc.first_child.take_if_unique_strong() {
168                    stack.push(rc);
169                    rc = child;
170                    continue;
171                }
172                if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
173                    // The previous value of `rc: Rc<Node>` is dropped here.
174                    // Since it was unique, the corresponding `Node` is dropped as well.
175                    // `<Node as Drop>::drop` does not call `drop_rc`
176                    // as both the first child and next sibling were already taken.
177                    // Weak reference counts decremented here for `Cell`s that are `Some`:
178                    // * `rc.parent`: still has a strong reference in `stack` or elsewhere
179                    // * `rc.last_child`: this is the last weak ref. Deallocated now.
180                    // * `rc.previous_sibling`: this is the last weak ref. Deallocated now.
181                    rc = sibling;
182                    continue;
183                }
184                if let Some(parent) = stack.pop() {
185                    // Same as in the above comment.
186                    rc = parent;
187                    continue;
188                }
189                return;
190            }
191        }
192    }
193}
194
195impl NodeRef {
196    /// Create a new node.
197    #[inline]
198    pub fn new(data: NodeData) -> NodeRef {
199        NodeRef(Rc::new(Node {
200            parent: Cell::new(None),
201            first_child: Cell::new(None),
202            last_child: Cell::new(None),
203            previous_sibling: Cell::new(None),
204            next_sibling: Cell::new(None),
205            data,
206        }))
207    }
208
209    /// Create a new element node.
210    #[inline]
211    pub fn new_element<I>(name: QualName, attributes: I) -> NodeRef
212    where
213        I: IntoIterator<Item = (ExpandedName, Attribute)>,
214    {
215        NodeRef::new(NodeData::Element(ElementData {
216            template_contents: if name.expanded() == expanded_name!(html "template") {
217                Some(NodeRef::new(NodeData::DocumentFragment))
218            } else {
219                None
220            },
221            name,
222            attributes: RefCell::new(Attributes {
223                map: attributes.into_iter().collect(),
224            }),
225        }))
226    }
227
228    /// Create a new text node.
229    #[inline]
230    pub fn new_text<T: Into<String>>(value: T) -> NodeRef {
231        NodeRef::new(NodeData::Text(RefCell::new(value.into())))
232    }
233
234    /// Create a new comment node.
235    #[inline]
236    pub fn new_comment<T: Into<String>>(value: T) -> NodeRef {
237        NodeRef::new(NodeData::Comment(RefCell::new(value.into())))
238    }
239
240    /// Create a new processing instruction node.
241    #[inline]
242    pub fn new_processing_instruction<T1, T2>(target: T1, data: T2) -> NodeRef
243    where
244        T1: Into<String>,
245        T2: Into<String>,
246    {
247        NodeRef::new(NodeData::ProcessingInstruction(RefCell::new((
248            target.into(),
249            data.into(),
250        ))))
251    }
252
253    /// Create a new doctype node.
254    #[inline]
255    pub fn new_doctype<T1, T2, T3>(name: T1, public_id: T2, system_id: T3) -> NodeRef
256    where
257        T1: Into<String>,
258        T2: Into<String>,
259        T3: Into<String>,
260    {
261        NodeRef::new(NodeData::Doctype(Doctype {
262            name: name.into(),
263            public_id: public_id.into(),
264            system_id: system_id.into(),
265        }))
266    }
267
268    /// Create a new document node.
269    #[inline]
270    pub fn new_document() -> NodeRef {
271        NodeRef::new(NodeData::Document(DocumentData {
272            _quirks_mode: Cell::new(QuirksMode::NoQuirks),
273        }))
274    }
275
276    /// Return the concatenation of all text nodes in this subtree.
277    pub fn text_contents(&self) -> String {
278        let mut s = String::new();
279        for text_node in self.inclusive_descendants().text_nodes() {
280            s.push_str(&text_node.borrow());
281        }
282        s
283    }
284}
285
286impl Node {
287    /// Return a reference to this node’s node-type-specific data.
288    #[inline]
289    pub fn data(&self) -> &NodeData {
290        &self.data
291    }
292
293    /// If this node is an element, return a reference to element-specific data.
294    #[inline]
295    pub fn as_element(&self) -> Option<&ElementData> {
296        match self.data {
297            NodeData::Element(ref value) => Some(value),
298            _ => None,
299        }
300    }
301
302    /// If this node is a text node, return a reference to its contents.
303    #[inline]
304    pub fn as_text(&self) -> Option<&RefCell<String>> {
305        match self.data {
306            NodeData::Text(ref value) => Some(value),
307            _ => None,
308        }
309    }
310
311    /// If this node is a comment, return a reference to its contents.
312    #[inline]
313    pub fn as_comment(&self) -> Option<&RefCell<String>> {
314        match self.data {
315            NodeData::Comment(ref value) => Some(value),
316            _ => None,
317        }
318    }
319
320    /// If this node is a document, return a reference to doctype-specific data.
321    #[inline]
322    pub fn as_doctype(&self) -> Option<&Doctype> {
323        match self.data {
324            NodeData::Doctype(ref value) => Some(value),
325            _ => None,
326        }
327    }
328
329    /// If this node is a document, return a reference to document-specific data.
330    #[inline]
331    pub fn as_document(&self) -> Option<&DocumentData> {
332        match self.data {
333            NodeData::Document(ref value) => Some(value),
334            _ => None,
335        }
336    }
337
338    /// Return a reference to the parent node, unless this node is the root of the tree.
339    #[inline]
340    pub fn parent(&self) -> Option<NodeRef> {
341        self.parent.upgrade().map(NodeRef)
342    }
343
344    /// Return a reference to the first child of this node, unless it has no child.
345    #[inline]
346    pub fn first_child(&self) -> Option<NodeRef> {
347        self.first_child.clone_inner().map(NodeRef)
348    }
349
350    /// Return a reference to the last child of this node, unless it has no child.
351    #[inline]
352    pub fn last_child(&self) -> Option<NodeRef> {
353        self.last_child.upgrade().map(NodeRef)
354    }
355
356    /// Return a reference to the previous sibling of this node, unless it is a first child.
357    #[inline]
358    pub fn previous_sibling(&self) -> Option<NodeRef> {
359        self.previous_sibling.upgrade().map(NodeRef)
360    }
361
362    /// Return a reference to the next sibling of this node, unless it is a last child.
363    #[inline]
364    pub fn next_sibling(&self) -> Option<NodeRef> {
365        self.next_sibling.clone_inner().map(NodeRef)
366    }
367
368    /// Detach a node from its parent and siblings. Children are not affected.
369    ///
370    /// To remove a node and its descendants, detach it and drop any strong reference to it.
371    pub fn detach(&self) {
372        let parent_weak = self.parent.take();
373        let previous_sibling_weak = self.previous_sibling.take();
374        let next_sibling_strong = self.next_sibling.take();
375
376        let previous_sibling_opt = previous_sibling_weak
377            .as_ref()
378            .and_then(|weak| weak.upgrade());
379
380        if let Some(next_sibling_ref) = next_sibling_strong.as_ref() {
381            next_sibling_ref
382                .previous_sibling
383                .replace(previous_sibling_weak);
384        } else if let Some(parent_ref) = parent_weak.as_ref() {
385            if let Some(parent_strong) = parent_ref.upgrade() {
386                parent_strong.last_child.replace(previous_sibling_weak);
387            }
388        }
389
390        if let Some(previous_sibling_strong) = previous_sibling_opt {
391            previous_sibling_strong
392                .next_sibling
393                .replace(next_sibling_strong);
394        } else if let Some(parent_ref) = parent_weak.as_ref() {
395            if let Some(parent_strong) = parent_ref.upgrade() {
396                parent_strong.first_child.replace(next_sibling_strong);
397            }
398        }
399    }
400}
401
402impl NodeRef {
403    /// Append a new child to this node, after existing children.
404    ///
405    /// The new child is detached from its previous position.
406    pub fn append(&self, new_child: NodeRef) {
407        new_child.detach();
408        new_child.parent.replace(Some(Rc::downgrade(&self.0)));
409        if let Some(last_child_weak) = self.last_child.replace(Some(Rc::downgrade(&new_child.0))) {
410            if let Some(last_child) = last_child_weak.upgrade() {
411                new_child.previous_sibling.replace(Some(last_child_weak));
412                debug_assert!(last_child.next_sibling.is_none());
413                last_child.next_sibling.replace(Some(new_child.0));
414                return;
415            }
416        }
417        debug_assert!(self.first_child.is_none());
418        self.first_child.replace(Some(new_child.0));
419    }
420
421    /// Prepend a new child to this node, before existing children.
422    ///
423    /// The new child is detached from its previous position.
424    pub fn prepend(&self, new_child: NodeRef) {
425        new_child.detach();
426        new_child.parent.replace(Some(Rc::downgrade(&self.0)));
427        if let Some(first_child) = self.first_child.take() {
428            debug_assert!(first_child.previous_sibling.is_none());
429            first_child
430                .previous_sibling
431                .replace(Some(Rc::downgrade(&new_child.0)));
432            new_child.next_sibling.replace(Some(first_child));
433        } else {
434            debug_assert!(self.first_child.is_none());
435            self.last_child.replace(Some(Rc::downgrade(&new_child.0)));
436        }
437        self.first_child.replace(Some(new_child.0));
438    }
439
440    /// Insert a new sibling after this node.
441    ///
442    /// The new sibling is detached from its previous position.
443    pub fn insert_after(&self, new_sibling: NodeRef) {
444        new_sibling.detach();
445        new_sibling.parent.replace(self.parent.clone_inner());
446        new_sibling
447            .previous_sibling
448            .replace(Some(Rc::downgrade(&self.0)));
449        if let Some(next_sibling) = self.next_sibling.take() {
450            debug_assert!(next_sibling.previous_sibling().unwrap() == *self);
451            next_sibling
452                .previous_sibling
453                .replace(Some(Rc::downgrade(&new_sibling.0)));
454            new_sibling.next_sibling.replace(Some(next_sibling));
455        } else if let Some(parent) = self.parent() {
456            debug_assert!(parent.last_child().unwrap() == *self);
457            parent
458                .last_child
459                .replace(Some(Rc::downgrade(&new_sibling.0)));
460        }
461        self.next_sibling.replace(Some(new_sibling.0));
462    }
463
464    /// Insert a new sibling before this node.
465    ///
466    /// The new sibling is detached from its previous position.
467    pub fn insert_before(&self, new_sibling: NodeRef) {
468        new_sibling.detach();
469        new_sibling.parent.replace(self.parent.clone_inner());
470        new_sibling.next_sibling.replace(Some(self.0.clone()));
471        if let Some(previous_sibling_weak) = self
472            .previous_sibling
473            .replace(Some(Rc::downgrade(&new_sibling.0)))
474        {
475            if let Some(previous_sibling) = previous_sibling_weak.upgrade() {
476                new_sibling
477                    .previous_sibling
478                    .replace(Some(previous_sibling_weak));
479                debug_assert!(previous_sibling.next_sibling().unwrap() == *self);
480                previous_sibling.next_sibling.replace(Some(new_sibling.0));
481                return;
482            }
483        }
484        if let Some(parent) = self.parent() {
485            debug_assert!(parent.first_child().unwrap() == *self);
486            parent.first_child.replace(Some(new_sibling.0));
487        }
488    }
489}