Skip to main content

readable_rs/
node_utils.rs

1use crate::parser::NodeRef;
2use html5ever::{LocalName, QualName};
3use kuchikikiki::{Attributes, ElementData, NodeData};
4use std::cell::RefCell;
5
6/// DOM-navigation and element-manipulation helpers implemented on [`NodeRef`].
7///
8/// This trait is automatically in scope when you import from
9/// [`crate::parser`] or [`crate::shared_utils`].
10pub trait NodeExt {
11    /// Return the local tag name of this node if it is an element (e.g.
12    /// `"div"`, `"p"`), or `None` for text / comment / document nodes.
13    fn element_name(&self) -> Option<&str>;
14
15    /// Look up an attribute by name and return its value, or `None` if the
16    /// attribute is absent or this is not an element node.
17    fn attr_value(&self, name: &str) -> Option<String>;
18
19    /// Collect the direct *element* children (skipping text and comment nodes)
20    /// into a `Vec`.
21    fn element_children(&self) -> Vec<NodeRef>;
22
23    /// Return the first direct child that is an element, or `None`.
24    fn first_element_child(&self) -> Option<NodeRef>;
25
26    /// Walk forward through siblings until an element node is found, or
27    /// return `None` if the end of the sibling list is reached.
28    fn next_element_sibling(&self) -> Option<NodeRef>;
29
30    /// Walk backward through siblings until an element node is found, or
31    /// return `None` if the beginning of the sibling list is reached.
32    fn previous_element_sibling(&self) -> Option<NodeRef>;
33
34    /// Serialise the *children* of this node to an HTML string (the node's
35    /// own open/close tags are **not** included).
36    fn inner_html(&self) -> String;
37
38    /// Create a new element with `tag_name`, copy all attributes and children
39    /// from `self`, splice the new node into the tree in `self`'s position,
40    /// and detach `self`.  Returns the new node.
41    ///
42    /// If `self` is not an element node it is returned unchanged.
43    ///
44    /// When the source tag is one of the legacy size-attribute elements
45    /// (`table`, `th`, `td`, `hr`, `pre`) the `width` and `height`
46    /// attributes are stripped from the copy.
47    fn clone_and_rename_element(self, tag_name: &str) -> NodeRef;
48}
49
50/// Create a new, detached HTML element node with the given tag name and no
51/// attributes or children.
52///
53/// # Examples
54///
55/// ```rust
56/// use readable_rs::{new_html_element, NodeExt};
57///
58/// let div = new_html_element("div");
59/// assert_eq!(div.element_name(), Some("div"));
60/// ```
61pub fn new_html_element(tag_name: &str) -> NodeRef {
62    let name = QualName::new(None, html5ever::ns!(html), LocalName::from(tag_name));
63    let attributes = Attributes {
64        map: Default::default(),
65    };
66    NodeRef::new(NodeData::Element(ElementData {
67        name,
68        attributes: RefCell::new(attributes),
69        template_contents: None,
70    }))
71}
72
73impl NodeExt for NodeRef {
74    fn element_name(&self) -> Option<&str> {
75        self.as_element().map(|e| e.name.local.as_ref())
76    }
77
78    fn attr_value(&self, name: &str) -> Option<String> {
79        self.as_element()
80            .and_then(|e| e.attributes.borrow().get(name).map(|v| v.to_string()))
81    }
82
83    fn element_children(&self) -> Vec<NodeRef> {
84        self.children()
85            .filter(|c| c.as_element().is_some())
86            .collect()
87    }
88
89    fn first_element_child(&self) -> Option<NodeRef> {
90        self.children().find(|c| c.as_element().is_some())
91    }
92
93    fn next_element_sibling(&self) -> Option<NodeRef> {
94        let mut sib = self.next_sibling();
95        while let Some(node) = sib {
96            if node.as_element().is_some() {
97                return Some(node);
98            }
99            sib = node.next_sibling();
100        }
101        None
102    }
103
104    fn previous_element_sibling(&self) -> Option<NodeRef> {
105        let mut sib = self.previous_sibling();
106        while let Some(node) = sib {
107            if node.as_element().is_some() {
108                return Some(node);
109            }
110            sib = node.previous_sibling();
111        }
112        None
113    }
114
115    fn inner_html(&self) -> String {
116        let mut out = String::new();
117        for child in self.children() {
118            out.push_str(&child.to_string());
119        }
120        out
121    }
122
123    fn clone_and_rename_element(self, tag_name: &str) -> NodeRef {
124        if self.as_element().is_none() {
125            return self;
126        }
127        let source_tag = self
128            .element_name()
129            .unwrap_or("")
130            .to_lowercase();
131        let e = self.as_element().unwrap();
132        let name = QualName::new(None, html5ever::ns!(html), LocalName::from(tag_name));
133        let new_node = NodeRef::new(NodeData::Element(ElementData {
134            name,
135            attributes: RefCell::new(Attributes {
136                map: e.attributes.borrow().map.clone(),
137            }),
138            template_contents: e.template_contents.clone(),
139        }));
140        if matches!(source_tag.as_str(), "table" | "th" | "td" | "hr" | "pre") {
141            if let Some(new_e) = new_node.as_element() {
142                let mut attrs = new_e.attributes.borrow_mut();
143                attrs.remove("width");
144                attrs.remove("height");
145            }
146        }
147
148        while self.first_child().is_some() {
149            new_node.append(self.first_child().unwrap());
150        }
151        self.insert_before(new_node.clone());
152        self.detach();
153
154        new_node
155    }
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161    use crate::parser::parse_html;
162    use std::panic::{catch_unwind, AssertUnwindSafe};
163
164    #[test]
165    fn clone_and_rename_element_non_element_does_not_panic() {
166        let doc = parse_html("<div>text</div>");
167        let div = doc.select_first("div").unwrap();
168        let text_node = div.as_node().first_child().unwrap();
169        let res = catch_unwind(AssertUnwindSafe(|| {
170            let _ = text_node.clone().clone_and_rename_element("span");
171        }));
172        assert!(res.is_ok());
173    }
174}