soup_kuchiki/
node_ext.rs

1use kuchiki::{Attribute, Attributes, ElementData, ExpandedName, NodeData, NodeRef};
2use std::collections::BTreeMap;
3
4use crate::Handle;
5
6/// Adds some convenience methods to the `kuchiki::Node` type
7pub trait NodeExt: Sized {
8    /// Retrieves the node that these methods will work on
9    fn get_node(&self) -> &NodeRef;
10
11    /// Returns `true` if node is of type Document
12    fn is_document(&self) -> bool {
13        let node = self.get_node();
14        match node.data() {
15            NodeData::Document {
16                ..
17            } => true,
18            _ => false,
19        }
20    }
21
22    /// Returns `true` if node is of type Doctype
23    fn is_doctype(&self) -> bool {
24        let node = self.get_node();
25        match node.data() {
26            NodeData::Doctype {
27                ..
28            } => true,
29            _ => false,
30        }
31    }
32
33    /// Returns `true` if node is of type Text
34    fn is_text(&self) -> bool {
35        let node = self.get_node();
36        match node.data() {
37            NodeData::Text {
38                ..
39            } => true,
40            _ => false,
41        }
42    }
43
44    /// Returns `true` if node is of type Comment
45    fn is_comment(&self) -> bool {
46        let node = self.get_node();
47        match node.data() {
48            NodeData::Comment {
49                ..
50            } => true,
51            _ => false,
52        }
53    }
54
55    /// Returns `true` if node is of type ProcessingInstruction
56    fn is_processing_instruction(&self) -> bool {
57        let node = self.get_node();
58        match node.data() {
59            NodeData::ProcessingInstruction {
60                ..
61            } => true,
62            _ => false,
63        }
64    }
65
66    /// Returns `true` if node is of type Element
67    fn is_element(&self) -> bool {
68        let node = self.get_node();
69        match node.data() {
70            NodeData::Element {
71                ..
72            } => true,
73            _ => false,
74        }
75    }
76
77    /// Retrieves the name of the node
78    ///
79    /// If this node is an element, the name of that element is returned.
80    /// Otherwise, special names are used:
81    ///
82    /// * Document -> "\[document\]"
83    /// * Doctype -> "\[doctype\]"
84    /// * Text -> "\[text\]"
85    /// * Comment -> "\[comment\]"
86    /// * ProcessingInstruction -> "\[processing-instruction\]"
87    fn name(&self) -> &str {
88        let node = self.get_node();
89        match node.data() {
90            NodeData::Document {
91                ..
92            } => "[document]",
93            NodeData::DocumentFragment => "[document-fragment]",
94            NodeData::Doctype {
95                ..
96            } => "[doctype]",
97            NodeData::Text {
98                ..
99            } => "[text]",
100            NodeData::Comment {
101                ..
102            } => "[comment]",
103            NodeData::ProcessingInstruction {
104                ..
105            } => "[processing-instruction]",
106            NodeData::Element(ElementData {
107                ref name, ..
108            }) => name.local.as_ref(),
109        }
110    }
111
112    /// Looks for an attribute named `attr` and returns it's value as a string
113    ///
114    /// # Example
115    ///
116    /// ```rust
117    /// # extern crate soup;
118    /// # use std::error::Error;
119    /// # use soup::prelude::*;
120    /// # fn main() -> Result<(), Box<Error>> {
121    /// let soup = Soup::new(r#"<div class="foo bar"></div>"#);
122    /// let div = soup.tag("div").find().expect("Couldn't find div");
123    /// assert_eq!(div.get("class"), Some("foo bar".to_string()));
124    /// #   Ok(())
125    /// # }
126    /// ```
127    fn get(&self, attr: &str) -> Option<String> {
128        let node = self.get_node();
129        match node.data() {
130            NodeData::Element(elem_data) => {
131                let attrs = elem_data.attributes.borrow();
132                for it in attrs.iter() {
133                    let name = it.name.local.as_ref();
134                    if name.to_lowercase() == attr.to_lowercase() {
135                        return Some(it.value.to_string());
136                    }
137                }
138                None
139            },
140            _ => None,
141        }
142    }
143
144    /// Returns the node's attributes as a BTreeMap
145    fn attrs(&self) -> BTreeMap<String, String> {
146        let node = self.get_node();
147        match node.data() {
148            NodeData::Element(elem_data) => {
149                let attrs = elem_data.attributes.borrow();
150                attrs
151                    .iter()
152                    .map(|attr| (attr.name.local.to_string(), attr.value.to_string()))
153                    .collect()
154            },
155            _ => BTreeMap::new(),
156        }
157    }
158
159    /// Retrieves the text value of this element, as well as it's child elements
160    fn text(&self) -> String {
161        let node = self.get_node();
162        let mut result = vec![];
163        extract_text(node, &mut result);
164        result.join("")
165    }
166
167    /// Returns the node as an html tag
168    fn display(&self) -> String {
169        let node = self.get_node();
170        match node.data() {
171            NodeData::Element(ElementData {
172                ref name,
173                ref attributes,
174                ..
175            }) => {
176                let c = node
177                    .children()
178                    .map(|child| child.display())
179                    .collect::<Vec<_>>()
180                    .join("");
181                let mut a = attributes
182                    .borrow()
183                    .iter()
184                    .map(|attr| format!(r#"{}="{}""#, attr.name.local, attr.value))
185                    .collect::<Vec<_>>();
186                a.sort();
187                let a = a.join(" ");
188                if a.is_empty() {
189                    format!("<{}>{}</{}>", name.local.as_ref(), c, name.local.as_ref())
190                } else {
191                    format!(
192                        "<{} {}>{}</{}>",
193                        name.local.as_ref(),
194                        a,
195                        c,
196                        name.local.as_ref()
197                    )
198                }
199            },
200            NodeData::Text(ref contents) => contents.borrow().to_string(),
201            NodeData::Comment(ref contents) => format!("<!--{}-->", contents.borrow()),
202            _ => "".to_string(),
203        }
204    }
205
206    /// Navigates to the parent of the node, if there is one
207    ///
208    /// # Example
209    ///
210    /// ```rust
211    /// extern crate soup;
212    ///
213    /// use soup::prelude::*;
214    /// # use std::error::Error;
215    /// # fn main() -> Result<(), Box<Error>> {
216    /// let soup = Soup::new(r#"<div id=""><b>FOO</b></div>"#);
217    /// let b = soup.tag("b").find().expect("Couldn't find tag 'b'");
218    /// let div = b.parent().expect("Couldn't get parent of tag 'b'");
219    /// assert_eq!(div.name(), "div".to_string());
220    /// #   Ok(())
221    /// # }
222    /// ```
223    fn parent(&self) -> Option<Handle> {
224        self.get_node().0.parent()
225    }
226}
227
228fn extract_text(node: &NodeRef, result: &mut Vec<String>) {
229    match node.data() {
230        NodeData::Text(ref contents) => result.push(contents.borrow().to_string()),
231        _ => (),
232    }
233    for child in node.children() {
234        extract_text(&child, result);
235    }
236}
237
238impl NodeExt for Handle {
239    #[inline(always)]
240    fn get_node(&self) -> &NodeRef {
241        &*self
242    }
243}
244
245impl<'node> NodeExt for &'node NodeRef {
246    #[inline(always)]
247    fn get_node(&self) -> &NodeRef {
248        self
249    }
250}
251
252#[derive(Debug, Clone)]
253pub struct ExtAttr<'a> {
254    pub name: &'a ExpandedName,
255    pub value: &'a String,
256}
257
258#[derive(Debug, Clone)]
259pub struct AttrIter<'a>(std::collections::btree_map::Iter<'a, ExpandedName, Attribute>);
260
261impl<'a> Iterator for AttrIter<'a> {
262    type Item = ExtAttr<'a>;
263
264    fn next(&mut self) -> Option<Self::Item> {
265        let (name, attr) = self.0.next()?;
266        Some(ExtAttr {
267            name,
268            value: &attr.value,
269        })
270    }
271}
272
273/// Adds iter() to the Attributes struct from Kuchiki
274pub trait AttributeExt {
275    /// Returns an iterator over all the attributes, with a type matching RcDom.
276    fn iter(&self) -> AttrIter<'_>;
277}
278
279impl AttributeExt for Attributes {
280    fn iter(&self) -> AttrIter<'_> {
281        AttrIter(self.map.iter())
282    }
283}
284
285#[cfg(test)]
286mod tests {
287    use crate::prelude::*;
288    use std::collections::BTreeMap;
289
290    #[test]
291    fn name() {
292        let soup = Soup::new("<b>some text</b>");
293        let b = soup.tag("b").find().expect("Couldn't find tag 'b'");
294        let name = b.name();
295        assert_eq!(name, "b");
296    }
297
298    #[test]
299    fn get() {
300        let soup = Soup::new(r#"<div class="one two"></div>"#);
301        let div = soup.tag("div").find().expect("Couldn't find tag 'div'");
302        let class = div.get("class");
303        assert_eq!(class, Some("one two".to_string()));
304    }
305
306    #[test]
307    fn attrs() {
308        let soup = Soup::new(r#"<div class="one two" id="some-id"></div>"#);
309        let div = soup.tag("div").find().expect("Couldn't find tag 'div'");
310        let attrs = div.attrs();
311        let mut expected = BTreeMap::new();
312        expected.insert("class".to_string(), "one two".to_string());
313        expected.insert("id".to_string(), "some-id".to_string());
314        assert_eq!(attrs, expected);
315    }
316
317    #[test]
318    fn case_sensitive() {
319        let soup = Soup::new(r#"<div class="ONE TWO"></div>"#);
320        let one = soup.attr("class", "ONE").find();
321        assert!(one.is_some());
322        let one = soup.attr("class", "one").find();
323        assert!(one.is_none());
324    }
325
326    #[test]
327    fn display() {
328        let soup = Soup::new(r#"<div class="foo bar" id="baz"></div>"#);
329        let div = soup.tag("div").find().expect("Couldn't find tag 'div'");
330        assert_eq!(div.display(), r#"<div class="foo bar" id="baz"></div>"#);
331
332        let soup = Soup::new(r#"<div class="foo bar" id="baz"><b>SOME TEXT</b></div>"#);
333        let div = soup.tag("div").find().expect("Couldn't find tag 'div'");
334        assert_eq!(
335            div.display(),
336            r#"<div class="foo bar" id="baz"><b>SOME TEXT</b></div>"#
337        );
338
339        let soup = Soup::new(
340            r#"<div class="foo bar" id="baz"><b>SOME TEXT <!-- and a comment --></b></div>"#,
341        );
342        let div = soup.tag("div").find().expect("Couldn't find tag 'div'");
343        let b = div.tag("b").find().expect("Couldn't find tag 'b'");
344        assert_eq!(b.display(), r#"<b>SOME TEXT <!-- and a comment --></b>"#);
345    }
346}