accessibility_scraper/element_ref/
mod.rs

1//! Element references.
2
3use std::ops::Deref;
4
5use ego_tree::iter::{Edge, Traverse};
6use ego_tree::NodeRef;
7use fast_html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
8
9use crate::node::Element;
10use crate::{Node, Selector};
11
12unsafe impl Send for Node {}
13unsafe impl Sync for Node {}
14
15/// Wrapper around a reference to an element node.
16///
17/// This wrapper implements the `Element` trait from the `selectors` crate, which allows it to be
18/// matched against CSS selectors.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct ElementRef<'a> {
21    node: NodeRef<'a, Node>,
22}
23
24impl<'a> ElementRef<'a> {
25    /// Create a new element reference
26    #[inline]
27    pub fn new(node: NodeRef<'a, Node>) -> Self {
28        ElementRef { node }
29    }
30
31    /// Wraps a `NodeRef` only if it references a `Node::Element`.
32    #[inline]
33    pub fn wrap(node: NodeRef<'a, Node>) -> Option<Self> {
34        if node.value().is_element() {
35            Some(ElementRef::new(node))
36        } else {
37            None
38        }
39    }
40
41    /// Returns the `Element` referenced by `self`.
42    #[inline]
43    pub fn value(&self) -> &'a Element {
44        self.node.value().as_element().unwrap()
45    }
46
47    /// Returns an iterator over descendent elements matching a selector.
48    #[inline]
49    pub fn select<'b>(&self, selector: &'b Selector) -> Select<'a, 'b> {
50        let mut inner = self.traverse();
51        inner.next(); // Skip Edge::Open(self).
52
53        Select {
54            scope: *self,
55            inner,
56            selector,
57        }
58    }
59
60    fn serialize(&self, traversal_scope: TraversalScope) -> String {
61        let opts = SerializeOpts {
62            scripting_enabled: false, // It's not clear what this does.
63            traversal_scope,
64            create_missing_parent: false,
65        };
66        let mut buf = Vec::new();
67        serialize(&mut buf, self, opts).unwrap();
68        String::from_utf8(buf).unwrap()
69    }
70
71    /// Returns the HTML of this element.
72    pub fn html(&self) -> String {
73        self.serialize(TraversalScope::IncludeNode)
74    }
75
76    /// Returns the inner HTML of this element.
77    pub fn inner_html(&self) -> String {
78        self.serialize(TraversalScope::ChildrenOnly(None))
79    }
80
81    /// Returns the value of an attribute.
82    #[inline]
83    pub fn attr(&self, attr: &str) -> Option<&str> {
84        self.value().attr(attr)
85    }
86
87    /// Returns an iterator over descendent text nodes.
88    pub fn text(&self) -> Text<'a> {
89        Text {
90            inner: self.traverse(),
91        }
92    }
93
94    /// Returns if the element has the attibute and not empty
95    #[inline]
96    pub fn has_attribute(&self, attr: &str) -> bool {
97        match self.attr(attr) {
98            Some(val) => !val.trim().is_empty(),
99            None => false,
100        }
101    }
102}
103
104impl<'a> Deref for ElementRef<'a> {
105    type Target = NodeRef<'a, Node>;
106    fn deref(&self) -> &NodeRef<'a, Node> {
107        &self.node
108    }
109}
110
111/// Iterator over descendent elements matching a selector.
112#[derive(Debug, Clone)]
113pub struct Select<'a, 'b> {
114    scope: ElementRef<'a>,
115    inner: Traverse<'a, Node>,
116    selector: &'b Selector,
117}
118
119impl<'a, 'b> Iterator for Select<'a, 'b> {
120    type Item = ElementRef<'a>;
121
122    fn next(&mut self) -> Option<ElementRef<'a>> {
123        for edge in &mut self.inner {
124            if let Edge::Open(node) = edge {
125                if let Some(element) = ElementRef::wrap(node) {
126                    if self.selector.matches_with_scope(&element, Some(self.scope)) {
127                        return Some(element);
128                    }
129                }
130            }
131        }
132        None
133    }
134}
135
136/// Iterator over descendent text nodes.
137#[derive(Debug, Clone)]
138pub struct Text<'a> {
139    inner: Traverse<'a, Node>,
140}
141
142impl<'a> Iterator for Text<'a> {
143    type Item = &'a str;
144
145    fn next(&mut self) -> Option<&'a str> {
146        for edge in &mut self.inner {
147            if let Edge::Open(node) = edge {
148                if let Node::Text(ref text) = node.value() {
149                    return Some(&**text);
150                }
151            }
152        }
153        None
154    }
155}
156
157mod element;
158mod serializable;
159
160#[cfg(test)]
161mod tests {
162    use crate::html::Html;
163    use crate::selector::Selector;
164
165    #[test]
166    fn test_scope() {
167        let html = r"
168            <div>
169                <b>1</b>
170                <span>
171                    <span><b>2</b></span>
172                    <b>3</b>
173                </span>
174            </div>
175        ";
176        let fragment = Html::parse_fragment(html);
177        let sel1 = Selector::parse("div > span").unwrap();
178        let sel2 = Selector::parse(":scope > b").unwrap();
179
180        let element1 = fragment.select(&sel1).next().unwrap();
181        let element2 = element1.select(&sel2).next().unwrap();
182        assert_eq!(element2.inner_html(), "3");
183    }
184}