scraper/element_ref/
mod.rs

1//! Element references.
2
3use std::fmt::{self, Debug};
4use std::iter::FusedIterator;
5use std::ops::Deref;
6
7use ego_tree::iter::{Edge, Traverse};
8use ego_tree::NodeRef;
9use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
10use selectors::matching::SelectorCaches;
11
12use crate::node::Element;
13use crate::{Node, Selector};
14
15/// Wrapper around a reference to an element node.
16///
17/// This wrapper implements the `Element` trait from the `selectors` crate, which allows it to be
18/// matched against CSS selectors.
19#[derive(Clone, Copy, PartialEq, Eq)]
20pub struct ElementRef<'a> {
21    node: NodeRef<'a, Node>,
22}
23
24impl<'a> ElementRef<'a> {
25    fn new(node: NodeRef<'a, Node>) -> Self {
26        ElementRef { node }
27    }
28
29    /// Wraps a `NodeRef` only if it references a `Node::Element`.
30    pub fn wrap(node: NodeRef<'a, Node>) -> Option<Self> {
31        if node.value().is_element() {
32            Some(ElementRef::new(node))
33        } else {
34            None
35        }
36    }
37
38    /// Returns the `Element` referenced by `self`.
39    pub fn value(&self) -> &'a Element {
40        self.node.value().as_element().unwrap()
41    }
42
43    /// Returns an iterator over descendent elements matching a selector.
44    pub fn select<'b>(&self, selector: &'b Selector) -> Select<'a, 'b> {
45        let mut inner = self.traverse();
46        inner.next(); // Skip Edge::Open(self).
47
48        Select {
49            scope: *self,
50            inner,
51            selector,
52            caches: Default::default(),
53        }
54    }
55
56    fn serialize(&self, traversal_scope: TraversalScope) -> String {
57        let opts = SerializeOpts {
58            scripting_enabled: false, // It's not clear what this does.
59            traversal_scope,
60            create_missing_parent: false,
61        };
62        let mut buf = Vec::new();
63        serialize(&mut buf, self, opts).unwrap();
64        String::from_utf8(buf).unwrap()
65    }
66
67    /// Returns the HTML of this element.
68    pub fn html(&self) -> String {
69        self.serialize(TraversalScope::IncludeNode)
70    }
71
72    /// Returns the inner HTML of this element.
73    pub fn inner_html(&self) -> String {
74        self.serialize(TraversalScope::ChildrenOnly(None))
75    }
76
77    /// Returns the value of an attribute.
78    pub fn attr(&self, attr: &str) -> Option<&'a str> {
79        self.value().attr(attr)
80    }
81
82    /// Returns an iterator over descendent text nodes.
83    pub fn text(&self) -> Text<'a> {
84        Text {
85            inner: self.traverse(),
86        }
87    }
88
89    /// Iterate over all child nodes which are elements
90    ///
91    /// # Example
92    ///
93    /// ```
94    /// # use scraper::Html;
95    /// let fragment = Html::parse_fragment("foo<span>bar</span><a>baz</a>qux");
96    ///
97    /// let children = fragment.root_element().child_elements().map(|element| element.value().name()).collect::<Vec<_>>();
98    /// assert_eq!(children, ["span", "a"]);
99    /// ```
100    pub fn child_elements(&self) -> impl Iterator<Item = ElementRef<'a>> {
101        self.children().filter_map(ElementRef::wrap)
102    }
103
104    /// Iterate over all descendent nodes which are elements
105    ///
106    /// # Example
107    ///
108    /// ```
109    /// # use scraper::Html;
110    /// let fragment = Html::parse_fragment("foo<span><b>bar</b></span><a><i>baz</i></a>qux");
111    ///
112    /// let descendants = fragment.root_element().descendent_elements().map(|element| element.value().name()).collect::<Vec<_>>();
113    /// assert_eq!(descendants, ["html", "span", "b", "a", "i"]);
114    /// ```
115    pub fn descendent_elements(&self) -> impl Iterator<Item = ElementRef<'a>> {
116        self.descendants().filter_map(ElementRef::wrap)
117    }
118}
119
120impl Debug for ElementRef<'_> {
121    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
122        Debug::fmt(self.value(), f)
123    }
124}
125
126impl<'a> Deref for ElementRef<'a> {
127    type Target = NodeRef<'a, Node>;
128    fn deref(&self) -> &NodeRef<'a, Node> {
129        &self.node
130    }
131}
132
133/// Iterator over descendent elements matching a selector.
134pub struct Select<'a, 'b> {
135    scope: ElementRef<'a>,
136    inner: Traverse<'a, Node>,
137    selector: &'b Selector,
138    caches: SelectorCaches,
139}
140
141impl Debug for Select<'_, '_> {
142    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
143        fmt.debug_struct("Select")
144            .field("scope", &self.scope)
145            .field("inner", &self.inner)
146            .field("selector", &self.selector)
147            .field("caches", &"..")
148            .finish()
149    }
150}
151
152impl Clone for Select<'_, '_> {
153    fn clone(&self) -> Self {
154        Self {
155            scope: self.scope,
156            inner: self.inner.clone(),
157            selector: self.selector,
158            caches: Default::default(),
159        }
160    }
161}
162
163impl<'a> Iterator for Select<'a, '_> {
164    type Item = ElementRef<'a>;
165
166    fn next(&mut self) -> Option<ElementRef<'a>> {
167        for edge in &mut self.inner {
168            if let Edge::Open(node) = edge {
169                if let Some(element) = ElementRef::wrap(node) {
170                    if self.selector.matches_with_scope_and_cache(
171                        &element,
172                        Some(self.scope),
173                        &mut self.caches,
174                    ) {
175                        return Some(element);
176                    }
177                }
178            }
179        }
180        None
181    }
182}
183
184impl FusedIterator for Select<'_, '_> {}
185
186/// Iterator over descendent text nodes.
187#[derive(Debug, Clone)]
188pub struct Text<'a> {
189    inner: Traverse<'a, Node>,
190}
191
192impl<'a> Iterator for Text<'a> {
193    type Item = &'a str;
194
195    fn next(&mut self) -> Option<&'a str> {
196        for edge in &mut self.inner {
197            if let Edge::Open(node) = edge {
198                if let Node::Text(ref text) = node.value() {
199                    return Some(&**text);
200                }
201            }
202        }
203        None
204    }
205}
206
207impl FusedIterator for Text<'_> {}
208
209mod element;
210mod serializable;
211
212#[cfg(test)]
213mod tests {
214    use crate::html::Html;
215    use crate::selector::Selector;
216
217    #[test]
218    fn test_scope() {
219        let html = r"
220            <div>
221                <b>1</b>
222                <span>
223                    <span><b>2</b></span>
224                    <b>3</b>
225                </span>
226            </div>
227        ";
228        let fragment = Html::parse_fragment(html);
229        let sel1 = Selector::parse("div > span").unwrap();
230        let sel2 = Selector::parse(":scope > b").unwrap();
231
232        let element1 = fragment.select(&sel1).next().unwrap();
233        let element2 = element1.select(&sel2).next().unwrap();
234        assert_eq!(element2.inner_html(), "3");
235    }
236}