sws_scraper/element_ref/
mod.rs

1//! Element references.
2
3mod element;
4mod serializable;
5
6use std::iter::FusedIterator;
7use std::ops::Deref;
8
9use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
10use sws_tree::iter::{Edge, Traverse};
11use sws_tree::NodeRef;
12
13use crate::node::{Element, Node};
14use crate::selector::Selector;
15
16/// Wrapper around a reference to an element node.
17///
18/// This wrapper implements the `Element` trait from the `selectors` crate, which allows
19/// it to be matched against CSS selectors.
20#[derive(Debug, Clone, PartialEq)]
21pub struct ElementRef {
22    node: NodeRef<Node>,
23}
24
25impl ElementRef {
26    fn new(node: NodeRef<Node>) -> Self {
27        ElementRef { node }
28    }
29
30    /// Wraps a `NodeRef` only if it references a `Node::Element`.
31    pub fn wrap(node: NodeRef<Node>) -> Option<Self> {
32        match node.map_value(|v| v.is_element()) {
33            Some(true) => Some(ElementRef::new(node)),
34            _ => None,
35        }
36    }
37
38    /// Maps a function to the `Element` referenced by `self`.
39    pub fn map_value<F, R>(&self, map_fn: F) -> Option<R>
40    where
41        F: FnOnce(&Element) -> R,
42    {
43        self.node.map_value(|v| map_fn(v.as_element().unwrap()))
44    }
45
46    /// Returns an iterator over descendent elements matching a selector.
47    pub fn select(&self, selector: Selector) -> Select {
48        let mut inner = self.traverse();
49        inner.next(); // Skip Edge::Open(self).
50
51        Select {
52            scope: self.clone(),
53            inner,
54            selector,
55        }
56    }
57
58    fn serialize(&self, traversal_scope: TraversalScope) -> String {
59        let opts = SerializeOpts {
60            scripting_enabled: false, // It's not clear what this does.
61            traversal_scope,
62            create_missing_parent: false,
63        };
64        let mut buf = Vec::new();
65        serialize(&mut buf, self, opts).unwrap();
66        String::from_utf8(buf).unwrap()
67    }
68
69    /// Returns the HTML of this element.
70    pub fn html(&self) -> String {
71        self.serialize(TraversalScope::IncludeNode)
72    }
73
74    /// Returns the inner HTML of this element.
75    pub fn inner_html(&self) -> String {
76        self.serialize(TraversalScope::ChildrenOnly(None))
77    }
78
79    /// Returns an iterator over descendent text nodes.
80    pub fn text(&self) -> Text {
81        Text {
82            inner: self.traverse(),
83        }
84    }
85
86    /// Returns all the descendent text nodes content concatenated.
87    pub fn inner_text(&self) -> String {
88        let mut all_text = String::new();
89        for edge in self.traverse() {
90            if let Edge::Open(node) = edge {
91                node.map_value(|v| {
92                    if let Node::Text(ref text) = v {
93                        all_text.push_str(text);
94                    }
95                });
96            }
97        }
98        all_text
99    }
100}
101
102impl Deref for ElementRef {
103    type Target = NodeRef<Node>;
104
105    fn deref(&self) -> &NodeRef<Node> {
106        &self.node
107    }
108}
109
110/// Iterator over descendent elements matching a selector.
111#[derive(Debug, Clone)]
112pub struct Select {
113    scope: ElementRef,
114    inner: Traverse<Node>,
115    selector: Selector,
116}
117
118impl Iterator for Select {
119    type Item = ElementRef;
120
121    fn next(&mut self) -> Option<ElementRef> {
122        for edge in &mut self.inner {
123            if let Edge::Open(node) = edge {
124                if let Some(element) = ElementRef::wrap(node) {
125                    if self
126                        .selector
127                        .matches_with_scope(&element, Some(self.scope.clone()))
128                    {
129                        return Some(element);
130                    }
131                }
132            }
133        }
134        None
135    }
136}
137
138impl FusedIterator for Select {}
139
140/// Iterator over descendent text nodes.
141#[derive(Debug, Clone)]
142pub struct Text {
143    inner: Traverse<Node>,
144}
145
146impl Iterator for Text {
147    type Item = String;
148
149    fn next(&mut self) -> Option<String> {
150        for edge in &mut self.inner {
151            if let Edge::Open(node) = edge {
152                let text = node
153                    .map_value(|v| {
154                        if let Node::Text(ref text) = v {
155                            Some(text.to_string())
156                        } else {
157                            None
158                        }
159                    })
160                    .unwrap();
161                if text.is_some() {
162                    return text;
163                }
164            }
165        }
166        None
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use crate::html::Html;
173    use crate::selector::Selector;
174
175    #[test]
176    fn test_scope() {
177        let html = r"
178            <div>
179                <b>1</b>
180                <span>
181                    <span><b>2</b></span>
182                    <b>3</b>
183                </span>
184            </div>
185        ";
186        let fragment = Html::parse_fragment(html);
187        let sel1 = Selector::parse("div > span").unwrap();
188        let sel2 = Selector::parse(":scope > b").unwrap();
189
190        let element1 = fragment.select(sel1).next().unwrap();
191        let element2 = element1.select(sel2).next().unwrap();
192        assert_eq!(element2.inner_html(), "3");
193    }
194}