reqwest_scraper/
css_selector.rs

1//!  Select elements in HTML response using CSS selector
2//!
3use crate::error::Result;
4use itertools::Itertools;
5use scraper::ElementRef;
6
7/// Html Response
8pub struct Html {
9    value: scraper::Html,
10}
11
12impl Html {
13    /// constructor
14    pub fn new(html_str: &str) -> Self {
15        Self {
16            value: scraper::Html::parse_fragment(html_str),
17        }
18    }
19    /// Select elements in HTML using CSS selector
20    pub fn select(&self, selector: &str) -> Result<Selectable<scraper::Html>> {
21        Selectable::wrap(selector, &self.value)
22    }
23}
24
25/// Wrapper object for HTML elements and CSS selectors
26pub struct Selectable<'a, T> {
27    selector: scraper::Selector,
28    node: &'a T,
29}
30
31/// Iterator for selected elements in Html
32pub struct HtmlSelectIterator<'a, 'b> {
33    select: scraper::html::Select<'a, 'b>,
34}
35
36/// Iterator for selected elements in Element
37pub struct ElementSelectIterator<'a, 'b> {
38    select: scraper::element_ref::Select<'a, 'b>,
39}
40
41/// HTML elements selected by CSS selector
42pub struct SelectItem<'a> {
43    element: ElementRef<'a>,
44}
45
46impl<'a, T> Selectable<'a, T> {
47    fn wrap(selector: &str, html: &'a T) -> Result<Selectable<'a, T>> {
48        Ok(Self {
49            selector: scraper::Selector::parse(selector)?,
50            node: html,
51        })
52    }
53}
54
55/// Selectable Html Document
56pub type SelectableHtml<'a> = Selectable<'a, scraper::Html>;
57
58impl<'a> Selectable<'a, scraper::Html> {
59    /// iterator
60    pub fn iter(&self) -> HtmlSelectIterator {
61        HtmlSelectIterator {
62            select: self.node.select(&self.selector),
63        }
64    }
65
66    /// first match item
67    pub fn first(&self) -> Option<SelectItem> {
68        self.iter().next()
69    }
70}
71
72/// Selectable Html Element
73pub type SelectableElement<'a> = Selectable<'a, ElementRef<'a>>;
74
75impl<'a> Selectable<'a, ElementRef<'a>> {
76    /// iterator
77    pub fn iter(&self) -> ElementSelectIterator {
78        ElementSelectIterator {
79            select: self.node.select(&self.selector),
80        }
81    }
82
83    /// first match item
84    pub fn first(&self) -> Option<SelectItem> {
85        self.iter().next()
86    }
87}
88
89impl<'a, 'b> Iterator for HtmlSelectIterator<'a, 'b> {
90    type Item = SelectItem<'a>;
91
92    fn next(&mut self) -> Option<Self::Item> {
93        self.select.next().map(Self::Item::new)
94    }
95}
96
97impl<'a, 'b> Iterator for ElementSelectIterator<'a, 'b> {
98    type Item = SelectItem<'a>;
99
100    fn next(&mut self) -> Option<Self::Item> {
101        self.select.next().map(Self::Item::new)
102    }
103}
104
105/// Case Sensitivity Match
106pub type CaseSensitivity = scraper::CaseSensitivity;
107/// Html element class attribute
108pub type Classes<'a> = scraper::node::Classes<'a>;
109/// Html element attributes
110pub type Attrs<'a> = scraper::node::Attrs<'a>;
111
112impl<'a> SelectItem<'a> {
113    /// constructor
114    pub fn new(element: ElementRef<'a>) -> Self {
115        Self { element }
116    }
117
118    /// Returns the element name.
119    pub fn name(&self) -> &str {
120        self.element.value().name()
121    }
122
123    /// Returns the element ID.
124    pub fn id(&self) -> Option<&str> {
125        self.element.value().id()
126    }
127
128    /// Returns true if element has the class.
129    pub fn has_class(&self, class: &str, case_sensitive: CaseSensitivity) -> bool {
130        self.element.value().has_class(class, case_sensitive)
131    }
132
133    /// Returns an iterator over the element's classes.
134    pub fn classes(&self) -> Classes {
135        self.element.value().classes()
136    }
137
138    /// Returns an iterator over the element's attributes.
139    pub fn attrs(&self) -> Attrs {
140        self.element.value().attrs()
141    }
142
143    /// Returns the value of an attribute.
144    pub fn attr(&self, attr: &str) -> Option<&'a str> {
145        self.element.attr(attr)
146    }
147
148    /// Returns the text of this element.
149    pub fn text(&self) -> String {
150        self.element.text().join("").trim().into()
151    }
152
153    /// Returns the HTML of this element.
154    pub fn html(&self) -> String {
155        self.element.html()
156    }
157
158    /// Returns the inner HTML of this element.
159    pub fn inner_html(&self) -> String {
160        self.element.inner_html()
161    }
162
163    /// Iterate over all child nodes which are elements
164    pub fn children(&self) -> impl Iterator<Item = SelectItem<'a>> {
165        self.element.child_elements().map(SelectItem::new)
166    }
167
168    /// Use CSS selector to find elements based on the current element
169    pub fn select(&self, selector: &str) -> Result<Selectable<'a, ElementRef>> {
170        Selectable::wrap(selector, &self.element)
171    }
172}
173
174#[cfg(test)]
175mod tests {
176
177    #[test]
178    fn test_css_selecctor() {
179        use super::*;
180
181        let html_str = r#"
182        <html>
183            <body>
184                <div id="content">
185                    <p>Hello, World!</p>
186                    <p>This is a test.</p>
187                </div>
188            </body>
189        </html>
190        "#;
191
192        let html = Html::new(html_str);
193        let content = html.select("#content").unwrap();
194        let content = content.first();
195        assert!(content.is_some());
196        let content = content.unwrap();
197        assert_eq!(content.attr("id").unwrap(), "content");
198
199        let p1 = content.select("p:nth-child(1)").ok().unwrap();
200        let p1 = p1.first();
201        assert!(p1.is_some());
202        assert_eq!(p1.unwrap().text(), "Hello, World!");
203
204        let p2 = content.select("p:nth-child(2)").ok().unwrap();
205        let p2 = p2.first();
206        assert!(p2.is_some());
207        assert_eq!(p2.unwrap().text(), "This is a test.");
208    }
209}