Skip to main content

web_scrape/scrape/
scraper.rs

1use crate::scrape::ScrapeError;
2use crate::scrape::ScrapeError::*;
3use scraper::element_ref::Select;
4use scraper::{ElementRef, Selector};
5
6/// Responsible for scraping data from html elements.
7#[derive(Copy, Clone, Debug)]
8pub struct Scraper<'a> {
9    element: ElementRef<'a>,
10}
11
12impl<'a> From<ElementRef<'a>> for Scraper<'a> {
13    fn from(element: ElementRef<'a>) -> Self {
14        Self { element }
15    }
16}
17
18impl<'a> Scraper<'a> {
19    //! Properties
20
21    /// Gets the element.
22    pub fn element(&self) -> ElementRef<'_> {
23        self.element
24    }
25}
26
27impl<'a> Scraper<'a> {
28    //! Utils
29
30    /// Creates the `Selector` for the `selection`.
31    fn selector(selection: &str) -> Result<Selector, ScrapeError> {
32        Selector::parse(selection).map_err(|e| InvalidSelection {
33            selection: selection.to_string(),
34            message: e.to_string(),
35        })
36    }
37}
38
39impl<'a> Scraper<'a> {
40    //! All
41
42    /// Scrapes all the instances of the `selection`.
43    pub fn all<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, ScrapeError>
44    where
45        F: Fn(Scraper) -> Result<T, ScrapeError>,
46    {
47        let selector: Selector = Self::selector(selection)?;
48        let mut result: Vec<T> = Vec::default();
49        for element in self.element.select(&selector) {
50            let scraper: Scraper = Scraper::from(element);
51            let element: T = scrape(scraper)?;
52            result.push(element)
53        }
54        Ok(result)
55    }
56
57    /// Scrapes all the text from the `selection`.
58    pub fn all_text(&self, selection: &str) -> Result<Vec<String>, ScrapeError> {
59        self.all(selection, |s| Ok(s.element().text().collect()))
60    }
61
62    /// Scrapes all the html from the `selection`.
63    pub fn all_html(&self, selection: &str) -> Result<Vec<String>, ScrapeError> {
64        self.all(selection, |s| Ok(s.element().html()))
65    }
66
67    /// Scrapes all the successful instances of the `selection`.
68    pub fn all_flat<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, ScrapeError>
69    where
70        F: Fn(Scraper) -> Result<Option<T>, ScrapeError>,
71    {
72        let selector: Selector = Self::selector(selection)?;
73        let mut result: Vec<T> = Vec::default();
74        for element in self.element.select(&selector) {
75            let scraper: Scraper = Scraper::from(element);
76            if let Some(element) = scrape(scraper)? {
77                result.push(element)
78            }
79        }
80        Ok(result)
81    }
82}
83
84impl<'a> Scraper<'a> {
85    //! Only
86
87    /// Scrapes the only instance of the `selection`.
88    pub fn only<T, F>(&self, selection: &str, scrape: F) -> Result<T, ScrapeError>
89    where
90        F: Fn(Scraper) -> Result<T, ScrapeError>,
91    {
92        let selector: Selector = Self::selector(selection)?;
93        let mut select: Select = self.element.select(&selector);
94        if let Some(first) = select.next() {
95            let first: T = scrape(first.into())?;
96            if select.next().is_some() {
97                Err(ExpectedOneGotMultiple {
98                    selection: selection.to_string(),
99                })
100            } else {
101                Ok(first)
102            }
103        } else {
104            Err(ExpectedOneGotNone {
105                selection: selection.to_string(),
106            })
107        }
108    }
109
110    /// Scrapes the only instance of the `selection` attribute.
111    pub fn only_att(&self, selection: &str, att: &str) -> Result<String, ScrapeError> {
112        self.only(selection, |s| {
113            if let Some(att) = s.element.attr(att) {
114                Ok(att.to_string())
115            } else {
116                Err(ExpectedOneGotNone {
117                    selection: selection.to_string(),
118                })
119            }
120        })
121    }
122
123    /// Scrapes the only instance of the `selection` text.
124    pub fn only_text(&self, selection: &str) -> Result<String, ScrapeError> {
125        self.only(selection, |s| Ok(s.element.text().collect()))
126    }
127
128    /// Scrapes the only instance of the `selection` html.
129    pub fn only_html(&self, selection: &str) -> Result<String, ScrapeError> {
130        self.only(selection, |s| Ok(s.element.html()))
131    }
132}
133
134impl<'a> Scraper<'a> {
135    //! Optional
136
137    /// Scrapes the optional instance of the `selection`.
138    pub fn optional<T, F>(&self, selection: &str, scrape: F) -> Result<Option<T>, ScrapeError>
139    where
140        F: Fn(Scraper) -> Result<T, ScrapeError>,
141    {
142        let selector: Selector = Self::selector(selection)?;
143        let mut select: Select = self.element.select(&selector);
144        if let Some(first) = select.next() {
145            let first: T = scrape(first.into())?;
146            if select.next().is_some() {
147                Err(ExpectedOptionalGotMultiple {
148                    selection: selection.to_string(),
149                })
150            } else {
151                Ok(Some(first))
152            }
153        } else {
154            Ok(None)
155        }
156    }
157
158    /// Scrapes the optional instance of the `selection` text.
159    pub fn optional_text(&self, selection: &str) -> Result<Option<String>, ScrapeError> {
160        self.optional(selection, |s| Ok(s.element().text().collect()))
161    }
162
163    /// Scrapes the optional instance of the `selection` html.
164    pub fn optional_html(&self, selection: &str) -> Result<Option<String>, ScrapeError> {
165        self.optional(selection, |s| Ok(s.element().html()))
166    }
167}