web_scrape/scrape/
scraper.rs

1use scraper::element_ref::Select;
2use scraper::{ElementRef, Selector};
3
4use crate::scrape::Error;
5use crate::scrape::Error::*;
6
7/// Responsible for scraping data from elements.
8#[derive(Copy, Clone, Debug)]
9pub struct Scraper<'a> {
10    element: ElementRef<'a>,
11}
12
13impl<'a> From<ElementRef<'a>> for Scraper<'a> {
14    fn from(element: ElementRef<'a>) -> Self {
15        Self { element }
16    }
17}
18
19impl<'a> Scraper<'a> {
20    //! Properties
21
22    /// Gets the element.
23    pub fn element(&self) -> ElementRef {
24        self.element
25    }
26}
27
28impl<'a> Scraper<'a> {
29    //! Utils
30
31    /// Creates the `Selector` for the `selection`.
32    fn selector(selection: &str) -> Result<Selector, Error> {
33        Selector::parse(selection).map_err(|e| InvalidSelection {
34            selection: selection.to_string(),
35            message: e.to_string(),
36        })
37    }
38}
39
40impl<'a> Scraper<'a> {
41    //! All
42
43    /// Scrapes all the instances of the `selection`.
44    pub fn all<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, Error>
45    where
46        F: Fn(Scraper) -> Result<T, Error>,
47    {
48        let selector: Selector = Self::selector(selection)?;
49        let mut result: Vec<T> = Vec::default();
50        for element in self.element.select(&selector) {
51            let scraper: Scraper = Scraper::from(element);
52            let element: T = scrape(scraper)?;
53            result.push(element)
54        }
55        Ok(result)
56    }
57
58    /// Scrapes all the text from the `selection`.
59    pub fn all_text(&self, selection: &str) -> Result<Vec<String>, Error> {
60        self.all(selection, |s| Ok(s.element().text().collect()))
61    }
62
63    /// Scrapes all the html from the `selection`.
64    pub fn all_html(&self, selection: &str) -> Result<Vec<String>, Error> {
65        self.all(selection, |s| Ok(s.element().html()))
66    }
67
68    /// Scrapes all the successful instances of the `selection`.
69    pub fn all_flat<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, Error>
70    where
71        F: Fn(Scraper) -> Result<Option<T>, Error>,
72    {
73        let selector: Selector = Self::selector(selection)?;
74        let mut result: Vec<T> = Vec::default();
75        for element in self.element.select(&selector) {
76            let scraper: Scraper = Scraper::from(element);
77            if let Some(element) = scrape(scraper)? {
78                result.push(element)
79            }
80        }
81        Ok(result)
82    }
83}
84
85impl<'a> Scraper<'a> {
86    //! Only
87
88    /// Scrapes the only instance of the `selection`.
89    pub fn only<T, F>(&self, selection: &str, scrape: F) -> Result<T, Error>
90    where
91        F: Fn(Scraper) -> Result<T, Error>,
92    {
93        let selector: Selector = Self::selector(selection)?;
94        let mut select: Select = self.element.select(&selector);
95        if let Some(first) = select.next() {
96            let first: T = scrape(first.into())?;
97            if select.next().is_some() {
98                Err(ExpectedOneGotMultiple {
99                    selection: selection.to_string(),
100                })
101            } else {
102                Ok(first)
103            }
104        } else {
105            Err(ExpectedOneGotNone {
106                selection: selection.to_string(),
107            })
108        }
109    }
110
111    /// Scrapes the only instance of the `selection` attribute.
112    pub fn only_att(&self, selection: &str, att: &str) -> Result<String, Error> {
113        self.only(selection, |s| {
114            if let Some(att) = s.element.attr(att) {
115                Ok(att.to_string())
116            } else {
117                Err(ExpectedOneGotNone {
118                    selection: selection.to_string(),
119                })
120            }
121        })
122    }
123
124    /// Scrapes the only instance of the `selection` text.
125    pub fn only_text(&self, selection: &str) -> Result<String, Error> {
126        self.only(selection, |s| Ok(s.element.text().collect()))
127    }
128}
129
130impl<'a> Scraper<'a> {
131    //! Optional
132
133    /// Scrapes the optional instance of the `selection`.
134    pub fn optional<T, F>(&self, selection: &str, scrape: F) -> Result<Option<T>, Error>
135    where
136        F: Fn(Scraper) -> Result<T, Error>,
137    {
138        let selector: Selector = Self::selector(selection)?;
139        let mut select: Select = self.element.select(&selector);
140        if let Some(first) = select.next() {
141            let first: T = scrape(first.into())?;
142            if select.next().is_some() {
143                Err(ExpectedOptionalGotMultiple {
144                    selection: selection.to_string(),
145                })
146            } else {
147                Ok(Some(first))
148            }
149        } else {
150            Ok(None)
151        }
152    }
153
154    /// Scrapes the optional instance of the `selection` text.
155    pub fn optional_text(&self, selection: &str) -> Result<Option<String>, Error> {
156        self.optional(selection, |s| Ok(s.element().text().collect()))
157    }
158}