web_scrape/scrape/
scraper.rs1use crate::scrape::ScrapeError;
2use crate::scrape::ScrapeError::*;
3use scraper::element_ref::Select;
4use scraper::{ElementRef, Selector};
5
6#[derive(Copy, Clone, Debug)]
8pub struct Scraper<'a> {
9 element: ElementRef<'a>,
10}
11
12impl<'a> From<ElementRef<'a>> for Scraper<'a> {
13 fn from(element: ElementRef<'a>) -> Self {
14 Self { element }
15 }
16}
17
18impl<'a> Scraper<'a> {
19 pub fn element(&self) -> ElementRef<'_> {
23 self.element
24 }
25}
26
27impl<'a> Scraper<'a> {
28 fn selector(selection: &str) -> Result<Selector, ScrapeError> {
32 Selector::parse(selection).map_err(|e| InvalidSelection {
33 selection: selection.to_string(),
34 message: e.to_string(),
35 })
36 }
37}
38
39impl<'a> Scraper<'a> {
40 pub fn all<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, ScrapeError>
44 where
45 F: Fn(Scraper) -> Result<T, ScrapeError>,
46 {
47 let selector: Selector = Self::selector(selection)?;
48 let mut result: Vec<T> = Vec::default();
49 for element in self.element.select(&selector) {
50 let scraper: Scraper = Scraper::from(element);
51 let element: T = scrape(scraper)?;
52 result.push(element)
53 }
54 Ok(result)
55 }
56
57 pub fn all_text(&self, selection: &str) -> Result<Vec<String>, ScrapeError> {
59 self.all(selection, |s| Ok(s.element().text().collect()))
60 }
61
62 pub fn all_html(&self, selection: &str) -> Result<Vec<String>, ScrapeError> {
64 self.all(selection, |s| Ok(s.element().html()))
65 }
66
67 pub fn all_flat<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, ScrapeError>
69 where
70 F: Fn(Scraper) -> Result<Option<T>, ScrapeError>,
71 {
72 let selector: Selector = Self::selector(selection)?;
73 let mut result: Vec<T> = Vec::default();
74 for element in self.element.select(&selector) {
75 let scraper: Scraper = Scraper::from(element);
76 if let Some(element) = scrape(scraper)? {
77 result.push(element)
78 }
79 }
80 Ok(result)
81 }
82}
83
84impl<'a> Scraper<'a> {
85 pub fn only<T, F>(&self, selection: &str, scrape: F) -> Result<T, ScrapeError>
89 where
90 F: Fn(Scraper) -> Result<T, ScrapeError>,
91 {
92 let selector: Selector = Self::selector(selection)?;
93 let mut select: Select = self.element.select(&selector);
94 if let Some(first) = select.next() {
95 let first: T = scrape(first.into())?;
96 if select.next().is_some() {
97 Err(ExpectedOneGotMultiple {
98 selection: selection.to_string(),
99 })
100 } else {
101 Ok(first)
102 }
103 } else {
104 Err(ExpectedOneGotNone {
105 selection: selection.to_string(),
106 })
107 }
108 }
109
110 pub fn only_att(&self, selection: &str, att: &str) -> Result<String, ScrapeError> {
112 self.only(selection, |s| {
113 if let Some(att) = s.element.attr(att) {
114 Ok(att.to_string())
115 } else {
116 Err(ExpectedOneGotNone {
117 selection: selection.to_string(),
118 })
119 }
120 })
121 }
122
123 pub fn only_text(&self, selection: &str) -> Result<String, ScrapeError> {
125 self.only(selection, |s| Ok(s.element.text().collect()))
126 }
127
128 pub fn only_html(&self, selection: &str) -> Result<String, ScrapeError> {
130 self.only(selection, |s| Ok(s.element.html()))
131 }
132}
133
134impl<'a> Scraper<'a> {
135 pub fn optional<T, F>(&self, selection: &str, scrape: F) -> Result<Option<T>, ScrapeError>
139 where
140 F: Fn(Scraper) -> Result<T, ScrapeError>,
141 {
142 let selector: Selector = Self::selector(selection)?;
143 let mut select: Select = self.element.select(&selector);
144 if let Some(first) = select.next() {
145 let first: T = scrape(first.into())?;
146 if select.next().is_some() {
147 Err(ExpectedOptionalGotMultiple {
148 selection: selection.to_string(),
149 })
150 } else {
151 Ok(Some(first))
152 }
153 } else {
154 Ok(None)
155 }
156 }
157
158 pub fn optional_text(&self, selection: &str) -> Result<Option<String>, ScrapeError> {
160 self.optional(selection, |s| Ok(s.element().text().collect()))
161 }
162
163 pub fn optional_html(&self, selection: &str) -> Result<Option<String>, ScrapeError> {
165 self.optional(selection, |s| Ok(s.element().html()))
166 }
167}