web_scrape/scrape/
scraper.rs1use scraper::element_ref::Select;
2use scraper::{ElementRef, Selector};
3
4use crate::scrape::ScrapeError;
5use crate::scrape::ScrapeError::*;
6
7#[derive(Copy, Clone, Debug)]
9pub struct Scraper<'a> {
10 element: ElementRef<'a>,
11}
12
13impl<'a> From<ElementRef<'a>> for Scraper<'a> {
14 fn from(element: ElementRef<'a>) -> Self {
15 Self { element }
16 }
17}
18
19impl<'a> Scraper<'a> {
20 pub fn element(&self) -> ElementRef {
24 self.element
25 }
26}
27
28impl<'a> Scraper<'a> {
29 fn selector(selection: &str) -> Result<Selector, ScrapeError> {
33 Selector::parse(selection).map_err(|e| InvalidSelection {
34 selection: selection.to_string(),
35 message: e.to_string(),
36 })
37 }
38}
39
40impl<'a> Scraper<'a> {
41 pub fn all<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, ScrapeError>
45 where
46 F: Fn(Scraper) -> Result<T, ScrapeError>,
47 {
48 let selector: Selector = Self::selector(selection)?;
49 let mut result: Vec<T> = Vec::default();
50 for element in self.element.select(&selector) {
51 let scraper: Scraper = Scraper::from(element);
52 let element: T = scrape(scraper)?;
53 result.push(element)
54 }
55 Ok(result)
56 }
57
58 pub fn all_text(&self, selection: &str) -> Result<Vec<String>, ScrapeError> {
60 self.all(selection, |s| Ok(s.element().text().collect()))
61 }
62
63 pub fn all_html(&self, selection: &str) -> Result<Vec<String>, ScrapeError> {
65 self.all(selection, |s| Ok(s.element().html()))
66 }
67
68 pub fn all_flat<T, F>(&self, selection: &str, scrape: F) -> Result<Vec<T>, ScrapeError>
70 where
71 F: Fn(Scraper) -> Result<Option<T>, ScrapeError>,
72 {
73 let selector: Selector = Self::selector(selection)?;
74 let mut result: Vec<T> = Vec::default();
75 for element in self.element.select(&selector) {
76 let scraper: Scraper = Scraper::from(element);
77 if let Some(element) = scrape(scraper)? {
78 result.push(element)
79 }
80 }
81 Ok(result)
82 }
83}
84
85impl<'a> Scraper<'a> {
86 pub fn only<T, F>(&self, selection: &str, scrape: F) -> Result<T, ScrapeError>
90 where
91 F: Fn(Scraper) -> Result<T, ScrapeError>,
92 {
93 let selector: Selector = Self::selector(selection)?;
94 let mut select: Select = self.element.select(&selector);
95 if let Some(first) = select.next() {
96 let first: T = scrape(first.into())?;
97 if select.next().is_some() {
98 Err(ExpectedOneGotMultiple {
99 selection: selection.to_string(),
100 })
101 } else {
102 Ok(first)
103 }
104 } else {
105 Err(ExpectedOneGotNone {
106 selection: selection.to_string(),
107 })
108 }
109 }
110
111 pub fn only_att(&self, selection: &str, att: &str) -> Result<String, ScrapeError> {
113 self.only(selection, |s| {
114 if let Some(att) = s.element.attr(att) {
115 Ok(att.to_string())
116 } else {
117 Err(ExpectedOneGotNone {
118 selection: selection.to_string(),
119 })
120 }
121 })
122 }
123
124 pub fn only_text(&self, selection: &str) -> Result<String, ScrapeError> {
126 self.only(selection, |s| Ok(s.element.text().collect()))
127 }
128
129 pub fn only_html(&self, selection: &str) -> Result<String, ScrapeError> {
131 self.only(selection, |s| Ok(s.element.html()))
132 }
133}
134
135impl<'a> Scraper<'a> {
136 pub fn optional<T, F>(&self, selection: &str, scrape: F) -> Result<Option<T>, ScrapeError>
140 where
141 F: Fn(Scraper) -> Result<T, ScrapeError>,
142 {
143 let selector: Selector = Self::selector(selection)?;
144 let mut select: Select = self.element.select(&selector);
145 if let Some(first) = select.next() {
146 let first: T = scrape(first.into())?;
147 if select.next().is_some() {
148 Err(ExpectedOptionalGotMultiple {
149 selection: selection.to_string(),
150 })
151 } else {
152 Ok(Some(first))
153 }
154 } else {
155 Ok(None)
156 }
157 }
158
159 pub fn optional_text(&self, selection: &str) -> Result<Option<String>, ScrapeError> {
161 self.optional(selection, |s| Ok(s.element().text().collect()))
162 }
163
164 pub fn optional_html(&self, selection: &str) -> Result<Option<String>, ScrapeError> {
166 self.optional(selection, |s| Ok(s.element().html()))
167 }
168}