pub struct ParseOutput<I> { /* private fields */ }Expand description
The output of a spider’s parse method.
Implementations§
Source§impl<I> ParseOutput<I>
impl<I> ParseOutput<I>
Sourcepub fn new() -> Self
pub fn new() -> Self
Creates a new, empty ParseOutput.
Examples found in repository?
examples/quotes_scraper.rs (line 29)
27 async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
28 let html = response.to_html()?;
29 let mut output = ParseOutput::new();
30
31 for quote_element in html.select(&".quote".to_selector()?) {
32 let text = quote_element
33 .select(&".text".to_selector()?)
34 .next()
35 .map(|e| e.text().collect::<String>().trim().to_string())
36 .unwrap_or_default();
37
38 let author = quote_element
39 .select(&".author".to_selector()?)
40 .next()
41 .map(|e| e.text().collect::<String>().trim().to_string())
42 .unwrap_or_default();
43
44 let tags: Vec<String> = quote_element
45 .select(&".tags .tag".to_selector()?)
46 .map(|e| e.text().collect::<String>().trim().to_string())
47 .collect();
48 let tags_str = tags.join(", ");
49
50 let item = QuoteItem {
51 text,
52 author,
53 tags: tags_str,
54 };
55 output.add_item(item);
56 }
57
58 if let Some(next_href) = html
59 .select(&".next > a".to_selector()?)
60 .next()
61 .and_then(|a| a.attr("href"))
62 {
63 let next_url = response.url.join(next_href)?;
64 let next_request = Request::new(next_url);
65 output.add_request(next_request);
66 }
67
68 Ok(output)
69 }Sourcepub fn into_parts(self) -> (Vec<I>, Vec<Request>)
pub fn into_parts(self) -> (Vec<I>, Vec<Request>)
Consumes the ParseOutput and returns its inner items and requests.
Sourcepub fn add_item(&mut self, item: I)
pub fn add_item(&mut self, item: I)
Adds a scraped item to the output.
Examples found in repository?
examples/quotes_scraper.rs (line 55)
27 async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
28 let html = response.to_html()?;
29 let mut output = ParseOutput::new();
30
31 for quote_element in html.select(&".quote".to_selector()?) {
32 let text = quote_element
33 .select(&".text".to_selector()?)
34 .next()
35 .map(|e| e.text().collect::<String>().trim().to_string())
36 .unwrap_or_default();
37
38 let author = quote_element
39 .select(&".author".to_selector()?)
40 .next()
41 .map(|e| e.text().collect::<String>().trim().to_string())
42 .unwrap_or_default();
43
44 let tags: Vec<String> = quote_element
45 .select(&".tags .tag".to_selector()?)
46 .map(|e| e.text().collect::<String>().trim().to_string())
47 .collect();
48 let tags_str = tags.join(", ");
49
50 let item = QuoteItem {
51 text,
52 author,
53 tags: tags_str,
54 };
55 output.add_item(item);
56 }
57
58 if let Some(next_href) = html
59 .select(&".next > a".to_selector()?)
60 .next()
61 .and_then(|a| a.attr("href"))
62 {
63 let next_url = response.url.join(next_href)?;
64 let next_request = Request::new(next_url);
65 output.add_request(next_request);
66 }
67
68 Ok(output)
69 }Sourcepub fn add_request(&mut self, request: Request)
pub fn add_request(&mut self, request: Request)
Adds a new request to be crawled.
Examples found in repository?
examples/quotes_scraper.rs (line 65)
27 async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
28 let html = response.to_html()?;
29 let mut output = ParseOutput::new();
30
31 for quote_element in html.select(&".quote".to_selector()?) {
32 let text = quote_element
33 .select(&".text".to_selector()?)
34 .next()
35 .map(|e| e.text().collect::<String>().trim().to_string())
36 .unwrap_or_default();
37
38 let author = quote_element
39 .select(&".author".to_selector()?)
40 .next()
41 .map(|e| e.text().collect::<String>().trim().to_string())
42 .unwrap_or_default();
43
44 let tags: Vec<String> = quote_element
45 .select(&".tags .tag".to_selector()?)
46 .map(|e| e.text().collect::<String>().trim().to_string())
47 .collect();
48 let tags_str = tags.join(", ");
49
50 let item = QuoteItem {
51 text,
52 author,
53 tags: tags_str,
54 };
55 output.add_item(item);
56 }
57
58 if let Some(next_href) = html
59 .select(&".next > a".to_selector()?)
60 .next()
61 .and_then(|a| a.attr("href"))
62 {
63 let next_url = response.url.join(next_href)?;
64 let next_request = Request::new(next_url);
65 output.add_request(next_request);
66 }
67
68 Ok(output)
69 }Sourcepub fn add_items(&mut self, items: impl IntoIterator<Item = I>)
pub fn add_items(&mut self, items: impl IntoIterator<Item = I>)
Adds multiple scraped items to the output.
Sourcepub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>)
pub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>)
Adds multiple new requests to be crawled.
Trait Implementations§
Source§impl<I: Clone> Clone for ParseOutput<I>
impl<I: Clone> Clone for ParseOutput<I>
Source§fn clone(&self) -> ParseOutput<I>
fn clone(&self) -> ParseOutput<I>
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl<I: Debug> Debug for ParseOutput<I>
impl<I: Debug> Debug for ParseOutput<I>
Auto Trait Implementations§
impl<I> Freeze for ParseOutput<I>
impl<I> !RefUnwindSafe for ParseOutput<I>
impl<I> Send for ParseOutput<I>where
I: Send,
impl<I> Sync for ParseOutput<I>where
I: Sync,
impl<I> Unpin for ParseOutput<I>where
I: Unpin,
impl<I> UnwindSafe for ParseOutput<I>where
I: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more