pub struct ParseOutput<I> { /* private fields */ }Expand description
The output of a spider’s parse method.
Implementations§
Source§impl<I> ParseOutput<I>
impl<I> ParseOutput<I>
Sourcepub fn new() -> Self
pub fn new() -> Self
Creates a new, empty ParseOutput.
Examples found in repository?
examples/quotes_scraper.rs (line 39)
37 async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
38 let html = response.to_html()?;
39 let mut output = ParseOutput::new();
40
41 for quote_element in html.select(&".quote".to_selector()?) {
42 let text = quote_element
43 .select(&".text".to_selector()?)
44 .next()
45 .map(|e| e.text().collect::<String>().trim().to_string())
46 .unwrap_or_default();
47
48 let author = quote_element
49 .select(&".author".to_selector()?)
50 .next()
51 .map(|e| e.text().collect::<String>().trim().to_string())
52 .unwrap_or_default();
53
54 let tags: Vec<String> = quote_element
55 .select(&".tags .tag".to_selector()?)
56 .map(|e| e.text().collect::<String>().trim().to_string())
57 .collect();
58 let tags_str = tags.join(", ");
59
60 let item = QuoteItem {
61 text,
62 author,
63 tags: tags_str,
64 };
65 output.add_item(item);
66 }
67
68 if let Some(next_href) = html
69 .select(&".next > a".to_selector()?)
70 .next()
71 .and_then(|a| a.attr("href"))
72 {
73 let next_url = response.url.join(next_href)?;
74 let next_request = Request::new(next_url);
75 output.add_request(next_request);
76 }
77
78 Ok(output)
79 }Sourcepub fn into_parts(self) -> (Vec<I>, Vec<Request>)
pub fn into_parts(self) -> (Vec<I>, Vec<Request>)
Consumes the ParseOutput and returns its inner items and requests.
Sourcepub fn add_item(&mut self, item: I)
pub fn add_item(&mut self, item: I)
Adds a scraped item to the output.
Examples found in repository?
examples/quotes_scraper.rs (line 65)
37 async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
38 let html = response.to_html()?;
39 let mut output = ParseOutput::new();
40
41 for quote_element in html.select(&".quote".to_selector()?) {
42 let text = quote_element
43 .select(&".text".to_selector()?)
44 .next()
45 .map(|e| e.text().collect::<String>().trim().to_string())
46 .unwrap_or_default();
47
48 let author = quote_element
49 .select(&".author".to_selector()?)
50 .next()
51 .map(|e| e.text().collect::<String>().trim().to_string())
52 .unwrap_or_default();
53
54 let tags: Vec<String> = quote_element
55 .select(&".tags .tag".to_selector()?)
56 .map(|e| e.text().collect::<String>().trim().to_string())
57 .collect();
58 let tags_str = tags.join(", ");
59
60 let item = QuoteItem {
61 text,
62 author,
63 tags: tags_str,
64 };
65 output.add_item(item);
66 }
67
68 if let Some(next_href) = html
69 .select(&".next > a".to_selector()?)
70 .next()
71 .and_then(|a| a.attr("href"))
72 {
73 let next_url = response.url.join(next_href)?;
74 let next_request = Request::new(next_url);
75 output.add_request(next_request);
76 }
77
78 Ok(output)
79 }Sourcepub fn add_request(&mut self, request: Request)
pub fn add_request(&mut self, request: Request)
Adds a new request to be crawled.
Examples found in repository?
examples/quotes_scraper.rs (line 75)
37 async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
38 let html = response.to_html()?;
39 let mut output = ParseOutput::new();
40
41 for quote_element in html.select(&".quote".to_selector()?) {
42 let text = quote_element
43 .select(&".text".to_selector()?)
44 .next()
45 .map(|e| e.text().collect::<String>().trim().to_string())
46 .unwrap_or_default();
47
48 let author = quote_element
49 .select(&".author".to_selector()?)
50 .next()
51 .map(|e| e.text().collect::<String>().trim().to_string())
52 .unwrap_or_default();
53
54 let tags: Vec<String> = quote_element
55 .select(&".tags .tag".to_selector()?)
56 .map(|e| e.text().collect::<String>().trim().to_string())
57 .collect();
58 let tags_str = tags.join(", ");
59
60 let item = QuoteItem {
61 text,
62 author,
63 tags: tags_str,
64 };
65 output.add_item(item);
66 }
67
68 if let Some(next_href) = html
69 .select(&".next > a".to_selector()?)
70 .next()
71 .and_then(|a| a.attr("href"))
72 {
73 let next_url = response.url.join(next_href)?;
74 let next_request = Request::new(next_url);
75 output.add_request(next_request);
76 }
77
78 Ok(output)
79 }Sourcepub fn add_items(&mut self, items: impl IntoIterator<Item = I>)
pub fn add_items(&mut self, items: impl IntoIterator<Item = I>)
Adds multiple scraped items to the output.
Sourcepub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>)
pub fn add_requests(&mut self, requests: impl IntoIterator<Item = Request>)
Adds multiple new requests to be crawled.
Trait Implementations§
Source§impl<I: Clone> Clone for ParseOutput<I>
impl<I: Clone> Clone for ParseOutput<I>
Source§fn clone(&self) -> ParseOutput<I>
fn clone(&self) -> ParseOutput<I>
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl<I: Debug> Debug for ParseOutput<I>
impl<I: Debug> Debug for ParseOutput<I>
Auto Trait Implementations§
impl<I> Freeze for ParseOutput<I>
impl<I> !RefUnwindSafe for ParseOutput<I>
impl<I> Send for ParseOutput<I>where
I: Send,
impl<I> Sync for ParseOutput<I>where
I: Sync,
impl<I> Unpin for ParseOutput<I>where
I: Unpin,
impl<I> UnwindSafe for ParseOutput<I>where
I: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more