pub struct Request {
pub url: Url,
pub method: Method,
pub headers: HeaderMap,
pub body: Option<Bytes>,
pub callback_name: String,
pub spider_name: String,
pub meta: HashMap<String, Value>,
}Fields§
§url: Url§method: Method§headers: HeaderMap§body: Option<Bytes>§callback_name: String§spider_name: String§meta: HashMap<String, Value>Implementations§
Source§impl Request
impl Request
Sourcepub fn new(url: Url, spider_name: &str, callback_name: &str) -> Self
pub fn new(url: Url, spider_name: &str, callback_name: &str) -> Self
Examples found in repository?
examples/quotes.rs (line 95)
51 async fn parse(&self, response: Response) -> Result<CrawlOutput<Self::Item>, SpiderError> {
52 debug!("Parsing response from {}", response.url);
53 let html = Html::parse_document(std::str::from_utf8(&response.body).unwrap());
54 let quote_selector = Selector::parse("div.quote")
55 .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
56 let text_selector = Selector::parse("span.text")
57 .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
58 let author_selector = Selector::parse("small.author")
59 .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
60 let tags_selector = Selector::parse("div.tags a.tag")
61 .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
62
63 let mut output = CrawlOutput::new();
64
65 for quote_element in html.select("e_selector) {
66 let text = quote_element
67 .select(&text_selector)
68 .next()
69 .map(|e| e.inner_html());
70 let author = quote_element
71 .select(&author_selector)
72 .next()
73 .map(|e| e.inner_html());
74 let tags: Vec<String> = quote_element
75 .select(&tags_selector)
76 .map(|e| e.inner_html())
77 .collect();
78
79 if let (Some(text), Some(author)) = (text, author) {
80 output.add_item(QuoteItem {
81 text,
82 author,
83 tags,
84 });
85 }
86 }
87
88 let next_page_selector = Selector::parse("li.next a")
89 .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
90 if let Some(next_page_element) = html.select(&next_page_selector).next()
91 && let Some(href) = next_page_element.value().attr("href")
92 {
93 let mut next_url = response.url.join(href)?;
94 next_url.set_fragment(None);
95 output.add_request(Request::new(next_url.clone(), self.name(), "parse"));
96 debug!("Following next page: {}", next_url);
97 }
98
99 Ok(output)
100 }More examples
examples/books.rs (line 109)
53 async fn parse(&self, response: Response) -> Result<CrawlOutput<Self::Item>, SpiderError> {
54 let html = Html::parse_document(std::str::from_utf8(&response.body).unwrap());
55 let mut output = CrawlOutput::new();
56
57 if response.url.path().ends_with("index.html") && response.url.path().contains("catalogue")
58 {
59 // Book page
60 let title = html
61 .select(&Selector::parse("h1").unwrap())
62 .next()
63 .unwrap()
64 .inner_html();
65 let price = html
66 .select(&Selector::parse("p.price_color").unwrap())
67 .next()
68 .unwrap()
69 .inner_html();
70 let availability = html
71 .select(&Selector::parse("p.instock.availability").unwrap())
72 .next()
73 .unwrap()
74 .text()
75 .collect::<String>()
76 .trim()
77 .to_string();
78 let rating = html
79 .select(&Selector::parse("p.star-rating").unwrap())
80 .next()
81 .unwrap()
82 .value()
83 .attr("class")
84 .unwrap()
85 .split_whitespace()
86 .last()
87 .unwrap()
88 .to_string();
89
90 output.add_item(BookItem {
91 title,
92 price,
93 availability,
94 rating,
95 url: response.url.to_string(),
96 });
97 } else {
98 // Book list page
99 let book_selector = Selector::parse("article.product_pod h3 a").unwrap();
100 let next_page_selector = Selector::parse("li.next a").unwrap();
101
102 let book_links = html.select(&book_selector);
103 let next_page_link = html.select(&next_page_selector);
104
105 for link in book_links.chain(next_page_link) {
106 if let Some(href) = link.value().attr("href") {
107 let mut url = response.url.join(href)?;
108 url.set_fragment(None);
109 output.add_request(Request::new(url.clone(), self.name(), "parse"));
110 }
111 }
112 }
113
114 Ok(output)
115 }pub fn with_method(self, method: Method) -> Self
pub fn with_header(self, name: &str, value: &str) -> Result<Self, SpiderError>
pub fn with_body(self, body: Bytes) -> Self
pub fn with_meta(self, key: String, value: Value) -> Self
Trait Implementations§
Auto Trait Implementations§
impl !Freeze for Request
impl RefUnwindSafe for Request
impl Send for Request
impl Sync for Request
impl Unpin for Request
impl UnwindSafe for Request
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more