Request

Struct Request 

Source
pub struct Request {
    pub url: Url,
    pub method: Method,
    pub headers: HeaderMap,
    pub body: Option<Bytes>,
    pub callback_name: String,
    pub spider_name: String,
    pub meta: HashMap<String, Value>,
}

Fields§

§url: Url§method: Method§headers: HeaderMap§body: Option<Bytes>§callback_name: String§spider_name: String§meta: HashMap<String, Value>

Implementations§

Source§

impl Request

Source

pub fn new(url: Url, spider_name: &str, callback_name: &str) -> Self

Examples found in repository?
examples/quotes.rs (line 95)
51    async fn parse(&self, response: Response) -> Result<CrawlOutput<Self::Item>, SpiderError> {
52        debug!("Parsing response from {}", response.url);
53        let html = Html::parse_document(std::str::from_utf8(&response.body).unwrap());
54        let quote_selector = Selector::parse("div.quote")
55            .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
56        let text_selector = Selector::parse("span.text")
57            .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
58        let author_selector = Selector::parse("small.author")
59            .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
60        let tags_selector = Selector::parse("div.tags a.tag")
61            .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
62
63        let mut output = CrawlOutput::new();
64
65        for quote_element in html.select(&quote_selector) {
66            let text = quote_element
67                .select(&text_selector)
68                .next()
69                .map(|e| e.inner_html());
70            let author = quote_element
71                .select(&author_selector)
72                .next()
73                .map(|e| e.inner_html());
74            let tags: Vec<String> = quote_element
75                .select(&tags_selector)
76                .map(|e| e.inner_html())
77                .collect();
78
79            if let (Some(text), Some(author)) = (text, author) {
80                output.add_item(QuoteItem {
81                    text,
82                    author,
83                    tags,
84                });
85            }
86        }
87
88        let next_page_selector = Selector::parse("li.next a")
89            .map_err(|e| SpiderError::HtmlParseError(format!("{:?}", e)))?;
90        if let Some(next_page_element) = html.select(&next_page_selector).next()
91            && let Some(href) = next_page_element.value().attr("href")
92        {
93            let mut next_url = response.url.join(href)?;
94            next_url.set_fragment(None);
95            output.add_request(Request::new(next_url.clone(), self.name(), "parse"));
96            debug!("Following next page: {}", next_url);
97        }
98
99        Ok(output)
100    }
More examples
Hide additional examples
examples/books.rs (line 109)
53    async fn parse(&self, response: Response) -> Result<CrawlOutput<Self::Item>, SpiderError> {
54        let html = Html::parse_document(std::str::from_utf8(&response.body).unwrap());
55        let mut output = CrawlOutput::new();
56
57        if response.url.path().ends_with("index.html") && response.url.path().contains("catalogue")
58        {
59            // Book page
60            let title = html
61                .select(&Selector::parse("h1").unwrap())
62                .next()
63                .unwrap()
64                .inner_html();
65            let price = html
66                .select(&Selector::parse("p.price_color").unwrap())
67                .next()
68                .unwrap()
69                .inner_html();
70            let availability = html
71                .select(&Selector::parse("p.instock.availability").unwrap())
72                .next()
73                .unwrap()
74                .text()
75                .collect::<String>()
76                .trim()
77                .to_string();
78            let rating = html
79                .select(&Selector::parse("p.star-rating").unwrap())
80                .next()
81                .unwrap()
82                .value()
83                .attr("class")
84                .unwrap()
85                .split_whitespace()
86                .last()
87                .unwrap()
88                .to_string();
89
90            output.add_item(BookItem {
91                title,
92                price,
93                availability,
94                rating,
95                url: response.url.to_string(),
96            });
97        } else {
98            // Book list page
99            let book_selector = Selector::parse("article.product_pod h3 a").unwrap();
100            let next_page_selector = Selector::parse("li.next a").unwrap();
101
102            let book_links = html.select(&book_selector);
103            let next_page_link = html.select(&next_page_selector);
104
105            for link in book_links.chain(next_page_link) {
106                if let Some(href) = link.value().attr("href") {
107                    let mut url = response.url.join(href)?;
108                    url.set_fragment(None);
109                    output.add_request(Request::new(url.clone(), self.name(), "parse"));
110                }
111            }
112        }
113
114        Ok(output)
115    }
Source

pub fn with_method(self, method: Method) -> Self

Source

pub fn with_header(self, name: &str, value: &str) -> Result<Self, SpiderError>

Source

pub fn with_body(self, body: Bytes) -> Self

Source

pub fn with_meta(self, key: String, value: Value) -> Self

Trait Implementations§

Source§

impl Clone for Request

Source§

fn clone(&self) -> Request

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for Request

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more