robotparser_fork/http/reqwest/
async_reqwest.rs

1use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
2use crate::model::FetchedRobotsTxt;
3use crate::model::{Error, ErrorKind};
4use crate::parser::{parse_fetched_robots_txt, ParseResult};
5use futures::future::ok as future_ok;
6use futures::future::TryFutureExt;
7use futures::task::{Context, Poll};
8use futures::Future;
9use reqwest::header::HeaderValue;
10use reqwest::header::USER_AGENT;
11use reqwest::Error as ReqwestError;
12use reqwest::Method;
13use reqwest::{Client, Request};
14use std::pin::Pin;
15use url::{Origin, Url};
16
17type FetchFuture = Box<dyn Future<Output = Result<(ResponseInfo, String), ReqwestError>>>;
18type BoxFuture = Pin<FetchFuture>;
19
20impl RobotsTxtClient for Client {
21    type Result = Result<RobotsTxtResponse, Error>;
22    fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
23        let url = format!("{}/robots.txt", origin.unicode_serialization());
24        let url = Url::parse(&url).map_err(|err| Error {
25            kind: ErrorKind::Url(err),
26        })?;
27        let mut request = Request::new(Method::GET, url);
28        let _ = request
29            .headers_mut()
30            .insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
31        let response = self.execute(request).and_then(|response| {
32            let response_info = ResponseInfo {
33                status_code: response.status().as_u16(),
34            };
35            response
36                .text()
37                .and_then(|response_text| future_ok((response_info, response_text)))
38        });
39        let response: BoxFuture = Box::pin(response);
40        Ok(RobotsTxtResponse { origin, response })
41    }
42}
43
44struct ResponseInfo {
45    status_code: u16,
46}
47
48/// Future for fetching robots.txt result.
49pub struct RobotsTxtResponse {
50    origin: Origin,
51    response: Pin<FetchFuture>,
52}
53
54impl RobotsTxtResponse {
55    /// Returns origin of robots.txt
56    pub fn get_origin(&self) -> &Origin {
57        &self.origin
58    }
59}
60
61impl Future for RobotsTxtResponse {
62    type Output = Result<ParseResult<FetchedRobotsTxt>, ReqwestError>;
63
64    fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
65        let self_mut = self.get_mut();
66        let response_pin = self_mut.response.as_mut();
67        match response_pin.poll(cx) {
68            Poll::Ready(Ok((response_info, text))) => {
69                let robots_txt = parse_fetched_robots_txt(self_mut.origin.clone(), response_info.status_code, &text);
70                Poll::Ready(Ok(robots_txt))
71            }
72            Poll::Ready(Err(error)) => Poll::Ready(Err(error)),
73            Poll::Pending => Poll::Pending,
74        }
75    }
76}