robotparser_fork/http/reqwest/
sync_reqwest.rs

1use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
2use crate::model::FetchedRobotsTxt;
3use crate::model::{Error, ErrorKind};
4use crate::parser::{parse_fetched_robots_txt, ParseResult};
5use reqwest::blocking::{Client, Request};
6use reqwest::header::HeaderValue;
7use reqwest::header::USER_AGENT;
8use reqwest::Method;
9use url::{Origin, Url};
10
11impl RobotsTxtClient for Client {
12    type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
13    fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
14        let url = format!("{}/robots.txt", origin.unicode_serialization());
15        let url = Url::parse(&url).map_err(|err| Error {
16            kind: ErrorKind::Url(err),
17        })?;
18        let mut request = Request::new(Method::GET, url);
19        let _ = request
20            .headers_mut()
21            .insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
22        let response = self.execute(request).map_err(|err| Error {
23            kind: ErrorKind::Http(err),
24        })?;
25        let status_code = response.status().as_u16();
26        let text = response.text().map_err(|err| Error {
27            kind: ErrorKind::Http(err),
28        })?;
29        let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
30        Ok(robots_txt)
31    }
32}