robotparser_fork/http/reqwest/
sync_reqwest.rs1use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
2use crate::model::FetchedRobotsTxt;
3use crate::model::{Error, ErrorKind};
4use crate::parser::{parse_fetched_robots_txt, ParseResult};
5use reqwest::blocking::{Client, Request};
6use reqwest::header::HeaderValue;
7use reqwest::header::USER_AGENT;
8use reqwest::Method;
9use url::{Origin, Url};
10
11impl RobotsTxtClient for Client {
12 type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
13 fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
14 let url = format!("{}/robots.txt", origin.unicode_serialization());
15 let url = Url::parse(&url).map_err(|err| Error {
16 kind: ErrorKind::Url(err),
17 })?;
18 let mut request = Request::new(Method::GET, url);
19 let _ = request
20 .headers_mut()
21 .insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
22 let response = self.execute(request).map_err(|err| Error {
23 kind: ErrorKind::Http(err),
24 })?;
25 let status_code = response.status().as_u16();
26 let text = response.text().map_err(|err| Error {
27 kind: ErrorKind::Http(err),
28 })?;
29 let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
30 Ok(robots_txt)
31 }
32}