robotparser_fork/
service.rs

1mod fetched_robots_txt;
2mod robots_txt;
3use crate::model::RequestRate;
4use std::time::Duration;
5use url::Url;
6
7/// Trait that implements robots txt service.
8pub trait RobotsTxtService {
9    /// Using the parsed robots.txt decide if useragent can fetch url.
10    fn can_fetch(&self, user_agent: &str, url: &Url) -> bool;
11
12    /// Returns the crawl delay for this user agent as a Duration, or None if no crawl delay is defined.
13    fn get_crawl_delay(&self, user_agent: &str) -> Option<Duration>;
14
15    /// Removes the request parameters from the url that were listed in the `Clean-param` directive.
16    /// This method CHECKS that the origin of the transmitted url matches the origin of robots.txt.
17    /// Returns true if the operation was applied to the passed url.
18    /// In other cases it returns false.
19    fn normalize_url(&self, url: &mut Url) -> bool;
20
21    /// Removes the request parameters from the url that were listed in the `Clean-param` directive.
22    /// This method DOES NOT CHECK that the origin of the transmitted url coincides with the origin of robots.txt.
23    fn normalize_url_ignore_origin(&self, url: &mut Url);
24
25    /// Returns the list of URL sitemaps that have been listed in the robots.txt file.
26    fn get_sitemaps(&self) -> &[Url];
27
28    /// Returns information about the restrictions set for sending HTTP requests to the server.
29    fn get_req_rate(&self, user_agent: &str) -> Option<RequestRate>;
30}
robotparser_fork/service.rs

robotparser_fork/
service.rs