robotxt/parse/
access.rs

1/// The result of the `robots.txt` retrieval attempt.
2///
3/// See [`Robots::from_access`].
4/// Also see 2.3.1. Access Results in the specification.
5///
6/// [`Robots::from_access`]: crate::Robots::from_access
7#[derive(Debug)]
8pub enum AccessResult<'a> {
9    /// 2.3.1.1.  Successful Access
10    ///
11    /// If the crawler successfully downloads the robots.txt file, the
12    /// crawler MUST follow the parseable rules.
13    Successful(&'a [u8]),
14    /// 2.3.1.2.  Redirects
15    ///
16    /// It's possible that a server responds to a robots.txt fetch request
17    /// with a redirect, such as HTTP 301 or HTTP 302 in the case of HTTP.
18    /// The crawlers SHOULD follow at least five consecutive redirects, even
19    /// across authorities (for example, hosts in the case of HTTP).
20    ///
21    /// If a robots.txt file is reached within five consecutive redirects,
22    /// the robots.txt file MUST be fetched, parsed, and its rules followed
23    /// in the context of the initial authority.
24    ///
25    /// If there are more than five consecutive redirects, crawlers MAY
26    /// assume that the robots.txt file is unavailable.
27    Redirect,
28    /// 2.3.1.3.  "Unavailable" Status
29    ///
30    /// "Unavailable" means the crawler tries to fetch the robots.txt file
31    /// and the server responds with status codes indicating that the
32    /// resource in question is unavailable.  For example, in the context of
33    /// HTTP, such status codes are in the 400-499 range.
34    ///
35    /// If a server status code indicates that the robots.txt file is
36    /// unavailable to the crawler, then the crawler MAY access any resources
37    /// on the server.
38    Unavailable,
39    /// 2.3.1.4.  "Unreachable" Status
40    ///
41    /// If the robots.txt file is unreachable due to server or network
42    /// errors, this means the robots.txt file is undefined and the crawler
43    /// MUST assume complete disallow.  For example, in the context of HTTP,
44    /// server errors are identified by status codes in the 500-599 range.
45    ///
46    /// If the robots.txt file is undefined for a reasonably long period of
47    /// time (for example, 30 days), crawlers MAY assume that the robots.txt
48    /// file is unavailable as defined in Section 2.3.1.3 or continue to use
49    /// cached copy.
50    Unreachable,
51}
52
53impl AccessResult<'_> {
54    /// Returns the textual representation of a status.
55    pub fn as_str(&self) -> &'static str {
56        match self {
57            AccessResult::Successful(_) => "Successful",
58            AccessResult::Redirect => "Redirect",
59            AccessResult::Unavailable => "Unavailable",
60            AccessResult::Unreachable => "Unreachable",
61        }
62    }
63}
64
65impl std::fmt::Display for AccessResult<'_> {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        write!(f, "{}", self.as_str())
68    }
69}