robotxt/parse/access.rs
1/// The result of the `robots.txt` retrieval attempt.
2///
3/// See [`Robots::from_access`].
4/// Also see 2.3.1. Access Results in the specification.
5///
6/// [`Robots::from_access`]: crate::Robots::from_access
7#[derive(Debug)]
8pub enum AccessResult<'a> {
9 /// 2.3.1.1. Successful Access
10 ///
11 /// If the crawler successfully downloads the robots.txt file, the
12 /// crawler MUST follow the parseable rules.
13 Successful(&'a [u8]),
14 /// 2.3.1.2. Redirects
15 ///
16 /// It's possible that a server responds to a robots.txt fetch request
17 /// with a redirect, such as HTTP 301 or HTTP 302 in the case of HTTP.
18 /// The crawlers SHOULD follow at least five consecutive redirects, even
19 /// across authorities (for example, hosts in the case of HTTP).
20 ///
21 /// If a robots.txt file is reached within five consecutive redirects,
22 /// the robots.txt file MUST be fetched, parsed, and its rules followed
23 /// in the context of the initial authority.
24 ///
25 /// If there are more than five consecutive redirects, crawlers MAY
26 /// assume that the robots.txt file is unavailable.
27 Redirect,
28 /// 2.3.1.3. "Unavailable" Status
29 ///
30 /// "Unavailable" means the crawler tries to fetch the robots.txt file
31 /// and the server responds with status codes indicating that the
32 /// resource in question is unavailable. For example, in the context of
33 /// HTTP, such status codes are in the 400-499 range.
34 ///
35 /// If a server status code indicates that the robots.txt file is
36 /// unavailable to the crawler, then the crawler MAY access any resources
37 /// on the server.
38 Unavailable,
39 /// 2.3.1.4. "Unreachable" Status
40 ///
41 /// If the robots.txt file is unreachable due to server or network
42 /// errors, this means the robots.txt file is undefined and the crawler
43 /// MUST assume complete disallow. For example, in the context of HTTP,
44 /// server errors are identified by status codes in the 500-599 range.
45 ///
46 /// If the robots.txt file is undefined for a reasonably long period of
47 /// time (for example, 30 days), crawlers MAY assume that the robots.txt
48 /// file is unavailable as defined in Section 2.3.1.3 or continue to use
49 /// cached copy.
50 Unreachable,
51}
52
53impl AccessResult<'_> {
54 /// Returns the textual representation of a status.
55 pub fn as_str(&self) -> &'static str {
56 match self {
57 AccessResult::Successful(_) => "Successful",
58 AccessResult::Redirect => "Redirect",
59 AccessResult::Unavailable => "Unavailable",
60 AccessResult::Unreachable => "Unreachable",
61 }
62 }
63}
64
65impl std::fmt::Display for AccessResult<'_> {
66 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67 write!(f, "{}", self.as_str())
68 }
69}