simbld_http/responses/
crawler.rs

1use crate::generate_responses_functions;
2use crate::responses::CustomResponse;
3use crate::traits::get_code_trait::GetCode;
4use strum_macros::EnumIter;
5
6generate_responses_functions! {
7"Crawler responses",
8    ResponsesCrawlerCodes,
9    ParsingErrorUnfinishedHeader => (400, "Bad Request", "Parsing error: unfinished header.", 700, "Parsing Error Unfinished Header"),
10    ParsingErrorHeader => (400, "Bad Request", "Parsing error in the header.", 710, "Parsing Error: Header"),
11    ParsingErrorMissingHTTPCode => (400, "Bad Request", "Parsing error: missing HTTP code.", 720, "Parsing Error: Missing HTTP Code"),
12    ParsingErrorBody => (400, "Bad Request", "Parsing error in the body.", 730, "Parsing Error: Body"),
13    ExcludedByRobotsTxtFile => (403, "Forbidden", "Excluded by robots.txt file", 740, "Excluded by Robots.txt file"),
14    RobotsTemporarilyUnavailable => (503, "Service Unavailable", "Robots temporarily unavailable.", 741, "Robots Temporarily Unavailable"),
15    ExcludedByDefinitionOfExplorationSpace => (403, "Forbidden", "Excluded by definition of exploration space.", 760, "Excluded by Definition of Exploration Space"),
16    NotAllowedByLocalExplorationSpace => (403, "Forbidden", "Not allowed by local exploration space.", 761, "Not Allowed by Local Exploration Space"),
17    IncorrectProtocolOrNonStandardSystemPort => (400, "Bad Request", "Incorrect protocol or non-standard port used.", 770, "Incorrect Protocol or Non-Standard System Port"),
18    ExcludedByFileTypeExclusions => (403, "Forbidden", "Excluded by file type exclusions.", 780, "Excluded by File Type Exclusions"),
19    InvalidCard => (400, "Bad Request", "Invalid card - Not a physical card?", 781, "Invalid Card"),
20    CannotDisablePhysicalCard => (400, "Bad Request", "Cannot disable physical card or already requested print.", 782, "Cannot Disable Physical Card"),
21    InvalidURL => (400, "Bad Request", "Invalid URL encountered by crawler.", 786, "Invalid URL"),
22    NoIndexMetaTag => (400, "Bad Request", "No index meta tag found (non-standard).", 2004, "No Index Meta Tag"),
23    ProgrammableRedirection => (302, "Found", "Programmable redirection used (non-standard).", 3020, "Programmable Redirection"),
24    RedirectedToAnotherURL => (302, "Found", "Redirected to another URL (crawler-based).", 3021, "Redirected to Another URL"),
25}
26
27#[cfg(test)]
28mod tests {
29    use crate::helpers::unified_tuple_helper::UnifiedTuple;
30    use crate::responses::ResponsesCrawlerCodes;
31    use crate::traits::tuple_traits::IntoTwoFieldsTuple;
32    use serde_json::{json, to_value};
33
34    #[test]
35    fn test_crawler_codes_get_code() {
36        assert_eq!(ResponsesCrawlerCodes::ParsingErrorUnfinishedHeader.get_code(), 400);
37        assert_eq!(ResponsesCrawlerCodes::ParsingErrorHeader.get_code(), 400);
38        assert_eq!(ResponsesCrawlerCodes::InvalidURL.get_code(), 400);
39        assert_eq!(ResponsesCrawlerCodes::ProgrammableRedirection.get_code(), 302);
40    }
41
42    #[test]
43    fn test_crawler_codes_from_u16() {
44        assert_eq!(
45            ResponsesCrawlerCodes::from_u16(700),
46            Some(ResponsesCrawlerCodes::ParsingErrorUnfinishedHeader)
47        );
48        assert_eq!(
49            ResponsesCrawlerCodes::from_u16(710),
50            Some(ResponsesCrawlerCodes::ParsingErrorHeader)
51        );
52        assert_eq!(ResponsesCrawlerCodes::from_u16(786), Some(ResponsesCrawlerCodes::InvalidURL));
53        assert_eq!(ResponsesCrawlerCodes::from_u16(9999), None);
54    }
55
56    #[test]
57    fn test_parsing_error_missing_as_tuple() {
58        let code = ResponsesCrawlerCodes::ParsingErrorMissingHTTPCode;
59        let tuple = UnifiedTuple {
60            standard_code: 400,
61            standard_name: "Bad Request",
62            unified_description: "Parsing error: missing HTTP code.",
63            internal_code: Some(720),
64            internal_name: Option::from("Parsing Error: Missing HTTP Code"),
65        };
66        let code_as_tuple = code.as_tuple();
67        assert_eq!(code_as_tuple, tuple);
68    }
69
70    #[test]
71    fn test_robots_temporarily_unavailable_as_json() {
72        let response_code = ResponsesCrawlerCodes::RobotsTemporarilyUnavailable;
73        let json_result = response_code.as_json();
74        let expected_json = json!({
75            "type": "Crawler responses",
76            "details": {
77                "standard http code": {
78                    "code": 503,
79                    "name": "Service Unavailable"
80                },
81                "description": "Robots temporarily unavailable.",
82                "internal http code": {
83                    "code": 741,
84                    "name": "Robots Temporarily Unavailable"
85                }
86            }
87        });
88        assert_eq!(json_result, expected_json);
89    }
90
91    #[test]
92    fn test_found_into_two_fields_tuple() {
93        let response_code = ResponsesCrawlerCodes::RedirectedToAnotherURL;
94        let tuple = response_code.into_two_fields_tuple();
95        let json_result = to_value(&tuple).unwrap();
96
97        let expected_json = json!({
98            "code": 302,
99            "name": "Found"
100        });
101
102        assert_eq!(json_result, expected_json);
103    }
104
105    #[test]
106    fn test_bad_request_duplicate_standard_codes() {
107        // These two codes have the same standard HTTP code (400) but different internal codes
108        assert_eq!(
109            ResponsesCrawlerCodes::from_u16(700),
110            Some(ResponsesCrawlerCodes::ParsingErrorUnfinishedHeader)
111        );
112        assert_eq!(
113            ResponsesCrawlerCodes::from_u16(710),
114            Some(ResponsesCrawlerCodes::ParsingErrorHeader)
115        );
116    }
117}