use serde::{Serialize,Deserialize};
use criterium::number::AsInteger;
use std::str::FromStr;
#[derive(Debug,PartialEq,Eq,Copy,Clone,Serialize,Deserialize)]
pub struct ExitCode(i16);
impl ExitCode {
pub const DATABASE_ERROR: ExitCode = ExitCode(-3);
pub const CANCELLED: ExitCode = ExitCode(-2);
pub const SOMEONE_STOLE_MY_WORK: ExitCode = ExitCode(-1);
pub const FILE_INGESTED: ExitCode = ExitCode(20);
pub const FILE_OF_UNKNOWN_TYPE: ExitCode = ExitCode(29);
pub const PERMANENT_REDIRECT: ExitCode = ExitCode(31);
pub const REDIRECT: ExitCode = ExitCode(32);
pub const FILE_DID_NOT_CHANGE: ExitCode = ExitCode(34);
pub const SERVER_BLAMED_CLIENT: ExitCode = ExitCode(40);
pub const FILE_GONE: ExitCode = ExitCode(41);
pub const DID_NOT_UNDERSTAND_ANSWER: ExitCode = ExitCode(42);
pub const FILE_NOT_FOUND: ExitCode = ExitCode(44);
pub const RATE_LIMITED: ExitCode = ExitCode(49);
pub const SERVER_INTERNAL_ERROR: ExitCode = ExitCode(50);
pub const CONNECTION_FAILED: ExitCode = ExitCode(100);
pub const REQUEST_TIMEOUT: ExitCode = ExitCode(101);
pub const ERROR_READING_RESPONSE: ExitCode = ExitCode(102);
pub const BLOCKED_BY_ROBOTS_TXT: ExitCode = ExitCode(170);
pub const BLOCKED_AT_REQUEST_OF_REMOTE: ExitCode = ExitCode(171);
pub const BLOCKED_ORIGIN_BY_LOCAL_POLICY: ExitCode = ExitCode(172);
pub const BLOCKED_URL_BY_LOCAL_POLICY: ExitCode = ExitCode(173);
pub const BLOCKED_BY_CHALLENGE: ExitCode = ExitCode(174);
pub const NOT_CANONICAL: ExitCode = ExitCode(180);
pub const DUPLICATE: ExitCode = ExitCode(181);
pub const UNKNOWN_ERROR: ExitCode = ExitCode(-999);
pub fn is_redirect(&self) -> bool {
matches!(self,
&Self::REDIRECT |
&Self::PERMANENT_REDIRECT
)
}
pub fn is_contentful(&self) -> bool {
matches!(*self,
Self::FILE_INGESTED |
Self::FILE_OF_UNKNOWN_TYPE |
Self::PERMANENT_REDIRECT |
Self::REDIRECT |
Self::FILE_NOT_FOUND |
Self::FILE_GONE |
Self::NOT_CANONICAL |
Self::DUPLICATE
)
}
pub fn is_blocked(&self) -> bool {
matches!(*self,
Self::BLOCKED_BY_ROBOTS_TXT |
Self::BLOCKED_AT_REQUEST_OF_REMOTE |
Self::BLOCKED_ORIGIN_BY_LOCAL_POLICY |
Self::BLOCKED_URL_BY_LOCAL_POLICY |
Self::BLOCKED_BY_CHALLENGE
)
}
pub fn could_be_a_fluke(&self) -> bool {
matches!(*self,
Self::UNKNOWN_ERROR |
Self::CONNECTION_FAILED |
Self::REQUEST_TIMEOUT |
Self::ERROR_READING_RESPONSE
)
}
}
impl ToString for ExitCode {
fn to_string(&self) -> String {
match *self {
Self::DATABASE_ERROR => "database_error",
Self::CANCELLED => "cancelled",
Self::SOMEONE_STOLE_MY_WORK => "someone_stole_my_work",
Self::FILE_INGESTED => "file_ingested",
Self::FILE_OF_UNKNOWN_TYPE => "file_of_unknown_type",
Self::PERMANENT_REDIRECT => "permanent_redirect",
Self::REDIRECT => "redirect",
Self::FILE_DID_NOT_CHANGE => "file_did_not_change",
Self::SERVER_BLAMED_CLIENT => "server_blamed_client",
Self::FILE_GONE => "file_gone",
Self::DID_NOT_UNDERSTAND_ANSWER => "did_not_understand_answer",
Self::FILE_NOT_FOUND => "file_not_found",
Self::RATE_LIMITED => "rate_limited",
Self::SERVER_INTERNAL_ERROR => "server_internal_error",
Self::CONNECTION_FAILED => "connection_failed",
Self::REQUEST_TIMEOUT => "request_timeout",
Self::ERROR_READING_RESPONSE => "error_reading_response",
Self::BLOCKED_BY_ROBOTS_TXT => "blocked_by_robots_txt",
Self::BLOCKED_AT_REQUEST_OF_REMOTE => "blocked_at_request_of_remote",
Self::BLOCKED_ORIGIN_BY_LOCAL_POLICY => "blocked_origin_by_local_policy",
Self::BLOCKED_URL_BY_LOCAL_POLICY => "blocked_url_by_local_policy",
Self::BLOCKED_BY_CHALLENGE => "blocked_by_challenge",
Self::NOT_CANONICAL => "not_canonical",
Self::DUPLICATE => "duplicate",
Self::UNKNOWN_ERROR => "unknown_error",
_ => { return self.0.to_string(); }
}.to_string()
}
}
impl FromStr for ExitCode {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"database_error" => Ok(Self::DATABASE_ERROR),
"cancelled" => Ok(Self::CANCELLED),
"someone_stole_my_work" => Ok(Self::SOMEONE_STOLE_MY_WORK),
"file_ingested" => Ok(Self::FILE_INGESTED),
"file_of_unknown_type" => Ok(Self::FILE_OF_UNKNOWN_TYPE),
"permanent_redirect" => Ok(Self::PERMANENT_REDIRECT),
"redirect" => Ok(Self::REDIRECT),
"file_did_not_change" => Ok(Self::FILE_DID_NOT_CHANGE),
"server_blamed_client" => Ok(Self::SERVER_BLAMED_CLIENT),
"file_gone" => Ok(Self::FILE_GONE),
"did_not_understand_answer" => Ok(Self::DID_NOT_UNDERSTAND_ANSWER),
"file_not_found" => Ok(Self::FILE_NOT_FOUND),
"rate_limited" => Ok(Self::RATE_LIMITED),
"server_internal_error" => Ok(Self::SERVER_INTERNAL_ERROR),
"connection_failed" => Ok(Self::CONNECTION_FAILED),
"request_timeout" => Ok(Self::REQUEST_TIMEOUT),
"error_reading_response" => Ok(Self::ERROR_READING_RESPONSE),
"blocked_by_robots_txt" => Ok(Self::BLOCKED_BY_ROBOTS_TXT),
"blocked_at_request_of_remote" => Ok(Self::BLOCKED_AT_REQUEST_OF_REMOTE),
"blocked_origin_by_local_policy" => Ok(Self::BLOCKED_ORIGIN_BY_LOCAL_POLICY),
"blocked_url_by_local_policy" => Ok(Self::BLOCKED_URL_BY_LOCAL_POLICY),
"blocked_by_challenge" => Ok(Self::BLOCKED_BY_CHALLENGE),
"not_canonical" => Ok(Self::NOT_CANONICAL),
"duplicate" => Ok(Self::DUPLICATE),
"unknown_error" => Ok(Self::UNKNOWN_ERROR),
_ => {
if let Ok(code) = i16::from_str(s) {
Ok(Self(code))
} else {
Err("Not a recognized crawler exit code! Make sure it is in lower_snake_case or number in the i16 range.")
}
},
}
}
}
impl From<ExitCode> for i16 {
fn from(exit_code: ExitCode) -> i16 {
exit_code.0
}
}
impl From<i16> for ExitCode {
fn from(n: i16) -> Self {
Self(n)
}
}
impl ExitCode {
pub fn from_number(n: i16) -> Self {
n.into()
}
pub fn from_number_opt(n: Option<i16>) -> Option<Self> {
n.map(|n| n.into())
}
pub fn to_number(self) -> i16 {
self.into()
}
}
impl AsInteger for ExitCode {
fn as_criterium_i64(&self) -> i64 {
self.to_number() as i64
}
}