use thiserror::Error;
#[derive(Debug, Error)]
pub enum CrawlError {
#[error("not_found: {0}")]
NotFound(String),
#[error("unauthorized: {0}")]
Unauthorized(String),
#[error("forbidden: {0}")]
Forbidden(String),
#[error("forbidden: waf/blocked: {0}")]
WafBlocked(String),
#[error("timeout: {0}")]
Timeout(String),
#[error("rate_limited: {0}")]
RateLimited(String),
#[error("server_error: {0}")]
ServerError(String),
#[error("bad_gateway: {0}")]
BadGateway(String),
#[error("gone: {0}")]
Gone(String),
#[error("connection: {0}")]
Connection(String),
#[error("dns: {0}")]
Dns(String),
#[error("ssl: {0}")]
Ssl(String),
#[error("data_loss: {0}")]
DataLoss(String),
#[error("browser: {0}")]
BrowserError(String),
#[error("browser_timeout: {0}")]
BrowserTimeout(String),
#[error("invalid_config: {0}")]
InvalidConfig(String),
#[error("other: {0}")]
Other(String),
}
pub(crate) fn error_chain_string(e: &reqwest::Error) -> String {
let mut parts = vec![e.to_string()];
let mut current: &dyn std::error::Error = e;
while let Some(src) = current.source() {
parts.push(src.to_string());
current = src;
}
parts.join(" | ").to_lowercase()
}
#[cfg(not(target_arch = "wasm32"))]
pub(crate) fn classify_reqwest_error(e: &reqwest::Error) -> CrawlError {
let chain = error_chain_string(e);
if e.is_timeout() || chain.contains("timed out") || chain.contains("timeout") {
CrawlError::Timeout(format!("timeout: {e}"))
} else if chain.contains("dns") || chain.contains("resolve") || chain.contains("lookup") {
CrawlError::Dns(format!("dns: {e}"))
} else if chain.contains("ssl")
|| chain.contains("tls")
|| chain.contains("certificate")
|| chain.contains("record overflow")
|| chain.contains("handshake")
|| chain.contains("corrupt message")
|| chain.contains("alertdescription")
|| chain.contains("invalidcontenttype")
{
CrawlError::Ssl(format!("ssl: {e}"))
} else if e.is_connect() || chain.contains("connection") || chain.contains("connect") {
CrawlError::Connection(format!("connection: {e}"))
} else if e.is_body()
|| chain.contains("content-length")
|| chain.contains("truncat")
|| chain.contains("incomplete")
|| chain.contains("decoding response body")
|| chain.contains("error decoding")
{
CrawlError::DataLoss(format!("data_loss: {e}"))
} else {
CrawlError::Other(format!("other: {e}"))
}
}
#[cfg(target_arch = "wasm32")]
pub(crate) fn classify_reqwest_error(e: &reqwest::Error) -> CrawlError {
let chain = error_chain_string(e);
if chain.contains("timed out") || chain.contains("timeout") {
CrawlError::Timeout(format!("timeout: {e}"))
} else if chain.contains("dns") || chain.contains("resolve") || chain.contains("lookup") {
CrawlError::Dns(format!("dns: {e}"))
} else if chain.contains("ssl")
|| chain.contains("tls")
|| chain.contains("certificate")
|| chain.contains("handshake")
{
CrawlError::Ssl(format!("ssl: {e}"))
} else if chain.contains("connection") || chain.contains("connect") {
CrawlError::Connection(format!("connection: {e}"))
} else if chain.contains("content-length") || chain.contains("truncat") || chain.contains("incomplete") {
CrawlError::DataLoss(format!("data_loss: {e}"))
} else {
CrawlError::Other(format!("other: {e}"))
}
}