use std::error::Error as StdError;
use std::time::Duration;
pub type Result<T> = std::result::Result<T, Error>;
pub(crate) type BoxError = Box<dyn StdError + Send + Sync + 'static>;
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum Error {
#[error("invalid URL '{url}': {reason}")]
InvalidUrl {
url: String,
reason: String,
},
#[error("page load timed out after {}s at {url}", timeout.as_secs())]
Timeout {
url: String,
timeout: Duration,
},
#[error("address not allowed: {host}")]
AddressNotAllowed {
host: String,
},
#[error("engine error: {source}")]
Engine {
url: Option<String>,
#[source]
source: BoxError,
},
#[error("JavaScript evaluation failed: {source}")]
JavaScript {
url: Option<String>,
#[source]
source: BoxError,
},
#[error("screenshot capture failed: {source}")]
Screenshot {
url: Option<String>,
#[source]
source: BoxError,
},
#[error(transparent)]
Extract(#[from] crate::extract::ExtractError),
#[error(transparent)]
Schema(#[from] crate::schema::SchemaError),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
InvalidGlob(#[from] globset::Error),
}
impl Error {
pub(crate) fn engine(source: impl Into<BoxError>, url: Option<String>) -> Self {
Self::Engine {
url,
source: source.into(),
}
}
pub(crate) fn screenshot(source: impl Into<BoxError>, url: Option<String>) -> Self {
Self::Screenshot {
url,
source: source.into(),
}
}
pub(crate) fn javascript(source: impl Into<BoxError>, url: Option<String>) -> Self {
Self::JavaScript {
url,
source: source.into(),
}
}
#[must_use]
pub fn is_timeout(&self) -> bool {
matches!(self, Self::Timeout { .. })
}
#[must_use]
pub fn is_network(&self) -> bool {
matches!(self, Self::Timeout { .. } | Self::AddressNotAllowed { .. })
}
#[must_use]
pub fn url(&self) -> Option<&str> {
match self {
Self::InvalidUrl { url, .. } | Self::Timeout { url, .. } => Some(url),
Self::Engine { url, .. } | Self::JavaScript { url, .. } | Self::Screenshot { url, .. } => url.as_deref(),
_ => None,
}
}
#[must_use]
pub fn host(&self) -> Option<&str> {
match self {
Self::AddressNotAllowed { host } => Some(host),
_ => None,
}
}
}
#[derive(Debug)]
pub(crate) enum UrlError {
Invalid(String),
PrivateAddress(String),
}
pub(crate) fn map_url_error(url: &str, e: UrlError) -> Error {
match e {
UrlError::PrivateAddress(host) => Error::AddressNotAllowed { host },
UrlError::Invalid(reason) => Error::InvalidUrl {
url: url.into(),
reason,
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn assert_send_sync() {
fn check<T: Send + Sync>() {}
check::<Error>();
}
#[test]
fn timeout_predicates() {
let err = Error::Timeout {
url: "https://example.com".into(),
timeout: Duration::from_secs(30),
};
assert!(err.is_timeout());
assert!(err.is_network());
assert_eq!(err.url(), Some("https://example.com"));
assert_eq!(err.host(), None);
}
#[test]
fn address_not_allowed_predicates() {
let err = Error::AddressNotAllowed {
host: "127.0.0.1".into(),
};
assert!(!err.is_timeout());
assert!(err.is_network());
assert_eq!(err.url(), None);
assert_eq!(err.host(), Some("127.0.0.1"));
}
#[test]
fn invalid_url_carries_url() {
let err = Error::InvalidUrl {
url: "bad://url".into(),
reason: "scheme not allowed".into(),
};
assert!(!err.is_network());
assert_eq!(err.url(), Some("bad://url"));
assert_eq!(err.host(), None);
}
#[test]
fn engine_helper_preserves_source_chain() {
let inner = std::io::Error::other("disk full");
let err = Error::engine(inner, Some("https://example.com".into()));
assert_eq!(err.url(), Some("https://example.com"));
assert!(err.source().is_some());
assert_eq!(err.to_string(), "engine error: disk full");
}
#[test]
fn engine_without_url_returns_none() {
let err = Error::engine(std::io::Error::other("crash"), None);
assert!(err.url().is_none());
}
}