Skip to main content

hanzo_extract/
error.rs

1//! Error types for content extraction
2
3use thiserror::Error;
4
5/// Result type for extraction operations
6pub type Result<T> = std::result::Result<T, ExtractError>;
7
8/// Errors that can occur during content extraction
9#[derive(Error, Debug)]
10pub enum ExtractError {
11    /// Network error during fetch
12    #[error("Network error: {0}")]
13    Network(String),
14
15    /// HTTP error response
16    #[error("HTTP error {status}: {message}")]
17    Http { status: u16, message: String },
18
19    /// Invalid URL
20    #[error("Invalid URL: {0}")]
21    InvalidUrl(String),
22
23    /// Parse error
24    #[error("Parse error: {0}")]
25    Parse(String),
26
27    /// PDF extraction error
28    #[error("PDF error: {0}")]
29    Pdf(String),
30
31    /// Content too large
32    #[error("Content too large: {size} bytes exceeds max {max} bytes")]
33    ContentTooLarge { size: usize, max: usize },
34
35    /// Timeout error
36    #[error("Request timeout after {0} seconds")]
37    Timeout(u64),
38
39    /// Sanitization blocked content
40    #[cfg(feature = "sanitize")]
41    #[error("Content blocked by sanitization: {0}")]
42    Blocked(String),
43
44    /// IO error
45    #[error("IO error: {0}")]
46    Io(#[from] std::io::Error),
47
48    /// Other error
49    #[error("{0}")]
50    Other(String),
51}
52
53#[cfg(feature = "web")]
54impl From<reqwest::Error> for ExtractError {
55    fn from(err: reqwest::Error) -> Self {
56        if err.is_timeout() {
57            ExtractError::Timeout(30)
58        } else if let Some(status) = err.status() {
59            ExtractError::Http {
60                status: status.as_u16(),
61                message: err.to_string(),
62            }
63        } else {
64            ExtractError::Network(err.to_string())
65        }
66    }
67}
68
69impl From<url::ParseError> for ExtractError {
70    fn from(err: url::ParseError) -> Self {
71        ExtractError::InvalidUrl(err.to_string())
72    }
73}
74
75#[cfg(feature = "pdf")]
76impl From<lopdf::Error> for ExtractError {
77    fn from(err: lopdf::Error) -> Self {
78        ExtractError::Pdf(err.to_string())
79    }
80}
81
82impl From<serde_json::Error> for ExtractError {
83    fn from(err: serde_json::Error) -> Self {
84        ExtractError::Parse(err.to_string())
85    }
86}