Skip to main content

servo_fetch/
error.rs

1//! Error types.
2
3use std::error::Error as StdError;
4use std::time::Duration;
5
6/// A specialized `Result` type for servo-fetch.
7pub type Result<T> = std::result::Result<T, Error>;
8
9/// Boxed error type used as the `source` of variants that wrap arbitrary errors.
10pub(crate) type BoxError = Box<dyn StdError + Send + Sync + 'static>;
11
12/// Errors from servo-fetch operations.
13#[derive(Debug, thiserror::Error)]
14#[non_exhaustive]
15pub enum Error {
16    /// The URL is malformed or uses a disallowed scheme.
17    #[error("invalid URL '{url}': {reason}")]
18    InvalidUrl {
19        /// The URL that failed validation.
20        url: String,
21        /// Why the URL is invalid.
22        reason: String,
23    },
24
25    /// The page did not finish loading within the configured timeout.
26    #[error("page load timed out after {}s at {url}", timeout.as_secs())]
27    Timeout {
28        /// The URL that timed out.
29        url: String,
30        /// The timeout that was exceeded.
31        timeout: Duration,
32    },
33
34    /// The URL resolves to a private or reserved address (SSRF protection).
35    #[error("address not allowed: {host}")]
36    AddressNotAllowed {
37        /// The blocked host.
38        host: String,
39    },
40
41    /// The Servo engine is unavailable or crashed.
42    #[error("engine error: {source}")]
43    Engine {
44        /// URL being processed, if known.
45        url: Option<String>,
46        /// Source error.
47        #[source]
48        source: BoxError,
49    },
50
51    /// JavaScript evaluation failed.
52    #[error("JavaScript evaluation failed: {source}")]
53    JavaScript {
54        /// URL being processed, if known.
55        url: Option<String>,
56        /// Source error.
57        #[source]
58        source: BoxError,
59    },
60
61    /// Screenshot capture failed.
62    #[error("screenshot capture failed: {source}")]
63    Screenshot {
64        /// URL being captured, if known.
65        url: Option<String>,
66        /// Source error.
67        #[source]
68        source: BoxError,
69    },
70
71    /// Content extraction failed.
72    #[error(transparent)]
73    Extract(#[from] crate::extract::ExtractError),
74
75    /// Failed to load or parse a cookies file.
76    #[error("failed to load cookies from {path}: {reason}")]
77    Cookies {
78        /// The cookies file path.
79        path: String,
80        /// Why loading failed.
81        reason: String,
82    },
83
84    /// Schema-based structured extraction failed.
85    #[error(transparent)]
86    Schema(#[from] crate::schema::SchemaError),
87
88    /// An I/O error occurred.
89    #[error(transparent)]
90    Io(#[from] std::io::Error),
91
92    /// A glob pattern is invalid.
93    #[error(transparent)]
94    InvalidGlob(#[from] globset::Error),
95}
96
97impl Error {
98    /// Construct an [`Error::Engine`] from any error type, preserving source chain.
99    pub(crate) fn engine(source: impl Into<BoxError>, url: Option<String>) -> Self {
100        Self::Engine {
101            url,
102            source: source.into(),
103        }
104    }
105
106    /// Construct an [`Error::Screenshot`] from any error type, preserving source chain.
107    pub(crate) fn screenshot(source: impl Into<BoxError>, url: Option<String>) -> Self {
108        Self::Screenshot {
109            url,
110            source: source.into(),
111        }
112    }
113
114    /// Construct an [`Error::JavaScript`] from any error type, preserving source chain.
115    pub(crate) fn javascript(source: impl Into<BoxError>, url: Option<String>) -> Self {
116        Self::JavaScript {
117            url,
118            source: source.into(),
119        }
120    }
121
122    /// Returns `true` if this is a timeout error.
123    #[must_use]
124    pub fn is_timeout(&self) -> bool {
125        matches!(self, Self::Timeout { .. })
126    }
127
128    /// Returns `true` if this is a network-related error (timeout or address-policy rejection).
129    #[must_use]
130    pub fn is_network(&self) -> bool {
131        matches!(self, Self::Timeout { .. } | Self::AddressNotAllowed { .. })
132    }
133
134    /// Returns the URL associated with this error, if any.
135    #[must_use]
136    pub fn url(&self) -> Option<&str> {
137        match self {
138            Self::InvalidUrl { url, .. } | Self::Timeout { url, .. } => Some(url),
139            Self::Engine { url, .. } | Self::JavaScript { url, .. } | Self::Screenshot { url, .. } => url.as_deref(),
140            _ => None,
141        }
142    }
143
144    /// Returns the host that was rejected, if this is [`Error::AddressNotAllowed`].
145    #[must_use]
146    pub fn host(&self) -> Option<&str> {
147        match self {
148            Self::AddressNotAllowed { host } => Some(host),
149            _ => None,
150        }
151    }
152}
153
154#[derive(Debug)]
155pub(crate) enum UrlError {
156    Invalid(String),
157    PrivateAddress(String),
158}
159
160pub(crate) fn map_url_error(url: &str, e: UrlError) -> Error {
161    match e {
162        UrlError::PrivateAddress(host) => Error::AddressNotAllowed { host },
163        UrlError::Invalid(reason) => Error::InvalidUrl {
164            url: url.into(),
165            reason,
166        },
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn assert_send_sync() {
176        fn check<T: Send + Sync>() {}
177        check::<Error>();
178    }
179
180    #[test]
181    fn timeout_predicates() {
182        let err = Error::Timeout {
183            url: "https://example.com".into(),
184            timeout: Duration::from_secs(30),
185        };
186        assert!(err.is_timeout());
187        assert!(err.is_network());
188        assert_eq!(err.url(), Some("https://example.com"));
189        assert_eq!(err.host(), None);
190    }
191
192    #[test]
193    fn address_not_allowed_predicates() {
194        let err = Error::AddressNotAllowed {
195            host: "127.0.0.1".into(),
196        };
197        assert!(!err.is_timeout());
198        assert!(err.is_network());
199        assert_eq!(err.url(), None);
200        assert_eq!(err.host(), Some("127.0.0.1"));
201    }
202
203    #[test]
204    fn invalid_url_carries_url() {
205        let err = Error::InvalidUrl {
206            url: "bad://url".into(),
207            reason: "scheme not allowed".into(),
208        };
209        assert!(!err.is_network());
210        assert_eq!(err.url(), Some("bad://url"));
211        assert_eq!(err.host(), None);
212    }
213
214    #[test]
215    fn engine_helper_preserves_source_chain() {
216        let inner = std::io::Error::other("disk full");
217        let err = Error::engine(inner, Some("https://example.com".into()));
218        assert_eq!(err.url(), Some("https://example.com"));
219        assert!(err.source().is_some());
220        assert_eq!(err.to_string(), "engine error: disk full");
221    }
222
223    #[test]
224    fn engine_without_url_returns_none() {
225        let err = Error::engine(std::io::Error::other("crash"), None);
226        assert!(err.url().is_none());
227    }
228}