Skip to main content

servo_fetch/
error.rs

1//! Error types.
2
3use std::fmt;
4use std::time::Duration;
5
6/// A specialized `Result` type for servo-fetch.
7pub type Result<T> = std::result::Result<T, Error>;
8
9/// Errors from servo-fetch operations.
10#[derive(Debug, thiserror::Error)]
11#[non_exhaustive]
12pub enum Error {
13    /// The URL is malformed or uses a disallowed scheme.
14    #[error("{reason}")]
15    InvalidUrl {
16        /// The URL that failed validation.
17        url: String,
18        /// Why the URL is invalid.
19        reason: String,
20    },
21
22    /// The page did not finish loading within the configured timeout.
23    #[error("page load timed out after {}s", timeout.as_secs())]
24    Timeout {
25        /// The URL that timed out.
26        url: String,
27        /// The timeout that was exceeded.
28        timeout: Duration,
29    },
30
31    /// The URL resolves to a private or reserved address (SSRF protection).
32    #[error("address not allowed: {0}")]
33    AddressNotAllowed(String),
34
35    /// The Servo engine is unavailable or crashed.
36    #[error("engine error: {0}")]
37    Engine(String),
38
39    /// JavaScript evaluation failed.
40    #[error("JavaScript evaluation failed: {0}")]
41    JavaScript(String),
42
43    /// Screenshot capture failed.
44    #[error("screenshot capture failed: {0}")]
45    Screenshot(String),
46
47    /// Content extraction failed.
48    #[error(transparent)]
49    Extract(#[from] crate::extract::ExtractError),
50
51    /// Schema-based structured extraction failed.
52    #[error(transparent)]
53    Schema(#[from] crate::schema::SchemaError),
54
55    /// An I/O error occurred.
56    #[error(transparent)]
57    Io(#[from] std::io::Error),
58
59    /// A glob pattern is invalid.
60    #[error("invalid glob pattern: {0}")]
61    InvalidGlob(#[from] globset::Error),
62}
63
64impl Error {
65    /// Returns `true` if this is a timeout error.
66    #[must_use]
67    pub fn is_timeout(&self) -> bool {
68        matches!(self, Self::Timeout { .. })
69    }
70
71    /// Returns `true` if this is a network-related error.
72    #[must_use]
73    pub fn is_network(&self) -> bool {
74        matches!(self, Self::Timeout { .. } | Self::AddressNotAllowed(_))
75    }
76
77    /// Returns the URL associated with this error, if any.
78    #[must_use]
79    pub fn url(&self) -> Option<&str> {
80        match self {
81            Self::InvalidUrl { url, .. } | Self::Timeout { url, .. } | Self::AddressNotAllowed(url) => Some(url),
82            _ => None,
83        }
84    }
85}
86
87#[allow(clippy::used_underscore_items)]
88const _: () = {
89    fn _assert<T: Send + Sync>() {}
90    fn _check() {
91        _assert::<Error>();
92    }
93};
94
95/// Why a URL was rejected by [`validate_url`].
96#[derive(Debug)]
97pub(crate) enum UrlError {
98    /// Malformed URL or disallowed scheme.
99    Invalid(String),
100    /// Host resolves to a private or reserved address.
101    PrivateAddress(String),
102}
103
104impl fmt::Display for UrlError {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        match self {
107            Self::Invalid(reason) => f.write_str(reason),
108            Self::PrivateAddress(host) => {
109                write!(f, "access to private/local addresses is not allowed: {host}")
110            }
111        }
112    }
113}
114
115pub(crate) fn map_url_error(url: &str, e: UrlError) -> Error {
116    match e {
117        UrlError::PrivateAddress(host) => Error::AddressNotAllowed(host),
118        UrlError::Invalid(reason) => Error::InvalidUrl {
119            url: url.into(),
120            reason,
121        },
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn timeout_is_timeout() {
131        let err = Error::Timeout {
132            url: "https://example.com".into(),
133            timeout: Duration::from_secs(30),
134        };
135        assert!(err.is_timeout());
136        assert!(err.is_network());
137        assert_eq!(err.url(), Some("https://example.com"));
138    }
139
140    #[test]
141    fn address_not_allowed_is_network() {
142        let err = Error::AddressNotAllowed("127.0.0.1".into());
143        assert!(!err.is_timeout());
144        assert!(err.is_network());
145        assert_eq!(err.url(), Some("127.0.0.1"));
146    }
147
148    #[test]
149    fn invalid_url_has_url() {
150        let err = Error::InvalidUrl {
151            url: "bad://url".into(),
152            reason: "scheme not allowed".into(),
153        };
154        assert!(!err.is_timeout());
155        assert!(!err.is_network());
156        assert_eq!(err.url(), Some("bad://url"));
157    }
158
159    #[test]
160    fn engine_error_has_no_url() {
161        let err = Error::Engine("crashed".into());
162        assert!(!err.is_timeout());
163        assert!(err.url().is_none());
164    }
165}