1use std::error::Error as StdError;
4use std::time::Duration;
5
6pub type Result<T> = std::result::Result<T, Error>;
8
9pub(crate) type BoxError = Box<dyn StdError + Send + Sync + 'static>;
11
12#[derive(Debug, thiserror::Error)]
14#[non_exhaustive]
15pub enum Error {
16 #[error("invalid URL '{url}': {reason}")]
18 InvalidUrl {
19 url: String,
21 reason: String,
23 },
24
25 #[error("page load timed out after {}s at {url}", timeout.as_secs())]
27 Timeout {
28 url: String,
30 timeout: Duration,
32 },
33
34 #[error("address not allowed: {host}")]
36 AddressNotAllowed {
37 host: String,
39 },
40
41 #[error("engine error: {source}")]
43 Engine {
44 url: Option<String>,
46 #[source]
48 source: BoxError,
49 },
50
51 #[error("JavaScript evaluation failed: {source}")]
53 JavaScript {
54 url: Option<String>,
56 #[source]
58 source: BoxError,
59 },
60
61 #[error("screenshot capture failed: {source}")]
63 Screenshot {
64 url: Option<String>,
66 #[source]
68 source: BoxError,
69 },
70
71 #[error(transparent)]
73 Extract(#[from] crate::extract::ExtractError),
74
75 #[error(transparent)]
77 Schema(#[from] crate::schema::SchemaError),
78
79 #[error(transparent)]
81 Io(#[from] std::io::Error),
82
83 #[error(transparent)]
85 InvalidGlob(#[from] globset::Error),
86}
87
88impl Error {
89 pub(crate) fn engine(source: impl Into<BoxError>, url: Option<String>) -> Self {
91 Self::Engine {
92 url,
93 source: source.into(),
94 }
95 }
96
97 #[must_use]
99 pub fn is_timeout(&self) -> bool {
100 matches!(self, Self::Timeout { .. })
101 }
102
103 #[must_use]
105 pub fn is_network(&self) -> bool {
106 matches!(self, Self::Timeout { .. } | Self::AddressNotAllowed { .. })
107 }
108
109 #[must_use]
111 pub fn url(&self) -> Option<&str> {
112 match self {
113 Self::InvalidUrl { url, .. } | Self::Timeout { url, .. } => Some(url),
114 Self::Engine { url, .. } | Self::JavaScript { url, .. } | Self::Screenshot { url, .. } => url.as_deref(),
115 _ => None,
116 }
117 }
118
119 #[must_use]
121 pub fn host(&self) -> Option<&str> {
122 match self {
123 Self::AddressNotAllowed { host } => Some(host),
124 _ => None,
125 }
126 }
127}
128
129#[derive(Debug)]
130pub(crate) enum UrlError {
131 Invalid(String),
132 PrivateAddress(String),
133}
134
135pub(crate) fn map_url_error(url: &str, e: UrlError) -> Error {
136 match e {
137 UrlError::PrivateAddress(host) => Error::AddressNotAllowed { host },
138 UrlError::Invalid(reason) => Error::InvalidUrl {
139 url: url.into(),
140 reason,
141 },
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn assert_send_sync() {
151 fn check<T: Send + Sync>() {}
152 check::<Error>();
153 }
154
155 #[test]
156 fn timeout_predicates() {
157 let err = Error::Timeout {
158 url: "https://example.com".into(),
159 timeout: Duration::from_secs(30),
160 };
161 assert!(err.is_timeout());
162 assert!(err.is_network());
163 assert_eq!(err.url(), Some("https://example.com"));
164 assert_eq!(err.host(), None);
165 }
166
167 #[test]
168 fn address_not_allowed_predicates() {
169 let err = Error::AddressNotAllowed {
170 host: "127.0.0.1".into(),
171 };
172 assert!(!err.is_timeout());
173 assert!(err.is_network());
174 assert_eq!(err.url(), None);
175 assert_eq!(err.host(), Some("127.0.0.1"));
176 }
177
178 #[test]
179 fn invalid_url_carries_url() {
180 let err = Error::InvalidUrl {
181 url: "bad://url".into(),
182 reason: "scheme not allowed".into(),
183 };
184 assert!(!err.is_network());
185 assert_eq!(err.url(), Some("bad://url"));
186 assert_eq!(err.host(), None);
187 }
188
189 #[test]
190 fn engine_helper_preserves_source_chain() {
191 let inner = std::io::Error::other("disk full");
192 let err = Error::engine(inner, Some("https://example.com".into()));
193 assert_eq!(err.url(), Some("https://example.com"));
194 assert!(err.source().is_some());
195 assert_eq!(err.to_string(), "engine error: disk full");
196 }
197
198 #[test]
199 fn engine_without_url_returns_none() {
200 let err = Error::engine(std::io::Error::other("crash"), None);
201 assert!(err.url().is_none());
202 }
203}