1use std::error::Error as StdError;
4use std::time::Duration;
5
6pub type Result<T> = std::result::Result<T, Error>;
8
9pub(crate) type BoxError = Box<dyn StdError + Send + Sync + 'static>;
11
12#[derive(Debug, thiserror::Error)]
14#[non_exhaustive]
15pub enum Error {
16 #[error("invalid URL '{url}': {reason}")]
18 InvalidUrl {
19 url: String,
21 reason: String,
23 },
24
25 #[error("page load timed out after {}s at {url}", timeout.as_secs())]
27 Timeout {
28 url: String,
30 timeout: Duration,
32 },
33
34 #[error("address not allowed: {host}")]
36 AddressNotAllowed {
37 host: String,
39 },
40
41 #[error("engine error: {source}")]
43 Engine {
44 url: Option<String>,
46 #[source]
48 source: BoxError,
49 },
50
51 #[error("JavaScript evaluation failed: {source}")]
53 JavaScript {
54 url: Option<String>,
56 #[source]
58 source: BoxError,
59 },
60
61 #[error("screenshot capture failed: {source}")]
63 Screenshot {
64 url: Option<String>,
66 #[source]
68 source: BoxError,
69 },
70
71 #[error(transparent)]
73 Extract(#[from] crate::extract::ExtractError),
74
75 #[error("failed to load cookies from {path}: {reason}")]
77 Cookies {
78 path: String,
80 reason: String,
82 },
83
84 #[error(transparent)]
86 Schema(#[from] crate::schema::SchemaError),
87
88 #[error(transparent)]
90 Io(#[from] std::io::Error),
91
92 #[error(transparent)]
94 InvalidGlob(#[from] globset::Error),
95}
96
97impl Error {
98 pub(crate) fn engine(source: impl Into<BoxError>, url: Option<String>) -> Self {
100 Self::Engine {
101 url,
102 source: source.into(),
103 }
104 }
105
106 pub(crate) fn screenshot(source: impl Into<BoxError>, url: Option<String>) -> Self {
108 Self::Screenshot {
109 url,
110 source: source.into(),
111 }
112 }
113
114 pub(crate) fn javascript(source: impl Into<BoxError>, url: Option<String>) -> Self {
116 Self::JavaScript {
117 url,
118 source: source.into(),
119 }
120 }
121
122 #[must_use]
124 pub fn is_timeout(&self) -> bool {
125 matches!(self, Self::Timeout { .. })
126 }
127
128 #[must_use]
130 pub fn is_network(&self) -> bool {
131 matches!(self, Self::Timeout { .. } | Self::AddressNotAllowed { .. })
132 }
133
134 #[must_use]
136 pub fn url(&self) -> Option<&str> {
137 match self {
138 Self::InvalidUrl { url, .. } | Self::Timeout { url, .. } => Some(url),
139 Self::Engine { url, .. } | Self::JavaScript { url, .. } | Self::Screenshot { url, .. } => url.as_deref(),
140 _ => None,
141 }
142 }
143
144 #[must_use]
146 pub fn host(&self) -> Option<&str> {
147 match self {
148 Self::AddressNotAllowed { host } => Some(host),
149 _ => None,
150 }
151 }
152}
153
154#[derive(Debug)]
155pub(crate) enum UrlError {
156 Invalid(String),
157 PrivateAddress(String),
158}
159
160pub(crate) fn map_url_error(url: &str, e: UrlError) -> Error {
161 match e {
162 UrlError::PrivateAddress(host) => Error::AddressNotAllowed { host },
163 UrlError::Invalid(reason) => Error::InvalidUrl {
164 url: url.into(),
165 reason,
166 },
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn assert_send_sync() {
176 fn check<T: Send + Sync>() {}
177 check::<Error>();
178 }
179
180 #[test]
181 fn timeout_predicates() {
182 let err = Error::Timeout {
183 url: "https://example.com".into(),
184 timeout: Duration::from_secs(30),
185 };
186 assert!(err.is_timeout());
187 assert!(err.is_network());
188 assert_eq!(err.url(), Some("https://example.com"));
189 assert_eq!(err.host(), None);
190 }
191
192 #[test]
193 fn address_not_allowed_predicates() {
194 let err = Error::AddressNotAllowed {
195 host: "127.0.0.1".into(),
196 };
197 assert!(!err.is_timeout());
198 assert!(err.is_network());
199 assert_eq!(err.url(), None);
200 assert_eq!(err.host(), Some("127.0.0.1"));
201 }
202
203 #[test]
204 fn invalid_url_carries_url() {
205 let err = Error::InvalidUrl {
206 url: "bad://url".into(),
207 reason: "scheme not allowed".into(),
208 };
209 assert!(!err.is_network());
210 assert_eq!(err.url(), Some("bad://url"));
211 assert_eq!(err.host(), None);
212 }
213
214 #[test]
215 fn engine_helper_preserves_source_chain() {
216 let inner = std::io::Error::other("disk full");
217 let err = Error::engine(inner, Some("https://example.com".into()));
218 assert_eq!(err.url(), Some("https://example.com"));
219 assert!(err.source().is_some());
220 assert_eq!(err.to_string(), "engine error: disk full");
221 }
222
223 #[test]
224 fn engine_without_url_returns_none() {
225 let err = Error::engine(std::io::Error::other("crash"), None);
226 assert!(err.url().is_none());
227 }
228}