1use serde::Deserialize;
4use thiserror::Error;
5
6#[derive(Debug, Clone, Default)]
10pub struct ApiError {
11 pub message: String,
13 pub code: String,
15 pub http_status: u16,
17 pub documentation_url: String,
19 pub hint: String,
21 pub retry_after_ms: u64,
23}
24
25impl std::fmt::Display for ApiError {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 write!(
28 f,
29 "API Error: {} (code: {}, status: {}, docs: {})",
30 self.message, self.code, self.http_status, self.documentation_url
31 )?;
32 if self.retry_after_ms > 0 {
33 write!(f, ", retry_after_ms: {}", self.retry_after_ms)?;
34 }
35 Ok(())
36 }
37}
38
39#[derive(Debug, Error)]
41pub enum ScrapflyError {
42 #[error("transport: {0}")]
44 Transport(#[from] reqwest::Error),
45 #[error("json: {0}")]
47 Json(#[from] serde_json::Error),
48 #[error("config: {0}")]
50 Config(String),
51 #[error("invalid key, must be a non-empty string")]
53 BadApiKey,
54 #[error("api error [{}] {}", .0.code, .0.message)]
56 Api(ApiError),
57 #[error("API http client error: {0}")]
59 ApiClient(ApiError),
60 #[error("API http server error: {0}")]
62 ApiServer(ApiError),
63 #[error("upstream http client error: {0}")]
65 UpstreamClient(ApiError),
66 #[error("upstream http server error: {0}")]
68 UpstreamServer(ApiError),
69 #[error("too many requests: {0}")]
71 TooManyRequests(ApiError),
72 #[error("quota limit reached: {0}")]
74 QuotaLimitReached(ApiError),
75 #[error("scrape failed: {0}")]
77 ScrapeFailed(ApiError),
78 #[error("proxy error: {0}")]
80 ProxyFailed(ApiError),
81 #[error("ASP bypass error: {0}")]
83 AspBypassFailed(ApiError),
84 #[error("schedule error: {0}")]
86 ScheduleFailed(ApiError),
87 #[error("webhook error: {0}")]
89 WebhookFailed(ApiError),
90 #[error("session error: {0}")]
92 SessionFailed(ApiError),
93 #[error("screenshot API error: {0}")]
95 ScreenshotApiFailed(ApiError),
96 #[error("extraction API error: {0}")]
98 ExtractionApiFailed(ApiError),
99 #[error("crawler error: {0}")]
101 CrawlerFailed(ApiError),
102 #[error("unhandled API error response: {0}")]
104 UnhandledApiResponse(ApiError),
105 #[error("crawler not started, call start() first")]
107 CrawlerNotStarted,
108 #[error("crawler already started")]
110 CrawlerAlreadyStarted,
111 #[error("crawler was cancelled")]
113 CrawlerCancelled,
114 #[error("crawler wait timed out")]
116 CrawlerTimeout,
117 #[error("unexpected response format: {0}")]
119 UnexpectedResponseFormat(String),
120 #[error("invalid content type for this operation: {0}")]
122 ContentType(String),
123 #[error("io: {0}")]
125 Io(#[from] std::io::Error),
126}
127
128#[derive(Debug, Deserialize, Default)]
129struct ErrorEnvelope {
130 #[serde(default)]
131 message: String,
132 #[serde(default, alias = "error")]
136 code: String,
137 #[serde(default)]
138 #[allow(dead_code)]
139 error_id: String,
140 #[serde(default)]
141 #[allow(dead_code)]
142 http_code: u16,
143}
144
145pub fn from_response(
152 status: u16,
153 body: &[u8],
154 retry_after_ms: u64,
155 is_crawler: bool,
156) -> ScrapflyError {
157 let envelope: ErrorEnvelope = serde_json::from_slice(body).unwrap_or_default();
158 let msg = if envelope.message.is_empty() {
159 format!("API returned status {}", status)
160 } else {
161 envelope.message.clone()
162 };
163 let mut err = ApiError {
164 message: msg,
165 code: envelope.code.clone(),
166 http_status: status,
167 documentation_url: String::new(),
168 hint: String::new(),
169 retry_after_ms,
170 };
171
172 if envelope.code.contains("::SCHEDULE::") {
178 return ScrapflyError::ScheduleFailed(err);
179 }
180
181 match status {
183 401 => err.hint = "Provide a valid API key via ?key=... or Bearer token.".into(),
184 429 => {
185 err.hint =
186 "Back off and retry after the indicated delay, or reduce concurrency/scope.".into();
187 return ScrapflyError::TooManyRequests(err);
188 }
189 422 => {
190 let body_str = String::from_utf8_lossy(body);
191 if body_str.contains("SCREENSHOT") {
192 err.hint =
193 "Check screenshot parameters (format/capture/resolution) and upstream site readiness."
194 .into();
195 return ScrapflyError::ScreenshotApiFailed(err);
196 }
197 if body_str.contains("EXTRACTION") {
198 err.hint =
199 "Check content_type, body encoding, and template/prompt validity.".into();
200 return ScrapflyError::ExtractionApiFailed(err);
201 }
202 }
203 _ => {}
204 }
205
206 if is_crawler && envelope.code.contains("::CRAWLER::") {
208 return ScrapflyError::CrawlerFailed(err);
209 }
210
211 if let Some(resource) = envelope.code.split("::").nth(1) {
213 match resource {
214 "SCRAPE" => return ScrapflyError::ScrapeFailed(err),
215 "PROXY" => return ScrapflyError::ProxyFailed(err),
216 "ASP" => return ScrapflyError::AspBypassFailed(err),
217 "SCHEDULE" => return ScrapflyError::ScheduleFailed(err),
218 "WEBHOOK" => return ScrapflyError::WebhookFailed(err),
219 "SESSION" => return ScrapflyError::SessionFailed(err),
220 "THROTTLE" => return ScrapflyError::TooManyRequests(err),
221 "QUOTA" => return ScrapflyError::QuotaLimitReached(err),
222 "CRAWLER" => return ScrapflyError::CrawlerFailed(err),
223 _ => {}
224 }
225 }
226
227 match status {
229 400..=499 => ScrapflyError::ApiClient(err),
230 500..=599 => ScrapflyError::ApiServer(err),
231 _ => ScrapflyError::UnhandledApiResponse(err),
232 }
233}
234
235pub(crate) fn parse_retry_after(value: Option<&str>) -> u64 {
238 match value {
239 Some(v) => v
240 .trim()
241 .parse::<u64>()
242 .map(|secs| secs.saturating_mul(1000))
243 .unwrap_or(0),
244 None => 0,
245 }
246}