use serde::Deserialize;
use thiserror::Error;
#[derive(Debug, Clone, Default)]
pub struct ApiError {
pub message: String,
pub code: String,
pub http_status: u16,
pub documentation_url: String,
pub hint: String,
pub retry_after_ms: u64,
}
impl std::fmt::Display for ApiError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"API Error: {} (code: {}, status: {}, docs: {})",
self.message, self.code, self.http_status, self.documentation_url
)?;
if self.retry_after_ms > 0 {
write!(f, ", retry_after_ms: {}", self.retry_after_ms)?;
}
Ok(())
}
}
#[derive(Debug, Error)]
pub enum ScrapflyError {
#[error("transport: {0}")]
Transport(#[from] reqwest::Error),
#[error("json: {0}")]
Json(#[from] serde_json::Error),
#[error("config: {0}")]
Config(String),
#[error("invalid key, must be a non-empty string")]
BadApiKey,
#[error("api error [{}] {}", .0.code, .0.message)]
Api(ApiError),
#[error("API http client error: {0}")]
ApiClient(ApiError),
#[error("API http server error: {0}")]
ApiServer(ApiError),
#[error("upstream http client error: {0}")]
UpstreamClient(ApiError),
#[error("upstream http server error: {0}")]
UpstreamServer(ApiError),
#[error("too many requests: {0}")]
TooManyRequests(ApiError),
#[error("quota limit reached: {0}")]
QuotaLimitReached(ApiError),
#[error("scrape failed: {0}")]
ScrapeFailed(ApiError),
#[error("proxy error: {0}")]
ProxyFailed(ApiError),
#[error("ASP bypass error: {0}")]
AspBypassFailed(ApiError),
#[error("schedule error: {0}")]
ScheduleFailed(ApiError),
#[error("webhook error: {0}")]
WebhookFailed(ApiError),
#[error("session error: {0}")]
SessionFailed(ApiError),
#[error("screenshot API error: {0}")]
ScreenshotApiFailed(ApiError),
#[error("extraction API error: {0}")]
ExtractionApiFailed(ApiError),
#[error("crawler error: {0}")]
CrawlerFailed(ApiError),
#[error("unhandled API error response: {0}")]
UnhandledApiResponse(ApiError),
#[error("crawler not started, call start() first")]
CrawlerNotStarted,
#[error("crawler already started")]
CrawlerAlreadyStarted,
#[error("crawler was cancelled")]
CrawlerCancelled,
#[error("crawler wait timed out")]
CrawlerTimeout,
#[error("unexpected response format: {0}")]
UnexpectedResponseFormat(String),
#[error("invalid content type for this operation: {0}")]
ContentType(String),
#[error("io: {0}")]
Io(#[from] std::io::Error),
}
#[derive(Debug, Deserialize, Default)]
struct ErrorEnvelope {
#[serde(default)]
message: String,
#[serde(default, alias = "error")]
code: String,
#[serde(default)]
#[allow(dead_code)]
error_id: String,
#[serde(default)]
#[allow(dead_code)]
http_code: u16,
}
pub fn from_response(
status: u16,
body: &[u8],
retry_after_ms: u64,
is_crawler: bool,
) -> ScrapflyError {
let envelope: ErrorEnvelope = serde_json::from_slice(body).unwrap_or_default();
let msg = if envelope.message.is_empty() {
format!("API returned status {}", status)
} else {
envelope.message.clone()
};
let mut err = ApiError {
message: msg,
code: envelope.code.clone(),
http_status: status,
documentation_url: String::new(),
hint: String::new(),
retry_after_ms,
};
if envelope.code.contains("::SCHEDULE::") {
return ScrapflyError::ScheduleFailed(err);
}
match status {
401 => err.hint = "Provide a valid API key via ?key=... or Bearer token.".into(),
429 => {
err.hint =
"Back off and retry after the indicated delay, or reduce concurrency/scope.".into();
return ScrapflyError::TooManyRequests(err);
}
422 => {
let body_str = String::from_utf8_lossy(body);
if body_str.contains("SCREENSHOT") {
err.hint =
"Check screenshot parameters (format/capture/resolution) and upstream site readiness."
.into();
return ScrapflyError::ScreenshotApiFailed(err);
}
if body_str.contains("EXTRACTION") {
err.hint =
"Check content_type, body encoding, and template/prompt validity.".into();
return ScrapflyError::ExtractionApiFailed(err);
}
}
_ => {}
}
if is_crawler && envelope.code.contains("::CRAWLER::") {
return ScrapflyError::CrawlerFailed(err);
}
if let Some(resource) = envelope.code.split("::").nth(1) {
match resource {
"SCRAPE" => return ScrapflyError::ScrapeFailed(err),
"PROXY" => return ScrapflyError::ProxyFailed(err),
"ASP" => return ScrapflyError::AspBypassFailed(err),
"SCHEDULE" => return ScrapflyError::ScheduleFailed(err),
"WEBHOOK" => return ScrapflyError::WebhookFailed(err),
"SESSION" => return ScrapflyError::SessionFailed(err),
"THROTTLE" => return ScrapflyError::TooManyRequests(err),
"QUOTA" => return ScrapflyError::QuotaLimitReached(err),
"CRAWLER" => return ScrapflyError::CrawlerFailed(err),
_ => {}
}
}
match status {
400..=499 => ScrapflyError::ApiClient(err),
500..=599 => ScrapflyError::ApiServer(err),
_ => ScrapflyError::UnhandledApiResponse(err),
}
}
pub(crate) fn parse_retry_after(value: Option<&str>) -> u64 {
match value {
Some(v) => v
.trim()
.parse::<u64>()
.map(|secs| secs.saturating_mul(1000))
.unwrap_or(0),
None => 0,
}
}