Skip to main content

essence/
error.rs

1use axum::{
2    http::StatusCode,
3    response::{IntoResponse, Response},
4    Json,
5};
6use serde_json::json;
7use thiserror::Error;
8
9#[derive(Debug, Error)]
10pub enum ScrapeError {
11    #[error("HTTP request failed: {0}")]
12    RequestFailed(#[from] reqwest::Error),
13
14    #[error("Invalid URL: {0}")]
15    InvalidUrl(String),
16
17    #[error("Timeout occurred")]
18    Timeout,
19
20    #[error("Failed to parse HTML: {0}")]
21    ParseError(String),
22
23    #[error("Robots.txt disallows scraping")]
24    RobotsDisallowed,
25
26    #[error("Unsupported format: {0}")]
27    UnsupportedFormat(String),
28
29    #[error("Internal error: {0}")]
30    Internal(String),
31
32    #[error("Browser error: {0}")]
33    BrowserError(String),
34
35    #[error("Browser launch failed: {0}")]
36    BrowserLaunchFailed(String),
37
38    #[error("Navigation failed: {0}")]
39    NavigationFailed(String),
40
41    #[error("Element not found: {0}")]
42    ElementNotFound(String),
43
44    #[error("Validation failed")]
45    ValidationFailed(Vec<String>),
46
47    #[error("Browser not found: {0}")]
48    BrowserNotFound(String),
49
50    #[error("Invalid request: {0}")]
51    InvalidRequest(String),
52
53    #[error("Resource limit exceeded: {0}")]
54    ResourceLimit(String),
55
56    #[error("Unauthorized")]
57    Unauthorized,
58
59    #[error("SSRF attempt detected: {0}")]
60    SsrfAttempt(String),
61
62    #[error("Empty content: {0}")]
63    EmptyContent(String),
64
65    #[error("Low quality content: {0}")]
66    LowQuality(String),
67
68    #[error("Error page detected: {0}")]
69    ErrorPage(String),
70
71    #[error("Configuration error: {0}")]
72    Configuration(String),
73}
74
75impl IntoResponse for ScrapeError {
76    fn into_response(self) -> Response {
77        let (status, error_message) = match self {
78            ScrapeError::RequestFailed(ref e) => {
79                if e.is_timeout() {
80                    (StatusCode::REQUEST_TIMEOUT, "Request timeout".to_string())
81                } else if e.is_connect() {
82                    (
83                        StatusCode::BAD_GATEWAY,
84                        "Failed to connect to target".to_string(),
85                    )
86                } else {
87                    (StatusCode::BAD_GATEWAY, format!("Request failed: {}", e))
88                }
89            }
90            ScrapeError::InvalidUrl(_) => (StatusCode::BAD_REQUEST, self.to_string()),
91            ScrapeError::Timeout => (StatusCode::REQUEST_TIMEOUT, self.to_string()),
92            ScrapeError::ParseError(_) => (StatusCode::UNPROCESSABLE_ENTITY, self.to_string()),
93            ScrapeError::RobotsDisallowed => (StatusCode::FORBIDDEN, self.to_string()),
94            ScrapeError::UnsupportedFormat(_) => (StatusCode::BAD_REQUEST, self.to_string()),
95            ScrapeError::Internal(_) => (StatusCode::INTERNAL_SERVER_ERROR, self.to_string()),
96            ScrapeError::BrowserError(_) => (StatusCode::INTERNAL_SERVER_ERROR, self.to_string()),
97            ScrapeError::BrowserLaunchFailed(_) => {
98                (StatusCode::SERVICE_UNAVAILABLE, self.to_string())
99            }
100            ScrapeError::NavigationFailed(_) => (StatusCode::BAD_GATEWAY, self.to_string()),
101            ScrapeError::ElementNotFound(_) => (StatusCode::UNPROCESSABLE_ENTITY, self.to_string()),
102            ScrapeError::ValidationFailed(ref errors) => (
103                StatusCode::BAD_REQUEST,
104                format!("Validation failed: {}", errors.join(", ")),
105            ),
106            ScrapeError::BrowserNotFound(_) => {
107                (StatusCode::SERVICE_UNAVAILABLE, self.to_string())
108            }
109            ScrapeError::InvalidRequest(_) => (StatusCode::BAD_REQUEST, self.to_string()),
110            ScrapeError::ResourceLimit(_) => (StatusCode::PAYLOAD_TOO_LARGE, self.to_string()),
111            ScrapeError::Unauthorized => (StatusCode::UNAUTHORIZED, "Unauthorized".to_string()),
112            ScrapeError::SsrfAttempt(_) => (StatusCode::FORBIDDEN, self.to_string()),
113            ScrapeError::EmptyContent(_) => (StatusCode::UNPROCESSABLE_ENTITY, self.to_string()),
114            ScrapeError::LowQuality(_) => (StatusCode::UNPROCESSABLE_ENTITY, self.to_string()),
115            ScrapeError::ErrorPage(_) => (StatusCode::BAD_GATEWAY, self.to_string()),
116            ScrapeError::Configuration(_) => (StatusCode::INTERNAL_SERVER_ERROR, self.to_string()),
117        };
118
119        let body = Json(json!({
120            "success": false,
121            "error": error_message,
122        }));
123
124        (status, body).into_response()
125    }
126}
127
128pub type Result<T> = std::result::Result<T, ScrapeError>;