Skip to main content

spider_lib/
error.rs

1//! Custom error types for the `spider-lib` framework.
2//!
3//! This module defines a comprehensive set of custom error types, `SpiderError`
4//! and `PipelineError`, used throughout the `spider-lib` crate. These errors
5//! encapsulate various issues that can occur during crawling, such as network
6//! failures, URL parsing problems, I/O errors, configuration issues, and
7//! problems within item processing pipelines.
8//!
9//! By centralizing error definitions, the module provides a consistent and
10//! semantic way to report and handle errors, improving the robustness and
11//! maintainability of the web scraping application.
12
13use thiserror::Error;
14
15#[derive(Debug, Clone, Error)]
16#[error("Reqwest error: {message}")]
17pub struct ReqwestErrorDetails {
18    pub message: String,
19    pub is_connect: bool,
20    pub is_timeout: bool,
21    // Add other relevant flags if necessary
22}
23
24impl From<reqwest::Error> for ReqwestErrorDetails {
25    fn from(err: reqwest::Error) -> Self {
26        ReqwestErrorDetails {
27            is_connect: err.is_connect(),
28            is_timeout: err.is_timeout(),
29            message: err.to_string(),
30        }
31    }
32}
33
34/// The main error type for the spider framework.
35#[derive(Debug, Clone, Error)] // Added Clone
36pub enum SpiderError {
37    #[error("Reqwest error: {0}")]
38    ReqwestError(#[from] ReqwestErrorDetails), // Changed to wrap ReqwestErrorDetails
39    #[error("Url parsing error: {0}")]
40    UrlParseError(#[from] url::ParseError),
41    #[error("Json parsing error: {0}")]
42    JsonError(String), // Changed to String for cloning
43    #[error("Io error: {0}")]
44    IoError(String), // Changed to String for cloning
45    #[error("Configuration error: {0}")]
46    ConfigurationError(String),
47    #[error("General error: {0}")]
48    GeneralError(String),
49    #[error("Failed to convert item to string: {0}")]
50    ItemToStringError(String),
51    #[error("Error during item serialization: {0}")]
52    ItemSerializationError(String),
53    #[error("Unknown error")]
54    Unknown,
55    #[error("Header value error: {0}")]
56    HeaderValueError(String),
57    #[error("HTML parsing error: {0}")]
58    HtmlParseError(String),
59    #[error("UTF-8 parsing error: {0}")]
60    Utf8Error(#[from] std::str::Utf8Error),
61    #[error("Pipeline error: {0}")]
62    PipelineError(#[from] PipelineError),
63    #[error("Request blocked by robots.txt")]
64    BlockedByRobotsTxt,
65}
66
67#[cfg(any(feature = "middleware-http-cache", feature = "checkpoint"))]
68impl From<bincode::Error> for SpiderError {
69    fn from(err: bincode::Error) -> Self {
70        SpiderError::GeneralError(format!("Bincode error: {}", err))
71    }
72}
73
74impl From<reqwest::Error> for SpiderError {
75    fn from(err: reqwest::Error) -> Self {
76        SpiderError::ReqwestError(err.into())
77    }
78}
79
80impl From<std::io::Error> for SpiderError {
81    fn from(err: std::io::Error) -> Self {
82        SpiderError::IoError(err.to_string())
83    }
84}
85
86impl From<serde_json::Error> for SpiderError {
87    fn from(err: serde_json::Error) -> Self {
88        SpiderError::JsonError(err.to_string())
89    }
90}
91
92/// The error type for item processing pipelines.
93#[derive(Error, Debug, Clone)] // Added Clone
94pub enum PipelineError {
95    #[error("I/O error: {0}")]
96    IoError(String), // Changed to String for cloning
97    #[error("Item processing error: {0}")]
98    ItemError(String),
99    #[cfg(feature = "pipeline-sqlite")]
100    #[error("Database error: {0}")]
101    DatabaseError(String), // Changed to String for cloning
102    #[error("Serialization error: {0}")]
103    SerializationError(String), // Changed to String for cloning
104    #[error("CSV error: {0}")]
105    CsvError(String), // Changed to String for cloning
106    #[error("Other pipeline error: {0}")]
107    Other(String),
108}
109
110#[cfg(feature = "pipeline-csv")]
111impl From<csv::Error> for PipelineError {
112    fn from(err: csv::Error) -> Self {
113        PipelineError::CsvError(err.to_string())
114    }
115}
116
117impl From<std::io::Error> for PipelineError {
118    fn from(err: std::io::Error) -> Self {
119        PipelineError::IoError(err.to_string())
120    }
121}
122
123impl From<serde_json::Error> for PipelineError {
124    fn from(err: serde_json::Error) -> Self {
125        PipelineError::SerializationError(err.to_string())
126    }
127}
128
129#[cfg(feature = "pipeline-sqlite")]
130impl From<rusqlite::Error> for PipelineError {
131    fn from(err: rusqlite::Error) -> Self {
132        PipelineError::DatabaseError(err.to_string())
133    }
134}
135#[cfg(feature = "pipeline-sqlite")]
136impl From<rusqlite::Error> for SpiderError {
137    fn from(err: rusqlite::Error) -> Self {
138        SpiderError::PipelineError(PipelineError::DatabaseError(err.to_string()))
139    }
140}