Skip to main content

spider_lib/
error.rs

1//! Custom error types for the `spider-lib` framework.
2//!
3//! This module defines a comprehensive set of custom error types, `SpiderError`
4//! and `PipelineError`, used throughout the `spider-lib` crate. These errors
5//! encapsulate various issues that can occur during crawling, such as network
6//! failures, URL parsing problems, I/O errors, configuration issues, and
7//! problems within item processing pipelines.
8//!
9//! By centralizing error definitions, the module provides a consistent and
10//! semantic way to report and handle errors, improving the robustness and
11//! maintainability of the web scraping application.
12
13use thiserror::Error;
14
15#[derive(Debug, Clone, Error)]
16#[error("Reqwest error: {message}")]
17pub struct ReqwestErrorDetails {
18    pub message: String,
19    pub is_connect: bool,
20    pub is_timeout: bool,
21    // Add other relevant flags if necessary
22}
23
24impl From<reqwest::Error> for ReqwestErrorDetails {
25    fn from(err: reqwest::Error) -> Self {
26        ReqwestErrorDetails {
27            is_connect: err.is_connect(),
28            is_timeout: err.is_timeout(),
29            message: err.to_string(),
30        }
31    }
32}
33
34/// The main error type for the spider framework.
35#[derive(Debug, Clone, Error)] // Added Clone
36pub enum SpiderError {
37    #[error("Reqwest error: {0}")]
38    ReqwestError(#[from] ReqwestErrorDetails), // Changed to wrap ReqwestErrorDetails
39    #[error("Url parsing error: {0}")]
40    UrlParseError(#[from] url::ParseError),
41    #[error("Json parsing error: {0}")]
42    JsonError(String), // Changed to String for cloning
43    #[error("Io error: {0}")]
44    IoError(String), // Changed to String for cloning
45    #[error("Configuration error: {0}")]
46    ConfigurationError(String),
47    #[error("General error: {0}")]
48    GeneralError(String),
49    #[error("Failed to convert item to string: {0}")]
50    ItemToStringError(String),
51    #[error("Error during item serialization: {0}")]
52    ItemSerializationError(String),
53    #[error("Unknown error")]
54    Unknown,
55    #[error("Header value error: {0}")]
56    HeaderValueError(String),
57    #[error("HTML parsing error: {0}")]
58    HtmlParseError(String),
59    #[error("UTF-8 parsing error: {0}")]
60    Utf8Error(#[from] std::str::Utf8Error),
61    #[error("Pipeline error: {0}")]
62    PipelineError(#[from] PipelineError),
63    #[error("Request blocked by robots.txt")]
64    BlockedByRobotsTxt,
65}
66
67impl From<bincode::Error> for SpiderError {
68    fn from(err: bincode::Error) -> Self {
69        SpiderError::GeneralError(format!("Bincode error: {}", err))
70    }
71}
72
73impl From<reqwest::Error> for SpiderError {
74    fn from(err: reqwest::Error) -> Self {
75        SpiderError::ReqwestError(err.into())
76    }
77}
78
79impl From<std::io::Error> for SpiderError {
80    fn from(err: std::io::Error) -> Self {
81        SpiderError::IoError(err.to_string())
82    }
83}
84
85impl From<serde_json::Error> for SpiderError {
86    fn from(err: serde_json::Error) -> Self {
87        SpiderError::JsonError(err.to_string())
88    }
89}
90
91/// The error type for item processing pipelines.
92#[derive(Error, Debug, Clone)] // Added Clone
93pub enum PipelineError {
94    #[error("I/O error: {0}")]
95    IoError(String), // Changed to String for cloning
96    #[error("Item processing error: {0}")]
97    ItemError(String),
98    #[cfg(feature = "pipeline-sqlite")]
99    #[error("Database error: {0}")]
100    DatabaseError(String), // Changed to String for cloning
101    #[error("Serialization error: {0}")]
102    SerializationError(String), // Changed to String for cloning
103    #[error("CSV error: {0}")]
104    CsvError(String), // Changed to String for cloning
105    #[error("Other pipeline error: {0}")]
106    Other(String),
107}
108
109impl From<csv::Error> for PipelineError {
110    fn from(err: csv::Error) -> Self {
111        PipelineError::CsvError(err.to_string())
112    }
113}
114
115impl From<std::io::Error> for PipelineError {
116    fn from(err: std::io::Error) -> Self {
117        PipelineError::IoError(err.to_string())
118    }
119}
120
121impl From<serde_json::Error> for PipelineError {
122    fn from(err: serde_json::Error) -> Self {
123        PipelineError::SerializationError(err.to_string())
124    }
125}
126
127#[cfg(feature = "pipeline-sqlite")]
128impl From<rusqlite::Error> for PipelineError {
129    fn from(err: rusqlite::Error) -> Self {
130        PipelineError::DatabaseError(err.to_string())
131    }
132}
133#[cfg(feature = "pipeline-sqlite")]
134impl From<rusqlite::Error> for SpiderError {
135    fn from(err: rusqlite::Error) -> Self {
136        SpiderError::PipelineError(PipelineError::DatabaseError(err.to_string()))
137    }
138}