Skip to main content

spider_util/
error.rs

1//! Custom error types for the `spider-core` framework.
2//!
3//! This module defines a comprehensive set of custom error types, `SpiderError`
4//! and `PipelineError`, used throughout the `spider-core` crate. These errors
5//! encapsulate various issues that can occur during crawling, such as network
6//! failures, URL parsing problems, I/O errors, configuration issues, and
7//! problems within item processing pipelines.
8//!
9//! By centralizing error definitions, the module provides a consistent and
10//! semantic way to report and handle errors, improving the robustness and
11//! maintainability of the web scraping application.
12
13use http;
14use serde_json::Error as SerdeJsonError;
15use std::str::Utf8Error;
16use thiserror::Error;
17
18#[derive(Debug, Clone, Error)]
19#[error("Reqwest error: {message}")]
20pub struct ReqwestErrorDetails {
21    pub message: String,
22    pub is_connect: bool,
23    pub is_timeout: bool,
24    // Add other relevant flags if necessary
25}
26
27impl From<reqwest::Error> for ReqwestErrorDetails {
28    fn from(err: reqwest::Error) -> Self {
29        ReqwestErrorDetails {
30            is_connect: err.is_connect(),
31            is_timeout: err.is_timeout(),
32            message: err.to_string(),
33        }
34    }
35}
36
37/// The main error type for the spider framework.
38#[derive(Debug, Clone, Error)]
39pub enum SpiderError {
40    #[error("Reqwest error: {0}")]
41    ReqwestError(#[from] ReqwestErrorDetails),
42    #[error("Url parsing error: {0}")]
43    UrlParseError(#[from] url::ParseError),
44    #[error("Json parsing error: {0}")]
45    JsonError(String),
46    #[error("Io error: {0}")]
47    IoError(String),
48    #[error("Configuration error: {0}")]
49    ConfigurationError(String),
50    #[error("General error: {0}")]
51    GeneralError(String),
52    #[error("Failed to convert item to string: {0}")]
53    ItemToStringError(String),
54    #[error("Error during item serialization: {0}")]
55    ItemSerializationError(String),
56    #[error("Unknown error")]
57    Unknown,
58    #[error("Invalid HTTP header value: {0}")]
59    InvalidHeaderValue(String),
60    #[error("Header value error: {0}")]
61    HeaderValueError(String),
62    #[error("HTML parsing error: {0}")]
63    HtmlParseError(String),
64    #[error("UTF-8 parsing error: {0}")]
65    Utf8Error(#[from] Utf8Error),
66    #[error("Pipeline error: {0}")]
67    PipelineError(#[from] PipelineError),
68    #[error("Request blocked by robots.txt")]
69    BlockedByRobotsTxt,
70}
71
72impl From<http::header::InvalidHeaderValue> for SpiderError {
73    fn from(err: http::header::InvalidHeaderValue) -> Self {
74        SpiderError::InvalidHeaderValue(err.to_string())
75    }
76}
77
78impl From<bincode::Error> for SpiderError {
79    fn from(err: bincode::Error) -> Self {
80        SpiderError::GeneralError(format!("Bincode error: {}", err))
81    }
82}
83
84impl From<reqwest::Error> for SpiderError {
85    fn from(err: reqwest::Error) -> Self {
86        SpiderError::ReqwestError(err.into())
87    }
88}
89
90impl From<std::io::Error> for SpiderError {
91    fn from(err: std::io::Error) -> Self {
92        SpiderError::IoError(err.to_string())
93    }
94}
95
96impl From<SerdeJsonError> for SpiderError {
97    fn from(err: SerdeJsonError) -> Self {
98        SpiderError::JsonError(err.to_string())
99    }
100}
101
102/// The error type for item processing pipelines.
103#[derive(Error, Debug, Clone)]
104pub enum PipelineError {
105    #[error("I/O error: {0}")]
106    IoError(String),
107    #[error("Item processing error: {0}")]
108    ItemError(String),
109    #[error("Database error: {0}")]
110    DatabaseError(String),
111    #[error("Serialization error: {0}")]
112    SerializationError(String),
113    #[error("CSV error: {0}")]
114    CsvError(String),
115    #[error("Other pipeline error: {0}")]
116    Other(String),
117}
118
119impl From<csv::Error> for PipelineError {
120    fn from(err: csv::Error) -> Self {
121        PipelineError::CsvError(err.to_string())
122    }
123}
124
125impl From<std::io::Error> for PipelineError {
126    fn from(err: std::io::Error) -> Self {
127        PipelineError::IoError(err.to_string())
128    }
129}
130
131impl From<SerdeJsonError> for PipelineError {
132    fn from(err: SerdeJsonError) -> Self {
133        PipelineError::SerializationError(err.to_string())
134    }
135}
136
137impl From<rusqlite::Error> for PipelineError {
138    fn from(err: rusqlite::Error) -> Self {
139        PipelineError::DatabaseError(err.to_string())
140    }
141}
142
143impl From<rusqlite::Error> for SpiderError {
144    fn from(err: rusqlite::Error) -> Self {
145        SpiderError::PipelineError(PipelineError::DatabaseError(err.to_string()))
146    }
147}