Skip to main content

spider_util/
error.rs

1//! Custom error types for the `spider-core` framework.
2//!
3//! This module defines a comprehensive set of custom error types, `SpiderError`
4//! and `PipelineError`, used throughout the `spider-core` crate. These errors
5//! encapsulate various issues that can occur during crawling, such as network
6//! failures, URL parsing problems, I/O errors, configuration issues, and
7//! problems within item processing pipelines.
8//!
9//! By centralizing error definitions, the module provides a consistent and
10//! semantic way to report and handle errors, improving the robustness and
11//! maintainability of the web scraping application.
12
13use http;
14use serde_json::Error as SerdeJsonError;
15use std::str::Utf8Error;
16use thiserror::Error;
17
18#[derive(Debug, Clone, Error)]
19#[error("Reqwest error: {message}")]
20pub struct ReqwestError {
21    pub message: String,
22    pub is_connect: bool,
23    pub is_timeout: bool,
24}
25
26impl From<reqwest::Error> for ReqwestError {
27    fn from(err: reqwest::Error) -> Self {
28        ReqwestError {
29            is_connect: err.is_connect(),
30            is_timeout: err.is_timeout(),
31            message: err.to_string(),
32        }
33    }
34}
35
36/// The main error type for the spider framework.
37#[derive(Debug, Clone, Error)]
38pub enum SpiderError {
39    #[error("Reqwest error: {0}")]
40    ReqwestError(#[from] ReqwestError),
41    #[error("Url parsing error: {0}")]
42    UrlParseError(#[from] url::ParseError),
43    #[error("Json parsing error: {0}")]
44    JsonError(String),
45    #[error("Io error: {0}")]
46    IoError(String),
47    #[error("Configuration error: {0}")]
48    ConfigurationError(String),
49    #[error("General error: {0}")]
50    GeneralError(String),
51    #[error("Failed to convert item to string: {0}")]
52    ItemToStringError(String),
53    #[error("Error during item serialization: {0}")]
54    ItemSerializationError(String),
55    #[error("Unknown error")]
56    Unknown,
57    #[error("Invalid HTTP header value: {0}")]
58    InvalidHeaderValue(String),
59    #[error("Header value error: {0}")]
60    HeaderValueError(String),
61    #[error("HTML parsing error: {0}")]
62    HtmlParseError(String),
63    #[error("UTF-8 parsing error: {0}")]
64    Utf8Error(#[from] Utf8Error),
65    #[error("Pipeline error: {0}")]
66    PipelineError(#[from] PipelineError),
67    #[error("Request blocked by robots.txt")]
68    BlockedByRobotsTxt,
69}
70
71impl From<http::header::InvalidHeaderValue> for SpiderError {
72    fn from(err: http::header::InvalidHeaderValue) -> Self {
73        SpiderError::InvalidHeaderValue(err.to_string())
74    }
75}
76
77impl From<bincode::Error> for SpiderError {
78    fn from(err: bincode::Error) -> Self {
79        SpiderError::GeneralError(format!("Bincode error: {}", err))
80    }
81}
82
83impl From<reqwest::Error> for SpiderError {
84    fn from(err: reqwest::Error) -> Self {
85        SpiderError::ReqwestError(err.into())
86    }
87}
88
89impl From<std::io::Error> for SpiderError {
90    fn from(err: std::io::Error) -> Self {
91        SpiderError::IoError(err.to_string())
92    }
93}
94
95impl From<SerdeJsonError> for SpiderError {
96    fn from(err: SerdeJsonError) -> Self {
97        SpiderError::JsonError(err.to_string())
98    }
99}
100
101/// The error type for item processing pipelines.
102#[derive(Error, Debug, Clone)]
103pub enum PipelineError {
104    #[error("I/O error: {0}")]
105    IoError(String),
106    #[error("Item processing error: {0}")]
107    ItemError(String),
108    #[error("Database error: {0}")]
109    DatabaseError(String),
110    #[error("Serialization error: {0}")]
111    SerializationError(String),
112    #[error("CSV error: {0}")]
113    CsvError(String),
114    #[error("Other pipeline error: {0}")]
115    Other(String),
116}
117
118impl From<csv::Error> for PipelineError {
119    fn from(err: csv::Error) -> Self {
120        PipelineError::CsvError(err.to_string())
121    }
122}
123
124impl From<std::io::Error> for PipelineError {
125    fn from(err: std::io::Error) -> Self {
126        PipelineError::IoError(err.to_string())
127    }
128}
129
130impl From<SerdeJsonError> for PipelineError {
131    fn from(err: SerdeJsonError) -> Self {
132        PipelineError::SerializationError(err.to_string())
133    }
134}
135
136impl From<rusqlite::Error> for PipelineError {
137    fn from(err: rusqlite::Error) -> Self {
138        PipelineError::DatabaseError(err.to_string())
139    }
140}
141
142impl From<rusqlite::Error> for SpiderError {
143    fn from(err: rusqlite::Error) -> Self {
144        SpiderError::PipelineError(PipelineError::DatabaseError(err.to_string()))
145    }
146}