1use http;
14use serde_json::Error as SerdeJsonError;
15use std::str::Utf8Error;
16use thiserror::Error;
17
18#[derive(Debug, Clone, Error)]
19#[error("Reqwest error: {message}")]
20pub struct ReqwestError {
21 pub message: String,
22 pub is_connect: bool,
23 pub is_timeout: bool,
24}
25
26impl From<reqwest::Error> for ReqwestError {
27 fn from(err: reqwest::Error) -> Self {
28 ReqwestError {
29 is_connect: err.is_connect(),
30 is_timeout: err.is_timeout(),
31 message: err.to_string(),
32 }
33 }
34}
35
36#[derive(Debug, Clone, Error)]
38pub enum SpiderError {
39 #[error("Reqwest error: {0}")]
40 ReqwestError(#[from] ReqwestError),
41 #[error("Url parsing error: {0}")]
42 UrlParseError(#[from] url::ParseError),
43 #[error("Json parsing error: {0}")]
44 JsonError(String),
45 #[error("Io error: {0}")]
46 IoError(String),
47 #[error("Configuration error: {0}")]
48 ConfigurationError(String),
49 #[error("General error: {0}")]
50 GeneralError(String),
51 #[error("Failed to convert item to string: {0}")]
52 ItemToStringError(String),
53 #[error("Error during item serialization: {0}")]
54 ItemSerializationError(String),
55 #[error("Unknown error")]
56 Unknown,
57 #[error("Invalid HTTP header value: {0}")]
58 InvalidHeaderValue(String),
59 #[error("Header value error: {0}")]
60 HeaderValueError(String),
61 #[error("HTML parsing error: {0}")]
62 HtmlParseError(String),
63 #[error("UTF-8 parsing error: {0}")]
64 Utf8Error(#[from] Utf8Error),
65 #[error("Pipeline error: {0}")]
66 PipelineError(#[from] PipelineError),
67 #[error("Request blocked by robots.txt")]
68 BlockedByRobotsTxt,
69}
70
71impl From<http::header::InvalidHeaderValue> for SpiderError {
72 fn from(err: http::header::InvalidHeaderValue) -> Self {
73 SpiderError::InvalidHeaderValue(err.to_string())
74 }
75}
76
77impl From<bincode::Error> for SpiderError {
78 fn from(err: bincode::Error) -> Self {
79 SpiderError::GeneralError(format!("Bincode error: {}", err))
80 }
81}
82
83impl From<reqwest::Error> for SpiderError {
84 fn from(err: reqwest::Error) -> Self {
85 SpiderError::ReqwestError(err.into())
86 }
87}
88
89impl From<std::io::Error> for SpiderError {
90 fn from(err: std::io::Error) -> Self {
91 SpiderError::IoError(err.to_string())
92 }
93}
94
95impl From<SerdeJsonError> for SpiderError {
96 fn from(err: SerdeJsonError) -> Self {
97 SpiderError::JsonError(err.to_string())
98 }
99}
100
101#[derive(Error, Debug, Clone)]
103pub enum PipelineError {
104 #[error("I/O error: {0}")]
105 IoError(String),
106 #[error("Item processing error: {0}")]
107 ItemError(String),
108 #[error("Database error: {0}")]
109 DatabaseError(String),
110 #[error("Serialization error: {0}")]
111 SerializationError(String),
112 #[error("CSV error: {0}")]
113 CsvError(String),
114 #[error("Other pipeline error: {0}")]
115 Other(String),
116}
117
118impl From<csv::Error> for PipelineError {
119 fn from(err: csv::Error) -> Self {
120 PipelineError::CsvError(err.to_string())
121 }
122}
123
124impl From<std::io::Error> for PipelineError {
125 fn from(err: std::io::Error) -> Self {
126 PipelineError::IoError(err.to_string())
127 }
128}
129
130impl From<SerdeJsonError> for PipelineError {
131 fn from(err: SerdeJsonError) -> Self {
132 PipelineError::SerializationError(err.to_string())
133 }
134}
135
136impl From<rusqlite::Error> for PipelineError {
137 fn from(err: rusqlite::Error) -> Self {
138 PipelineError::DatabaseError(err.to_string())
139 }
140}
141
142impl From<rusqlite::Error> for SpiderError {
143 fn from(err: rusqlite::Error) -> Self {
144 SpiderError::PipelineError(PipelineError::DatabaseError(err.to_string()))
145 }
146}