1use http;
14use serde_json::Error as SerdeJsonError;
15use std::str::Utf8Error;
16use thiserror::Error;
17
18#[derive(Debug, Clone, Error)]
19#[error("Reqwest error: {message}")]
20pub struct ReqwestErrorDetails {
21 pub message: String,
22 pub is_connect: bool,
23 pub is_timeout: bool,
24 }
26
27impl From<reqwest::Error> for ReqwestErrorDetails {
28 fn from(err: reqwest::Error) -> Self {
29 ReqwestErrorDetails {
30 is_connect: err.is_connect(),
31 is_timeout: err.is_timeout(),
32 message: err.to_string(),
33 }
34 }
35}
36
37#[derive(Debug, Clone, Error)]
39pub enum SpiderError {
40 #[error("Reqwest error: {0}")]
41 ReqwestError(#[from] ReqwestErrorDetails),
42 #[error("Url parsing error: {0}")]
43 UrlParseError(#[from] url::ParseError),
44 #[error("Json parsing error: {0}")]
45 JsonError(String),
46 #[error("Io error: {0}")]
47 IoError(String),
48 #[error("Configuration error: {0}")]
49 ConfigurationError(String),
50 #[error("General error: {0}")]
51 GeneralError(String),
52 #[error("Failed to convert item to string: {0}")]
53 ItemToStringError(String),
54 #[error("Error during item serialization: {0}")]
55 ItemSerializationError(String),
56 #[error("Unknown error")]
57 Unknown,
58 #[error("Invalid HTTP header value: {0}")]
59 InvalidHeaderValue(String),
60 #[error("Header value error: {0}")]
61 HeaderValueError(String),
62 #[error("HTML parsing error: {0}")]
63 HtmlParseError(String),
64 #[error("UTF-8 parsing error: {0}")]
65 Utf8Error(#[from] Utf8Error),
66 #[error("Pipeline error: {0}")]
67 PipelineError(#[from] PipelineError),
68 #[error("Request blocked by robots.txt")]
69 BlockedByRobotsTxt,
70}
71
72impl From<http::header::InvalidHeaderValue> for SpiderError {
73 fn from(err: http::header::InvalidHeaderValue) -> Self {
74 SpiderError::InvalidHeaderValue(err.to_string())
75 }
76}
77
78impl From<bincode::Error> for SpiderError {
79 fn from(err: bincode::Error) -> Self {
80 SpiderError::GeneralError(format!("Bincode error: {}", err))
81 }
82}
83
84impl From<reqwest::Error> for SpiderError {
85 fn from(err: reqwest::Error) -> Self {
86 SpiderError::ReqwestError(err.into())
87 }
88}
89
90impl From<std::io::Error> for SpiderError {
91 fn from(err: std::io::Error) -> Self {
92 SpiderError::IoError(err.to_string())
93 }
94}
95
96impl From<SerdeJsonError> for SpiderError {
97 fn from(err: SerdeJsonError) -> Self {
98 SpiderError::JsonError(err.to_string())
99 }
100}
101
102#[derive(Error, Debug, Clone)]
104pub enum PipelineError {
105 #[error("I/O error: {0}")]
106 IoError(String),
107 #[error("Item processing error: {0}")]
108 ItemError(String),
109 #[error("Database error: {0}")]
110 DatabaseError(String),
111 #[error("Serialization error: {0}")]
112 SerializationError(String),
113 #[error("CSV error: {0}")]
114 CsvError(String),
115 #[error("Other pipeline error: {0}")]
116 Other(String),
117}
118
119impl From<csv::Error> for PipelineError {
120 fn from(err: csv::Error) -> Self {
121 PipelineError::CsvError(err.to_string())
122 }
123}
124
125impl From<std::io::Error> for PipelineError {
126 fn from(err: std::io::Error) -> Self {
127 PipelineError::IoError(err.to_string())
128 }
129}
130
131impl From<SerdeJsonError> for PipelineError {
132 fn from(err: SerdeJsonError) -> Self {
133 PipelineError::SerializationError(err.to_string())
134 }
135}
136
137impl From<rusqlite::Error> for PipelineError {
138 fn from(err: rusqlite::Error) -> Self {
139 PipelineError::DatabaseError(err.to_string())
140 }
141}
142
143impl From<rusqlite::Error> for SpiderError {
144 fn from(err: rusqlite::Error) -> Self {
145 SpiderError::PipelineError(PipelineError::DatabaseError(err.to_string()))
146 }
147}