spider_middleware/
retry.rs1use async_trait::async_trait;
7use log::{info, trace, warn};
8use std::time::Duration;
9
10use crate::middleware::{Middleware, MiddlewareAction};
11use spider_util::error::SpiderError;
12use spider_util::request::Request;
13use spider_util::response::Response;
14
15#[derive(Debug, Clone)]
17pub struct RetryMiddleware {
18 pub max_retries: u32,
20 pub retry_http_codes: Vec<u16>,
22 pub backoff_factor: f64,
24 pub max_delay: Duration,
26}
27
28impl Default for RetryMiddleware {
29 fn default() -> Self {
30 let middleware = RetryMiddleware {
31 max_retries: 3,
32 retry_http_codes: vec![500, 502, 503, 504, 408, 429],
33 backoff_factor: 1.0,
34 max_delay: Duration::from_secs(180),
35 };
36 info!("Initializing RetryMiddleware with config: {:?}", middleware);
37 middleware
38 }
39}
40
41impl RetryMiddleware {
42 pub fn new() -> Self {
44 Self::default()
45 }
46
47 pub fn max_retries(mut self, max_retries: u32) -> Self {
49 self.max_retries = max_retries;
50 self
51 }
52
53 pub fn retry_http_codes<I>(mut self, retry_http_codes: I) -> Self
55 where
56 I: IntoIterator<Item = u16>,
57 {
58 self.retry_http_codes = retry_http_codes.into_iter().collect();
59 self
60 }
61
62 pub fn backoff_factor(mut self, backoff_factor: f64) -> Self {
64 self.backoff_factor = backoff_factor;
65 self
66 }
67
68 pub fn max_delay(mut self, max_delay: Duration) -> Self {
70 self.max_delay = max_delay;
71 self
72 }
73}
74
75#[async_trait]
76impl<C: Send + Sync> Middleware<C> for RetryMiddleware {
77 fn name(&self) -> &str {
78 "RetryMiddleware"
79 }
80
81 async fn process_request(
82 &self,
83 _client: &C,
84 request: Request,
85 ) -> Result<MiddlewareAction<Request>, SpiderError> {
86 Ok(MiddlewareAction::Continue(request))
87 }
88
89 async fn process_response(
90 &self,
91 response: Response,
92 ) -> Result<MiddlewareAction<Response>, SpiderError> {
93 trace!(
94 "Processing response for URL: {} with status: {}",
95 response.url, response.status
96 );
97
98 if self.retry_http_codes.contains(&response.status.as_u16()) {
99 let mut request = response.request_from_response();
100 let current_attempts = request.get_retry_attempts();
101
102 if current_attempts < self.max_retries {
103 request.increment_retry_attempts();
104 let delay = self.calculate_delay(current_attempts);
105 info!(
106 "Retrying {} (status: {}, attempt {}/{}) after {:?}",
107 request.url,
108 response.status,
109 current_attempts + 1,
110 self.max_retries,
111 delay
112 );
113 return Ok(MiddlewareAction::Retry(Box::new(request), delay));
114 } else {
115 warn!(
116 "Max retries ({}) reached for {} (status: {}). Dropping response.",
117 self.max_retries, request.url, response.status
118 );
119 return Ok(MiddlewareAction::Drop);
120 }
121 } else {
122 trace!(
123 "Response status {} is not in retry codes, continuing",
124 response.status
125 );
126 }
127
128 Ok(MiddlewareAction::Continue(response))
129 }
130
131 async fn handle_error(
132 &self,
133 request: &Request,
134 error: &SpiderError,
135 ) -> Result<MiddlewareAction<Request>, SpiderError> {
136 trace!("Handling error for request {}: {:?}", request.url, error);
137
138 if let SpiderError::ReqwestError(err_details) = error
139 && (err_details.is_connect || err_details.is_timeout)
140 {
141 let mut new_request = request.clone();
142 let current_attempts = new_request.get_retry_attempts();
143
144 if current_attempts < self.max_retries {
145 new_request.increment_retry_attempts();
146 let delay = self.calculate_delay(current_attempts);
147 info!(
148 "Retrying {} (error: {}, attempt {}/{}) after {:?}",
149 new_request.url,
150 err_details.message,
151 current_attempts + 1,
152 self.max_retries,
153 delay
154 );
155 return Ok(MiddlewareAction::Retry(Box::new(new_request), delay));
156 } else {
157 warn!(
158 "Max retries ({}) reached for {} (error: {}). Dropping request.",
159 self.max_retries, new_request.url, err_details.message
160 );
161 return Ok(MiddlewareAction::Drop);
162 }
163 } else {
164 trace!("Error is not a retryable error, returning original error");
165 }
166
167 Err(error.clone())
168 }
169}
170
171impl RetryMiddleware {
172 fn calculate_delay(&self, retries: u32) -> Duration {
173 let delay_secs = self.backoff_factor * (2.0f64.powi(retries as i32));
174 let delay = Duration::from_secs_f64(delay_secs);
175 delay.min(self.max_delay)
176 }
177}