spider_lib/middleware.rs
1//! Core Middleware trait and related types for the `spider-lib` framework.
2//!
3//! This module defines the `Middleware` trait, which is a fundamental abstraction
4//! for injecting custom logic into the web crawling pipeline. Middlewares can
5//! intercept and modify `Request`s before they are sent, `Response`s after they
6//! are received, and handle errors that occur during the process.
7//!
8//! The `MiddlewareAction` enum provides a flexible way for middlewares to control
9//! the subsequent flow of execution, allowing actions such as continuing processing,
10//! retrying a request, dropping an item, or directly returning a response.
11
12use async_trait::async_trait;
13use std::any::Any;
14use std::time::Duration;
15
16use crate::error::SpiderError;
17use crate::request::Request;
18use crate::response::Response;
19
20#[allow(clippy::large_enum_variant)]
21/// Enum returned by middleware methods to control further processing.
22pub enum MiddlewareAction<T> {
23 /// Continue processing with the provided item.
24 Continue(T),
25 /// Retry the Request after the specified duration. (Only valid for Response processing)
26 Retry(Box<Request>, Duration),
27 /// Drop the item, stopping further processing.
28 Drop,
29 /// Return a Response directly, bypassing the downloader. (Only valid for Request processing)
30 ReturnResponse(Response),
31}
32
33/// A trait for processing requests and responses.
34#[async_trait]
35pub trait Middleware<C: Send + Sync>: Any + Send + Sync + 'static {
36 fn name(&self) -> &str;
37
38 async fn process_request(
39 &mut self,
40 _client: &C,
41 request: Request,
42 ) -> Result<MiddlewareAction<Request>, SpiderError> {
43 Ok(MiddlewareAction::Continue(request))
44 }
45 async fn process_response(
46 &mut self,
47 response: Response,
48 ) -> Result<MiddlewareAction<Response>, SpiderError> {
49 Ok(MiddlewareAction::Continue(response))
50 }
51
52 async fn handle_error(
53 &mut self,
54 _request: &Request,
55 error: &SpiderError,
56 ) -> Result<MiddlewareAction<Request>, SpiderError> {
57 // The default implementation is to just pass the error through by cloning it.
58 Err(error.clone())
59 }
60}