Skip to main content

spider_lib/
middleware.rs

1//! Core Middleware trait and related types for the `spider-lib` framework.
2//!
3//! This module defines the `Middleware` trait, which is a fundamental abstraction
4//! for injecting custom logic into the web crawling pipeline. Middlewares can
5//! intercept and modify `Request`s before they are sent, `Response`s after they
6//! are received, and handle errors that occur during the process.
7//!
8//! The `MiddlewareAction` enum provides a flexible way for middlewares to control
9//! the subsequent flow of execution, allowing actions such as continuing processing,
10//! retrying a request, dropping an item, or directly returning a response.
11
12use async_trait::async_trait;
13use std::any::Any;
14use std::time::Duration;
15
16use crate::error::SpiderError;
17use crate::request::Request;
18use crate::response::Response;
19
20#[allow(clippy::large_enum_variant)]
21/// Enum returned by middleware methods to control further processing.
22pub enum MiddlewareAction<T> {
23    /// Continue processing with the provided item.
24    Continue(T),
25    /// Retry the Request after the specified duration. (Only valid for Response processing)
26    Retry(Box<Request>, Duration),
27    /// Drop the item, stopping further processing.
28    Drop,
29    /// Return a Response directly, bypassing the downloader. (Only valid for Request processing)
30    ReturnResponse(Response),
31}
32
33/// A trait for processing requests and responses.
34#[async_trait]
35pub trait Middleware<C: Send + Sync>: Any + Send + Sync + 'static {
36    fn name(&self) -> &str;
37
38    async fn process_request(
39        &mut self,
40        _client: &C,
41        request: Request,
42    ) -> Result<MiddlewareAction<Request>, SpiderError> {
43        Ok(MiddlewareAction::Continue(request))
44    }
45    async fn process_response(
46        &mut self,
47        response: Response,
48    ) -> Result<MiddlewareAction<Response>, SpiderError> {
49        Ok(MiddlewareAction::Continue(response))
50    }
51
52    async fn handle_error(
53        &mut self,
54        _request: &Request,
55        error: &SpiderError,
56    ) -> Result<MiddlewareAction<Request>, SpiderError> {
57        // The default implementation is to just pass the error through by cloning it.
58        Err(error.clone())
59    }
60}