Skip to main content

spider_middleware/
lib.rs

1//! # spider-middleware
2//!
3//! Built-in middleware for the crawler runtime.
4//!
5//! This crate contains the request/response hooks that sit between scheduling,
6//! downloading, and parsing. It is the right layer for retry policy, rate
7//! limiting, cookies, proxies, user agents, `robots.txt`, and caching.
8//!
9//! ## Example
10//!
11//! ```rust,ignore
12//! use spider_middleware::{rate_limit::RateLimitMiddleware, retry::RetryMiddleware};
13//!
14//! let crawler = CrawlerBuilder::new(MySpider)
15//!     .add_middleware(RateLimitMiddleware::default())
16//!     .add_middleware(RetryMiddleware::new())
17//!     .build()
18//!     .await?;
19//! ```
20
21pub mod middleware;
22pub mod rate_limit;
23pub mod referer;
24pub mod retry;
25
26pub use spider_util::request::{Body, Request};
27pub use spider_util::response::Response;
28
29pub mod prelude;
30
31#[cfg(feature = "middleware-autothrottle")]
32pub mod autothrottle;
33
34#[cfg(feature = "middleware-user-agent")]
35pub mod user_agent;
36
37#[cfg(feature = "middleware-cookies")]
38pub mod cookies;
39
40#[cfg(feature = "middleware-cache")]
41pub mod http_cache;
42
43#[cfg(feature = "middleware-proxy")]
44pub mod proxy;
45
46#[cfg(feature = "middleware-robots")]
47pub mod robots;