Skip to main content

spider_core/
lib.rs

1//! # spider-core
2//!
3//! `spider-core` is the runtime crate behind the rest of the workspace.
4//! It owns the crawler loop, scheduling, shared runtime state, statistics, and
5//! the [`Spider`] trait used to describe crawl behavior.
6//!
7//! If you are building an application, `spider-lib` is usually the easier
8//! starting point. Depend on `spider-core` directly when you want the runtime
9//! API without the facade crate.
10//!
11//! ## Example
12//!
13//! ```rust,ignore
14//! use spider_core::{async_trait, CrawlerBuilder, Spider};
15//! use spider_util::{response::Response, error::SpiderError, item::ParseOutput};
16//!
17//! #[spider_macro::scraped_item]
18//! struct Item {
19//!     title: String,
20//! }
21//!
22//! struct MySpider;
23//!
24//! #[async_trait]
25//! impl Spider for MySpider {
26//!     type Item = Item;
27//!     type State = ();
28//!
29//!     fn start_requests(&self) -> Result<spider_core::StartRequests<'_>, SpiderError> {
30//!         Ok(spider_core::StartRequests::Urls(vec!["https://example.com"]))
31//!     }
32//!
33//!     async fn parse(
34//!         &self,
35//!         _response: Response,
36//!         _state: &Self::State,
37//!     ) -> Result<ParseOutput<Self::Item>, SpiderError> {
38//!         Ok(ParseOutput::new())
39//!     }
40//! }
41//!
42//! async fn run() -> Result<(), SpiderError> {
43//!     let crawler = CrawlerBuilder::new(MySpider).build().await?;
44//!     crawler.start_crawl().await
45//! }
46//! ```
47
48pub mod builder;
49#[cfg(feature = "checkpoint")]
50pub mod checkpoint;
51pub mod config;
52pub mod engine;
53pub mod prelude;
54pub mod scheduler;
55pub mod spider;
56pub mod state;
57pub mod stats;
58
59// Re-export SchedulerCheckpoint and Checkpoint (when checkpoint feature is enabled)
60#[cfg(feature = "checkpoint")]
61pub use checkpoint::{Checkpoint, SchedulerCheckpoint};
62
63pub use spider_downloader::{Downloader, HttpClient, ReqwestClientDownloader};
64
65// Re-export CookieStore (when cookie-store feature is enabled)
66#[cfg(feature = "cookie-store")]
67pub use cookie_store::CookieStore;
68
69pub use builder::CrawlerBuilder;
70pub use engine::Crawler;
71pub use scheduler::Scheduler;
72pub use spider_macro::scraped_item;
73
74pub use async_trait::async_trait;
75pub use dashmap::DashMap;
76pub use spider::{Spider, StartRequestIter, StartRequests};
77pub use state::{
78    ConcurrentMap, ConcurrentVec, Counter, Counter64, Flag, StateAccessMetrics, VisitedUrls,
79};
80pub use tokio;