spider_core/lib.rs
1//! # spider-core
2//!
3//! Core engine of the `spider-lib` web scraping framework.
4//!
5//! Provides the main components: `Crawler`, `Scheduler`, `Spider` trait, and infrastructure.
6//!
7//! ## Example
8//!
9//! ```rust,ignore
10//! use spider_core::{Crawler, CrawlerBuilder, Spider};
11//! use spider_util::{response::Response, error::SpiderError, item::ParseOutput};
12//!
13//! #[spider_macro::scraped_item]
14//! struct MyItem {
15//! title: String,
16//! url: String,
17//! }
18//!
19//! struct MySpider;
20//!
21//! #[async_trait::async_trait]
22//! impl Spider for MySpider {
23//! type Item = MyItem;
24//! fn start_urls(&self) -> Vec<&'static str> { vec!["https://example.com"] }
25//! async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
26//! todo!()
27//! }
28//! }
29//!
30//! async fn run_crawler() -> Result<(), SpiderError> {
31//! let crawler = CrawlerBuilder::new(MySpider).build().await?;
32//! crawler.start_crawl().await
33//! }
34//! ```
35
36pub mod builder;
37#[cfg(feature = "checkpoint")]
38pub mod checkpoint;
39pub mod concurrency;
40pub mod engine;
41pub mod prelude;
42pub mod scheduler;
43pub mod spider;
44pub mod state;
45pub mod stats;
46
47// Re-export SchedulerCheckpoint and Checkpoint (when checkpoint feature is enabled)
48#[cfg(feature = "checkpoint")]
49pub use checkpoint::{Checkpoint, SchedulerCheckpoint};
50
51pub use spider_downloader::{Downloader, HttpClient, ReqwestClientDownloader};
52
53// Re-export CookieStore (when cookie-store feature is enabled)
54#[cfg(feature = "cookie-store")]
55pub use cookie_store::CookieStore;
56
57pub use builder::CrawlerBuilder;
58pub use engine::Crawler;
59pub use scheduler::Scheduler;
60pub use spider_macro::scraped_item;
61
62pub use async_trait::async_trait;
63pub use dashmap::DashMap;
64pub use spider::Spider;
65pub use tokio;