Skip to main content

spider_core/
lib.rs

1//! # spider-core
2//!
3//! Core engine of the `spider-lib` web scraping framework.
4//!
5//! Provides the main components: `Crawler`, `Scheduler`, `Spider` trait, and infrastructure.
6//!
7//! ## Example
8//!
9//! ```rust,ignore
10//! use spider_core::{Crawler, CrawlerBuilder, Spider};
11//! use spider_util::{response::Response, error::SpiderError, item::ParseOutput};
12//!
13//! #[spider_macro::scraped_item]
14//! struct MyItem {
15//!     title: String,
16//!     url: String,
17//! }
18//!
19//! struct MySpider;
20//!
21//! #[async_trait::async_trait]
22//! impl Spider for MySpider {
23//!     type Item = MyItem;
24//!     fn start_urls(&self) -> Vec<&'static str> { vec!["https://example.com"] }
25//!     async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
26//!         todo!()
27//!     }
28//! }
29//!
30//! async fn run_crawler() -> Result<(), SpiderError> {
31//!     let crawler = CrawlerBuilder::new(MySpider).build().await?;
32//!     crawler.start_crawl().await
33//! }
34//! ```
35
36pub mod builder;
37#[cfg(feature = "checkpoint")]
38pub mod checkpoint;
39pub mod crawler;
40pub mod prelude;
41pub mod scheduler;
42pub mod spider;
43pub mod state;
44pub mod stats;
45
46// Re-export SchedulerCheckpoint and Checkpoint (when checkpoint feature is enabled)
47#[cfg(feature = "checkpoint")]
48pub use checkpoint::{Checkpoint, SchedulerCheckpoint};
49
50pub use spider_downloader::{Downloader, ReqwestClientDownloader, SimpleHttpClient};
51
52// Re-export CookieStore (when cookie-store feature is enabled)
53#[cfg(feature = "cookie-store")]
54pub use cookie_store::CookieStore;
55
56pub use builder::CrawlerBuilder;
57pub use crawler::Crawler;
58pub use scheduler::Scheduler;
59pub use spider_macro::scraped_item;
60
61pub use async_trait::async_trait;
62pub use dashmap::DashMap;
63pub use spider::Spider;
64pub use tokio;