spider_lib/lib.rs
1//! # spider-lib
2//!
3//! A Rust-based web scraping framework inspired by Scrapy.
4//!
5//! `spider-lib` is an asynchronous, concurrent web scraping library for Rust.
6//! It's designed to be a lightweight yet powerful tool for building and running
7//! scrapers for projects of any size. If you're familiar with Scrapy's architecture
8//! of Spiders, Middlewares, and Pipelines, you'll feel right at home.
9//!
10//! This is the main entry point for the spider framework, integrating the core engine
11//! (spider-core), macro utilities (spider-macro), middleware components (spider-middlewares),
12//! and pipeline components (spider-pipelines) into a unified, easy-to-use library.
13//!
14//! ## Getting Started
15//!
16//! To use `spider-lib`, add it to your project's `Cargo.toml`:
17//!
18//! ```toml
19//! [dependencies]
20//! spider-lib = "0.5"
21//! ```
22//!
23//! ## Quick Example
24//!
25//! Here's a minimal example of a spider that scrapes quotes from `quotes.toscrape.com`:
26//!
27//! ```rust,ignore
28//! use spider_lib::prelude::*;
29//! use spider_core::utils::ToSelector;
30//!
31//! #[spider_lib::scraped_item]
32//! #[derive(Default)]
33//! struct Quote {
34//! text: String,
35//! author: String,
36//! }
37//!
38//! struct QuotesSpider;
39//!
40//! #[async_trait]
41//! impl Spider for QuotesSpider {
42//! type Item = Quote;
43//!
44//! fn start_urls(&self) -> Vec<&'static str> {
45//! vec!["http://quotes.toscrape.com/"]
46//! }
47//!
48//! async fn parse(&mut self, response: Response) -> Result<ParseOutput<Self::Item>, SpiderError> {
49//! let html = response.to_html()?;
50//! let mut output = ParseOutput::new();
51//!
52//! for quote in html.select(&".quote".to_selector()?) {
53//! let text = quote.select(&".text".to_selector()?).next().map(|e| e.text().collect::<String>()).unwrap_or_default();
54//! let author = quote.select(&".author".to_selector()?).next().map(|e| e.text().collect::<String>()).unwrap_or_default();
55//! output.add_item(Quote { text, author });
56//! }
57//!
58//! if let Some(next_href) = html.select(&".next > a[href]".to_selector()?).next().and_then(|a| a.attr("href")) {
59//! let next_url = response.url.join(next_href)?;
60//! output.add_request(Request::new(next_url));
61//! }
62//!
63//! Ok(output)
64//! }
65//! }
66//!
67//! #[tokio::main]
68//! async fn main() -> Result<(), SpiderError> {
69//! let crawler = CrawlerBuilder::new(QuotesSpider)
70//! .build()
71//! .await?;
72//!
73//! crawler.start_crawl().await?;
74//! Ok(())
75//! }
76//! ```
77
78pub mod prelude;
79pub use prelude::*;