sws_crawler/lib.rs
1//! Web crawler with plugable scraping logic.
2//!
3//! The main function [`crawl_site`](crawl_site) crawls and scraps web pages. It is
4//! configured through a [`CrawlerConfig`](CrawlerConfig) and a [`Scrapable`](Scrapable)
5//! implementation. The latter defines the [`Seed`](Seed) used for crawling, as well as
6//! the scraping logic. Note that [robots.txt][robots-txt] seeds are supported and
7//! exposed through [texting_robots::Robot][robots] in the
8//! [`CrawlingContext`](CrawlingContext) and [`ScrapingContext`](ScrapingContext).
9//!
10//! [robots-txt]: https://en.wikipedia.org/wiki/Robots.txt
11//! [robots]: https://docs.rs/texting_robots/latest/texting_robots/struct.Robot.html
12
13mod config;
14mod crawler;
15mod limiter;
16mod scrapable;
17
18pub use config::{CrawlerConfig, OnError, Throttle};
19pub use crawler::crawl_site;
20pub use scrapable::{
21 CountedTx, CrawlingContext, PageLocation, Scrapable, ScrapingContext, Seed, Sitemap,
22};
23
24pub use anyhow;
25pub use texting_robots;