halldyll_robots/lib.rs
1//! # halldyll-robots
2//!
3//! RFC 9309 compliant robots.txt parser and checker.
4//!
5//! ## Features
6//!
7//! - **RFC 9309 Compliance**: Full support for the robots.txt standard
8//! - **Unavailable vs Unreachable**: Proper handling per RFC (4xx = allow, 5xx = deny)
9//! - **Safe Mode**: Optional stricter handling of 401/403 as deny
10//! - **Conditional GET**: ETag/Last-Modified support for bandwidth savings
11//! - **Request-rate**: Non-standard but common directive support
12//! - **Caching**: In-memory cache with optional file persistence
13//! - **Pattern Matching**: Wildcards (*), end anchors ($), percent-encoding
14//! - **UTF-8 BOM**: Automatic stripping of BOM prefix
15//! - **Observability**: Detailed logging and statistics with min/max/avg metrics
16//!
17//! ## Example
18//!
19//! ```rust,no_run
20//! use halldyll_robots::{RobotsChecker, RobotsConfig};
21//! use url::Url;
22//!
23//! #[tokio::main]
24//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
25//! let config = RobotsConfig::default();
26//! let checker = RobotsChecker::new(config);
27//!
28//! let url = Url::parse("https://example.com/some/path")?;
29//! let decision = checker.is_allowed(&url).await;
30//!
31//! if decision.allowed {
32//! println!("URL is allowed");
33//! } else {
34//! println!("URL is blocked: {:?}", decision.reason);
35//! }
36//!
37//! Ok(())
38//! }
39//! ```
40
41#![warn(missing_docs)]
42#![warn(clippy::all)]
43
44// ============================================================================
45// Modules
46// ============================================================================
47
48pub mod cache;
49pub mod checker;
50pub mod fetcher;
51pub mod matcher;
52pub mod parser;
53pub mod types;
54
55// ============================================================================
56// Re-exports
57// ============================================================================
58
59pub use cache::{CacheStats, CacheStatsSnapshot, RobotsCache};
60pub use checker::{RobotsChecker, RobotsDiagnostics};
61pub use fetcher::{FetchStats, FetchStatsSnapshot, RobotsFetcher};
62pub use matcher::RobotsMatcher;
63pub use parser::RobotsParser;
64pub use types::{
65 Decision, DecisionReason, EffectiveRules, FetchStatus, Group, RequestRate,
66 RobotsCacheKey, RobotsConfig, RobotsPolicy, Rule, RuleKind,
67};