robotstxt_rs/
lib.rs

1//! # robotstxt-rs
2//!
3//! An intuitive Rust library for acquiring, parsing and querying robots.txt files.
4//!
5//! ## Features
6//!
7//! - Parse robots.txt from strings or URLs
8//! - Check if paths are allowed for specific user-agents
9//! - Extract sitemaps and comments
10//! - Support for wildcards and path patterns
11//! - Async URL fetching with reqwest
12//!
13//! ## Examples
14//!
15//! ### Basic parsing and querying
16//!
17//! ```rust
18//! use robotstxt_rs::RobotsTxt;
19//!
20//! # fn main() {
21//! let content = r#"
22//! User-agent: *
23//! Disallow: /admin/
24//! Allow: /public/
25//!
26//! User-agent: Googlebot
27//! Disallow: /private/
28//!
29//! Sitemap: https://example.com/sitemap.xml
30//! # This is a comment
31//! "#;
32//!
33//! let robots = RobotsTxt::parse(content);
34//!
35//! // Check if paths are allowed
36//! assert!(robots.can_fetch("Mozilla", "/public/page.html"));
37//! assert!(!robots.can_fetch("Mozilla", "/admin/panel"));
38//! assert!(!robots.can_fetch("Googlebot", "/private/data"));
39//!
40//! // Access sitemaps
41//! for sitemap in robots.get_sitemaps() {
42//!     println!("Sitemap: {}", sitemap);
43//! }
44//!
45//! // Access comments
46//! for comment in robots.get_comments() {
47//!     println!("Comment: {}", comment);
48//! }
49//!
50//! // Get all rules
51//! for (user_agent, rule) in robots.get_rules() {
52//!     println!("User-agent: {}", user_agent);
53//!     println!("  Allowed: {:?}", rule.allowed);
54//!     println!("  Disallowed: {:?}", rule.disallowed);
55//! }
56//!
57//! // Get specific rule
58//! if let Some(rule) = robots.get_rule("Googlebot") {
59//!     println!("Googlebot disallowed: {:?}", rule.disallowed);
60//! }
61//! # }
62//! ```
63//!
64//! ### Fetch from URL (async)
65//!
66//! ```no_run
67//! use robotstxt_rs::RobotsTxt;
68//!
69//! #[tokio::main]
70//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
71//!     let robots = RobotsTxt::from_url("https://example.com/robots.txt").await?;
72//!
73//!     if let Some(domain) = robots.get_domain() {
74//!         println!("Domain: {}", domain);
75//!     }
76//!
77//!     if robots.can_fetch("MyBot", "/") {
78//!         println!("Allowed to crawl!");
79//!     }
80//!
81//!     Ok(())
82//! }
83//! ```
84//!
85//! ### Parse with custom domain
86//!
87//! ```rust
88//! use robotstxt_rs::RobotsTxt;
89//!
90//! # fn main() {
91//! let content = "User-agent: *\nDisallow: /admin/";
92//! let robots = RobotsTxt::parse_with_domain(content, Some("example.com".to_string()));
93//!
94//! if let Some(domain) = robots.get_domain() {
95//!     println!("Domain: {}", domain);
96//! }
97//! # }
98//! ```
99
100mod parser;
101
102pub use parser::{RobotsTxt, RobotRule};