robotxt 0.4.1 - Docs.rs

#![forbid(unsafe_code)]

//! The implementation of the robots.txt protocol (or URL exclusion protocol)
//! with the support of `crawl-delay`, `sitemap`, and universal `*` match
//! extensions (according to the RFC specification).
//!
//! **Also check out other `xwde` projects [here](https://github.com/xwde).**
//!
//! ## Features
//!
//! - `builder` to enable `robotxt::{RobotsBuilder, GroupBuilder}`. **Enabled by default**.
//! - `parser` to enable `robotxt::{Robots}`. **Enabled by default**.
//!
//! ## Examples
//!
//! - parse the most specific `user-agent` in the provided `robots.txt` file (See [Robots]):
//!
//! ```rust
//! use robotxt::Robots;
//!
//! let txt = r#"
//!     User-Agent: foobot
//!     Disallow: *
//!     Allow: /example/
//!     Disallow: /example/nope.txt
//! "#.as_bytes();
//!
//! let r = Robots::from_bytes(txt, "foobot");
//! assert!(r.is_allowed("/example/yeah.txt"));
//! assert!(!r.is_allowed("/example/nope.txt"));
//! assert!(!r.is_allowed("/invalid/path.txt"));
//! ```
//!
//! - build the new `robots.txt` file in a declarative manner (See [RobotsBuilder]):
//!
//! ```rust
//! use robotxt::RobotsBuilder;
//!
//! let txt = RobotsBuilder::default()
//!     .header("Robots.txt: Start")
//!     .group(["foobot"], |u| {
//!         u.crawl_delay(5)
//!             .header("Rules for Foobot: Start")
//!             .allow("/example/yeah.txt")
//!             .disallow("/example/nope.txt")
//!             .footer("Rules for Foobot: End")
//!     })
//!     .group(["barbot", "nombot"], |u| {
//!         u.crawl_delay(2)
//!             .disallow("/example/yeah.txt")
//!             .disallow("/example/nope.txt")
//!     })
//!     .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap())
//!     .sitemap("https://example.com/sitemap_2.xml".try_into().unwrap())
//!     .footer("Robots.txt: End");
//!
//! println!("{}", txt.to_string());
//! ```
//!
//! ## Links
//!
//! - [Request for Comments: 9309](https://www.rfc-editor.org/rfc/rfc9309.txt) on
//!   RFC-Editor.com
//! - [Introduction to Robots.txt](https://developers.google.com/search/docs/crawling-indexing/robots/intro)
//!   on Google.com
//! - [How Google interprets Robots.txt](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt)
//!   on Google.com
//! - [What is Robots.txt file](https://moz.com/learn/seo/robotstxt) on Moz.com
//!
//! ## Notes
//!
//! - The parser is based on
//! [Smerity/texting_robots](https://github.com/Smerity/texting_robots).
//! - The `Host` directive is not supported.
//!

#[cfg(any(feature = "builder", feature = "parser"))]
mod paths;
#[cfg(any(feature = "builder", feature = "parser"))]
pub use paths::*;

#[cfg(feature = "builder")]
mod build;
#[cfg(feature = "builder")]
pub use build::*;

#[cfg(feature = "parser")]
mod parse;
#[cfg(feature = "parser")]
pub use parse::*;

// Re-exports
pub use url;