robotparser_fork/parser.rs
1//! # Supported features and directives
2//!
3//! * Removes BOM unicode
4//! * Directive `User-Agent`
5//! * Directive `Allow`
6//! * Directive `Disallow`
7//! * Directive `Crawl-Delay`
8//! * Directive `Request-Rate`
9//! * Directive `Sitemap`
10//! * Directive `Clean-Param`
11//!
12//! # Example
13//! ```rust
14//! use robotparser::parser::parse_robots_txt;
15//! use robotparser::service::RobotsTxtService;
16//! use url::Url;
17//!
18//! let robots_txt_url = Url::parse("https://google.com/robots.txt").unwrap();
19//! let robots_txt = "User-agent: *\nDisallow: /search";
20//! let robots_txt = parse_robots_txt(robots_txt_url.origin(), robots_txt);
21//! assert_eq!(robots_txt.get_warnings().len(), 0);
22//! let robots_txt = robots_txt.get_result();
23//! let good_url = Url::parse("https://google.com/test").unwrap();
24//! let bad_url = Url::parse("https://google.com/search/vvv").unwrap();
25//! assert_eq!(robots_txt.can_fetch("*", &bad_url), false);
26//! assert_eq!(robots_txt.can_fetch("*", &good_url), true);
27//! ```
28mod robots_txt_parser;
29pub use self::robots_txt_parser::parse as parse_robots_txt;
30mod warning_reason;
31pub use self::warning_reason::WarningReason;
32mod warning;
33pub use self::warning::ParseWarning;
34mod parse_result;
35pub use self::parse_result::ParseResult;
36mod fetched_robots_txt_parser;
37pub use self::fetched_robots_txt_parser::parse as parse_fetched_robots_txt;
38mod line;