1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
//! The implementation of the robots.txt protocol (or URL exclusion protocol)
//! with the support of `crawl-delay`, `sitemap`, and universal `*` match
//! extensions (according to the RFC specification).
//!
//! **Also check out other `xwde` projects [here](https://github.com/xwde).**
//!
//! ## Examples
//!
//! - parse the `user-agent` in the provided `robots.txt` file (See [Robots]):
//!
//! ```rust
//! use robotxt::Robots;
//!
//! let txt = r#"
//! User-Agent: foobot
//! Disallow: *
//! Allow: /example/
//! Disallow: /example/nope.txt
//! "#.as_bytes();
//!
//! let r = Robots::from_bytes(txt, "foobot");
//! assert!(r.is_allowed("/example/yeah.txt"));
//! assert!(!r.is_allowed("/example/nope.txt"));
//! assert!(!r.is_allowed("/invalid/path.txt"));
//! ```
//!
//! - build the new `robots.txt` file from provided directives (See [Factory]):
//!
//! ```rust
//! use url::Url;
//! use robotxt::Factory;
//!
//! let txt = Factory::default()
//! .header("Robots.txt Header")
//! .group(["foobot"], |u| {
//! u.crawl_delay(5)
//! .header("Rules for Foobot: Start")
//! .allow("/example/yeah.txt")
//! .disallow("/example/nope.txt")
//! .footer("Rules for Foobot: End")
//! })
//! .group(["barbot", "nombot"], |u| {
//! u.crawl_delay(2)
//! .disallow("/example/yeah.txt")
//! .disallow("/example/nope.txt")
//! })
//! .sitemap("https://example.com/sitemap_1.xml").unwrap()
//! .sitemap("https://example.com/sitemap_2.xml").unwrap()
//! .footer("Robots.txt Footer");
//!
//! println!("{}", txt.to_string());
//! ```
//!
//! ## Links
//!
//! - [Request for Comments: 9309](https://www.rfc-editor.org/rfc/rfc9309.txt) on
//! RFC-Editor.com
//! - [Introduction to Robots.txt](https://developers.google.com/search/docs/crawling-indexing/robots/intro)
//! on Google.com
//! - [How Google interprets Robots.txt](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt)
//! on Google.com
//! - [What is Robots.txt file](https://moz.com/learn/seo/robotstxt) on Moz.com
//!
//! ## Notes
//!
//! - The parser is based on
//! [Smerity/texting_robots](https://github.com/Smerity/texting_robots)
//! - The `Host` directive is not supported
//!
pub use *;
pub use *;