robotxt/build/
mod.rs

1use std::collections::HashSet;
2use std::fmt;
3
4use url::Url;
5
6pub use crate::build::group::GroupBuilder;
7use crate::build::split::format_comment;
8
9mod group;
10mod split;
11
12/// The set of formatted `user-agent` groups that can be written
13/// in the `robots.txt` compliant format.
14#[derive(Debug, Default, Clone)]
15pub struct RobotsBuilder {
16    groups: Vec<GroupBuilder>,
17    sitemaps: HashSet<Url>,
18    header: Option<String>,
19    footer: Option<String>,
20}
21
22impl RobotsBuilder {
23    /// Creates a new [`RobotsBuilder`] with default settings.
24    pub fn new() -> Self {
25        Self::default()
26    }
27
28    /// Adds a global header, usually used for permissions or legal notices.
29    ///
30    /// ```
31    /// use robotxt::RobotsBuilder;
32    ///
33    /// let txt = RobotsBuilder::default()
34    ///     .header("Note: Stop right there!")
35    ///     .group(["*"], |u| u.disallow("/"))
36    ///     .group(["foobot"], |u| u.allow("/"));
37    /// ```
38    pub fn header(mut self, header: &str) -> Self {
39        self.header = Some(header.to_string());
40        self
41    }
42
43    /// Adds a new `user-agent` group from the provided list of user-agents.
44    ///
45    /// ```
46    /// use robotxt::RobotsBuilder;
47    ///
48    /// let txt = RobotsBuilder::default()
49    ///     .group(["*"], |u| u.disallow("/"))
50    ///     .group(["foobot"], |u| u.allow("/"));
51    /// ```
52    pub fn group<'a>(
53        mut self,
54        group: impl IntoIterator<Item = &'a str>,
55        factory: impl FnOnce(GroupBuilder) -> GroupBuilder,
56    ) -> Self {
57        let section = GroupBuilder::from_iter(group);
58        self.groups.push(factory(section));
59        self
60    }
61
62    /// Adds the `Sitemap` directive from the URL address.
63    ///
64    /// ```
65    /// use url::Url;
66    /// use robotxt::RobotsBuilder;
67    ///
68    /// let txt = RobotsBuilder::default()
69    ///     .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap())
70    ///     .sitemap("https://example.com/sitemap_1.xml".try_into().unwrap());
71    /// ```
72    pub fn sitemap(mut self, sitemap: Url) -> Self {
73        self.sitemaps.insert(sitemap);
74        self
75    }
76
77    /// Adds a global footer, usually used for notices.
78    ///
79    /// ```
80    /// use robotxt::RobotsBuilder;
81    ///
82    /// let txt = RobotsBuilder::default()
83    ///     .group(["*"], |u| u.disallow("/"))
84    ///     .group(["foobot"], |u| u.allow("/"))
85    ///     .footer("Note: Have a nice day!");
86    /// ```
87    pub fn footer(mut self, footer: &str) -> Self {
88        self.footer = Some(footer.to_string());
89        self
90    }
91
92    /// Parses the constructed output.
93    /// See [`Robots::from_bytes`].
94    ///
95    /// [`Robots::from_bytes`]: crate::Robots::from_bytes
96    #[cfg(feature = "parser")]
97    #[cfg_attr(docsrs, doc(cfg(feature = "parser")))]
98    pub fn parse(&self, user_agent: &str) -> crate::Robots {
99        let txt = self.to_string();
100        crate::Robots::from_bytes(txt.as_bytes(), user_agent)
101    }
102}
103
104impl fmt::Display for RobotsBuilder {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        let header = self.header.as_ref().map(|h| format_comment(h));
107        let footer = self.footer.as_ref().map(|f| format_comment(f));
108
109        let groups = self.groups.iter().map(|u| u.to_string());
110        let groups = groups.collect::<Vec<_>>().join("\n\n");
111
112        let result = [header, Some(groups), footer];
113        let result = result.iter().filter_map(|u| u.clone());
114        let result = result.collect::<Vec<_>>().join("\n\n");
115        write!(f, "{}", result.as_str())
116    }
117}
118
119#[cfg(test)]
120mod builder {
121    use crate::{Result, RobotsBuilder};
122
123    #[test]
124    fn readme() -> Result<()> {
125        let txt = RobotsBuilder::default()
126            .header("Robots.txt: Start")
127            .group(["foobot"], |u| {
128                u.crawl_delay(5)
129                    .header("Rules for Foobot: Start")
130                    .allow("/example/yeah.txt")
131                    .disallow("/example/nope.txt")
132                    .footer("Rules for Foobot: End")
133            })
134            .group(["barbot", "nombot"], |u| {
135                u.crawl_delay(2)
136                    .disallow("/example/yeah.txt")
137                    .disallow("/example/nope.txt")
138            })
139            .sitemap("https://example.com/sitemap_1.xml".try_into()?)
140            .sitemap("https://example.com/sitemap_2.xml".try_into()?)
141            .footer("Robots.txt: End");
142
143        println!("{}", txt.to_string());
144        Ok(())
145    }
146}