blog_tools/sitemap/
sitemap.rs

1use std::{io::Cursor, path::Path};
2
3use xml::{reader::XmlEvent as ReaderXmlEvent, writer::XmlEvent, EmitterConfig, EventReader};
4
5use crate::{
6    common::{parse_blogs, BlogError},
7    low::LowBlogEntry,
8    types::Blog,
9};
10
11use super::types::SitemapOptions;
12
13const DATE_FORMAT: &'static str = "%d-%m-%Y";
14
15/// Use this function in `low` mode to generate a sitemap
16///
17/// Parameters
18///
19/// - `blog_root`: Path to the root of the blog e.g. `files/blog`
20/// - `url_base`: URL of the website e.g. `www.example.com`
21/// - `options`: `SitemapOptions` for configuration
22pub fn create_sitemap<T: AsRef<Path>>(
23    blog_root: T,
24    url_base: &String,
25    options: &SitemapOptions,
26) -> Result<String, BlogError> {
27    let (entries, tags): (Vec<LowBlogEntry>, Vec<String>) = parse_blogs(blog_root, None, None)?;
28
29    return create_sitemap_inner(&entries, Some(&tags), url_base, options);
30}
31
32pub fn create_sitemap_inner<T: Blog>(
33    entries: &Vec<T>,
34    maybe_tags: Option<&Vec<String>>,
35    url_base: &String,
36    options: &SitemapOptions,
37) -> Result<String, BlogError> {
38    let mut buffer = Cursor::new(Vec::new());
39    let mut writer = EmitterConfig::new()
40        .perform_indent(true)
41        .create_writer(&mut buffer);
42
43    writer
44        .write(
45            XmlEvent::start_element("urlset")
46                .attr("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9"),
47        )
48        .unwrap();
49
50    let default_priority = options.default_priority;
51
52    // Blog pages
53    for blog in entries {
54        writer.write(XmlEvent::start_element("url")).unwrap();
55
56        // Location
57        writer.write(XmlEvent::start_element("loc")).unwrap();
58
59        let loc = format!(
60            "{}/{}/{}",
61            url_base,
62            options.blog_root_slug,
63            blog.get_full_slug()
64        );
65
66        writer.write(XmlEvent::characters(&loc)).unwrap();
67        writer.write(XmlEvent::end_element()).unwrap();
68
69        // Last Modified
70        writer.write(XmlEvent::start_element("lastmod")).unwrap();
71
72        let lastmod = match blog.get_last_modified() {
73            Some(x) => x.format(DATE_FORMAT).to_string(),
74            None => blog.get_date_listed().format(DATE_FORMAT).to_string(),
75        };
76
77        writer.write(XmlEvent::characters(&lastmod)).unwrap();
78        writer.write(XmlEvent::end_element()).unwrap();
79
80        // Priority
81
82        writer.write(XmlEvent::start_element("priority")).unwrap();
83
84        let priority = match blog.get_priority() {
85            None => default_priority,
86            Some(x) => {
87                if x > 1.0 || x < 0.0 {
88                    panic!("Priority must be between 0.0 and 1.0, got `{}`", x);
89                }
90
91                x
92            }
93        };
94        writer
95            .write(XmlEvent::characters(&format!("{}", priority)))
96            .unwrap();
97        writer.write(XmlEvent::end_element()).unwrap();
98
99        writer.write(XmlEvent::end_element()).unwrap(); // Finish <url>
100    }
101
102    // Tag pages
103    if options.include_tags && maybe_tags.is_some() {
104        let tags = maybe_tags.unwrap();
105        let current_time = chrono::offset::Utc::now();
106        let lastmod = current_time.date_naive().format(&DATE_FORMAT).to_string();
107
108        for tag in tags {
109            writer.write(XmlEvent::start_element("url")).unwrap();
110
111            // Location
112            writer.write(XmlEvent::start_element("loc")).unwrap();
113
114            let loc = format!("{}/{}/{}", url_base, options.tag_root_slug, tag);
115
116            writer.write(XmlEvent::characters(&loc)).unwrap();
117            writer.write(XmlEvent::end_element()).unwrap();
118
119            // Last Modified
120            writer.write(XmlEvent::start_element("lastmod")).unwrap();
121
122            writer.write(XmlEvent::characters(&lastmod)).unwrap();
123            writer.write(XmlEvent::end_element()).unwrap();
124
125            // Priority
126
127            writer.write(XmlEvent::start_element("priority")).unwrap();
128
129            writer
130                .write(XmlEvent::characters(&format!("{}", default_priority)))
131                .unwrap();
132            writer.write(XmlEvent::end_element()).unwrap();
133
134            writer.write(XmlEvent::end_element()).unwrap(); // Finish <url>
135        }
136    }
137
138    if let Some(sitemap_base) = &options.sitemap_base {
139        let parser = EventReader::from_str(&sitemap_base);
140
141        for e in parser {
142            match e {
143                Ok(ReaderXmlEvent::StartElement { name, .. }) => {
144                    let this_name = name.to_string();
145                    if this_name == "urlset" {
146                        continue;
147                    }
148                    writer
149                        .write(XmlEvent::start_element(this_name.as_str()))
150                        .unwrap();
151                }
152                Ok(ReaderXmlEvent::Characters(x)) => {
153                    writer.write(XmlEvent::characters(&x)).unwrap();
154                }
155                Ok(ReaderXmlEvent::EndElement { name }) => {
156                    if name.to_string() == "urlset" {
157                        continue;
158                    }
159                    writer.write(XmlEvent::end_element()).unwrap();
160                }
161                Err(e) => {
162                    eprintln!("Error: {e}");
163                    break;
164                }
165                // There's more: https://docs.rs/xml-rs/latest/xml/reader/enum.XmlEvent.html
166                _ => {}
167            }
168        }
169    }
170
171    writer.write(XmlEvent::end_element()).unwrap(); // End <urlset>
172
173    return Ok(String::from_utf8(buffer.into_inner()).unwrap());
174}