Skip to main content

typstify_generator/
sitemap.rs

1//! Sitemap generation.
2//!
3//! Generates XML sitemaps for search engine optimization.
4
5use std::io::Write;
6
7use chrono::{DateTime, Utc};
8use thiserror::Error;
9use tracing::debug;
10use typstify_core::{Config, Page};
11
12/// Sitemap generation errors.
13#[derive(Debug, Error)]
14pub enum SitemapError {
15    /// IO error.
16    #[error("IO error: {0}")]
17    Io(#[from] std::io::Error),
18
19    /// XML encoding error.
20    #[error("XML encoding error: {0}")]
21    Xml(String),
22}
23
24/// Result type for sitemap operations.
25pub type Result<T> = std::result::Result<T, SitemapError>;
26
27/// Change frequency for sitemap entries.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum ChangeFreq {
30    Always,
31    Hourly,
32    Daily,
33    Weekly,
34    Monthly,
35    Yearly,
36    Never,
37}
38
39impl ChangeFreq {
40    fn as_str(&self) -> &'static str {
41        match self {
42            Self::Always => "always",
43            Self::Hourly => "hourly",
44            Self::Daily => "daily",
45            Self::Weekly => "weekly",
46            Self::Monthly => "monthly",
47            Self::Yearly => "yearly",
48            Self::Never => "never",
49        }
50    }
51}
52
53/// A sitemap URL entry.
54#[derive(Debug, Clone)]
55pub struct SitemapUrl {
56    /// URL location.
57    pub loc: String,
58
59    /// Last modification date.
60    pub lastmod: Option<DateTime<Utc>>,
61
62    /// Change frequency.
63    pub changefreq: Option<ChangeFreq>,
64
65    /// Priority (0.0 to 1.0).
66    pub priority: Option<f32>,
67
68    /// Alternate language versions.
69    pub alternates: Vec<AlternateLink>,
70}
71
72/// Alternate language link for a URL.
73#[derive(Debug, Clone)]
74pub struct AlternateLink {
75    /// Language code (e.g., "en", "zh").
76    pub hreflang: String,
77
78    /// URL for this language version.
79    pub href: String,
80}
81
82/// Sitemap generator.
83#[derive(Debug)]
84pub struct SitemapGenerator {
85    config: Config,
86}
87
88impl SitemapGenerator {
89    /// Create a new sitemap generator.
90    #[must_use]
91    pub fn new(config: Config) -> Self {
92        Self { config }
93    }
94
95    /// Generate sitemap XML from pages.
96    pub fn generate(&self, pages: &[&Page]) -> Result<String> {
97        debug!(count = pages.len(), "generating sitemap");
98
99        let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
100        xml.push('\n');
101        xml.push_str(r#"<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9""#);
102
103        // Add xhtml namespace if we have multiple languages
104        if self.config.site.languages.len() > 1 {
105            xml.push_str(r#" xmlns:xhtml="http://www.w3.org/1999/xhtml""#);
106        }
107        xml.push_str(">\n");
108
109        for page in pages {
110            let url = self.page_to_url(page);
111            xml.push_str(&self.url_to_xml(&url));
112        }
113
114        xml.push_str("</urlset>\n");
115
116        Ok(xml)
117    }
118
119    /// Convert a page to a sitemap URL entry.
120    fn page_to_url(&self, page: &Page) -> SitemapUrl {
121        let loc = format!("{}{}", self.config.site.base_url, page.url);
122
123        // Determine lastmod from page date or updated date
124        let lastmod = page.updated.or(page.date);
125
126        // Determine change frequency and priority based on content type
127        let (changefreq, priority) = if page.url == "/" || page.url.is_empty() {
128            // Home page
129            (Some(ChangeFreq::Daily), Some(1.0))
130        } else if page.date.is_some() {
131            // Blog posts
132            (Some(ChangeFreq::Monthly), Some(0.8))
133        } else {
134            // Static pages
135            (Some(ChangeFreq::Yearly), Some(0.5))
136        };
137
138        // Build alternate links for multi-language sites
139        let slug = page.url.trim_start_matches('/');
140        let alternates = if self.config.site.languages.len() > 1 {
141            self.config
142                .site
143                .languages
144                .iter()
145                .map(|lang| {
146                    let href = if lang == &self.config.site.default_language {
147                        format!("{}/{}", self.config.site.base_url, slug)
148                    } else {
149                        format!("{}/{}/{}", self.config.site.base_url, lang, slug)
150                    };
151                    AlternateLink {
152                        hreflang: lang.clone(),
153                        href,
154                    }
155                })
156                .collect()
157        } else {
158            Vec::new()
159        };
160
161        SitemapUrl {
162            loc,
163            lastmod,
164            changefreq,
165            priority,
166            alternates,
167        }
168    }
169
170    /// Convert a URL entry to XML.
171    fn url_to_xml(&self, url: &SitemapUrl) -> String {
172        let mut xml = String::from("  <url>\n");
173
174        xml.push_str(&format!("    <loc>{}</loc>\n", escape_xml(&url.loc)));
175
176        if let Some(lastmod) = &url.lastmod {
177            xml.push_str(&format!(
178                "    <lastmod>{}</lastmod>\n",
179                lastmod.format("%Y-%m-%d")
180            ));
181        }
182
183        if let Some(changefreq) = &url.changefreq {
184            xml.push_str(&format!(
185                "    <changefreq>{}</changefreq>\n",
186                changefreq.as_str()
187            ));
188        }
189
190        if let Some(priority) = &url.priority {
191            xml.push_str(&format!("    <priority>{priority:.1}</priority>\n"));
192        }
193
194        // Add alternate language links
195        for alt in &url.alternates {
196            xml.push_str(&format!(
197                r#"    <xhtml:link rel="alternate" hreflang="{}" href="{}" />"#,
198                alt.hreflang,
199                escape_xml(&alt.href)
200            ));
201            xml.push('\n');
202        }
203
204        xml.push_str("  </url>\n");
205        xml
206    }
207
208    /// Write sitemap to a writer.
209    pub fn write_to<W: Write>(&self, pages: &[&Page], writer: &mut W) -> Result<()> {
210        let xml = self.generate(pages)?;
211        writer.write_all(xml.as_bytes())?;
212        Ok(())
213    }
214
215    /// Generate sitemap index for multiple sitemaps.
216    pub fn generate_index(&self, sitemaps: &[&str]) -> String {
217        let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
218        xml.push('\n');
219        xml.push_str(r#"<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">"#);
220        xml.push('\n');
221
222        let now = Utc::now().format("%Y-%m-%d").to_string();
223
224        for sitemap in sitemaps {
225            xml.push_str("  <sitemap>\n");
226            xml.push_str(&format!(
227                "    <loc>{}/{}</loc>\n",
228                self.config.site.base_url, sitemap
229            ));
230            xml.push_str(&format!("    <lastmod>{now}</lastmod>\n"));
231            xml.push_str("  </sitemap>\n");
232        }
233
234        xml.push_str("</sitemapindex>\n");
235        xml
236    }
237}
238
239/// Escape special XML characters.
240fn escape_xml(s: &str) -> String {
241    s.replace('&', "&amp;")
242        .replace('<', "&lt;")
243        .replace('>', "&gt;")
244        .replace('"', "&quot;")
245        .replace('\'', "&apos;")
246}
247
248#[cfg(test)]
249mod tests {
250    use std::path::PathBuf;
251
252    use super::*;
253
254    fn test_config() -> Config {
255        Config {
256            site: typstify_core::config::SiteConfig {
257                title: "Test Site".to_string(),
258                base_url: "https://example.com".to_string(),
259                default_language: "en".to_string(),
260                languages: vec!["en".to_string()],
261                description: None,
262                author: None,
263            },
264            build: typstify_core::config::BuildConfig::default(),
265            search: typstify_core::config::SearchConfig::default(),
266            rss: typstify_core::config::RssConfig::default(),
267            taxonomies: typstify_core::config::TaxonomyConfig::default(),
268        }
269    }
270
271    fn test_page(slug: &str, date: Option<DateTime<Utc>>) -> Page {
272        Page {
273            url: format!("/{}", slug),
274            title: slug.to_string(),
275            description: None,
276            date,
277            updated: None,
278            draft: false,
279            lang: None,
280            tags: vec![],
281            categories: vec![],
282            content: String::new(),
283            summary: None,
284            reading_time: None,
285            word_count: None,
286            toc: vec![],
287            custom_js: vec![],
288            custom_css: vec![],
289            aliases: vec![],
290            template: None,
291            weight: 0,
292            source_path: Some(PathBuf::from("test.md")),
293        }
294    }
295
296    #[test]
297    fn test_generate_sitemap() {
298        let generator = SitemapGenerator::new(test_config());
299        let page1 = test_page("about", None);
300        let page2 = test_page("blog/post-1", Some(Utc::now()));
301        let pages: Vec<&Page> = vec![&page1, &page2];
302
303        let xml = generator.generate(&pages).unwrap();
304
305        assert!(xml.contains(r#"<?xml version="1.0""#));
306        assert!(xml.contains("<urlset"));
307        assert!(xml.contains("<loc>https://example.com/about</loc>"));
308        assert!(xml.contains("<loc>https://example.com/blog/post-1</loc>"));
309        assert!(xml.contains("<changefreq>"));
310        assert!(xml.contains("<priority>"));
311    }
312
313    #[test]
314    fn test_escape_xml() {
315        assert_eq!(escape_xml("a & b"), "a &amp; b");
316        assert_eq!(escape_xml("<tag>"), "&lt;tag&gt;");
317        assert_eq!(escape_xml("\"quoted\""), "&quot;quoted&quot;");
318    }
319
320    #[test]
321    fn test_home_page_priority() {
322        let generator = SitemapGenerator::new(test_config());
323        let mut home = test_page("", None);
324        home.url = "/".to_string();
325
326        let url = generator.page_to_url(&home);
327
328        assert_eq!(url.priority, Some(1.0));
329        assert_eq!(url.changefreq, Some(ChangeFreq::Daily));
330    }
331
332    #[test]
333    fn test_generate_index() {
334        let generator = SitemapGenerator::new(test_config());
335        let sitemaps = vec!["sitemap-posts.xml", "sitemap-pages.xml"];
336
337        let xml = generator.generate_index(&sitemaps);
338
339        assert!(xml.contains("<sitemapindex"));
340        assert!(xml.contains("sitemap-posts.xml"));
341        assert!(xml.contains("sitemap-pages.xml"));
342    }
343
344    #[test]
345    fn test_multilang_sitemap() {
346        let mut config = test_config();
347        config.site.languages = vec!["en".to_string(), "zh".to_string()];
348        let generator = SitemapGenerator::new(config);
349
350        let page = test_page("about", None);
351        let pages: Vec<&Page> = vec![&page];
352
353        let xml = generator.generate(&pages).unwrap();
354
355        assert!(xml.contains("xmlns:xhtml"));
356        assert!(xml.contains(r#"hreflang="en""#));
357        assert!(xml.contains(r#"hreflang="zh""#));
358    }
359}