typstify_generator/
sitemap.rs

1//! Sitemap generation.
2//!
3//! Generates XML sitemaps for search engine optimization.
4
5use std::io::Write;
6
7use chrono::{DateTime, Utc};
8use thiserror::Error;
9use tracing::debug;
10use typstify_core::{Config, Page};
11
12/// Sitemap generation errors.
13#[derive(Debug, Error)]
14pub enum SitemapError {
15    /// IO error.
16    #[error("IO error: {0}")]
17    Io(#[from] std::io::Error),
18
19    /// XML encoding error.
20    #[error("XML encoding error: {0}")]
21    Xml(String),
22}
23
24/// Result type for sitemap operations.
25pub type Result<T> = std::result::Result<T, SitemapError>;
26
27/// Change frequency for sitemap entries.
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum ChangeFreq {
30    Always,
31    Hourly,
32    Daily,
33    Weekly,
34    Monthly,
35    Yearly,
36    Never,
37}
38
39impl ChangeFreq {
40    fn as_str(&self) -> &'static str {
41        match self {
42            Self::Always => "always",
43            Self::Hourly => "hourly",
44            Self::Daily => "daily",
45            Self::Weekly => "weekly",
46            Self::Monthly => "monthly",
47            Self::Yearly => "yearly",
48            Self::Never => "never",
49        }
50    }
51}
52
53/// A sitemap URL entry.
54#[derive(Debug, Clone)]
55pub struct SitemapUrl {
56    /// URL location.
57    pub loc: String,
58
59    /// Last modification date.
60    pub lastmod: Option<DateTime<Utc>>,
61
62    /// Change frequency.
63    pub changefreq: Option<ChangeFreq>,
64
65    /// Priority (0.0 to 1.0).
66    pub priority: Option<f32>,
67
68    /// Alternate language versions.
69    pub alternates: Vec<AlternateLink>,
70}
71
72/// Alternate language link for a URL.
73#[derive(Debug, Clone)]
74pub struct AlternateLink {
75    /// Language code (e.g., "en", "zh").
76    pub hreflang: String,
77
78    /// URL for this language version.
79    pub href: String,
80}
81
82/// Sitemap generator.
83#[derive(Debug)]
84pub struct SitemapGenerator {
85    config: Config,
86}
87
88impl SitemapGenerator {
89    /// Create a new sitemap generator.
90    #[must_use]
91    pub fn new(config: Config) -> Self {
92        Self { config }
93    }
94
95    /// Generate sitemap XML from pages.
96    pub fn generate(&self, pages: &[&Page]) -> Result<String> {
97        debug!(count = pages.len(), "generating sitemap");
98
99        let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
100        xml.push('\n');
101        // Add XSLT stylesheet reference for browser rendering
102        xml.push_str(r#"<?xml-stylesheet type="text/xsl" href="/sitemap-style.xsl"?>"#);
103        xml.push('\n');
104        xml.push_str(r#"<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9""#);
105
106        // Add xhtml namespace if we have multiple languages
107        let all_languages = self.config.all_languages();
108        if all_languages.len() > 1 {
109            xml.push_str(r#" xmlns:xhtml="http://www.w3.org/1999/xhtml""#);
110        }
111        xml.push_str(">\n");
112
113        for page in pages {
114            let url = self.page_to_url(page);
115            xml.push_str(&self.url_to_xml(&url));
116        }
117
118        xml.push_str("</urlset>\n");
119
120        Ok(xml)
121    }
122
123    /// Convert a page to a sitemap URL entry.
124    fn page_to_url(&self, page: &Page) -> SitemapUrl {
125        let loc = format!("{}{}", self.config.site.base_url, page.url);
126
127        // Determine lastmod from page date or updated date
128        let lastmod = page.updated.or(page.date);
129
130        // Determine change frequency and priority based on content type
131        let (changefreq, priority) = if page.url == "/" || page.url.is_empty() {
132            // Home page
133            (Some(ChangeFreq::Daily), Some(1.0))
134        } else if page.date.is_some() {
135            // Blog posts
136            (Some(ChangeFreq::Monthly), Some(0.8))
137        } else {
138            // Static pages
139            (Some(ChangeFreq::Yearly), Some(0.5))
140        };
141
142        // Build alternate links for multi-language sites
143        let slug = page.url.trim_start_matches('/');
144        let all_languages = self.config.all_languages();
145        let alternates = if all_languages.len() > 1 {
146            all_languages
147                .iter()
148                .map(|lang| {
149                    let href = if *lang == self.config.site.default_language {
150                        format!("{}/{}", self.config.site.base_url, slug)
151                    } else {
152                        format!("{}/{}/{}", self.config.site.base_url, lang, slug)
153                    };
154                    AlternateLink {
155                        hreflang: lang.to_string(),
156                        href,
157                    }
158                })
159                .collect()
160        } else {
161            Vec::new()
162        };
163
164        SitemapUrl {
165            loc,
166            lastmod,
167            changefreq,
168            priority,
169            alternates,
170        }
171    }
172
173    /// Convert a URL entry to XML.
174    fn url_to_xml(&self, url: &SitemapUrl) -> String {
175        let mut xml = String::from("  <url>\n");
176
177        xml.push_str(&format!("    <loc>{}</loc>\n", escape_xml(&url.loc)));
178
179        if let Some(lastmod) = &url.lastmod {
180            xml.push_str(&format!(
181                "    <lastmod>{}</lastmod>\n",
182                lastmod.format("%Y-%m-%d")
183            ));
184        }
185
186        if let Some(changefreq) = &url.changefreq {
187            xml.push_str(&format!(
188                "    <changefreq>{}</changefreq>\n",
189                changefreq.as_str()
190            ));
191        }
192
193        if let Some(priority) = &url.priority {
194            xml.push_str(&format!("    <priority>{priority:.1}</priority>\n"));
195        }
196
197        // Add alternate language links
198        for alt in &url.alternates {
199            xml.push_str(&format!(
200                r#"    <xhtml:link rel="alternate" hreflang="{}" href="{}" />"#,
201                alt.hreflang,
202                escape_xml(&alt.href)
203            ));
204            xml.push('\n');
205        }
206
207        xml.push_str("  </url>\n");
208        xml
209    }
210
211    /// Write sitemap to a writer.
212    pub fn write_to<W: Write>(&self, pages: &[&Page], writer: &mut W) -> Result<()> {
213        let xml = self.generate(pages)?;
214        writer.write_all(xml.as_bytes())?;
215        Ok(())
216    }
217
218    /// Generate sitemap index for multiple sitemaps.
219    pub fn generate_index(&self, sitemaps: &[&str]) -> String {
220        let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
221        xml.push('\n');
222        xml.push_str(r#"<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">"#);
223        xml.push('\n');
224
225        let now = Utc::now().format("%Y-%m-%d").to_string();
226
227        for sitemap in sitemaps {
228            xml.push_str("  <sitemap>\n");
229            xml.push_str(&format!(
230                "    <loc>{}/{}</loc>\n",
231                self.config.site.base_url, sitemap
232            ));
233            xml.push_str(&format!("    <lastmod>{now}</lastmod>\n"));
234            xml.push_str("  </sitemap>\n");
235        }
236
237        xml.push_str("</sitemapindex>\n");
238        xml
239    }
240}
241
242/// Escape special XML characters.
243fn escape_xml(s: &str) -> String {
244    s.replace('&', "&amp;")
245        .replace('<', "&lt;")
246        .replace('>', "&gt;")
247        .replace('"', "&quot;")
248        .replace('\'', "&apos;")
249}
250
251/// Generate XSLT stylesheet for sitemap rendering in browsers.
252///
253/// This creates a modern, clean stylesheet with light/dark mode support
254/// that renders the sitemap as an HTML table.
255#[must_use]
256pub fn generate_sitemap_xsl() -> String {
257    r#"<?xml version="1.0" encoding="UTF-8"?>
258<xsl:stylesheet version="2.0"
259    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
260    xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
261    xmlns:xhtml="http://www.w3.org/1999/xhtml">
262
263<xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
264
265<xsl:template match="/">
266<html lang="en">
267<head>
268    <meta charset="UTF-8"/>
269    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
270    <title>Sitemap</title>
271    <style>
272        :root {
273            --bg-primary: #ffffff;
274            --bg-secondary: #f8fafc;
275            --bg-tertiary: #f1f5f9;
276            --text-primary: #0f172a;
277            --text-secondary: #475569;
278            --text-muted: #94a3b8;
279            --border-color: #e2e8f0;
280            --accent-color: #3b82f6;
281            --accent-hover: #2563eb;
282            --priority-high: #22c55e;
283            --priority-medium: #eab308;
284            --priority-low: #94a3b8;
285        }
286
287        @media (prefers-color-scheme: dark) {
288            :root {
289                --bg-primary: #0f172a;
290                --bg-secondary: #1e293b;
291                --bg-tertiary: #334155;
292                --text-primary: #f1f5f9;
293                --text-secondary: #cbd5e1;
294                --text-muted: #64748b;
295                --border-color: #334155;
296                --accent-color: #60a5fa;
297                --accent-hover: #93c5fd;
298            }
299        }
300
301        * {
302            margin: 0;
303            padding: 0;
304            box-sizing: border-box;
305        }
306
307        body {
308            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
309            background-color: var(--bg-primary);
310            color: var(--text-primary);
311            line-height: 1.6;
312            padding: 2rem;
313        }
314
315        .container {
316            max-width: 1200px;
317            margin: 0 auto;
318        }
319
320        header {
321            margin-bottom: 2rem;
322            padding-bottom: 1rem;
323            border-bottom: 1px solid var(--border-color);
324        }
325
326        h1 {
327            font-size: 1.875rem;
328            font-weight: 700;
329            margin-bottom: 0.5rem;
330        }
331
332        .subtitle {
333            color: var(--text-secondary);
334            font-size: 0.875rem;
335        }
336
337        .stats {
338            display: flex;
339            gap: 2rem;
340            margin-top: 1rem;
341            flex-wrap: wrap;
342        }
343
344        .stat {
345            background: var(--bg-secondary);
346            padding: 0.75rem 1.25rem;
347            border-radius: 0.5rem;
348            border: 1px solid var(--border-color);
349        }
350
351        .stat-label {
352            font-size: 0.75rem;
353            text-transform: uppercase;
354            letter-spacing: 0.05em;
355            color: var(--text-muted);
356        }
357
358        .stat-value {
359            font-size: 1.25rem;
360            font-weight: 600;
361            color: var(--accent-color);
362        }
363
364        table {
365            width: 100%;
366            border-collapse: collapse;
367            margin-top: 1.5rem;
368            background: var(--bg-secondary);
369            border-radius: 0.5rem;
370            overflow: hidden;
371            border: 1px solid var(--border-color);
372        }
373
374        thead {
375            background: var(--bg-tertiary);
376        }
377
378        th {
379            padding: 0.875rem 1rem;
380            text-align: left;
381            font-weight: 600;
382            font-size: 0.75rem;
383            text-transform: uppercase;
384            letter-spacing: 0.05em;
385            color: var(--text-secondary);
386            border-bottom: 1px solid var(--border-color);
387        }
388
389        td {
390            padding: 0.875rem 1rem;
391            border-bottom: 1px solid var(--border-color);
392            font-size: 0.875rem;
393        }
394
395        tbody tr:hover {
396            background: var(--bg-tertiary);
397        }
398
399        tbody tr:last-child td {
400            border-bottom: none;
401        }
402
403        a {
404            color: var(--accent-color);
405            text-decoration: none;
406            word-break: break-all;
407        }
408
409        a:hover {
410            color: var(--accent-hover);
411            text-decoration: underline;
412        }
413
414        .priority {
415            display: inline-flex;
416            align-items: center;
417            gap: 0.375rem;
418        }
419
420        .priority-dot {
421            width: 0.5rem;
422            height: 0.5rem;
423            border-radius: 50%;
424        }
425
426        .priority-high .priority-dot {
427            background: var(--priority-high);
428        }
429
430        .priority-medium .priority-dot {
431            background: var(--priority-medium);
432        }
433
434        .priority-low .priority-dot {
435            background: var(--priority-low);
436        }
437
438        .changefreq {
439            display: inline-block;
440            padding: 0.25rem 0.5rem;
441            background: var(--bg-tertiary);
442            border-radius: 0.25rem;
443            font-size: 0.75rem;
444            color: var(--text-secondary);
445        }
446
447        .date {
448            color: var(--text-muted);
449            font-size: 0.8125rem;
450        }
451
452        footer {
453            margin-top: 2rem;
454            padding-top: 1rem;
455            border-top: 1px solid var(--border-color);
456            text-align: center;
457            color: var(--text-muted);
458            font-size: 0.75rem;
459        }
460
461        @media (max-width: 768px) {
462            body {
463                padding: 1rem;
464            }
465
466            .stats {
467                gap: 1rem;
468            }
469
470            th, td {
471                padding: 0.625rem 0.5rem;
472            }
473
474            .hide-mobile {
475                display: none;
476            }
477        }
478    </style>
479</head>
480<body>
481    <div class="container">
482        <header>
483            <h1>🗺️ Sitemap</h1>
484            <p class="subtitle">This sitemap contains all pages available on this website.</p>
485            <div class="stats">
486                <div class="stat">
487                    <div class="stat-label">Total URLs</div>
488                    <div class="stat-value"><xsl:value-of select="count(sitemap:urlset/sitemap:url)"/></div>
489                </div>
490            </div>
491        </header>
492
493        <table>
494            <thead>
495                <tr>
496                    <th>URL</th>
497                    <th class="hide-mobile">Priority</th>
498                    <th class="hide-mobile">Change Frequency</th>
499                    <th class="hide-mobile">Last Modified</th>
500                </tr>
501            </thead>
502            <tbody>
503                <xsl:for-each select="sitemap:urlset/sitemap:url">
504                    <xsl:sort select="sitemap:priority" order="descending"/>
505                    <tr>
506                        <td>
507                            <a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc"/></a>
508                        </td>
509                        <td class="hide-mobile">
510                            <xsl:choose>
511                                <xsl:when test="sitemap:priority &gt;= 0.8">
512                                    <span class="priority priority-high">
513                                        <span class="priority-dot"></span>
514                                        <xsl:value-of select="sitemap:priority"/>
515                                    </span>
516                                </xsl:when>
517                                <xsl:when test="sitemap:priority &gt;= 0.5">
518                                    <span class="priority priority-medium">
519                                        <span class="priority-dot"></span>
520                                        <xsl:value-of select="sitemap:priority"/>
521                                    </span>
522                                </xsl:when>
523                                <xsl:otherwise>
524                                    <span class="priority priority-low">
525                                        <span class="priority-dot"></span>
526                                        <xsl:value-of select="sitemap:priority"/>
527                                    </span>
528                                </xsl:otherwise>
529                            </xsl:choose>
530                        </td>
531                        <td class="hide-mobile">
532                            <xsl:if test="sitemap:changefreq">
533                                <span class="changefreq"><xsl:value-of select="sitemap:changefreq"/></span>
534                            </xsl:if>
535                        </td>
536                        <td class="hide-mobile">
537                            <xsl:if test="sitemap:lastmod">
538                                <span class="date"><xsl:value-of select="sitemap:lastmod"/></span>
539                            </xsl:if>
540                        </td>
541                    </tr>
542                </xsl:for-each>
543            </tbody>
544        </table>
545
546        <footer>
547            <p>Generated by Typstify • XML Sitemap Protocol</p>
548        </footer>
549    </div>
550</body>
551</html>
552</xsl:template>
553
554</xsl:stylesheet>"#.to_string()
555}
556
557#[cfg(test)]
558mod tests {
559    use std::{collections::HashMap, path::PathBuf};
560
561    use typstify_core::config::LanguageConfig;
562
563    use super::*;
564
565    fn test_config() -> Config {
566        Config {
567            site: typstify_core::config::SiteConfig {
568                title: "Test Site".to_string(),
569                base_url: "https://example.com".to_string(),
570                default_language: "en".to_string(),
571                description: None,
572                author: None,
573            },
574            languages: HashMap::new(),
575            build: typstify_core::config::BuildConfig::default(),
576            search: typstify_core::config::SearchConfig::default(),
577            rss: typstify_core::config::RssConfig::default(),
578            robots: typstify_core::config::RobotsConfig::default(),
579            taxonomies: typstify_core::config::TaxonomyConfig::default(),
580        }
581    }
582
583    fn test_page(slug: &str, date: Option<DateTime<Utc>>) -> Page {
584        Page {
585            url: format!("/{}", slug),
586            title: slug.to_string(),
587            description: None,
588            date,
589            updated: None,
590            draft: false,
591            lang: "en".to_string(),
592            is_default_lang: true,
593            canonical_id: slug.to_string(),
594            tags: vec![],
595            categories: vec![],
596            content: String::new(),
597            summary: None,
598            reading_time: None,
599            word_count: None,
600            toc: vec![],
601            custom_js: vec![],
602            custom_css: vec![],
603            aliases: vec![],
604            template: None,
605            weight: 0,
606            source_path: Some(PathBuf::from("test.md")),
607        }
608    }
609
610    #[test]
611    fn test_generate_sitemap() {
612        let generator = SitemapGenerator::new(test_config());
613        let page1 = test_page("about", None);
614        let page2 = test_page("blog/post-1", Some(Utc::now()));
615        let pages: Vec<&Page> = vec![&page1, &page2];
616
617        let xml = generator.generate(&pages).unwrap();
618
619        assert!(xml.contains(r#"<?xml version="1.0""#));
620        assert!(xml.contains("<urlset"));
621        assert!(xml.contains("<loc>https://example.com/about</loc>"));
622        assert!(xml.contains("<loc>https://example.com/blog/post-1</loc>"));
623        assert!(xml.contains("<changefreq>"));
624        assert!(xml.contains("<priority>"));
625    }
626
627    #[test]
628    fn test_escape_xml() {
629        assert_eq!(escape_xml("a & b"), "a &amp; b");
630        assert_eq!(escape_xml("<tag>"), "&lt;tag&gt;");
631        assert_eq!(escape_xml("\"quoted\""), "&quot;quoted&quot;");
632    }
633
634    #[test]
635    fn test_home_page_priority() {
636        let generator = SitemapGenerator::new(test_config());
637        let mut home = test_page("", None);
638        home.url = "/".to_string();
639
640        let url = generator.page_to_url(&home);
641
642        assert_eq!(url.priority, Some(1.0));
643        assert_eq!(url.changefreq, Some(ChangeFreq::Daily));
644    }
645
646    #[test]
647    fn test_generate_index() {
648        let generator = SitemapGenerator::new(test_config());
649        let sitemaps = vec!["sitemap-posts.xml", "sitemap-pages.xml"];
650
651        let xml = generator.generate_index(&sitemaps);
652
653        assert!(xml.contains("<sitemapindex"));
654        assert!(xml.contains("sitemap-posts.xml"));
655        assert!(xml.contains("sitemap-pages.xml"));
656    }
657
658    #[test]
659    fn test_multilang_sitemap() {
660        let mut config = test_config();
661        config.languages.insert(
662            "en".to_string(),
663            LanguageConfig {
664                name: Some("English".to_string()),
665                title: None,
666                description: None,
667            },
668        );
669        config.languages.insert(
670            "zh".to_string(),
671            LanguageConfig {
672                name: Some("中文".to_string()),
673                title: None,
674                description: None,
675            },
676        );
677        let generator = SitemapGenerator::new(config);
678
679        let page = test_page("about", None);
680        let pages: Vec<&Page> = vec![&page];
681
682        let xml = generator.generate(&pages).unwrap();
683
684        assert!(xml.contains("xmlns:xhtml"));
685        assert!(xml.contains(r#"hreflang="en""#));
686        assert!(xml.contains(r#"hreflang="zh""#));
687    }
688}