1use std::io::Write;
6
7use chrono::{DateTime, Utc};
8use thiserror::Error;
9use tracing::debug;
10use typstify_core::{Config, Page};
11
12#[derive(Debug, Error)]
14pub enum SitemapError {
15 #[error("IO error: {0}")]
17 Io(#[from] std::io::Error),
18
19 #[error("XML encoding error: {0}")]
21 Xml(String),
22}
23
24pub type Result<T> = std::result::Result<T, SitemapError>;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum ChangeFreq {
30 Always,
31 Hourly,
32 Daily,
33 Weekly,
34 Monthly,
35 Yearly,
36 Never,
37}
38
39impl ChangeFreq {
40 fn as_str(&self) -> &'static str {
41 match self {
42 Self::Always => "always",
43 Self::Hourly => "hourly",
44 Self::Daily => "daily",
45 Self::Weekly => "weekly",
46 Self::Monthly => "monthly",
47 Self::Yearly => "yearly",
48 Self::Never => "never",
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct SitemapUrl {
56 pub loc: String,
58
59 pub lastmod: Option<DateTime<Utc>>,
61
62 pub changefreq: Option<ChangeFreq>,
64
65 pub priority: Option<f32>,
67
68 pub alternates: Vec<AlternateLink>,
70}
71
72#[derive(Debug, Clone)]
74pub struct AlternateLink {
75 pub hreflang: String,
77
78 pub href: String,
80}
81
82#[derive(Debug)]
84pub struct SitemapGenerator {
85 config: Config,
86}
87
88impl SitemapGenerator {
89 #[must_use]
91 pub fn new(config: Config) -> Self {
92 Self { config }
93 }
94
95 pub fn generate(&self, pages: &[&Page]) -> Result<String> {
97 debug!(count = pages.len(), "generating sitemap");
98
99 let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
100 xml.push('\n');
101 xml.push_str(r#"<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9""#);
102
103 if self.config.site.languages.len() > 1 {
105 xml.push_str(r#" xmlns:xhtml="http://www.w3.org/1999/xhtml""#);
106 }
107 xml.push_str(">\n");
108
109 for page in pages {
110 let url = self.page_to_url(page);
111 xml.push_str(&self.url_to_xml(&url));
112 }
113
114 xml.push_str("</urlset>\n");
115
116 Ok(xml)
117 }
118
119 fn page_to_url(&self, page: &Page) -> SitemapUrl {
121 let loc = format!("{}{}", self.config.site.base_url, page.url);
122
123 let lastmod = page.updated.or(page.date);
125
126 let (changefreq, priority) = if page.url == "/" || page.url.is_empty() {
128 (Some(ChangeFreq::Daily), Some(1.0))
130 } else if page.date.is_some() {
131 (Some(ChangeFreq::Monthly), Some(0.8))
133 } else {
134 (Some(ChangeFreq::Yearly), Some(0.5))
136 };
137
138 let slug = page.url.trim_start_matches('/');
140 let alternates = if self.config.site.languages.len() > 1 {
141 self.config
142 .site
143 .languages
144 .iter()
145 .map(|lang| {
146 let href = if lang == &self.config.site.default_language {
147 format!("{}/{}", self.config.site.base_url, slug)
148 } else {
149 format!("{}/{}/{}", self.config.site.base_url, lang, slug)
150 };
151 AlternateLink {
152 hreflang: lang.clone(),
153 href,
154 }
155 })
156 .collect()
157 } else {
158 Vec::new()
159 };
160
161 SitemapUrl {
162 loc,
163 lastmod,
164 changefreq,
165 priority,
166 alternates,
167 }
168 }
169
170 fn url_to_xml(&self, url: &SitemapUrl) -> String {
172 let mut xml = String::from(" <url>\n");
173
174 xml.push_str(&format!(" <loc>{}</loc>\n", escape_xml(&url.loc)));
175
176 if let Some(lastmod) = &url.lastmod {
177 xml.push_str(&format!(
178 " <lastmod>{}</lastmod>\n",
179 lastmod.format("%Y-%m-%d")
180 ));
181 }
182
183 if let Some(changefreq) = &url.changefreq {
184 xml.push_str(&format!(
185 " <changefreq>{}</changefreq>\n",
186 changefreq.as_str()
187 ));
188 }
189
190 if let Some(priority) = &url.priority {
191 xml.push_str(&format!(" <priority>{priority:.1}</priority>\n"));
192 }
193
194 for alt in &url.alternates {
196 xml.push_str(&format!(
197 r#" <xhtml:link rel="alternate" hreflang="{}" href="{}" />"#,
198 alt.hreflang,
199 escape_xml(&alt.href)
200 ));
201 xml.push('\n');
202 }
203
204 xml.push_str(" </url>\n");
205 xml
206 }
207
208 pub fn write_to<W: Write>(&self, pages: &[&Page], writer: &mut W) -> Result<()> {
210 let xml = self.generate(pages)?;
211 writer.write_all(xml.as_bytes())?;
212 Ok(())
213 }
214
215 pub fn generate_index(&self, sitemaps: &[&str]) -> String {
217 let mut xml = String::from(r#"<?xml version="1.0" encoding="UTF-8"?>"#);
218 xml.push('\n');
219 xml.push_str(r#"<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">"#);
220 xml.push('\n');
221
222 let now = Utc::now().format("%Y-%m-%d").to_string();
223
224 for sitemap in sitemaps {
225 xml.push_str(" <sitemap>\n");
226 xml.push_str(&format!(
227 " <loc>{}/{}</loc>\n",
228 self.config.site.base_url, sitemap
229 ));
230 xml.push_str(&format!(" <lastmod>{now}</lastmod>\n"));
231 xml.push_str(" </sitemap>\n");
232 }
233
234 xml.push_str("</sitemapindex>\n");
235 xml
236 }
237}
238
239fn escape_xml(s: &str) -> String {
241 s.replace('&', "&")
242 .replace('<', "<")
243 .replace('>', ">")
244 .replace('"', """)
245 .replace('\'', "'")
246}
247
248#[cfg(test)]
249mod tests {
250 use std::path::PathBuf;
251
252 use super::*;
253
254 fn test_config() -> Config {
255 Config {
256 site: typstify_core::config::SiteConfig {
257 title: "Test Site".to_string(),
258 base_url: "https://example.com".to_string(),
259 default_language: "en".to_string(),
260 languages: vec!["en".to_string()],
261 description: None,
262 author: None,
263 },
264 build: typstify_core::config::BuildConfig::default(),
265 search: typstify_core::config::SearchConfig::default(),
266 rss: typstify_core::config::RssConfig::default(),
267 taxonomies: typstify_core::config::TaxonomyConfig::default(),
268 }
269 }
270
271 fn test_page(slug: &str, date: Option<DateTime<Utc>>) -> Page {
272 Page {
273 url: format!("/{}", slug),
274 title: slug.to_string(),
275 description: None,
276 date,
277 updated: None,
278 draft: false,
279 lang: None,
280 tags: vec![],
281 categories: vec![],
282 content: String::new(),
283 summary: None,
284 reading_time: None,
285 word_count: None,
286 toc: vec![],
287 custom_js: vec![],
288 custom_css: vec![],
289 aliases: vec![],
290 template: None,
291 weight: 0,
292 source_path: Some(PathBuf::from("test.md")),
293 }
294 }
295
296 #[test]
297 fn test_generate_sitemap() {
298 let generator = SitemapGenerator::new(test_config());
299 let page1 = test_page("about", None);
300 let page2 = test_page("blog/post-1", Some(Utc::now()));
301 let pages: Vec<&Page> = vec![&page1, &page2];
302
303 let xml = generator.generate(&pages).unwrap();
304
305 assert!(xml.contains(r#"<?xml version="1.0""#));
306 assert!(xml.contains("<urlset"));
307 assert!(xml.contains("<loc>https://example.com/about</loc>"));
308 assert!(xml.contains("<loc>https://example.com/blog/post-1</loc>"));
309 assert!(xml.contains("<changefreq>"));
310 assert!(xml.contains("<priority>"));
311 }
312
313 #[test]
314 fn test_escape_xml() {
315 assert_eq!(escape_xml("a & b"), "a & b");
316 assert_eq!(escape_xml("<tag>"), "<tag>");
317 assert_eq!(escape_xml("\"quoted\""), ""quoted"");
318 }
319
320 #[test]
321 fn test_home_page_priority() {
322 let generator = SitemapGenerator::new(test_config());
323 let mut home = test_page("", None);
324 home.url = "/".to_string();
325
326 let url = generator.page_to_url(&home);
327
328 assert_eq!(url.priority, Some(1.0));
329 assert_eq!(url.changefreq, Some(ChangeFreq::Daily));
330 }
331
332 #[test]
333 fn test_generate_index() {
334 let generator = SitemapGenerator::new(test_config());
335 let sitemaps = vec!["sitemap-posts.xml", "sitemap-pages.xml"];
336
337 let xml = generator.generate_index(&sitemaps);
338
339 assert!(xml.contains("<sitemapindex"));
340 assert!(xml.contains("sitemap-posts.xml"));
341 assert!(xml.contains("sitemap-pages.xml"));
342 }
343
344 #[test]
345 fn test_multilang_sitemap() {
346 let mut config = test_config();
347 config.site.languages = vec!["en".to_string(), "zh".to_string()];
348 let generator = SitemapGenerator::new(config);
349
350 let page = test_page("about", None);
351 let pages: Vec<&Page> = vec![&page];
352
353 let xml = generator.generate(&pages).unwrap();
354
355 assert!(xml.contains("xmlns:xhtml"));
356 assert!(xml.contains(r#"hreflang="en""#));
357 assert!(xml.contains(r#"hreflang="zh""#));
358 }
359}