halldyll_core/sitemap/
index.rs1use quick_xml::events::Event;
4use quick_xml::Reader;
5use url::Url;
6
7#[derive(Debug, Clone)]
9pub struct SitemapIndexEntry {
10 pub loc: Url,
12 pub lastmod: Option<String>,
14}
15
16pub struct SitemapIndex;
18
19impl Default for SitemapIndex {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl SitemapIndex {
26 pub fn new() -> Self {
28 Self
29 }
30
31 pub fn parse(&self, xml: &str) -> Vec<SitemapIndexEntry> {
33 let mut entries = Vec::new();
34 let mut reader = Reader::from_str(xml);
35 reader.trim_text(true);
36
37 let mut current_entry: Option<PartialIndexEntry> = None;
38 let mut current_tag = String::new();
39 let mut in_sitemap = false;
40
41 loop {
42 match reader.read_event() {
43 Ok(Event::Start(ref e)) => {
44 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
45 current_tag = name.clone();
46
47 if name == "sitemap" {
48 in_sitemap = true;
49 current_entry = Some(PartialIndexEntry::default());
50 }
51 }
52 Ok(Event::Text(e)) => {
53 if in_sitemap {
54 if let Some(ref mut entry) = current_entry {
55 let text = e.unescape().unwrap_or_default().to_string();
56 match current_tag.as_str() {
57 "loc" => entry.loc = Some(text),
58 "lastmod" => entry.lastmod = Some(text),
59 _ => {}
60 }
61 }
62 }
63 }
64 Ok(Event::End(ref e)) => {
65 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
66 if name == "sitemap" {
67 in_sitemap = false;
68 if let Some(entry) = current_entry.take() {
69 if let Some(loc_str) = entry.loc {
70 if let Ok(loc) = Url::parse(&loc_str) {
71 entries.push(SitemapIndexEntry {
72 loc,
73 lastmod: entry.lastmod,
74 });
75 }
76 }
77 }
78 }
79 }
80 Ok(Event::Eof) => break,
81 Err(_) => break,
82 _ => {}
83 }
84 }
85
86 entries
87 }
88
89 pub fn default_sitemap_url(base_url: &Url) -> Option<Url> {
91 let sitemap_url = format!("{}://{}/sitemap.xml", base_url.scheme(), base_url.host_str()?);
92 Url::parse(&sitemap_url).ok()
93 }
94
95 pub fn common_sitemap_paths() -> Vec<&'static str> {
97 vec![
98 "/sitemap.xml",
99 "/sitemap_index.xml",
100 "/sitemap-index.xml",
101 "/sitemaps.xml",
102 "/sitemap/sitemap.xml",
103 "/wp-sitemap.xml",
104 "/post-sitemap.xml",
105 "/page-sitemap.xml",
106 ]
107 }
108}
109
110#[derive(Default)]
112struct PartialIndexEntry {
113 loc: Option<String>,
114 lastmod: Option<String>,
115}