halldyll_core/sitemap/
parser.rs1use quick_xml::events::Event;
4use quick_xml::Reader;
5use url::Url;
6
7#[derive(Debug, Clone)]
9pub struct SitemapEntry {
10 pub loc: Url,
12 pub lastmod: Option<String>,
14 pub changefreq: Option<ChangeFreq>,
16 pub priority: Option<f32>,
18 pub images: Vec<SitemapImage>,
20 pub videos: Vec<SitemapVideo>,
22 pub news: Option<SitemapNews>,
24}
25
26#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum ChangeFreq {
29 Always,
31 Hourly,
33 Daily,
35 Weekly,
37 Monthly,
39 Yearly,
41 Never,
43}
44
45impl ChangeFreq {
46 fn from_str(s: &str) -> Option<Self> {
47 match s.to_lowercase().as_str() {
48 "always" => Some(ChangeFreq::Always),
49 "hourly" => Some(ChangeFreq::Hourly),
50 "daily" => Some(ChangeFreq::Daily),
51 "weekly" => Some(ChangeFreq::Weekly),
52 "monthly" => Some(ChangeFreq::Monthly),
53 "yearly" => Some(ChangeFreq::Yearly),
54 "never" => Some(ChangeFreq::Never),
55 _ => None,
56 }
57 }
58}
59
60#[derive(Debug, Clone)]
62pub struct SitemapImage {
63 pub loc: Url,
65 pub title: Option<String>,
67 pub caption: Option<String>,
69}
70
71#[derive(Debug, Clone)]
73pub struct SitemapVideo {
74 pub content_loc: Option<Url>,
76 pub player_loc: Option<Url>,
78 pub thumbnail_loc: Option<Url>,
80 pub title: Option<String>,
82 pub description: Option<String>,
84 pub duration: Option<u32>,
86}
87
88#[derive(Debug, Clone)]
90pub struct SitemapNews {
91 pub publication_name: Option<String>,
93 pub publication_language: Option<String>,
95 pub publication_date: Option<String>,
97 pub title: Option<String>,
99}
100
101pub struct SitemapParser;
103
104impl Default for SitemapParser {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110impl SitemapParser {
111 pub fn new() -> Self {
113 Self
114 }
115
116 pub fn parse(&self, xml: &str) -> Vec<SitemapEntry> {
118 let mut entries = Vec::new();
119 let mut reader = Reader::from_str(xml);
120 reader.trim_text(true);
121
122 let mut current_entry: Option<PartialEntry> = None;
123 let mut current_tag = String::new();
124 let mut in_url = false;
125
126 loop {
127 match reader.read_event() {
128 Ok(Event::Start(ref e)) => {
129 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
130 current_tag = name.clone();
131
132 if name == "url" {
133 in_url = true;
134 current_entry = Some(PartialEntry::default());
135 }
136 }
137 Ok(Event::Text(e)) => {
138 if in_url {
139 if let Some(ref mut entry) = current_entry {
140 let text = e.unescape().unwrap_or_default().to_string();
141 match current_tag.as_str() {
142 "loc" => entry.loc = Some(text),
143 "lastmod" => entry.lastmod = Some(text),
144 "changefreq" => entry.changefreq = Some(text),
145 "priority" => entry.priority = text.parse().ok(),
146 _ => {}
147 }
148 }
149 }
150 }
151 Ok(Event::End(ref e)) => {
152 let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
153 if name == "url" {
154 in_url = false;
155 if let Some(entry) = current_entry.take() {
156 if let Some(loc_str) = entry.loc {
157 if let Ok(loc) = Url::parse(&loc_str) {
158 entries.push(SitemapEntry {
159 loc,
160 lastmod: entry.lastmod,
161 changefreq: entry.changefreq.and_then(|s| ChangeFreq::from_str(&s)),
162 priority: entry.priority,
163 images: Vec::new(),
164 videos: Vec::new(),
165 news: None,
166 });
167 }
168 }
169 }
170 }
171 }
172 Ok(Event::Eof) => break,
173 Err(_) => break,
174 _ => {}
175 }
176 }
177
178 entries
179 }
180
181 pub fn is_sitemap_index(xml: &str) -> bool {
183 xml.contains("<sitemapindex") || xml.contains("<sitemap>")
184 }
185}
186
187#[derive(Default)]
189struct PartialEntry {
190 loc: Option<String>,
191 lastmod: Option<String>,
192 changefreq: Option<String>,
193 priority: Option<f32>,
194}