1use log::{error, warn};
2use std::fmt::Debug;
3use std::str::FromStr;
4
5#[derive(Debug, PartialEq, Eq, Clone)]
6pub enum FrequencyParseError {
7 InvalidFrequency,
8}
9#[derive(Debug, PartialEq, Eq, Clone, Copy)]
11pub enum Frequency {
12 Always,
13 Hourly,
14 Daily,
15 Weekly,
16 Monthly,
17 Yearly,
18 Never,
19}
20impl FromStr for Frequency {
21 type Err = FrequencyParseError;
22 fn from_str(s: &str) -> Result<Self, Self::Err> {
23 Ok(if s.eq_ignore_ascii_case("always") {
24 Self::Always
25 } else if s.eq_ignore_ascii_case("hourly") {
26 Self::Hourly
27 } else if s.eq_ignore_ascii_case("daily") {
28 Self::Daily
29 } else if s.eq_ignore_ascii_case("weekly") {
30 Self::Weekly
31 } else if s.eq_ignore_ascii_case("monthly") {
32 Self::Monthly
33 } else if s.eq_ignore_ascii_case("yearly") {
34 Self::Yearly
35 } else if s.eq_ignore_ascii_case("never") {
36 Self::Never
37 } else {
38 return Err(FrequencyParseError::InvalidFrequency);
39 })
40 }
41}
42#[derive(Debug, PartialEq, Clone, Copy)]
46pub struct UrlEntry<'a> {
47 pub location: &'a str,
53 pub last_modified: Option<&'a str>,
59 pub change_frequency: Option<Frequency>,
63 pub priority: Option<f32>,
69}
70#[derive(Debug, PartialEq, Eq, Clone)]
71pub enum Error {
72 UrlsetMissing,
76 Parse(roxmltree::Error),
77}
78pub struct Document<'a> {
79 doc: roxmltree::Document<'a>,
80}
81impl<'a> Document<'a> {
82 pub fn parse(xml_document: &'a str) -> Result<Self, Error> {
84 roxmltree::Document::parse(xml_document)
85 .map_err(Error::Parse)
86 .map(|doc| Self { doc })
87 }
88 pub fn iterate(
92 &'a self,
93 ) -> Result<impl Iterator<Item = UrlEntry<'a>> + DoubleEndedIterator + Clone + Debug + 'a, Error>
94 {
95 self.doc
96 .root()
97 .children()
98 .find(|c| c.is_element())
99 .and_then(|node| {
100 if node.tag_name().name() == "urlset" {
101 Some(node)
102 } else {
103 error!("Expected <urlset> but got {:?}", node);
104 None
105 }
106 })
107 .map(|node| {
108 node.children().filter_map(|c| {
109 let children = c.children().filter(|c| c.is_element());
110 let mut loc = None;
111 let mut lastmod = None;
112 let mut changefreq = None;
113 let mut priority = None;
114 for child in children {
115 if let Some(text) = node_text_expected_name(&child, "loc") {
116 if loc.is_none() {
117 loc = Some(text);
118 } else {
119 error!("Multiple <loc> in entry.");
120 return None;
121 }
122 } else if let Some(text) = node_text_expected_name(&child, "lastmod") {
123 if lastmod.is_some() {
124 warn!("Multiple <lastmod> in entry.");
125 }
126 lastmod = Some(text);
127 } else if let Some(text) = node_text_expected_name(&child, "changefreq") {
128 if changefreq.is_some() {
129 warn!("Multiple <changefreq> in entry.");
130 }
131 if let Ok(frequency) = text.parse() {
132 changefreq = Some(frequency);
133 } else {
134 warn!("<changefreq> has invalid format: {text:?}");
135 }
136 } else if let Some(text) = node_text_expected_name(&child, "priority") {
137 if priority.is_some() {
138 warn!("Multiple <priority> in entry.");
139 }
140 if let Ok(num) = text.parse() {
141 if (0.0..=1.0).contains(&num) {
142 priority = Some(num)
143 } else {
144 warn!("<priority> {num} is out of range",)
145 }
146 }else {
147 warn!("<priority> has invalid format: {text:?}. Expected floating-point number.");
148 }
149 }
150 }
151 if let Some(loc) = loc {
152 Some(UrlEntry::<'a> {
153 location: loc,
154 last_modified: lastmod,
155 change_frequency: changefreq,
156 priority,
157 })
158 } else {
159 error!("Expected <loc>, but found none.");
160 None
161 }
162 })
163 })
164 .ok_or(Error::UrlsetMissing)
165 }
166}
167fn node_text_expected_name<'a>(
168 node: &roxmltree::Node<'a, 'a>,
169 expected_tag: &str,
170) -> Option<&'a str> {
171 if node.tag_name().name() == expected_tag {
172 if let Some(text) = node.text() {
173 return Some(text);
174 }
175 }
176 None
177}