use chrono::{DateTime, SecondsFormat, Utc};
use derive_builder::Builder;
use url::Url;
pub use url;
use quick_xml::{
events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event},
Writer,
};
use std::fmt::Display;
use std::io::Cursor;
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
pub enum ChangeFreq {
Always,
Hourly,
Daily,
Weekly,
Monthly,
Yearly,
Never,
}
impl Display for ChangeFreq {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let what = match self {
ChangeFreq::Always => "always",
ChangeFreq::Hourly => "hourly",
ChangeFreq::Daily => "daily",
ChangeFreq::Weekly => "weekly",
ChangeFreq::Monthly => "monthly",
ChangeFreq::Yearly => "yearly",
ChangeFreq::Never => "never",
};
f.write_str(what)
}
}
#[derive(Debug, Clone, Builder)]
#[builder(setter(strip_option))]
pub struct UrlEntry {
pub loc: Url,
#[builder(default)]
pub lastmod: Option<DateTime<Utc>>,
#[builder(default)]
pub changefreq: Option<ChangeFreq>,
#[builder(default)]
pub priority: Option<f32>,
}
impl UrlEntry {
pub fn new(
loc: Url,
lastmod: Option<DateTime<Utc>>,
changefreq: Option<ChangeFreq>,
priority: Option<f32>,
) -> Self {
Self {
loc,
lastmod,
changefreq,
priority,
}
}
}
#[derive(Debug)]
pub struct Sitemap;
fn write_tag<T>(writer: &mut Writer<T>, tag: &str, text: &str)
where
T: std::io::Write,
{
writer
.write_event(Event::Start(BytesStart::borrowed_name(tag.as_bytes())))
.expect(&format!("error opening {}", tag));
writer
.write_event(Event::Text(BytesText::from_plain_str(text)))
.expect(&format!("error writing text to {}", tag));
writer
.write_event(Event::End(BytesEnd::borrowed(tag.as_bytes())))
.expect(&format!("error opening {}", tag));
}
impl Sitemap {
pub fn generate<T>(inner_writer: T, urls: &[UrlEntry]) -> T
where
T: std::io::Write,
{
let mut writer = Writer::new_with_indent(inner_writer, b' ', 4);
writer
.write_event(Event::Decl(BytesDecl::new(b"1.0", Some(b"UTF-8"), None)))
.expect("error creating xml decl");
let urlset_name = b"urlset";
let mut urlset = BytesStart::borrowed_name(urlset_name);
urlset.push_attribute(("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9"));
writer
.write_event(Event::Start(urlset))
.expect("error opening urlset");
for entry in urls {
writer
.write_event(Event::Start(BytesStart::borrowed_name(b"url")))
.expect("error opening url");
write_tag(&mut writer, "loc", entry.loc.as_str());
if let Some(lastmod) = &entry.lastmod {
write_tag(
&mut writer,
"lastmod",
&lastmod.to_rfc3339_opts(SecondsFormat::Secs, true),
);
}
if let Some(priority) = &entry.priority {
write_tag(&mut writer, "priority", &format!("{:.1}", priority))
}
if let Some(changefreq) = &entry.changefreq {
write_tag(&mut writer, "changefreq", &changefreq.to_string());
}
writer
.write_event(Event::End(BytesEnd::borrowed(b"url")))
.expect("error closing url");
}
writer
.write_event(Event::End(BytesEnd::borrowed(urlset_name)))
.expect("error closing urlset");
writer.into_inner()
}
pub fn into_bytes(urls: &[UrlEntry]) -> Vec<u8> {
let inner = Cursor::new(Vec::new());
let result = Sitemap::generate(inner, urls);
result.into_inner()
}
pub fn into_str(urls: &[UrlEntry]) -> Result<String, std::str::Utf8Error> {
let bytes = Sitemap::into_bytes(urls);
let res = std::str::from_utf8(&bytes)?;
Ok(res.to_owned())
}
}
#[cfg(test)]
mod tests {
use crate::*;
#[test]
fn it_works() {
use chrono::Utc;
let urls = vec![
UrlEntryBuilder::default()
.loc("https://domain.com".parse().unwrap())
.priority(0.2)
.build()
.unwrap(),
UrlEntry::new(
"https://domain.com/some_url".parse().unwrap(),
None,
None,
None,
),
UrlEntry {
loc: "https://domain.com/another".parse().unwrap(),
priority: None,
changefreq: Some(ChangeFreq::Always),
lastmod: None,
},
UrlEntry {
loc: "https://domain.com/url".parse().unwrap(),
changefreq: Some(ChangeFreq::Daily),
priority: Some(0.8),
lastmod: Some(Utc::now()),
},
UrlEntry {
loc: "https://domain.com/aa".parse().unwrap(),
changefreq: Some(ChangeFreq::Monthly),
priority: None,
lastmod: None,
},
UrlEntry {
loc: "https://domain.com/bb".parse().unwrap(),
changefreq: None,
priority: None,
lastmod: None,
},
UrlEntry {
loc: "https://domain.com/bb&id='<test>'".parse().unwrap(),
changefreq: None,
priority: Some(0.4),
lastmod: None,
},
];
Sitemap::into_str(&urls).unwrap();
}
#[test]
fn changefreq_is_valid() {
assert_eq!(format!("{}", ChangeFreq::Always), "always");
assert_eq!(format!("{}", ChangeFreq::Hourly), "hourly");
assert_eq!(format!("{}", ChangeFreq::Daily), "daily");
assert_eq!(format!("{}", ChangeFreq::Weekly), "weekly");
assert_eq!(format!("{}", ChangeFreq::Monthly), "monthly");
assert_eq!(format!("{}", ChangeFreq::Yearly), "yearly");
assert_eq!(format!("{}", ChangeFreq::Never), "never");
}
}