use std::fmt;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Slug(String);
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SlugError {
Empty { segment: String },
Rejected { segment: String, offending: char },
}
impl fmt::Display for SlugError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Empty { segment } => {
write!(f, "slug derived from {segment:?} is empty")
}
Self::Rejected { segment, offending } => write!(
f,
"character {offending:?} in {segment:?} cannot be used in a slug",
),
}
}
}
impl std::error::Error for SlugError {}
impl Slug {
pub fn from_segment(segment: &str) -> Result<Self, SlugError> {
let base = segment.strip_suffix(".md").unwrap_or(segment);
let mut out = String::with_capacity(base.len());
for c in base.chars() {
match c {
'a'..='z' | '0'..='9' | '-' => out.push(c),
'A'..='Z' => out.push(c.to_ascii_lowercase()),
' ' | '_' => out.push('-'),
other => match fold_latin(other) {
Some(s) => out.push_str(s),
None => {
return Err(SlugError::Rejected {
segment: segment.to_owned(),
offending: other,
});
}
},
}
}
if out.is_empty() {
return Err(SlugError::Empty {
segment: segment.to_owned(),
});
}
Ok(Self(out))
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for Slug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
fn fold_latin(c: char) -> Option<&'static str> {
Some(match c {
'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' | 'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' => "a",
'Æ' | 'æ' => "ae",
'Ç' | 'ç' => "c",
'È' | 'É' | 'Ê' | 'Ë' | 'è' | 'é' | 'ê' | 'ë' => "e",
'Ì' | 'Í' | 'Î' | 'Ï' | 'ì' | 'í' | 'î' | 'ï' => "i",
'Ñ' | 'ñ' => "n",
'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ø' | 'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ø' => "o",
'Œ' | 'œ' => "oe",
'ß' => "ss",
'Ù' | 'Ú' | 'Û' | 'Ü' | 'ù' | 'ú' | 'û' | 'ü' => "u",
'Ý' | 'ý' | 'ÿ' => "y",
_ => return None,
})
}
#[cfg(test)]
pub mod strategy {
use super::*;
use proptest::prelude::*;
pub fn arb_canonical_segment() -> impl Strategy<Value = String> {
proptest::string::string_regex("[a-z0-9][a-z0-9-]{0,40}").unwrap()
}
pub fn arb_slug() -> impl Strategy<Value = Slug> {
arb_canonical_segment().prop_map(|s| Slug::from_segment(&s).unwrap())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn strips_md_extension() {
assert_eq!(
Slug::from_segment("getting-started.md").unwrap().as_str(),
"getting-started"
);
}
#[test]
fn lowercases_ascii() {
assert_eq!(Slug::from_segment("Foo.md").unwrap().as_str(), "foo");
}
#[test]
fn spaces_become_dashes() {
assert_eq!(
Slug::from_segment("Foo Bar.md").unwrap().as_str(),
"foo-bar"
);
}
#[test]
fn underscores_become_dashes() {
assert_eq!(
Slug::from_segment("foo_bar.md").unwrap().as_str(),
"foo-bar"
);
}
#[test]
fn keeps_numeric_prefix() {
assert_eq!(
Slug::from_segment("01-intro.md").unwrap().as_str(),
"01-intro"
);
}
#[test]
fn folds_common_accents() {
assert_eq!(Slug::from_segment("café.md").unwrap().as_str(), "cafe");
assert_eq!(Slug::from_segment("naïve.md").unwrap().as_str(), "naive");
assert_eq!(Slug::from_segment("œuvre.md").unwrap().as_str(), "oeuvre");
assert_eq!(Slug::from_segment("straße.md").unwrap().as_str(), "strasse");
}
#[test]
fn rejects_question_mark() {
let err = Slug::from_segment("a?b.md").unwrap_err();
assert!(matches!(err, SlugError::Rejected { offending: '?', .. }));
}
#[test]
fn rejects_hash() {
let err = Slug::from_segment("a#b.md").unwrap_err();
assert!(matches!(err, SlugError::Rejected { offending: '#', .. }));
}
#[test]
fn rejects_ampersand() {
let err = Slug::from_segment("a&b.md").unwrap_err();
assert!(matches!(err, SlugError::Rejected { offending: '&', .. }));
}
#[test]
fn rejects_inner_slash() {
let err = Slug::from_segment("a/b.md").unwrap_err();
assert!(matches!(err, SlugError::Rejected { offending: '/', .. }));
}
#[test]
fn rejects_dot_inside_basename() {
let err = Slug::from_segment("v1.2.md").unwrap_err();
assert!(matches!(err, SlugError::Rejected { offending: '.', .. }));
}
#[test]
fn rejects_unmapped_unicode() {
let err = Slug::from_segment("漢字.md").unwrap_err();
assert!(matches!(err, SlugError::Rejected { .. }));
}
#[test]
fn empty_basename_is_rejected() {
assert!(matches!(
Slug::from_segment(".md"),
Err(SlugError::Empty { .. })
));
assert!(matches!(
Slug::from_segment(""),
Err(SlugError::Empty { .. })
));
}
#[test]
fn segment_without_md_is_accepted() {
assert_eq!(Slug::from_segment("foo").unwrap().as_str(), "foo");
}
proptest::proptest! {
#[test]
fn prop_canonical_segments_are_accepted(s in strategy::arb_canonical_segment()) {
let slug = Slug::from_segment(&s).unwrap();
assert_eq!(slug.as_str(), s);
}
#[test]
fn prop_double_application_is_idempotent(s in strategy::arb_canonical_segment()) {
let once = Slug::from_segment(&s).unwrap();
let twice = Slug::from_segment(once.as_str()).unwrap();
assert_eq!(once, twice);
}
#[test]
fn prop_question_mark_always_rejected(
prefix in "[a-z]{1,5}", suffix in "[a-z]{1,5}",
) {
let segment = format!("{prefix}?{suffix}.md");
assert!(matches!(
Slug::from_segment(&segment),
Err(SlugError::Rejected { offending: '?', .. }),
));
}
}
}