pub const DEFAULT_MAX_SLUG_BYTES: usize = 80;
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
"COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
];
pub fn slugify_title(title: &str) -> String {
slugify_title_with(title, DEFAULT_MAX_SLUG_BYTES)
}
pub fn slugify_title_with(title: &str, max_bytes: usize) -> String {
let budget = if max_bytes == 0 {
DEFAULT_MAX_SLUG_BYTES
} else {
max_bytes
};
let trimmed = title.trim();
let mut out = String::with_capacity(trimmed.len());
let mut last_was_dash = false;
for ch in trimmed.chars() {
let forbidden = ch == '-'
|| matches!(ch, '\\' | '/' | ':' | '*' | '?' | '"' | '<' | '>' | '|')
|| ch.is_control()
|| ch.is_whitespace();
if forbidden {
if !last_was_dash {
out.push('-');
last_was_dash = true;
}
} else {
out.push(ch);
last_was_dash = false;
}
}
let mut slug = out.trim_matches('-').to_string();
while slug.ends_with('.') || slug.ends_with(' ') {
slug.pop();
}
let mut slug = slug.trim_matches('-').to_string();
if slug.is_empty() {
slug = "untitled".to_string();
}
if WINDOWS_RESERVED
.iter()
.any(|r| r.eq_ignore_ascii_case(&slug))
{
slug.insert(0, '_');
}
if slug.len() > budget {
let mut end = budget;
while end > 0 && !slug.is_char_boundary(end) {
end -= 1;
}
slug.truncate(end);
let trimmed = slug.trim_matches('-');
if trimmed.len() != slug.len() {
slug = trimmed.to_string();
}
if slug.is_empty() {
slug = "untitled".to_string();
}
}
slug
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn plain_titles_pass_through() {
assert_eq!(slugify_title("Patient A"), "Patient-A");
assert_eq!(slugify_title("Chapter 1"), "Chapter-1");
assert_eq!(slugify_title("Already-Safe_Name.v2"), "Already-Safe_Name.v2");
}
#[test]
fn forbidden_and_path_chars_become_dash_collapsed() {
assert_eq!(slugify_title(r#"Patient B / X-Ray"#), "Patient-B-X-Ray");
assert_eq!(slugify_title(r#"a:*?"<>|\\/b"#), "a-b");
assert_eq!(slugify_title("a\u{0007}\u{0000}b"), "a-b"); assert_eq!(slugify_title("a\t\n b"), "a-b"); assert_eq!(slugify_title("a---b c"), "a-b-c"); }
#[test]
fn empty_and_whitespace_become_untitled() {
assert_eq!(slugify_title(""), "untitled");
assert_eq!(slugify_title(" "), "untitled");
assert_eq!(slugify_title("\t\n"), "untitled");
assert_eq!(slugify_title("///"), "untitled");
assert_eq!(slugify_title("- - -"), "untitled");
}
#[test]
fn trailing_dot_and_space_stripped() {
assert_eq!(slugify_title("report."), "report");
assert_eq!(slugify_title("report . "), "report");
assert_eq!(slugify_title("name..."), "name");
}
#[test]
fn windows_reserved_names_are_prefixed_case_insensitive() {
assert_eq!(slugify_title("CON"), "_CON");
assert_eq!(slugify_title("con"), "_con");
assert_eq!(slugify_title("CoM1"), "_CoM1");
assert_eq!(slugify_title("LPT9"), "_LPT9");
assert_eq!(slugify_title("COM10"), "COM10");
assert_eq!(slugify_title("CONTRACT"), "CONTRACT");
}
#[test]
fn truncates_at_byte_budget_without_splitting_codepoints() {
let long = "x".repeat(200);
assert_eq!(slugify_title(&long).len(), DEFAULT_MAX_SLUG_BYTES);
let s = slugify_title_with("ééééé", 5);
assert!(s.is_char_boundary(s.len()));
assert!(s.len() <= 5);
assert!(!s.is_empty());
for b in 1..12 {
let r = slugify_title_with("αβγδε", b);
assert!(r.is_char_boundary(r.len()));
assert!(!r.is_empty());
}
}
#[test]
fn zero_budget_falls_back_to_default() {
assert_eq!(slugify_title_with(&"y".repeat(300), 0).len(), DEFAULT_MAX_SLUG_BYTES);
}
#[test]
fn pure_and_deterministic() {
let t = r#"Combined / Record: "Final"."#;
assert_eq!(slugify_title(t), slugify_title(t));
assert_eq!(slugify_title("Record"), "Record");
assert_eq!(slugify_title("Record"), "Record");
}
}