use std::borrow::Cow;
#[must_use]
pub fn split_extension(filename: &str) -> (&str, &str) {
const COMPOUND: &[&str] = &[".tar.gz", ".tar.bz2", ".tar.xz", ".tar.zst"];
if filename.starts_with('.') && !filename[1..].contains('.') {
return ("", filename);
}
let lower = filename.to_lowercase();
for ext in COMPOUND {
if lower.ends_with(ext) {
let base_end = filename.len() - ext.len();
return (&filename[..base_end], &filename[base_end..]);
}
}
match filename.rfind('.') {
Some(pos) if pos > 0 => (&filename[..pos], &filename[pos..]),
_ => (filename, ""),
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum Style {
#[default]
Kebab,
Snake,
Camel,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SlugifyOptions {
pub style: Style,
pub keep_unicode: bool,
}
impl Default for SlugifyOptions {
fn default() -> Self {
Self {
style: Style::Kebab,
keep_unicode: false,
}
}
}
const VERSION_DOT: char = '\x01';
fn preserve_version_dots(input: &str) -> String {
let bytes = input.as_bytes();
let len = bytes.len();
let mut result = String::with_capacity(len);
let mut i = 0;
while i < len {
if bytes[i].is_ascii_digit() {
let start = i;
while i < len && bytes[i].is_ascii_digit() {
i += 1;
}
let mut dot_count = 0;
while i < len && bytes[i] == b'.' {
let dot_pos = i;
i += 1;
if i < len && bytes[i].is_ascii_digit() {
while i < len && bytes[i].is_ascii_digit() {
i += 1;
}
dot_count += 1;
} else {
i = dot_pos; break;
}
}
if dot_count >= 1 {
for &b in &bytes[start..i] {
if b == b'.' {
result.push(VERSION_DOT);
} else {
result.push(b as char);
}
}
} else {
for &b in &bytes[start..i] {
result.push(b as char);
}
}
} else {
let ch = input[i..].chars().next().unwrap();
result.push(ch);
i += ch.len_utf8();
}
}
result
}
fn restore_version_dots(input: &str) -> String {
input.replace(VERSION_DOT, ".")
}
const MAX_FILENAME_BYTES: usize = 255;
fn truncate_base(base: &str, ext: &str, max_bytes: usize) -> String {
let budget = max_bytes.saturating_sub(ext.len());
if base.len() <= budget {
return base.to_string();
}
let budget = (0..=budget)
.rev()
.find(|&i| base.is_char_boundary(i))
.unwrap_or(0);
let truncated = &base[..budget];
if let Some(pos) = truncated.rfind(['-', '_']) {
if pos > 0 {
return truncated[..pos].to_string();
}
}
truncated.to_string()
}
#[must_use]
pub fn slugify<'a>(filename: &'a str, options: &SlugifyOptions) -> Cow<'a, str> {
if filename.is_empty() {
return Cow::Borrowed("");
}
let (base, ext) = split_extension(filename);
if base.is_empty() {
return Cow::Borrowed(filename);
}
let is_dotfile = base.starts_with('.');
let base = if options.keep_unicode {
base.to_string()
} else {
any_ascii::any_ascii(base)
};
let base = base.replace(['(', ')', '[', ']', '{', '}'], " ");
let base = preserve_version_dots(&base);
let words: Vec<String> = if options.keep_unicode {
base.split(|c: char| !c.is_alphanumeric() && c != VERSION_DOT)
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect()
} else {
base.split(|c: char| !c.is_ascii_alphanumeric() && c != VERSION_DOT)
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect()
};
if words.is_empty() {
return Cow::Owned(ext.to_string());
}
let slugified = match options.style {
Style::Kebab => words.join("-"),
Style::Snake => words.join("_"),
Style::Camel => {
let mut result = String::new();
for (i, word) in words.iter().enumerate() {
if i == 0 {
result.push_str(word);
} else {
let mut chars = word.chars();
if let Some(first) = chars.next() {
result.extend(first.to_uppercase());
result.push_str(chars.as_str());
}
}
}
result
}
};
let slugified = restore_version_dots(&slugified);
let slugified = if is_dotfile {
format!(".{slugified}")
} else {
slugified
};
let slugified = truncate_base(&slugified, ext, MAX_FILENAME_BYTES);
Cow::Owned(format!("{slugified}{ext}"))
}
#[must_use]
pub fn slugify_string<'a>(input: &'a str, options: &SlugifyOptions) -> Cow<'a, str> {
if input.is_empty() {
return Cow::Borrowed("");
}
let text = if options.keep_unicode {
input.to_string()
} else {
any_ascii::any_ascii(input)
};
let text = text.replace(['(', ')', '[', ']', '{', '}'], " ");
let text = preserve_version_dots(&text);
let words: Vec<String> = if options.keep_unicode {
text.split(|c: char| !c.is_alphanumeric() && c != VERSION_DOT)
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect()
} else {
text.split(|c: char| !c.is_ascii_alphanumeric() && c != VERSION_DOT)
.filter(|s| !s.is_empty())
.map(str::to_lowercase)
.collect()
};
if words.is_empty() {
return Cow::Owned(String::new());
}
let slugified = match options.style {
Style::Kebab => words.join("-"),
Style::Snake => words.join("_"),
Style::Camel => {
let mut result = String::new();
for (i, word) in words.iter().enumerate() {
if i == 0 {
result.push_str(word);
} else {
let mut chars = word.chars();
if let Some(first) = chars.next() {
result.extend(first.to_uppercase());
result.push_str(chars.as_str());
}
}
}
result
}
};
let slugified = restore_version_dots(&slugified);
let slugified = truncate_base(&slugified, "", MAX_FILENAME_BYTES);
Cow::Owned(slugified)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_split_simple_extension() {
assert_eq!(split_extension("hello.txt"), ("hello", ".txt"));
}
#[test]
fn test_split_compound_extension() {
assert_eq!(split_extension("archive.tar.gz"), ("archive", ".tar.gz"));
assert_eq!(split_extension("backup.tar.bz2"), ("backup", ".tar.bz2"));
assert_eq!(split_extension("data.tar.xz"), ("data", ".tar.xz"));
assert_eq!(split_extension("logs.tar.zst"), ("logs", ".tar.zst"));
}
#[test]
fn test_split_dotfile() {
assert_eq!(split_extension(".gitignore"), ("", ".gitignore"));
assert_eq!(split_extension(".env"), ("", ".env"));
}
#[test]
fn test_split_no_extension() {
assert_eq!(split_extension("Makefile"), ("Makefile", ""));
assert_eq!(split_extension("README"), ("README", ""));
}
#[test]
fn test_split_multiple_dots() {
assert_eq!(split_extension("my.cool.file.txt"), ("my.cool.file", ".txt"));
}
#[test]
fn test_split_dotfile_with_extension() {
assert_eq!(split_extension(".bashrc"), ("", ".bashrc"));
}
#[test]
fn test_slugify_basic_kebab() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("My Cool File.txt", &opts), "my-cool-file.txt");
}
#[test]
fn test_slugify_snake() {
let opts = SlugifyOptions { style: Style::Snake, ..Default::default() };
assert_eq!(slugify("My Cool File.txt", &opts), "my_cool_file.txt");
}
#[test]
fn test_slugify_camel() {
let opts = SlugifyOptions { style: Style::Camel, ..Default::default() };
assert_eq!(slugify("my cool file.txt", &opts), "myCoolFile.txt");
}
#[test]
fn test_slugify_unicode_transliterate() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("Café Résumé.txt", &opts), "cafe-resume.txt");
}
#[test]
fn test_slugify_keep_unicode() {
let opts = SlugifyOptions { keep_unicode: true, ..Default::default() };
assert_eq!(slugify("Café Résumé.txt", &opts), "café-résumé.txt");
}
#[test]
fn test_slugify_brackets_stripped_contents_kept() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("Report (Final) [2024].txt", &opts), "report-final-2024.txt");
}
#[test]
fn test_slugify_compound_extension() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("My Archive File.tar.gz", &opts), "my-archive-file.tar.gz");
}
#[test]
fn test_slugify_dotfile_untouched() {
let opts = SlugifyOptions::default();
assert_eq!(slugify(".gitignore", &opts), ".gitignore");
}
#[test]
fn test_slugify_already_clean() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("already-clean.txt", &opts), "already-clean.txt");
}
#[test]
fn test_slugify_special_chars() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("file@name#with$symbols.txt", &opts), "file-name-with-symbols.txt");
}
#[test]
fn test_slugify_collapses_separators() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("too many spaces.txt", &opts), "too-many-spaces.txt");
}
#[test]
fn test_slugify_trims_separators() {
let opts = SlugifyOptions::default();
assert_eq!(slugify(" leading and trailing .txt", &opts), "leading-and-trailing.txt");
}
#[test]
fn test_slugify_no_extension() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("My Makefile", &opts), "my-makefile");
}
#[test]
fn test_slugify_full_pipeline() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("Café Résumé (Final Copy) [2024].tar.gz", &opts), "cafe-resume-final-copy-2024.tar.gz");
}
#[test]
fn test_slugify_curly_braces() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("file {draft}.txt", &opts), "file-draft.txt");
}
#[test]
fn test_slugify_camel_multiple_words() {
let opts = SlugifyOptions { style: Style::Camel, ..Default::default() };
assert_eq!(slugify("Hello World Foo Bar.txt", &opts), "helloWorldFooBar.txt");
}
#[test]
fn test_slugify_empty_string() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("", &opts), "");
}
#[test]
fn test_slugify_only_special_chars() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("@#$.txt", &opts), ".txt");
}
#[test]
fn test_slugify_numbers_only() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("12345.txt", &opts), "12345.txt");
}
#[test]
fn test_slugify_dotfile_with_extension() {
let opts = SlugifyOptions::default();
assert_eq!(slugify(".env.local", &opts), ".env.local");
}
#[test]
fn test_slugify_hidden_config_multipart() {
let opts = SlugifyOptions::default();
assert_eq!(slugify(".config.backup.old", &opts), ".config-backup.old");
}
#[test]
fn test_split_dotfile_with_second_extension() {
assert_eq!(split_extension(".env.local"), (".env", ".local"));
}
#[test]
fn test_slugify_only_special_no_ext() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("@@@", &opts), "");
}
#[test]
fn test_slugify_shell_injection() {
let opts = SlugifyOptions::default();
let result = slugify("$(echo pwned).txt", &opts);
assert!(!result.contains('$'));
assert!(!result.contains('('));
assert!(!result.contains(')'));
assert_eq!(result, "echo-pwned.txt");
}
#[test]
fn test_slugify_backticks() {
let opts = SlugifyOptions::default();
let result = slugify("`rm -rf /`.txt", &opts);
assert!(!result.contains('`'));
assert_eq!(result, "rm-rf.txt");
}
#[test]
fn test_slugify_pipe_redirect() {
let opts = SlugifyOptions::default();
let result = slugify("file|name>output.txt", &opts);
assert!(!result.contains('|'));
assert!(!result.contains('>'));
assert_eq!(result, "file-name-output.txt");
}
#[test]
fn test_slugify_newline_in_name() {
let opts = SlugifyOptions::default();
let result = slugify("file\nname.txt", &opts);
assert!(!result.contains('\n'));
assert_eq!(result, "file-name.txt");
}
#[test]
fn test_slugify_emoji() {
let opts = SlugifyOptions::default();
let result = slugify("🎉.txt", &opts);
assert!(result.ends_with(".txt"));
assert!(result.is_ascii());
}
#[test]
fn test_slugify_cjk() {
let opts = SlugifyOptions::default();
let result = slugify("你好世界.txt", &opts);
assert!(result.ends_with(".txt"));
assert!(result.is_ascii());
assert!(result.len() > ".txt".len(), "CJK should transliterate to something");
}
#[test]
fn test_slugify_rtl_arabic() {
let opts = SlugifyOptions::default();
let result = slugify("مرحبا.txt", &opts);
assert!(result.ends_with(".txt"));
assert!(result.is_ascii());
assert!(result.len() > ".txt".len(), "Arabic should transliterate to something");
}
#[test]
fn test_slugify_combining_char() {
let opts = SlugifyOptions::default();
let decomposed = slugify("caf\u{0065}\u{0301}.txt", &opts);
let precomposed = slugify("caf\u{00e9}.txt", &opts);
assert_eq!(decomposed, precomposed);
}
#[test]
fn test_slugify_zero_width_chars() {
let opts = SlugifyOptions::default();
let result = slugify("hello\u{200B}world.txt", &opts);
assert!(!result.contains('\u{200B}'));
assert_eq!(result, "helloworld.txt");
}
#[test]
fn test_slugify_very_long_name() {
let opts = SlugifyOptions::default();
let long_name = "a".repeat(255) + ".txt";
let result = slugify(&long_name, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(result.ends_with(".txt"));
assert!(!result.is_empty());
}
#[test]
fn test_slugify_already_numbered() {
let opts = SlugifyOptions::default();
assert_eq!(slugify("file-2.txt", &opts), "file-2.txt");
}
#[test]
fn test_slugify_compound_ext_mixed_case() {
let opts = SlugifyOptions::default();
let result = slugify("Archive.TAR.GZ", &opts);
assert_eq!(result, "archive.TAR.GZ");
}
#[test]
fn test_preserve_version_dots_simple() {
assert_eq!(preserve_version_dots("foo-0.8.34-bar"), "foo-0\x018\x0134-bar");
}
#[test]
fn test_preserve_version_dots_semver() {
assert_eq!(preserve_version_dots("app-1.2.3"), "app-1\x012\x013");
}
#[test]
fn test_preserve_version_dots_two_part() {
assert_eq!(preserve_version_dots("app-7.20"), "app-7\x0120");
}
#[test]
fn test_preserve_version_dots_no_version() {
assert_eq!(preserve_version_dots("hello-world"), "hello-world");
}
#[test]
fn test_preserve_version_dots_letters_not_matched() {
assert_eq!(preserve_version_dots("a.b.c"), "a.b.c");
}
#[test]
fn test_preserve_version_dots_not_followed_by_digit() {
assert_eq!(preserve_version_dots("7.txt"), "7.txt");
}
#[test]
fn test_preserve_version_dots_adjacent_to_letters() {
assert_eq!(preserve_version_dots("istatmenus7.20"), "istatmenus7\x0120");
}
#[test]
fn test_preserve_version_dots_multiple_versions() {
assert_eq!(preserve_version_dots("2.10-2.12.26"), "2\x0110-2\x0112\x0126");
}
#[test]
fn test_restore_version_dots() {
assert_eq!(restore_version_dots("app-1\x012\x013"), "app-1.2.3");
}
#[test]
fn test_restore_version_dots_no_placeholder() {
assert_eq!(restore_version_dots("hello-world"), "hello-world");
}
#[test]
fn test_slugify_truncates_long_name() {
let opts = SlugifyOptions::default();
let long_name = "a".repeat(300) + ".txt";
let result = slugify(&long_name, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(result.ends_with(".txt"));
}
#[test]
fn test_slugify_truncates_at_separator_boundary() {
let opts = SlugifyOptions::default();
let words: Vec<&str> = std::iter::repeat_n("abcdefgh", 30).collect();
let long_name = words.join(" ") + ".txt";
let result = slugify(&long_name, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(result.ends_with(".txt"));
let (base, _ext) = split_extension(&result);
assert!(!base.ends_with('-'), "should not have trailing separator: {result}");
}
#[test]
fn test_slugify_truncates_cjk_expansion() {
let opts = SlugifyOptions::default();
let cjk = "你".repeat(200) + ".txt";
let result = slugify(&cjk, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(result.ends_with(".txt"));
}
#[test]
fn test_slugify_no_truncation_under_limit() {
let opts = SlugifyOptions::default();
let name = "a".repeat(250) + ".txt";
let result = slugify(&name, &opts);
assert_eq!(result.len(), 254);
}
#[test]
fn test_slugify_truncation_preserves_long_extension() {
let opts = SlugifyOptions::default();
let long_name = "a".repeat(300) + ".tar.gz";
let result = slugify(&long_name, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(result.ends_with(".tar.gz"));
}
#[test]
fn test_slugify_truncation_keep_unicode_multibyte() {
let opts = SlugifyOptions { keep_unicode: true, ..Default::default() };
let long_name = "é".repeat(200) + ".txt";
let result = slugify(&long_name, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(result.ends_with(".txt"));
assert!(!result.is_empty());
}
#[test]
fn test_slugify_string_basic() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("My Blog Post Title!", &opts), "my-blog-post-title");
}
#[test]
fn test_slugify_string_no_extension_handling() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("my.blog.post", &opts), "my-blog-post");
}
#[test]
fn test_slugify_string_snake() {
let opts = SlugifyOptions { style: Style::Snake, ..Default::default() };
assert_eq!(slugify_string("My Blog Post", &opts), "my_blog_post");
}
#[test]
fn test_slugify_string_camel() {
let opts = SlugifyOptions { style: Style::Camel, ..Default::default() };
assert_eq!(slugify_string("my blog post", &opts), "myBlogPost");
}
#[test]
fn test_slugify_string_unicode() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("Café Résumé", &opts), "cafe-resume");
}
#[test]
fn test_slugify_string_keep_unicode() {
let opts = SlugifyOptions { keep_unicode: true, ..Default::default() };
assert_eq!(slugify_string("Café Résumé", &opts), "café-résumé");
}
#[test]
fn test_slugify_string_empty() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("", &opts), "");
}
#[test]
fn test_slugify_string_only_special() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("@#$!", &opts), "");
}
#[test]
fn test_slugify_string_preserves_version_dots() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("app version 1.2.3", &opts), "app-version-1.2.3");
}
#[test]
fn test_slugify_string_brackets_stripped() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string("Hello (World) [2024]", &opts), "hello-world-2024");
}
#[test]
fn test_slugify_string_truncates_long_input() {
let opts = SlugifyOptions::default();
let long_input = "a ".repeat(200); let result = slugify_string(&long_input, &opts);
assert!(result.len() <= 255, "result is {} bytes", result.len());
assert!(!result.is_empty());
}
#[test]
fn test_slugify_string_dotfile_not_preserved() {
let opts = SlugifyOptions::default();
assert_eq!(slugify_string(".gitignore", &opts), "gitignore");
assert_eq!(slugify_string(".env.local", &opts), "env-local");
}
}