use unicode_normalization::UnicodeNormalization;
use crate::transliterate;
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
"CLOCK$", "KEYBD$", "SCREEN$",
];
const UNIVERSAL_ILLEGAL: &[char] = &['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\0'];
const POSIX_ILLEGAL: &[char] = &['/', '\0'];
use crate::utils::floor_char_boundary;
fn is_windows_reserved(stem: &str) -> bool {
let upper = stem.to_uppercase();
WINDOWS_RESERVED.iter().any(|r| upper == *r)
}
fn apply_max_length(name: &mut String, ext: Option<&str>, max_length: usize, preserve_ext: bool) {
if max_length == 0 || name.len() <= max_length {
return;
}
if preserve_ext {
if let Some(ext) = ext {
let ext_len = ext.len();
if ext_len >= max_length {
let safe = floor_char_boundary(name, max_length);
name.truncate(safe);
} else {
let stem_budget = max_length - ext_len;
let safe = floor_char_boundary(name, stem_budget);
let mut new_name = name[..safe].to_owned();
new_name.push_str(ext);
*name = new_name;
}
return;
}
}
let safe = floor_char_boundary(name, max_length);
name.truncate(safe);
}
fn collapse_dot_sequences(text: &str) -> String {
if !text.contains("..") {
return text.to_owned();
}
let mut result = String::with_capacity(text.len());
let mut dot_run = 0usize;
for ch in text.chars() {
if ch == '.' {
dot_run += 1;
} else {
if dot_run >= 1 {
result.push('.'); }
dot_run = 0;
result.push(ch);
}
}
if dot_run >= 1 {
result.push('.');
}
result
}
pub(crate) fn sanitize_filename(
text: &str,
separator: &str,
max_length: usize,
platform: &str,
lang: Option<&str>,
preserve_extension: bool,
) -> Result<String, crate::ErrorRepr> {
crate::transliterate::validate_lang(lang)?;
if text.is_empty() {
return Ok(String::new());
}
let illegal_chars: &[char] = match platform {
"universal" | "windows" => UNIVERSAL_ILLEGAL,
"posix" => POSIX_ILLEGAL,
_ => {
return Err(crate::ErrorRepr::InvalidPlatform {
got: platform.to_owned(),
})
}
};
let nfc_text: String = text.nfc().collect();
let safe_text = collapse_dot_sequences(&nfc_text);
let transliterated = transliterate::transliterate_impl(
&safe_text,
lang,
crate::ErrorMode::Ignore,
"",
false,
false,
false,
)
.into_owned();
let transliterated = collapse_dot_sequences(&transliterated);
let (stem, ext) = if preserve_extension {
match transliterated.rfind('.') {
Some(pos) if pos > 0 => (&transliterated[..pos], Some(&transliterated[pos..])),
_ => (transliterated.as_str(), None),
}
} else {
(transliterated.as_str(), None)
};
let mut result = String::with_capacity(stem.len());
let mut prev_was_sep = true;
for ch in stem.chars() {
if illegal_chars.contains(&ch) || ch.is_control() || ch.is_whitespace() {
if !prev_was_sep && !separator.is_empty() {
result.push_str(separator);
prev_was_sep = true;
}
} else {
result.push(ch);
prev_was_sep = false;
}
}
while result.ends_with(separator) && !separator.is_empty() {
result.truncate(result.len() - separator.len());
}
{
let trim_start = result
.chars()
.take_while(|c| *c == '.' || *c == ' ')
.map(char::len_utf8)
.sum::<usize>();
if trim_start > 0 {
result.drain(..trim_start);
}
}
{
let trim_end = result
.chars()
.rev()
.take_while(|c| *c == '.' || *c == ' ')
.map(char::len_utf8)
.sum::<usize>();
if trim_end > 0 {
result.truncate(result.len() - trim_end);
}
}
let sanitized_ext = ext.map(|e| {
let mut clean = String::with_capacity(e.len());
clean.push('.'); for ch in e[1..].chars() {
if !illegal_chars.contains(&ch) && !ch.is_control() && !ch.is_whitespace() {
clean.push(ch);
}
}
clean
});
if matches!(platform, "universal" | "windows") && is_windows_reserved(&result) {
let mut final_name = format!("_{result}");
if let Some(ref ext) = sanitized_ext {
final_name.push_str(ext);
}
apply_max_length(
&mut final_name,
sanitized_ext.as_deref(),
max_length,
preserve_extension,
);
return Ok(final_name);
}
let mut final_name = result;
if let Some(ref ext) = sanitized_ext {
final_name.push_str(ext);
}
apply_max_length(
&mut final_name,
sanitized_ext.as_deref(),
max_length,
preserve_extension,
);
if matches!(platform, "universal" | "windows") {
let check_stem = match final_name.find('.') {
Some(pos) => &final_name[..pos],
None => &final_name,
};
if is_windows_reserved(check_stem) {
final_name.insert(0, '_');
apply_max_length(
&mut final_name,
sanitized_ext.as_deref(),
max_length,
preserve_extension,
);
}
}
if final_name.is_empty() {
final_name = String::from("_");
}
Ok(final_name)
}
#[cfg(test)]
#[allow(clippy::case_sensitive_file_extension_comparisons)]
mod tests {
use super::*;
#[test]
fn test_collapse_dot_sequences_double() {
assert_eq!(collapse_dot_sequences(".."), ".");
assert_eq!(collapse_dot_sequences("foo..bar"), "foo.bar");
assert_eq!(collapse_dot_sequences("../../etc"), "././etc");
}
#[test]
fn test_collapse_dot_sequences_single_preserved() {
assert_eq!(collapse_dot_sequences("file.txt"), "file.txt");
assert_eq!(collapse_dot_sequences("a.b.c"), "a.b.c");
}
#[test]
fn test_collapse_dot_sequences_triple() {
assert_eq!(collapse_dot_sequences("..."), ".");
assert_eq!(collapse_dot_sequences("foo...bar"), "foo.bar");
}
#[test]
fn test_collapse_empty() {
assert_eq!(collapse_dot_sequences(""), "");
}
#[test]
fn test_collapse_no_dots() {
assert_eq!(collapse_dot_sequences("hello world"), "hello world");
}
#[test]
fn test_collapse_trailing_dots() {
assert_eq!(collapse_dot_sequences("foo.."), "foo.");
}
#[test]
fn test_truncation_creates_reserved_name() {
let result = sanitize_filename("NULtra.txt", "_", 3, "universal", None, false).unwrap();
let upper = result.to_uppercase();
assert!(
!WINDOWS_RESERVED.iter().any(|r| upper == *r),
"truncation produced reserved name: {result}"
);
}
#[test]
fn test_reserved_name_prefixed() {
let result = sanitize_filename("CON", "_", 255, "universal", None, false).unwrap();
assert!(result.starts_with('_'));
}
#[test]
fn test_reserved_name_preserve_extension() {
let result = sanitize_filename("NUL.txt", "_", 7, "universal", None, true).unwrap();
assert!(result.ends_with(".txt"), "extension lost: {result}");
assert!(result.len() <= 7, "exceeds max_length: {result}");
let stem = result.split('.').next().unwrap().to_uppercase();
assert!(
!WINDOWS_RESERVED.iter().any(|r| stem == *r),
"stem is reserved: {result}"
);
}
#[test]
fn test_truncation_creates_reserved_preserve_extension() {
let result = sanitize_filename("NULtra.txt", "_", 7, "universal", None, true).unwrap();
assert!(result.ends_with(".txt"), "extension lost: {result}");
assert!(result.len() <= 7, "exceeds max_length: {result}");
}
#[test]
fn regress_direct_reserved_nul_preserve_ext_tight() {
let r = sanitize_filename("NUL.txt", "_", 7, "universal", None, true).unwrap();
assert!(r.ends_with(".txt"), "extension lost: {r}");
assert!(r.len() <= 7, "exceeds max_length: {r}");
}
#[test]
fn regress_direct_reserved_con_preserve_ext_tight() {
let r = sanitize_filename("CON.dat", "_", 8, "universal", None, true).unwrap();
assert!(r.ends_with(".dat"), "extension lost: {r}");
assert!(r.len() <= 8, "exceeds max_length: {r}");
assert!(r.starts_with('_'), "missing underscore prefix: {r}");
}
#[test]
fn regress_direct_reserved_aux_preserve_ext_exact_fit() {
let r = sanitize_filename("AUX.py", "_", 7, "universal", None, true).unwrap();
assert_eq!(r, "_AUX.py");
}
#[test]
fn regress_direct_reserved_prn_preserve_ext_very_tight() {
let r = sanitize_filename("PRN.txt", "_", 5, "universal", None, true).unwrap();
assert!(r.ends_with(".txt"), "extension lost: {r}");
assert!(r.len() <= 5, "exceeds max_length: {r}");
}
#[test]
fn regress_post_truncation_reserved_preserve_ext() {
let r = sanitize_filename("NULtra.txt", "_", 7, "universal", None, true).unwrap();
assert!(r.ends_with(".txt"), "extension lost: {r}");
assert!(r.len() <= 7, "exceeds max_length: {r}");
}
#[test]
fn regress_post_truncation_con_preserve_ext() {
let r = sanitize_filename("CONtest.pdf", "_", 8, "universal", None, true).unwrap();
assert!(r.ends_with(".pdf"), "extension lost: {r}");
assert!(r.len() <= 8, "exceeds max_length: {r}");
}
#[test]
fn regress_reserved_no_extension_preserve_true() {
let r = sanitize_filename("CON", "_", 4, "universal", None, true).unwrap();
assert!(r.len() <= 4, "exceeds max_length: {r}");
assert!(r.starts_with('_'), "missing underscore prefix: {r}");
}
#[test]
fn regress_reserved_preserve_false_still_works() {
let r = sanitize_filename("NUL.txt", "_", 5, "universal", None, false).unwrap();
assert!(r.len() <= 5, "exceeds max_length: {r}");
}
#[test]
fn regress_all_reserved_names_preserve_ext() {
for name in WINDOWS_RESERVED {
let input = format!("{name}.txt");
let r = sanitize_filename(&input, "_", 255, "universal", None, true).unwrap();
assert!(
r.ends_with(".txt"),
"extension lost for reserved name '{name}': got '{r}'"
);
assert!(
r.starts_with('_'),
"missing underscore prefix for '{name}': got '{r}'"
);
}
}
#[test]
fn regress_posix_reserved_names_no_prefix() {
let r = sanitize_filename("NUL.txt", "_", 7, "posix", None, true).unwrap();
assert!(r.ends_with(".txt"), "extension lost on posix: {r}");
assert!(!r.starts_with('_'), "unexpected prefix on posix: {r}");
}
#[test]
fn regress_multibyte_extension_reserved_name() {
let r = sanitize_filename("CON.ñ", "_", 6, "universal", None, true).unwrap();
assert!(r.len() <= 6, "exceeds max_length: {r}");
}
mod proptest_properties {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn collapse_dots_no_double_dots(s in "\\PC*") {
let result = collapse_dot_sequences(&s);
prop_assert!(
!result.contains(".."),
"double dots in: {result:?}"
);
}
#[test]
fn collapse_dots_idempotent(s in "\\PC*") {
let once = collapse_dot_sequences(&s);
let twice = collapse_dot_sequences(&once);
prop_assert_eq!(&once, &twice);
}
#[test]
fn collapse_dots_preserves_singles(s in "[a-z]{1,5}(\\.[a-z]{1,5}){0,5}") {
let result = collapse_dot_sequences(&s);
prop_assert_eq!(&result, &s);
}
#[test]
fn collapse_dots_preserves_non_dots(s in "[^.]{0,50}") {
let result = collapse_dot_sequences(&s);
prop_assert_eq!(&result, &s);
}
}
fn reserved_name_strategy() -> impl Strategy<Value = String> {
prop::sample::select(WINDOWS_RESERVED).prop_map(str::to_string)
}
fn extension_strategy() -> impl Strategy<Value = String> {
prop::string::string_regex("[a-z]{1,6}")
.unwrap()
.prop_map(|e| format!(".{e}"))
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(500))]
#[test]
fn preserve_ext_keeps_extension(
stem in "[a-zA-Z0-9]{1,20}",
ext in "[a-z]{1,4}",
max_length in 5usize..50,
) {
let input = format!("{stem}.{ext}");
let expected_ext = format!(".{ext}");
let result = sanitize_filename(&input, "_", max_length, "universal", None, true).unwrap();
prop_assert!(result.len() <= max_length, "exceeds max_length {max_length}: {result}");
if expected_ext.len() < max_length {
prop_assert!(
result.ends_with(&expected_ext),
"extension '{expected_ext}' lost from input '{input}': got '{result}'"
);
}
}
#[test]
fn no_preserve_ext_respects_max_length(
stem in "[a-zA-Z0-9]{1,30}",
ext in "[a-z]{1,4}",
max_length in 1usize..50,
) {
let input = format!("{stem}.{ext}");
let result = sanitize_filename(&input, "_", max_length, "universal", None, false).unwrap();
prop_assert!(result.len() <= max_length, "exceeds max_length {max_length}: {result}");
}
#[test]
fn reserved_name_preserve_ext(
name in reserved_name_strategy(),
ext in extension_strategy(),
max_length in 6usize..50,
) {
let input = format!("{name}{ext}");
let result = sanitize_filename(&input, "_", max_length, "universal", None, true).unwrap();
prop_assert!(result.len() <= max_length, "exceeds max_length {max_length}: {result}");
if ext.len() < max_length {
prop_assert!(
result.ends_with(&ext),
"extension '{ext}' lost for reserved name '{name}': got '{result}'"
);
}
prop_assert!(
result.starts_with('_'),
"missing underscore prefix for reserved '{name}': got '{result}'"
);
}
#[test]
fn never_produces_bare_reserved_stem(
input in "[A-Za-z]{1,10}\\.[a-z]{1,4}",
max_length in 1usize..30,
preserve_ext in proptest::bool::ANY,
) {
let result = sanitize_filename(&input, "_", max_length, "universal", None, preserve_ext).unwrap();
if !result.is_empty() {
let stem = match result.find('.') {
Some(pos) => &result[..pos],
None => &result,
};
let upper = stem.to_uppercase();
prop_assert!(
!WINDOWS_RESERVED.iter().any(|r| upper == *r),
"produced bare reserved stem from '{input}' (max_length={max_length}, preserve_ext={preserve_ext}): '{result}'"
);
}
}
#[test]
fn max_length_always_respected(
input in "\\PC{1,30}",
max_length in 1usize..50,
preserve_ext in proptest::bool::ANY,
) {
if let Ok(result) = sanitize_filename(&input, "_", max_length, "universal", None, preserve_ext) {
prop_assert!(
result.len() <= max_length,
"exceeds max_length {max_length} for input '{input}': got '{result}' (len={})",
result.len()
);
}
}
}
}
}