use alloc::borrow::Cow;
use crate::ErrorKind;
use crate::error::ResultKind;
use crate::unicode::{case_fold, is_above, is_starter, is_whitespace, nfc, nfd};
use crate::utils::cow;
#[must_use]
pub fn is_whitespace_like(c: char) -> bool {
is_whitespace(c) || ('\u{2409}'..='\u{240D}').contains(&c) || c == '\u{FEFF}'
}
#[must_use]
pub fn map_fullwidth(s: &str) -> Cow<'_, str> {
cow(
s.chars().map(|c| match c {
'\u{FF01}'..='\u{FF5E}' => char::from_u32(c as u32 - 0xFEE0).unwrap_or(c),
_ => c,
}),
s,
)
}
pub fn trim_whitespace_like(s: &str) -> &str {
s.trim_matches(is_whitespace_like)
}
#[must_use]
pub fn fixup_case_fold(s: &str) -> Cow<'_, str> {
cow(
s.chars()
.scan(false, |strip_dot_above, c| {
match c {
'\u{0130}' => {
*strip_dot_above = true;
Some(Some('I'))
}
'\u{0131}' => {
*strip_dot_above = true;
Some(Some('i'))
}
'I' | 'i' | 'J' | 'j' => {
*strip_dot_above = true;
Some(Some(c))
}
'\u{0307}' if *strip_dot_above => {
Some(None)
}
_ => {
if is_starter(c) || is_above(c) {
*strip_dot_above = false;
}
Some(Some(c))
}
}
})
.flatten(),
s,
)
}
#[must_use]
pub fn map_control_chars(s: &str) -> Cow<'_, str> {
cow(
s.chars().map(|c| match c {
'\x01'..='\x1F' => char::from_u32(c as u32 + 0x2400).unwrap_or(c),
'\x7F' => '\u{2421}',
_ => c,
}),
s,
)
}
pub fn normalize_cs(name: &str) -> ResultKind<Cow<'_, str>> {
let s = nfd(name);
let s = trim_whitespace_like(&s);
let s = map_fullwidth(s);
let s = map_control_chars(&s);
validate_path_element(&s)?;
let s = nfc(&s);
debug_assert!(validate_path_element(&s).is_ok());
Ok(cow(s.chars(), name))
}
#[must_use]
pub fn normalize_ci_from_normalized_cs(cs_normalized: &str) -> Cow<'_, str> {
let s = nfd(cs_normalized);
let s = case_fold(&s);
let s = fixup_case_fold(&s);
let s = nfc(&s);
debug_assert!(validate_path_element(&s).is_ok());
cow(s.chars(), cs_normalized)
}
pub fn validate_path_element(name: &str) -> ResultKind<()> {
match name {
"" => Err(ErrorKind::Empty),
"." => Err(ErrorKind::CurrentDirectoryMarker),
".." => Err(ErrorKind::ParentDirectoryMarker),
_ if name.contains('\0') => Err(ErrorKind::ContainsNullByte),
_ if name.contains('/') => Err(ErrorKind::ContainsForwardSlash),
_ => Ok(()),
}
}
#[cfg(test)]
mod tests {
use alloc::borrow::Cow;
use alloc::string::String;
#[cfg(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "none")))]
use wasm_bindgen_test::wasm_bindgen_test as test;
use super::{
fixup_case_fold, map_control_chars, map_fullwidth, normalize_ci_from_normalized_cs,
normalize_cs, trim_whitespace_like, validate_path_element,
};
use crate::ErrorKind;
#[test]
fn trim_whitespace_like_removes_trailing() {
assert_eq!(trim_whitespace_like("hello "), "hello");
}
#[test]
fn trim_whitespace_like_removes_leading() {
assert_eq!(trim_whitespace_like(" hello"), "hello");
}
#[test]
fn trim_whitespace_like_removes_both() {
assert_eq!(trim_whitespace_like(" hello "), "hello");
}
#[test]
fn trim_whitespace_like_no_whitespace() {
assert_eq!(trim_whitespace_like("hello"), "hello");
}
#[test]
fn trim_whitespace_like_middle_preserved() {
assert_eq!(trim_whitespace_like("he llo"), "he llo");
}
#[test]
fn trim_whitespace_like_empty() {
assert_eq!(trim_whitespace_like(""), "");
}
#[test]
fn trim_whitespace_like_only_spaces() {
assert_eq!(trim_whitespace_like(" "), "");
}
#[test]
fn trim_whitespace_like_tabs() {
assert_eq!(trim_whitespace_like("\thello\t"), "hello");
}
#[test]
fn trim_whitespace_like_ideographic_space() {
assert_eq!(trim_whitespace_like("\u{3000}hello\u{3000}"), "hello");
}
#[test]
fn trim_whitespace_like_mixed() {
assert_eq!(trim_whitespace_like("\t\u{3000} hello \t\u{3000}"), "hello");
}
#[test]
fn trim_whitespace_like_control_picture_tab() {
assert_eq!(trim_whitespace_like("\u{2409}hello\u{2409}"), "hello");
}
#[test]
fn trim_whitespace_like_control_picture_lf() {
assert_eq!(trim_whitespace_like("\u{240A}hello\u{240A}"), "hello");
}
#[test]
fn trim_whitespace_like_control_picture_cr() {
assert_eq!(trim_whitespace_like("\u{240D}hello\u{240D}"), "hello");
}
#[test]
fn trim_whitespace_like_control_picture_middle_preserved() {
assert_eq!(trim_whitespace_like("he\u{2409}llo"), "he\u{2409}llo");
}
#[test]
fn trim_whitespace_like_bom_leading() {
assert_eq!(trim_whitespace_like("\u{FEFF}hello"), "hello");
}
#[test]
fn trim_whitespace_like_bom_trailing() {
assert_eq!(trim_whitespace_like("hello\u{FEFF}"), "hello");
}
#[test]
fn trim_whitespace_like_bom_both() {
assert_eq!(trim_whitespace_like("\u{FEFF}hello\u{FEFF}"), "hello");
}
#[test]
fn trim_whitespace_like_bom_middle_preserved() {
assert_eq!(trim_whitespace_like("he\u{FEFF}llo"), "he\u{FEFF}llo");
}
#[test]
fn trim_whitespace_like_only_bom() {
assert_eq!(trim_whitespace_like("\u{FEFF}"), "");
}
#[test]
fn trim_whitespace_like_multiple_leading_bom() {
assert_eq!(
trim_whitespace_like("\u{FEFF}\u{FEFF}\u{FEFF}hello"),
"hello"
);
}
#[test]
fn map_fullwidth_letters() {
assert_eq!(map_fullwidth("\u{FF21}\u{FF41}"), "Aa");
}
#[test]
fn map_fullwidth_digits() {
assert_eq!(map_fullwidth("\u{FF10}\u{FF19}"), "09");
}
#[test]
fn map_fullwidth_symbols() {
assert_eq!(map_fullwidth("\u{FF01}"), "!");
}
#[test]
fn map_fullwidth_mixed() {
assert_eq!(map_fullwidth("abc\u{FF21}def"), "abcAdef");
}
#[test]
fn map_fullwidth_pure_ascii() {
let result = map_fullwidth("hello");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "hello");
}
#[test]
fn map_fullwidth_all_characters() {
let fullwidth: String = ('\u{FF01}'..='\u{FF5E}').collect();
let ascii: String = ('!'..='~').collect();
assert_eq!(map_fullwidth(&fullwidth), ascii);
}
#[test]
fn map_control_del() {
assert_eq!(map_control_chars("\x7F"), "\u{2421}");
}
#[test]
fn map_control_normal_unchanged() {
let result = map_control_chars("hello");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "hello");
}
#[test]
fn map_control_mixed() {
assert_eq!(map_control_chars("a\x01b\x7Fc"), "a\u{2401}b\u{2421}c");
}
#[test]
fn map_control_null_byte_unchanged() {
assert_eq!(map_control_chars("\x00"), "\x00");
}
#[test]
fn map_control_all_c0_characters() {
let controls: String = ('\x01'..='\x1F').collect();
let pictures: String = ('\u{2401}'..='\u{241F}').collect();
assert_eq!(map_control_chars(&controls), pictures);
}
#[test]
fn fixup_case_fold_dotted_capital() {
assert_eq!(fixup_case_fold("\u{0130}"), "I");
}
#[test]
fn fixup_case_fold_dotless_lowercase() {
assert_eq!(fixup_case_fold("\u{0131}"), "i");
}
#[test]
fn fixup_case_fold_dotless_lowercase_with_dot() {
assert_eq!(fixup_case_fold("\u{0131}\u{0307}"), "i");
}
#[test]
fn fixup_case_fold_mixed() {
assert_eq!(fixup_case_fold("a\u{0130}b\u{0131}c"), "aIbic");
}
#[test]
fn fixup_case_fold_ascii_unchanged() {
let result = fixup_case_fold("Hello");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "Hello");
}
#[test]
fn fixup_case_fold_nfd_decomposed() {
let result = fixup_case_fold("I\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "I");
}
#[test]
fn fixup_case_fold_nfd_decomposed_lowercase() {
let result = fixup_case_fold("i\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "i");
}
#[test]
fn fixup_case_fold_intervening_combiner() {
let result = fixup_case_fold("I\u{0327}\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "I\u{0327}");
}
#[test]
fn fixup_case_fold_intervening_combiner_lowercase() {
let result = fixup_case_fold("i\u{0327}\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "i\u{0327}");
}
#[test]
fn fixup_case_fold_multiple_dots() {
let result = fixup_case_fold("I\u{0307}\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "I");
}
#[test]
fn fixup_case_fold_dot_on_other_base() {
let result = fixup_case_fold("e\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "e\u{0307}");
}
#[test]
fn fixup_case_fold_dot_after_starter_resets() {
let result = fixup_case_fold("Ia\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "Ia\u{0307}");
}
#[test]
fn fixup_case_fold_multiple_combiners_then_dot() {
let result = fixup_case_fold("i\u{0325}\u{0327}\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "i\u{0325}\u{0327}");
}
#[test]
fn fixup_case_fold_above_combiner_blocks_strip() {
let result = fixup_case_fold("I\u{0301}\u{0307}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "I\u{0301}\u{0307}");
}
#[test]
fn fixup_case_fold_below_combiner_allows_strip() {
assert_eq!(fixup_case_fold("I\u{0327}\u{0307}"), "I\u{0327}");
}
#[test]
fn fixup_case_fold_j_dot_above_stripped() {
assert_eq!(fixup_case_fold("j\u{0307}"), "j");
}
#[test]
fn fixup_case_fold_j_uppercase_dot_above_stripped() {
assert_eq!(fixup_case_fold("J\u{0307}"), "J");
}
#[test]
fn fixup_case_fold_j_dot_with_circumflex() {
assert_eq!(
fixup_case_fold("j\u{0307}\u{0302}\u{0345}"),
"j\u{0302}\u{0345}"
);
}
#[test]
fn fixup_case_fold_j_no_dot_unchanged() {
let result = fixup_case_fold("j\u{0302}");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "j\u{0302}");
}
#[test]
fn normalize_trims_leading_bom() {
let input = "\u{FEFF}hello.txt";
let with_bom = normalize_cs(input).unwrap();
let without_bom = normalize_cs("hello.txt").unwrap();
assert_eq!(with_bom, without_bom);
assert!(matches!(with_bom, Cow::Borrowed(_)));
assert!(matches!(without_bom, Cow::Borrowed(_)));
assert!(core::ptr::eq(
with_bom.as_ptr(),
input["\u{FEFF}".len()..].as_ptr()
));
}
#[test]
fn normalize_preserves_interior_bom() {
let result = normalize_cs("he\u{FEFF}llo").unwrap();
assert!(result.contains('\u{FEFF}'));
}
#[test]
fn normalize_maps_fullwidth() {
let fullwidth = normalize_cs("\u{FF21}bc.txt").unwrap();
let ascii = normalize_cs("Abc.txt").unwrap();
assert_eq!(fullwidth, ascii);
}
#[test]
fn normalize_strips_whitespace() {
let with_whitespace = normalize_cs("\t\u{3000} hello \t\u{3000}").unwrap();
let without_whitespace = normalize_cs("hello").unwrap();
assert_eq!(with_whitespace, without_whitespace);
assert!(matches!(with_whitespace, Cow::Borrowed(_)));
assert!(matches!(without_whitespace, Cow::Borrowed(_)));
}
#[test]
fn normalize_trailing_whitespace_borrows_prefix() {
let input = "hello ";
let result = normalize_cs(input).unwrap();
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "hello");
assert!(core::ptr::eq(result.as_ptr(), input.as_ptr()));
}
#[test]
fn normalize_leading_whitespace_borrows_suffix() {
let input = " hello";
let result = normalize_cs(input).unwrap();
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "hello");
assert!(core::ptr::eq(result.as_ptr(), input[3..].as_ptr()));
}
#[test]
fn normalize_normalizes_unicode() {
let nfd_input = normalize_cs("e\u{0301}.txt").unwrap();
let composed = normalize_cs("\u{00E9}.txt").unwrap();
assert_eq!(nfd_input, composed);
}
#[test]
fn normalize_maps_control_chars() {
let with_control = normalize_cs("a\x01b").unwrap();
let with_picture = normalize_cs("a\u{2401}b").unwrap();
assert_eq!(with_control, with_picture);
}
#[test]
fn normalize_strips_whitespace_control_pictures() {
let with_tab = normalize_cs("\thello").unwrap();
let with_picture = normalize_cs("\u{2409}hello").unwrap();
let plain = normalize_cs("hello").unwrap();
assert_eq!(with_tab, plain);
assert_eq!(with_picture, plain);
}
#[test]
fn normalize_turkish_i_sensitive() {
let ascii_upper = normalize_cs("I").unwrap();
let ascii_lower = normalize_cs("i").unwrap();
let dotted = normalize_cs("\u{0130}").unwrap();
let dotless = normalize_cs("\u{0131}").unwrap();
assert_eq!(ascii_upper, "I");
assert_eq!(ascii_lower, "i");
assert_eq!(dotted, "\u{0130}");
assert_eq!(dotless, "\u{0131}");
assert_ne!(ascii_upper, ascii_lower);
assert_ne!(ascii_upper, dotted);
assert_ne!(ascii_upper, dotless);
assert_ne!(ascii_lower, dotted);
assert_ne!(ascii_lower, dotless);
assert_ne!(dotted, dotless);
}
#[test]
fn normalize_ypogegrammeni_sensitive_preserved() {
let cs = normalize_cs("\u{0345}").unwrap();
assert_eq!(cs, "\u{0345}");
}
#[test]
fn normalize_ypogegrammeni_with_overline_sensitive() {
let a = normalize_cs("\u{0345}\u{0305}").unwrap();
let b = normalize_cs("\u{0305}\u{0345}").unwrap();
assert_eq!(a, b);
assert_eq!(a, "\u{0305}\u{0345}");
}
#[test]
fn normalize_ligature_ffl_sensitive() {
let cs = normalize_cs("\u{FB04}").unwrap();
assert_eq!(cs, "\u{FB04}");
}
#[test]
fn normalize_deseret_sensitive() {
let upper = normalize_cs("\u{10400}").unwrap();
let lower = normalize_cs("\u{10428}").unwrap();
assert_ne!(upper, lower);
}
#[test]
fn normalize_greek_sigma_sensitive() {
let small = normalize_cs("\u{03C3}").unwrap();
let final_sigma = normalize_cs("\u{03C2}").unwrap();
assert_ne!(small, final_sigma);
}
#[test]
fn normalize_ohm_sign_equals_omega() {
let ohm = normalize_cs("\u{2126}").unwrap();
let omega = normalize_cs("\u{03A9}").unwrap();
assert_eq!(ohm, omega);
}
#[test]
fn normalize_angstrom_equals_a_ring() {
let angstrom = normalize_cs("\u{212B}").unwrap();
let a_ring = normalize_cs("\u{00C5}").unwrap();
assert_eq!(angstrom, a_ring);
}
#[test]
fn normalize_dz_digraph_sensitive() {
let upper = normalize_cs("\u{01F1}").unwrap();
let title = normalize_cs("\u{01F2}").unwrap();
let lower = normalize_cs("\u{01F3}").unwrap();
assert_ne!(upper, title);
assert_ne!(upper, lower);
assert_ne!(title, lower);
}
#[test]
fn normalize_ascii_i_sensitive() {
let upper = normalize_cs("I").unwrap();
let lower = normalize_cs("i").unwrap();
assert_ne!(upper, lower);
assert_eq!(upper, "I");
assert_eq!(lower, "i");
}
#[test]
fn normalize_empty_rejected() {
assert!(normalize_cs("").is_err());
}
#[test]
fn normalize_dot_rejected() {
assert!(normalize_cs(".").is_err());
}
#[test]
fn normalize_dotdot_rejected() {
assert!(normalize_cs("..").is_err());
}
#[test]
fn normalize_slash_rejected() {
assert!(normalize_cs("a/b").is_err());
}
#[test]
fn normalize_bom_only_rejected() {
assert!(normalize_cs("\u{FEFF}").is_err());
}
#[test]
fn normalize_bom_dot_rejected() {
assert!(normalize_cs("\u{FEFF}.").is_err());
}
#[test]
fn validate_empty_rejected() {
assert!(validate_path_element("").is_err());
}
#[test]
fn validate_dot_rejected() {
assert!(validate_path_element(".").is_err());
}
#[test]
fn validate_dotdot_rejected() {
assert!(validate_path_element("..").is_err());
}
#[test]
fn validate_slash_rejected() {
assert!(validate_path_element("a/b").is_err());
}
#[test]
fn validate_valid_path_element() {
assert!(validate_path_element("hello.txt").is_ok());
}
#[test]
fn validate_dotfile() {
assert!(validate_path_element(".gitignore").is_ok());
}
#[test]
fn validate_triple_dot() {
assert!(validate_path_element("...").is_ok());
}
#[test]
fn validate_unicode() {
assert!(validate_path_element("日本語.txt").is_ok());
}
#[test]
fn validate_null_byte_rejected() {
assert!(matches!(
validate_path_element("\0"),
Err(ErrorKind::ContainsNullByte)
));
assert!(matches!(
validate_path_element("a\0b"),
Err(ErrorKind::ContainsNullByte)
));
}
#[test]
fn normalize_cs_null_byte_rejected() {
assert!(matches!(
normalize_cs("a\0b"),
Err(ErrorKind::ContainsNullByte)
));
}
#[test]
fn normalize_sensitive_preserves_case() {
let upper = normalize_cs("Hello.txt").unwrap();
let lower = normalize_cs("hello.txt").unwrap();
assert_ne!(upper, lower);
assert!(matches!(upper, Cow::Borrowed(_)));
assert!(matches!(lower, Cow::Borrowed(_)));
}
#[test]
fn ci_from_cs_turkish_i() {
assert_eq!(normalize_ci_from_normalized_cs("I"), "i");
assert_eq!(normalize_ci_from_normalized_cs("i"), "i");
assert_eq!(normalize_ci_from_normalized_cs("\u{0130}"), "i");
assert_eq!(normalize_ci_from_normalized_cs("\u{0131}"), "i");
}
#[test]
fn ci_from_cs_i_combining_dot() {
assert_eq!(normalize_ci_from_normalized_cs("\u{0130}"), "i");
assert_eq!(normalize_ci_from_normalized_cs("\u{0131}\u{0307}"), "i");
}
#[test]
fn ci_from_cs_ypogegrammeni() {
assert_eq!(normalize_ci_from_normalized_cs("\u{0345}"), "\u{03B9}");
assert_eq!(
normalize_ci_from_normalized_cs("\u{0305}\u{0345}"),
"\u{0305}\u{03B9}"
);
}
#[test]
fn ci_from_cs_composed_ypogegrammeni() {
assert_eq!(
normalize_ci_from_normalized_cs("\u{1FC3}"),
"\u{03B7}\u{03B9}"
);
}
#[test]
fn ci_from_cs_ligature_ffl() {
assert_eq!(normalize_ci_from_normalized_cs("\u{FB04}"), "ffl");
assert_eq!(normalize_ci_from_normalized_cs("ffl"), "ffl");
assert_eq!(normalize_ci_from_normalized_cs("FFL"), "ffl");
assert_eq!(normalize_ci_from_normalized_cs("Ffl"), "ffl");
}
#[test]
fn ci_from_cs_deseret() {
assert_eq!(normalize_ci_from_normalized_cs("\u{10400}"), "\u{10428}");
assert_eq!(normalize_ci_from_normalized_cs("\u{10428}"), "\u{10428}");
}
#[test]
fn ci_from_cs_ohm_omega() {
assert_eq!(normalize_ci_from_normalized_cs("\u{03A9}"), "\u{03C9}");
assert_eq!(normalize_ci_from_normalized_cs("\u{03C9}"), "\u{03C9}");
}
#[test]
fn ci_from_cs_angstrom() {
assert_eq!(normalize_ci_from_normalized_cs("\u{00C5}"), "\u{00E5}");
assert_eq!(normalize_ci_from_normalized_cs("\u{00E5}"), "\u{00E5}");
}
#[test]
fn ci_from_cs_micro_sign() {
assert_eq!(normalize_ci_from_normalized_cs("\u{00B5}"), "\u{03BC}");
assert_eq!(normalize_ci_from_normalized_cs("\u{03BC}"), "\u{03BC}");
assert_eq!(normalize_ci_from_normalized_cs("\u{039C}"), "\u{03BC}");
}
#[test]
fn ci_from_cs_dz_digraph() {
assert_eq!(normalize_ci_from_normalized_cs("\u{01F1}"), "\u{01F3}");
assert_eq!(normalize_ci_from_normalized_cs("\u{01F2}"), "\u{01F3}");
assert_eq!(normalize_ci_from_normalized_cs("\u{01F3}"), "\u{01F3}");
}
#[test]
fn ci_from_cs_sharp_s_variants() {
assert_eq!(normalize_ci_from_normalized_cs("ss"), "ss");
assert_eq!(normalize_ci_from_normalized_cs("SS"), "ss");
assert_eq!(normalize_ci_from_normalized_cs("sS"), "ss");
assert_eq!(normalize_ci_from_normalized_cs("Ss"), "ss");
assert_eq!(normalize_ci_from_normalized_cs("\u{00DF}"), "ss"); assert_eq!(normalize_ci_from_normalized_cs("\u{1E9E}"), "ss"); }
#[test]
fn ci_from_cs_greek_sigma_variants() {
assert_eq!(normalize_ci_from_normalized_cs("\u{03A3}"), "\u{03C3}"); assert_eq!(normalize_ci_from_normalized_cs("\u{03C3}"), "\u{03C3}"); assert_eq!(normalize_ci_from_normalized_cs("\u{03C2}"), "\u{03C3}"); assert_eq!(normalize_ci_from_normalized_cs("\u{03F2}"), "\u{03F2}"); assert_eq!(normalize_ci_from_normalized_cs("\u{03F9}"), "\u{03F2}"); }
#[test]
fn ci_from_cs_hello() {
assert_eq!(normalize_ci_from_normalized_cs("Hello.txt"), "hello.txt");
assert_eq!(normalize_ci_from_normalized_cs("hello.txt"), "hello.txt");
assert_eq!(normalize_ci_from_normalized_cs("HELLO.TXT"), "hello.txt");
}
#[test]
fn ci_from_cs_nfc_nfd_equivalent() {
assert_eq!(
normalize_ci_from_normalized_cs("\u{00C9}.txt"),
"\u{00E9}.txt"
);
}
#[test]
fn ci_from_cs_japanese_unchanged() {
let result = normalize_ci_from_normalized_cs("日本語.txt");
assert_eq!(result.as_ref(), "日本語.txt");
assert!(matches!(result, Cow::Borrowed(_)));
}
#[test]
fn ci_from_cs_idempotent() {
let first = normalize_ci_from_normalized_cs("Hello.txt");
let second = normalize_ci_from_normalized_cs(&first);
assert_eq!(first, second);
assert!(matches!(second, Cow::Borrowed(_)));
}
#[test]
fn ci_from_cs_already_folded_borrows() {
let result = normalize_ci_from_normalized_cs("hello.txt");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result.as_ref(), "hello.txt");
}
#[test]
fn normalize_space_vs_nbsp_distinct() {
let space = normalize_cs("a b").unwrap();
let nbsp = normalize_cs("a\u{00A0}b").unwrap();
assert_ne!(space, nbsp);
}
#[test]
fn normalize_cs_preserves_zwj() {
let result = normalize_cs("a\u{200D}b").unwrap();
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "a\u{200D}b");
}
#[test]
fn normalize_cs_preserves_zwnj() {
let result = normalize_cs("a\u{200C}b").unwrap();
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "a\u{200C}b");
}
#[test]
fn ci_from_cs_preserves_zwj() {
let result = normalize_ci_from_normalized_cs("a\u{200D}b");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "a\u{200D}b");
}
#[test]
fn ci_from_cs_preserves_zwnj() {
let result = normalize_ci_from_normalized_cs("a\u{200C}b");
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(result, "a\u{200C}b");
}
#[test]
fn ci_from_cs_zwj_between_i_and_dot() {
assert_eq!(
normalize_ci_from_normalized_cs("i\u{200D}\u{0307}"),
"i\u{200D}\u{0307}"
);
}
#[test]
fn ci_from_cs_zwnj_between_i_and_dot() {
assert_eq!(
normalize_ci_from_normalized_cs("i\u{200C}\u{0307}"),
"i\u{200C}\u{0307}"
);
}
}