normalized-path 0.0.4

use alloc::borrow::Cow;

use crate::ErrorKind;
use crate::error::ResultKind;
use crate::unicode::{case_fold, is_above, is_starter, is_whitespace, nfc, nfd};
use crate::utils::cow;

/// `White_Space` property check extended with Control Pictures (U+2409–U+240D) that
/// correspond to whitespace control characters (HT, LF, VT, FF, CR), and the BOM (U+FEFF).
#[must_use]
pub fn is_whitespace_like(c: char) -> bool {
    is_whitespace(c) || ('\u{2409}'..='\u{240D}').contains(&c) || c == '\u{FEFF}'
}

/// Map Fullwidth characters (U+FF01..U+FF5E) to their ASCII equivalents.
#[must_use]
pub fn map_fullwidth(s: &str) -> Cow<'_, str> {
    cow(
        s.chars().map(|c| match c {
            '\u{FF01}'..='\u{FF5E}' => char::from_u32(c as u32 - 0xFEE0).unwrap_or(c),
            _ => c,
        }),
        s,
    )
}

/// Trim leading and trailing `White_Space` characters, control pictures, and BOMs.
pub fn trim_whitespace_like(s: &str) -> &str {
    s.trim_matches(is_whitespace_like)
}

/// Post-case-fold fixup for locale-specific casing inconsistencies.
/// Applied after `toCasefold()` in case-insensitive mode.
///
/// - Maps Turkish İ (U+0130) and ı (U+0131) to ASCII I and i.
///   `toCasefold()` treats ı as distinct from i, yet `toUppercase(ı)` = I
///   even without locale tailoring, creating collisions that folding alone misses.
/// - Strips U+0307 COMBINING DOT ABOVE after I/i/J/j (with intervening
///   combiners allowed as long as they are not starters or CCC=230 Above,
///   matching the Unicode `After_I` condition). This handles NFD decomposition of İ
///   (I + U+0307) and Lithuanian casing rules that add U+0307 after lowercase
///   i and j to retain the visual dot when other diacritics are present.
///
/// See <https://www.unicode.org/Public/17.0.0/ucd/SpecialCasing.txt>.
#[must_use]
pub fn fixup_case_fold(s: &str) -> Cow<'_, str> {
    cow(
        s.chars()
            .scan(false, |strip_dot_above, c| {
                match c {
                    '\u{0130}' => {
                        // İ → I
                        *strip_dot_above = true;
                        Some(Some('I'))
                    }
                    '\u{0131}' => {
                        // ı → i
                        *strip_dot_above = true;
                        Some(Some('i'))
                    }
                    'I' | 'i' | 'J' | 'j' => {
                        *strip_dot_above = true;
                        Some(Some(c))
                    }
                    '\u{0307}' if *strip_dot_above => {
                        // Strip combining dot above after I/i/J/j
                        Some(None)
                    }
                    _ => {
                        // Reset on starters (CCC=0) or CCC=230 (Above), matching
                        // the `After_Soft_Dotted`, `More_Above`, `Before_Dot`, and
                        // `After_I` conditions in SpecialCasing.txt.
                        if is_starter(c) || is_above(c) {
                            *strip_dot_above = false;
                        }
                        Some(Some(c))
                    }
                }
            })
            .flatten(),
        s,
    )
}

/// Map control characters to Unicode Control Pictures.
/// 0x01-0x1F → U+2401-U+241F, 0x7F → U+2421.
/// Null bytes (0x00) are excluded — they are rejected by validation instead.
#[must_use]
pub fn map_control_chars(s: &str) -> Cow<'_, str> {
    cow(
        s.chars().map(|c| match c {
            '\x01'..='\x1F' => char::from_u32(c as u32 + 0x2400).unwrap_or(c),
            '\x7F' => '\u{2421}',
            _ => c,
        }),
        s,
    )
}

/// Normalize a plaintext path element name case-sensitively.
///
/// Pipeline: NFD → whitespace trimming → fullwidth mapping →
/// control char mapping → validation → NFC.
///
/// # Errors
/// Returns an error if the name is invalid.
pub fn normalize_cs(name: &str) -> ResultKind<Cow<'_, str>> {
    let s = nfd(name);
    let s = trim_whitespace_like(&s);
    let s = map_fullwidth(s);
    let s = map_control_chars(&s);
    validate_path_element(&s)?;
    let s = nfc(&s);
    debug_assert!(validate_path_element(&s).is_ok());
    Ok(cow(s.chars(), name))
}

/// Derive the case-insensitive normalized form from an already case-sensitive normalized name.
///
/// Applies NFD, case folding, post-case-fold fixup ([`fixup_case_fold()`]), and NFC
/// to a CS-normalized name. Skips the steps already applied by CS normalization
/// (trim, fullwidth, control chars).
#[must_use]
pub fn normalize_ci_from_normalized_cs(cs_normalized: &str) -> Cow<'_, str> {
    let s = nfd(cs_normalized);
    let s = case_fold(&s);
    let s = fixup_case_fold(&s);
    let s = nfc(&s);
    debug_assert!(validate_path_element(&s).is_ok());
    cow(s.chars(), cs_normalized)
}

/// Validate a normalized path element name.
///
/// Rejects empty strings, `.`, `..`, names containing `/`, and names containing `\0`.
///
/// # Errors
/// Returns an error if the name is invalid.
pub fn validate_path_element(name: &str) -> ResultKind<()> {
    match name {
        "" => Err(ErrorKind::Empty),
        "." => Err(ErrorKind::CurrentDirectoryMarker),
        ".." => Err(ErrorKind::ParentDirectoryMarker),
        _ if name.contains('\0') => Err(ErrorKind::ContainsNullByte),
        _ if name.contains('/') => Err(ErrorKind::ContainsForwardSlash),
        _ => Ok(()),
    }
}

#[cfg(test)]
mod tests {
    use alloc::borrow::Cow;
    use alloc::string::String;

    #[cfg(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "none")))]
    use wasm_bindgen_test::wasm_bindgen_test as test;

    use super::{
        fixup_case_fold, map_control_chars, map_fullwidth, normalize_ci_from_normalized_cs,
        normalize_cs, trim_whitespace_like, validate_path_element,
    };
    use crate::ErrorKind;
    // --- trim_whitespace_like ---

    #[test]
    fn trim_whitespace_like_removes_trailing() {
        assert_eq!(trim_whitespace_like("hello   "), "hello");
    }

    #[test]
    fn trim_whitespace_like_removes_leading() {
        assert_eq!(trim_whitespace_like("   hello"), "hello");
    }

    #[test]
    fn trim_whitespace_like_removes_both() {
        assert_eq!(trim_whitespace_like("  hello  "), "hello");
    }

    #[test]
    fn trim_whitespace_like_no_whitespace() {
        assert_eq!(trim_whitespace_like("hello"), "hello");
    }

    #[test]
    fn trim_whitespace_like_middle_preserved() {
        assert_eq!(trim_whitespace_like("he llo"), "he llo");
    }

    #[test]
    fn trim_whitespace_like_empty() {
        assert_eq!(trim_whitespace_like(""), "");
    }

    #[test]
    fn trim_whitespace_like_only_spaces() {
        assert_eq!(trim_whitespace_like("   "), "");
    }

    #[test]
    fn trim_whitespace_like_tabs() {
        assert_eq!(trim_whitespace_like("\thello\t"), "hello");
    }

    #[test]
    fn trim_whitespace_like_ideographic_space() {
        assert_eq!(trim_whitespace_like("\u{3000}hello\u{3000}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_mixed() {
        assert_eq!(trim_whitespace_like("\t\u{3000} hello \t\u{3000}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_control_picture_tab() {
        assert_eq!(trim_whitespace_like("\u{2409}hello\u{2409}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_control_picture_lf() {
        assert_eq!(trim_whitespace_like("\u{240A}hello\u{240A}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_control_picture_cr() {
        assert_eq!(trim_whitespace_like("\u{240D}hello\u{240D}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_control_picture_middle_preserved() {
        assert_eq!(trim_whitespace_like("he\u{2409}llo"), "he\u{2409}llo");
    }

    #[test]
    fn trim_whitespace_like_bom_leading() {
        assert_eq!(trim_whitespace_like("\u{FEFF}hello"), "hello");
    }

    #[test]
    fn trim_whitespace_like_bom_trailing() {
        assert_eq!(trim_whitespace_like("hello\u{FEFF}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_bom_both() {
        assert_eq!(trim_whitespace_like("\u{FEFF}hello\u{FEFF}"), "hello");
    }

    #[test]
    fn trim_whitespace_like_bom_middle_preserved() {
        assert_eq!(trim_whitespace_like("he\u{FEFF}llo"), "he\u{FEFF}llo");
    }

    #[test]
    fn trim_whitespace_like_only_bom() {
        assert_eq!(trim_whitespace_like("\u{FEFF}"), "");
    }

    #[test]
    fn trim_whitespace_like_multiple_leading_bom() {
        assert_eq!(
            trim_whitespace_like("\u{FEFF}\u{FEFF}\u{FEFF}hello"),
            "hello"
        );
    }

    // --- map_fullwidth ---

    #[test]
    fn map_fullwidth_letters() {
        assert_eq!(map_fullwidth("\u{FF21}\u{FF41}"), "Aa");
    }

    #[test]
    fn map_fullwidth_digits() {
        assert_eq!(map_fullwidth("\u{FF10}\u{FF19}"), "09");
    }

    #[test]
    fn map_fullwidth_symbols() {
        assert_eq!(map_fullwidth("\u{FF01}"), "!");
    }

    #[test]
    fn map_fullwidth_mixed() {
        assert_eq!(map_fullwidth("abc\u{FF21}def"), "abcAdef");
    }

    #[test]
    fn map_fullwidth_pure_ascii() {
        let result = map_fullwidth("hello");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "hello");
    }

    #[test]
    fn map_fullwidth_all_characters() {
        let fullwidth: String = ('\u{FF01}'..='\u{FF5E}').collect();
        let ascii: String = ('!'..='~').collect();
        assert_eq!(map_fullwidth(&fullwidth), ascii);
    }

    // --- map_control_chars ---

    #[test]
    fn map_control_del() {
        assert_eq!(map_control_chars("\x7F"), "\u{2421}");
    }

    #[test]
    fn map_control_normal_unchanged() {
        let result = map_control_chars("hello");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "hello");
    }

    #[test]
    fn map_control_mixed() {
        assert_eq!(map_control_chars("a\x01b\x7Fc"), "a\u{2401}b\u{2421}c");
    }

    #[test]
    fn map_control_null_byte_unchanged() {
        assert_eq!(map_control_chars("\x00"), "\x00");
    }

    #[test]
    fn map_control_all_c0_characters() {
        let controls: String = ('\x01'..='\x1F').collect();
        let pictures: String = ('\u{2401}'..='\u{241F}').collect();
        assert_eq!(map_control_chars(&controls), pictures);
    }

    // --- fixup_case_fold ---

    #[test]
    fn fixup_case_fold_dotted_capital() {
        assert_eq!(fixup_case_fold("\u{0130}"), "I");
    }

    #[test]
    fn fixup_case_fold_dotless_lowercase() {
        assert_eq!(fixup_case_fold("\u{0131}"), "i");
    }

    #[test]
    fn fixup_case_fold_dotless_lowercase_with_dot() {
        // ı followed by combining dot above: ı→i and strip the dot.
        // This handles Turkic fold output: fold_turkic("I\u{0307}") = "ı\u{0307}".
        assert_eq!(fixup_case_fold("\u{0131}\u{0307}"), "i");
    }

    #[test]
    fn fixup_case_fold_mixed() {
        assert_eq!(fixup_case_fold("a\u{0130}b\u{0131}c"), "aIbic");
    }

    #[test]
    fn fixup_case_fold_ascii_unchanged() {
        let result = fixup_case_fold("Hello");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "Hello");
    }

    #[test]
    fn fixup_case_fold_nfd_decomposed() {
        let result = fixup_case_fold("I\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "I");
    }

    #[test]
    fn fixup_case_fold_nfd_decomposed_lowercase() {
        let result = fixup_case_fold("i\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "i");
    }

    #[test]
    fn fixup_case_fold_intervening_combiner() {
        let result = fixup_case_fold("I\u{0327}\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "I\u{0327}");
    }

    #[test]
    fn fixup_case_fold_intervening_combiner_lowercase() {
        let result = fixup_case_fold("i\u{0327}\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "i\u{0327}");
    }

    #[test]
    fn fixup_case_fold_multiple_dots() {
        let result = fixup_case_fold("I\u{0307}\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "I");
    }

    #[test]
    fn fixup_case_fold_dot_on_other_base() {
        let result = fixup_case_fold("e\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "e\u{0307}");
    }

    #[test]
    fn fixup_case_fold_dot_after_starter_resets() {
        let result = fixup_case_fold("Ia\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "Ia\u{0307}");
    }

    #[test]
    fn fixup_case_fold_multiple_combiners_then_dot() {
        let result = fixup_case_fold("i\u{0325}\u{0327}\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "i\u{0325}\u{0327}");
    }

    #[test]
    fn fixup_case_fold_above_combiner_blocks_strip() {
        // U+0301 COMBINING ACUTE ACCENT has CCC=230 (Above), which blocks dot stripping.
        let result = fixup_case_fold("I\u{0301}\u{0307}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "I\u{0301}\u{0307}");
    }

    #[test]
    fn fixup_case_fold_below_combiner_allows_strip() {
        // U+0327 COMBINING CEDILLA has CCC=202 (not Above), so dot stripping proceeds.
        assert_eq!(fixup_case_fold("I\u{0327}\u{0307}"), "I\u{0327}");
    }

    // --- fixup_case_fold: Lithuanian J dot stripping ---

    #[test]
    fn fixup_case_fold_j_dot_above_stripped() {
        // Lithuanian lowercase adds U+0307 after j.
        assert_eq!(fixup_case_fold("j\u{0307}"), "j");
    }

    #[test]
    fn fixup_case_fold_j_uppercase_dot_above_stripped() {
        assert_eq!(fixup_case_fold("J\u{0307}"), "J");
    }

    #[test]
    fn fixup_case_fold_j_dot_with_circumflex() {
        // Lithuanian lowercase of Ĵ with ypogegrammeni: j + dot + circumflex + ypogegrammeni
        assert_eq!(
            fixup_case_fold("j\u{0307}\u{0302}\u{0345}"),
            "j\u{0302}\u{0345}"
        );
    }

    #[test]
    fn fixup_case_fold_j_no_dot_unchanged() {
        let result = fixup_case_fold("j\u{0302}");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "j\u{0302}");
    }

    // --- normalize ---

    #[test]
    fn normalize_trims_leading_bom() {
        let input = "\u{FEFF}hello.txt";
        let with_bom = normalize_cs(input).unwrap();
        let without_bom = normalize_cs("hello.txt").unwrap();
        assert_eq!(with_bom, without_bom);
        assert!(matches!(with_bom, Cow::Borrowed(_)));
        assert!(matches!(without_bom, Cow::Borrowed(_)));
        assert!(core::ptr::eq(
            with_bom.as_ptr(),
            input["\u{FEFF}".len()..].as_ptr()
        ));
    }

    #[test]
    fn normalize_preserves_interior_bom() {
        let result = normalize_cs("he\u{FEFF}llo").unwrap();
        assert!(result.contains('\u{FEFF}'));
    }

    #[test]
    fn normalize_maps_fullwidth() {
        let fullwidth = normalize_cs("\u{FF21}bc.txt").unwrap();
        let ascii = normalize_cs("Abc.txt").unwrap();
        assert_eq!(fullwidth, ascii);
    }

    #[test]
    fn normalize_strips_whitespace() {
        let with_whitespace = normalize_cs("\t\u{3000} hello \t\u{3000}").unwrap();
        let without_whitespace = normalize_cs("hello").unwrap();
        assert_eq!(with_whitespace, without_whitespace);
        assert!(matches!(with_whitespace, Cow::Borrowed(_)));
        assert!(matches!(without_whitespace, Cow::Borrowed(_)));
    }

    #[test]
    fn normalize_trailing_whitespace_borrows_prefix() {
        let input = "hello   ";
        let result = normalize_cs(input).unwrap();
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "hello");
        assert!(core::ptr::eq(result.as_ptr(), input.as_ptr()));
    }

    #[test]
    fn normalize_leading_whitespace_borrows_suffix() {
        let input = "   hello";
        let result = normalize_cs(input).unwrap();
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "hello");
        assert!(core::ptr::eq(result.as_ptr(), input[3..].as_ptr()));
    }

    #[test]
    fn normalize_normalizes_unicode() {
        let nfd_input = normalize_cs("e\u{0301}.txt").unwrap();
        let composed = normalize_cs("\u{00E9}.txt").unwrap();
        assert_eq!(nfd_input, composed);
    }

    #[test]
    fn normalize_maps_control_chars() {
        let with_control = normalize_cs("a\x01b").unwrap();
        let with_picture = normalize_cs("a\u{2401}b").unwrap();
        assert_eq!(with_control, with_picture);
    }

    #[test]
    fn normalize_strips_whitespace_control_pictures() {
        let with_tab = normalize_cs("\thello").unwrap();
        let with_picture = normalize_cs("\u{2409}hello").unwrap();
        let plain = normalize_cs("hello").unwrap();
        assert_eq!(with_tab, plain);
        assert_eq!(with_picture, plain);
    }

    #[test]
    fn normalize_turkish_i_sensitive() {
        let ascii_upper = normalize_cs("I").unwrap();
        let ascii_lower = normalize_cs("i").unwrap();
        let dotted = normalize_cs("\u{0130}").unwrap();
        let dotless = normalize_cs("\u{0131}").unwrap();
        assert_eq!(ascii_upper, "I");
        assert_eq!(ascii_lower, "i");
        assert_eq!(dotted, "\u{0130}");
        assert_eq!(dotless, "\u{0131}");
        assert_ne!(ascii_upper, ascii_lower);
        assert_ne!(ascii_upper, dotted);
        assert_ne!(ascii_upper, dotless);
        assert_ne!(ascii_lower, dotted);
        assert_ne!(ascii_lower, dotless);
        assert_ne!(dotted, dotless);
    }

    // --- D145: U+0345 COMBINING GREEK YPOGEGRAMMENI ---

    #[test]
    fn normalize_ypogegrammeni_sensitive_preserved() {
        let cs = normalize_cs("\u{0345}").unwrap();
        assert_eq!(cs, "\u{0345}");
    }

    #[test]
    fn normalize_ypogegrammeni_with_overline_sensitive() {
        let a = normalize_cs("\u{0345}\u{0305}").unwrap();
        let b = normalize_cs("\u{0305}\u{0345}").unwrap();
        assert_eq!(a, b);
        assert_eq!(a, "\u{0305}\u{0345}");
    }

    // --- U+FB04 LATIN SMALL LIGATURE FFL ---

    #[test]
    fn normalize_ligature_ffl_sensitive() {
        let cs = normalize_cs("\u{FB04}").unwrap();
        assert_eq!(cs, "\u{FB04}");
    }

    // --- Supplementary plane case folding (Deseret) ---

    #[test]
    fn normalize_deseret_sensitive() {
        let upper = normalize_cs("\u{10400}").unwrap();
        let lower = normalize_cs("\u{10428}").unwrap();
        assert_ne!(upper, lower);
    }

    // --- Greek sigma ---

    #[test]
    fn normalize_greek_sigma_sensitive() {
        let small = normalize_cs("\u{03C3}").unwrap();
        let final_sigma = normalize_cs("\u{03C2}").unwrap();
        assert_ne!(small, final_sigma);
    }

    // --- Canonical equivalence ---

    #[test]
    fn normalize_ohm_sign_equals_omega() {
        let ohm = normalize_cs("\u{2126}").unwrap();
        let omega = normalize_cs("\u{03A9}").unwrap();
        assert_eq!(ohm, omega);
    }

    #[test]
    fn normalize_angstrom_equals_a_ring() {
        let angstrom = normalize_cs("\u{212B}").unwrap();
        let a_ring = normalize_cs("\u{00C5}").unwrap();
        assert_eq!(angstrom, a_ring);
    }

    // --- DZ digraph ---

    #[test]
    fn normalize_dz_digraph_sensitive() {
        let upper = normalize_cs("\u{01F1}").unwrap();
        let title = normalize_cs("\u{01F2}").unwrap();
        let lower = normalize_cs("\u{01F3}").unwrap();
        assert_ne!(upper, title);
        assert_ne!(upper, lower);
        assert_ne!(title, lower);
    }

    #[test]
    fn normalize_ascii_i_sensitive() {
        let upper = normalize_cs("I").unwrap();
        let lower = normalize_cs("i").unwrap();
        assert_ne!(upper, lower);
        assert_eq!(upper, "I");
        assert_eq!(lower, "i");
    }

    #[test]
    fn normalize_empty_rejected() {
        assert!(normalize_cs("").is_err());
    }

    #[test]
    fn normalize_dot_rejected() {
        assert!(normalize_cs(".").is_err());
    }

    #[test]
    fn normalize_dotdot_rejected() {
        assert!(normalize_cs("..").is_err());
    }

    #[test]
    fn normalize_slash_rejected() {
        assert!(normalize_cs("a/b").is_err());
    }

    #[test]
    fn normalize_bom_only_rejected() {
        assert!(normalize_cs("\u{FEFF}").is_err());
    }

    #[test]
    fn normalize_bom_dot_rejected() {
        assert!(normalize_cs("\u{FEFF}.").is_err());
    }

    // --- validate_path_element ---

    #[test]
    fn validate_empty_rejected() {
        assert!(validate_path_element("").is_err());
    }

    #[test]
    fn validate_dot_rejected() {
        assert!(validate_path_element(".").is_err());
    }

    #[test]
    fn validate_dotdot_rejected() {
        assert!(validate_path_element("..").is_err());
    }

    #[test]
    fn validate_slash_rejected() {
        assert!(validate_path_element("a/b").is_err());
    }

    #[test]
    fn validate_valid_path_element() {
        assert!(validate_path_element("hello.txt").is_ok());
    }

    #[test]
    fn validate_dotfile() {
        assert!(validate_path_element(".gitignore").is_ok());
    }

    #[test]
    fn validate_triple_dot() {
        assert!(validate_path_element("...").is_ok());
    }

    #[test]
    fn validate_unicode() {
        assert!(validate_path_element("日本語.txt").is_ok());
    }

    #[test]
    fn validate_null_byte_rejected() {
        assert!(matches!(
            validate_path_element("\0"),
            Err(ErrorKind::ContainsNullByte)
        ));
        assert!(matches!(
            validate_path_element("a\0b"),
            Err(ErrorKind::ContainsNullByte)
        ));
    }

    // --- normalize_cs ---

    #[test]
    fn normalize_cs_null_byte_rejected() {
        assert!(matches!(
            normalize_cs("a\0b"),
            Err(ErrorKind::ContainsNullByte)
        ));
    }

    #[test]
    fn normalize_sensitive_preserves_case() {
        let upper = normalize_cs("Hello.txt").unwrap();
        let lower = normalize_cs("hello.txt").unwrap();
        assert_ne!(upper, lower);
        assert!(matches!(upper, Cow::Borrowed(_)));
        assert!(matches!(lower, Cow::Borrowed(_)));
    }

    // --- normalize_ci_from_normalized_cs ---

    #[test]
    fn ci_from_cs_turkish_i() {
        assert_eq!(normalize_ci_from_normalized_cs("I"), "i");
        assert_eq!(normalize_ci_from_normalized_cs("i"), "i");
        // İ (U+0130) is already NFC
        assert_eq!(normalize_ci_from_normalized_cs("\u{0130}"), "i");
        // ı (U+0131) is already NFC
        assert_eq!(normalize_ci_from_normalized_cs("\u{0131}"), "i");
    }

    #[test]
    fn ci_from_cs_i_combining_dot() {
        // "I\u{0307}" NFC-composes to İ (U+0130). CI must map to "i".
        assert_eq!(normalize_ci_from_normalized_cs("\u{0130}"), "i");

        // "ı\u{0307}" stays as-is in NFC. CI must map to "i".
        assert_eq!(normalize_ci_from_normalized_cs("\u{0131}\u{0307}"), "i");
    }

    #[test]
    fn ci_from_cs_ypogegrammeni() {
        assert_eq!(normalize_ci_from_normalized_cs("\u{0345}"), "\u{03B9}");

        // Both orderings CS-normalize to "\u{0305}\u{0345}" (overline CCC=230 < ypogegrammeni CCC=240).
        // Ypogegrammeni case-folds to ι (U+03B9).
        assert_eq!(
            normalize_ci_from_normalized_cs("\u{0305}\u{0345}"),
            "\u{0305}\u{03B9}"
        );
    }

    #[test]
    fn ci_from_cs_composed_ypogegrammeni() {
        // U+1FC3 (ᾳ) = η + ypogegrammeni → η + ι after case fold.
        assert_eq!(
            normalize_ci_from_normalized_cs("\u{1FC3}"),
            "\u{03B7}\u{03B9}"
        );
    }

    #[test]
    fn ci_from_cs_ligature_ffl() {
        // Ligature U+FB04 is preserved by CS normalization, then case-folded to "ffl".
        assert_eq!(normalize_ci_from_normalized_cs("\u{FB04}"), "ffl");
        assert_eq!(normalize_ci_from_normalized_cs("ffl"), "ffl");
        assert_eq!(normalize_ci_from_normalized_cs("FFL"), "ffl");
        assert_eq!(normalize_ci_from_normalized_cs("Ffl"), "ffl");
    }

    #[test]
    fn ci_from_cs_deseret() {
        assert_eq!(normalize_ci_from_normalized_cs("\u{10400}"), "\u{10428}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{10428}"), "\u{10428}");
    }

    #[test]
    fn ci_from_cs_ohm_omega() {
        // Ohm sign (U+2126) and Omega (U+03A9) both CS-normalize to Ω (U+03A9).
        assert_eq!(normalize_ci_from_normalized_cs("\u{03A9}"), "\u{03C9}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{03C9}"), "\u{03C9}");
    }

    #[test]
    fn ci_from_cs_angstrom() {
        // Angstrom (U+212B) and Å (U+00C5) both CS-normalize to Å (U+00C5).
        assert_eq!(normalize_ci_from_normalized_cs("\u{00C5}"), "\u{00E5}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{00E5}"), "\u{00E5}");
    }

    #[test]
    fn ci_from_cs_micro_sign() {
        assert_eq!(normalize_ci_from_normalized_cs("\u{00B5}"), "\u{03BC}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{03BC}"), "\u{03BC}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{039C}"), "\u{03BC}");
    }

    #[test]
    fn ci_from_cs_dz_digraph() {
        // U+01F1 DZ, U+01F2 Dz, U+01F3 dz (ligatures) all fold to U+01F3.
        // The ASCII pairs "DZ"/"dz" fold to "dz" instead — they are distinct.
        assert_eq!(normalize_ci_from_normalized_cs("\u{01F1}"), "\u{01F3}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{01F2}"), "\u{01F3}");
        assert_eq!(normalize_ci_from_normalized_cs("\u{01F3}"), "\u{01F3}");
    }

    #[test]
    fn ci_from_cs_sharp_s_variants() {
        // All sharp s and "ss" variants normalize to "ss".
        assert_eq!(normalize_ci_from_normalized_cs("ss"), "ss");
        assert_eq!(normalize_ci_from_normalized_cs("SS"), "ss");
        assert_eq!(normalize_ci_from_normalized_cs("sS"), "ss");
        assert_eq!(normalize_ci_from_normalized_cs("Ss"), "ss");
        assert_eq!(normalize_ci_from_normalized_cs("\u{00DF}"), "ss"); // ß
        assert_eq!(normalize_ci_from_normalized_cs("\u{1E9E}"), "ss"); // ẞ
    }

    #[test]
    fn ci_from_cs_greek_sigma_variants() {
        // All sigma variants normalize to σ (U+03C3).
        assert_eq!(normalize_ci_from_normalized_cs("\u{03A3}"), "\u{03C3}"); // Σ
        assert_eq!(normalize_ci_from_normalized_cs("\u{03C3}"), "\u{03C3}"); // σ
        assert_eq!(normalize_ci_from_normalized_cs("\u{03C2}"), "\u{03C3}"); // ς
        // Lunate sigma ϲ/Ϲ fold to ϲ, not σ.
        assert_eq!(normalize_ci_from_normalized_cs("\u{03F2}"), "\u{03F2}"); // ϲ
        assert_eq!(normalize_ci_from_normalized_cs("\u{03F9}"), "\u{03F2}"); // Ϲ
    }

    #[test]
    fn ci_from_cs_hello() {
        assert_eq!(normalize_ci_from_normalized_cs("Hello.txt"), "hello.txt");
        assert_eq!(normalize_ci_from_normalized_cs("hello.txt"), "hello.txt");
        assert_eq!(normalize_ci_from_normalized_cs("HELLO.TXT"), "hello.txt");
    }

    #[test]
    fn ci_from_cs_nfc_nfd_equivalent() {
        // Both É (U+00C9) and E+\u{0301} CS-normalize to É (U+00C9).
        assert_eq!(
            normalize_ci_from_normalized_cs("\u{00C9}.txt"),
            "\u{00E9}.txt"
        );
    }

    #[test]
    fn ci_from_cs_japanese_unchanged() {
        let result = normalize_ci_from_normalized_cs("日本語.txt");
        assert_eq!(result.as_ref(), "日本語.txt");
        assert!(matches!(result, Cow::Borrowed(_)));
    }

    #[test]
    fn ci_from_cs_idempotent() {
        let first = normalize_ci_from_normalized_cs("Hello.txt");
        let second = normalize_ci_from_normalized_cs(&first);
        assert_eq!(first, second);
        assert!(matches!(second, Cow::Borrowed(_)));
    }

    #[test]
    fn ci_from_cs_already_folded_borrows() {
        let result = normalize_ci_from_normalized_cs("hello.txt");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result.as_ref(), "hello.txt");
    }

    #[test]
    fn normalize_space_vs_nbsp_distinct() {
        // Regular space and non-breaking space produce different normalized forms.
        let space = normalize_cs("a b").unwrap();
        let nbsp = normalize_cs("a\u{00A0}b").unwrap();
        assert_ne!(space, nbsp);
    }

    // --- Zero Width Joiner / Non-Joiner ---

    #[test]
    fn normalize_cs_preserves_zwj() {
        let result = normalize_cs("a\u{200D}b").unwrap();
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "a\u{200D}b");
    }

    #[test]
    fn normalize_cs_preserves_zwnj() {
        let result = normalize_cs("a\u{200C}b").unwrap();
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "a\u{200C}b");
    }

    #[test]
    fn ci_from_cs_preserves_zwj() {
        let result = normalize_ci_from_normalized_cs("a\u{200D}b");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "a\u{200D}b");
    }

    #[test]
    fn ci_from_cs_preserves_zwnj() {
        let result = normalize_ci_from_normalized_cs("a\u{200C}b");
        assert!(matches!(result, Cow::Borrowed(_)));
        assert_eq!(result, "a\u{200C}b");
    }

    #[test]
    fn ci_from_cs_zwj_between_i_and_dot() {
        // ZWJ is a starter (CCC=0), so it blocks dot stripping.
        assert_eq!(
            normalize_ci_from_normalized_cs("i\u{200D}\u{0307}"),
            "i\u{200D}\u{0307}"
        );
    }

    #[test]
    fn ci_from_cs_zwnj_between_i_and_dot() {
        // ZWNJ is a starter (CCC=0), so it blocks dot stripping.
        assert_eq!(
            normalize_ci_from_normalized_cs("i\u{200C}\u{0307}"),
            "i\u{200C}\u{0307}"
        );
    }
}