use simd_normalizer::{CaseFoldMode, casefold, casefold_char};
use std::borrow::Cow;
#[test]
fn ascii_uppercase_folds_to_lowercase() {
for c in 'A'..='Z' {
let folded = casefold_char(c, CaseFoldMode::Standard);
let expected = (c as u8 + 32) as char;
assert_eq!(folded, expected, "Expected {:?} -> {:?}", c, expected);
}
}
#[test]
fn ascii_lowercase_unchanged() {
for c in 'a'..='z' {
assert_eq!(casefold_char(c, CaseFoldMode::Standard), c);
}
}
#[test]
fn digits_and_symbols_unchanged() {
for c in '0'..='9' {
assert_eq!(casefold_char(c, CaseFoldMode::Standard), c);
}
for &c in &['!', '@', '#', '$', '%', '^', '&', '*', '(', ')'] {
assert_eq!(casefold_char(c, CaseFoldMode::Standard), c);
}
}
#[test]
fn latin_extended_folding() {
let cases = [
('\u{00C0}', '\u{00E0}'), ('\u{00D6}', '\u{00F6}'), ('\u{00DC}', '\u{00FC}'), ('\u{00C9}', '\u{00E9}'), ('\u{00D1}', '\u{00F1}'), ];
for (upper, lower) in cases {
assert_eq!(
casefold_char(upper, CaseFoldMode::Standard),
lower,
"U+{:04X} should fold to U+{:04X}",
upper as u32,
lower as u32,
);
}
}
#[test]
fn greek_folding() {
let cases = [
('\u{0391}', '\u{03B1}'), ('\u{0392}', '\u{03B2}'), ('\u{03A3}', '\u{03C3}'), ('\u{03A9}', '\u{03C9}'), ];
for (upper, lower) in cases {
assert_eq!(
casefold_char(upper, CaseFoldMode::Standard),
lower,
"Greek U+{:04X} should fold to U+{:04X}",
upper as u32,
lower as u32,
);
}
}
#[test]
fn cyrillic_folding() {
let cases = [
('\u{0410}', '\u{0430}'), ('\u{0411}', '\u{0431}'), ('\u{042F}', '\u{044F}'), ];
for (upper, lower) in cases {
assert_eq!(
casefold_char(upper, CaseFoldMode::Standard),
lower,
"Cyrillic U+{:04X} should fold to U+{:04X}",
upper as u32,
lower as u32,
);
}
}
#[test]
fn special_case_foldings() {
assert_eq!(
casefold_char('\u{00B5}', CaseFoldMode::Standard),
'\u{03BC}'
);
assert_eq!(
casefold_char('\u{1E9E}', CaseFoldMode::Standard),
'\u{00DF}'
);
}
#[test]
fn turkish_capital_i_to_dotless() {
assert_eq!(casefold_char('I', CaseFoldMode::Standard), 'i');
assert_eq!(casefold_char('I', CaseFoldMode::Turkish), '\u{0131}');
}
#[test]
fn turkish_dotted_capital_i_to_i() {
assert_eq!(casefold_char('\u{0130}', CaseFoldMode::Turkish), 'i');
}
#[test]
fn turkish_other_chars_same_as_standard() {
for c in 'A'..='H' {
assert_eq!(
casefold_char(c, CaseFoldMode::Turkish),
casefold_char(c, CaseFoldMode::Standard),
);
}
for c in 'J'..='Z' {
assert_eq!(
casefold_char(c, CaseFoldMode::Turkish),
casefold_char(c, CaseFoldMode::Standard),
);
}
}
#[test]
fn string_already_folded_returns_borrowed() {
let result = casefold("hello world", CaseFoldMode::Standard);
assert!(matches!(result, Cow::Borrowed(_)));
}
#[test]
fn string_empty_returns_borrowed() {
let result = casefold("", CaseFoldMode::Standard);
assert!(matches!(result, Cow::Borrowed(_)));
}
#[test]
fn string_mixed_case() {
assert_eq!(
&*casefold("Hello World", CaseFoldMode::Standard),
"hello world"
);
assert_eq!(&*casefold("HELLO", CaseFoldMode::Standard), "hello");
}
#[test]
fn string_unicode_mixed() {
assert_eq!(&*casefold("Ströme", CaseFoldMode::Standard), "ströme");
assert_eq!(&*casefold("CAFÉ", CaseFoldMode::Standard), "café");
}
#[test]
fn string_turkish_mode() {
let result = casefold("Istanbul", CaseFoldMode::Turkish);
assert_eq!(&*result, "\u{0131}stanbul");
}
#[test]
fn bmp_scan_no_panics() {
for cp in 0u32..=0xFFFF {
if let Some(c) = char::from_u32(cp) {
let _ = casefold_char(c, CaseFoldMode::Standard);
let _ = casefold_char(c, CaseFoldMode::Turkish);
}
}
}
#[test]
fn supplementary_sample_no_panics() {
let cps = [0x10000u32, 0x10400, 0x10428, 0x1D400, 0x1F600, 0x10FFFF];
for &cp in &cps {
if let Some(c) = char::from_u32(cp) {
let _ = casefold_char(c, CaseFoldMode::Standard);
}
}
}
#[test]
fn casefold_idempotent() {
let inputs = [
"Hello World",
"CAFÉ",
"Ströme",
"Istanbul",
"\u{0391}\u{0392}\u{0393}", "\u{0410}\u{0411}\u{0412}", ];
for input in &inputs {
let once = casefold(input, CaseFoldMode::Standard);
let twice = casefold(&once, CaseFoldMode::Standard);
assert_eq!(&*once, &*twice, "casefold not idempotent for {:?}", input);
}
}