use crate::tables::case_folding_data;
pub(crate) fn fold_case_impl(text: &str) -> String {
let mut out = String::new();
fold_case_into(text, &mut out);
out
}
pub(crate) fn fold_case_cow(text: &str) -> std::borrow::Cow<'_, str> {
use std::borrow::Cow;
let changes = text.chars().any(|ch| {
ch.is_ascii_uppercase() || (!ch.is_ascii() && case_folding_data::lookup(ch).is_some())
});
if changes {
Cow::Owned(fold_case_impl(text))
} else {
Cow::Borrowed(text)
}
}
pub(crate) fn fold_case_into(text: &str, result: &mut String) {
result.clear();
if text.is_ascii() {
result.push_str(text);
result.make_ascii_lowercase();
return;
}
result.reserve(text.len() + text.len() / 10);
for ch in text.chars() {
if ch.is_ascii() {
result.push(ch.to_ascii_lowercase());
} else if let Some(folded) = case_folding_data::lookup(ch) {
result.push_str(folded);
} else {
result.push(ch);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fold_case_basic() {
assert_eq!(fold_case_impl("Hello"), "hello");
assert_eq!(fold_case_impl("Straße"), "strasse");
}
#[test]
fn test_fold_case_ascii_fast_path() {
assert_eq!(fold_case_impl("HELLO WORLD"), "hello world");
assert_eq!(fold_case_impl("already lowercase"), "already lowercase");
assert_eq!(fold_case_impl("MiXeD CaSe 123!"), "mixed case 123!");
}
#[test]
fn test_fold_case_pure_ascii_digits_and_punctuation() {
assert_eq!(fold_case_impl("12345!@#$%"), "12345!@#$%");
assert_eq!(fold_case_impl("foo_bar-baz.qux"), "foo_bar-baz.qux");
}
#[test]
fn test_fold_case_empty_string() {
assert_eq!(fold_case_impl(""), "");
}
#[test]
fn test_fold_case_single_ascii_char() {
assert_eq!(fold_case_impl("A"), "a");
assert_eq!(fold_case_impl("z"), "z");
assert_eq!(fold_case_impl("7"), "7");
}
#[test]
fn test_fold_case_ligatures() {
assert_eq!(fold_case_impl("find flat ff ffi ffl"), "find flat ff ffi ffl");
assert_eq!(fold_case_impl("ſtop stop"), "stop stop");
}
#[test]
fn test_fold_case_micro_sign_to_greek_mu() {
assert_eq!(fold_case_impl("\u{00B5}"), "\u{03BC}");
}
#[test]
fn test_fold_case_long_s_to_s() {
assert_eq!(fold_case_impl("\u{017F}"), "s");
}
#[test]
fn test_fold_case_eszett() {
assert_eq!(fold_case_impl("ß"), "ss");
assert_eq!(fold_case_impl("ẞ"), "ss");
}
#[test]
fn test_fold_case_dotted_i() {
assert_eq!(fold_case_impl("\u{0130}"), "i\u{0307}");
}
#[test]
fn test_fold_case_greek_uppercase() {
assert_eq!(fold_case_impl("ΑΒΓΔ"), "αβγδ");
assert_eq!(fold_case_impl("ΩΨΧΦ"), "ωψχφ");
}
#[test]
fn test_fold_case_greek_final_sigma() {
assert_eq!(fold_case_impl("\u{03C2}"), "\u{03C3}");
}
#[test]
fn test_fold_case_greek_variant_forms() {
assert_eq!(fold_case_impl("\u{03D0}"), "\u{03B2}");
assert_eq!(fold_case_impl("\u{03D1}"), "\u{03B8}");
assert_eq!(fold_case_impl("\u{03D5}"), "\u{03C6}");
assert_eq!(fold_case_impl("\u{03D6}"), "\u{03C0}");
assert_eq!(fold_case_impl("\u{03F0}"), "\u{03BA}");
assert_eq!(fold_case_impl("\u{03F1}"), "\u{03C1}");
}
#[test]
fn test_fold_case_greek_with_tonos() {
assert_eq!(fold_case_impl("\u{0390}"), "\u{03B9}\u{0308}\u{0301}");
}
#[test]
fn test_fold_case_cyrillic_uppercase() {
assert_eq!(fold_case_impl("АБВГД"), "абвгд");
assert_eq!(fold_case_impl("ЭЮЯЪ"), "эюяъ");
}
#[test]
fn test_fold_case_cyrillic_mixed() {
assert_eq!(fold_case_impl("Москва"), "москва");
assert_eq!(fold_case_impl("КИЇВ"), "київ");
}
#[test]
fn test_fold_case_armenian() {
assert_eq!(fold_case_impl("\u{0531}"), "\u{0561}");
assert_eq!(fold_case_impl("\u{0587}"), "\u{0565}\u{0582}");
}
#[test]
fn test_fold_case_georgian_mtavruli() {
assert_eq!(fold_case_impl("\u{1C90}"), "\u{10D0}");
}
#[test]
fn test_fold_case_cherokee() {
assert_eq!(fold_case_impl("\u{13A0}"), "\u{13A0}"); assert_eq!(fold_case_impl("\u{AB70}"), "\u{13A0}");
assert_eq!(fold_case_impl("\u{AB71}"), "\u{13A1}");
}
#[test]
fn test_fold_case_adlam() {
assert_eq!(fold_case_impl("\u{1E900}"), "\u{1E922}");
assert_eq!(fold_case_impl("\u{1E901}"), "\u{1E923}");
}
#[test]
fn test_fold_case_fullwidth_latin() {
assert_eq!(fold_case_impl("\u{FF21}"), "\u{FF41}");
assert_eq!(fold_case_impl("\u{FF3A}"), "\u{FF5A}");
}
#[test]
fn test_fold_case_mixed_scripts() {
assert_eq!(fold_case_impl("Café ΣΟΦΙΑ"), "café σοφια");
}
#[test]
fn test_fold_case_mixed_ascii_and_non_ascii() {
assert_eq!(fold_case_impl("ABC Straße ÄÖÜ"), "abc strasse äöü");
}
#[test]
fn test_fold_case_mixed_cjk_and_latin() {
assert_eq!(fold_case_impl("Hello 你好 WORLD"), "hello 你好 world");
}
#[test]
fn test_fold_case_identity_cjk() {
assert_eq!(fold_case_impl("你好世界"), "你好世界");
}
#[test]
fn test_fold_case_identity_emoji() {
assert_eq!(fold_case_impl("🎉🚀💡"), "🎉🚀💡");
}
#[test]
fn test_fold_case_identity_already_folded() {
assert_eq!(fold_case_impl("café résumé naïve"), "café résumé naïve");
}
#[test]
fn test_fold_case_string_length_grows() {
assert_eq!(fold_case_impl("ßßß"), "ssssss");
assert_eq!(fold_case_impl("ßßß").len(), 6);
}
#[test]
fn test_fold_case_combining_characters_preserved() {
let input = "e\u{0301}";
assert_eq!(fold_case_impl(input), input);
}
#[test]
fn test_fold_case_null_byte() {
assert_eq!(fold_case_impl("A\0B"), "a\0b");
}
#[test]
fn test_fold_case_surrogate_boundary() {
assert_eq!(fold_case_impl("\u{FFFF}"), "\u{FFFF}");
assert_eq!(fold_case_impl("\u{10000}"), "\u{10000}");
}
#[test]
fn test_fold_case_deseret() {
assert_eq!(fold_case_impl("\u{10400}"), "\u{10428}");
}
#[test]
fn test_fold_case_osage() {
assert_eq!(fold_case_impl("\u{104B0}"), "\u{104D8}");
}
#[test]
fn test_fold_case_warang_citi() {
assert_eq!(fold_case_impl("\u{118A0}"), "\u{118C0}");
}
#[test]
fn test_fold_case_agrees_with_casefolding_txt() {
let cases: &[(char, &str)] = &[
('A', "a"),
('Z', "z"),
('À', "à"), ('Ð', "ð"), ('Ø', "ø"), ('Ʃ', "ʃ"), ('Ω', "ω"), ('Ж', "ж"), ('\u{0587}', "\u{0565}\u{0582}"), ];
for &(input, expected) in cases {
let got = fold_case_impl(&input.to_string());
assert_eq!(
got, expected,
"fold_case(U+{:04X} {:?}) = {:?}, expected {:?}",
input as u32, input, got, expected
);
}
}
mod proptest_properties {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn fold_case_idempotent(s in "\\PC*") {
let once = fold_case_impl(&s);
let twice = fold_case_impl(&once);
prop_assert_eq!(&once, &twice);
}
#[test]
fn fold_case_no_ascii_uppercase(s in "\\PC*") {
let result = fold_case_impl(&s);
for ch in result.chars() {
if ch.is_ascii() {
prop_assert!(
!ch.is_ascii_uppercase(),
"uppercase {ch:?} in fold output: {result:?}"
);
}
}
}
#[test]
fn fold_case_never_drops_chars(s in "\\PC*") {
let result = fold_case_impl(&s);
prop_assert!(
result.chars().count() >= s.chars().count(),
"fold_case dropped chars: {} → {}",
s.chars().count(),
result.chars().count()
);
}
#[test]
fn fold_case_ascii_stays_ascii(s in "[\\x00-\\x7f]*") {
let result = fold_case_impl(&s);
prop_assert!(
result.is_ascii(),
"non-ASCII in fold of ASCII input: {result:?}"
);
}
}
}
}