include!("case_fold_data.rs");
fn case_fold_data(c: char) -> char {
match CASE_FOLD_DATA.binary_search_by(|&(key, _)| key.cmp(&c)) {
Ok(i) => CASE_FOLD_DATA[i].1,
Err(_) => c,
}
}
pub fn case_fold(c: char) -> char {
if c.is_ascii() {
(c as u8).to_ascii_lowercase() as char
} else {
case_fold_data(c)
}
}
pub fn case_folding_djb_hash(s: &str) -> u32 {
let mut hash: u32 = 5381;
for c in s.chars() {
if c.is_ascii() {
let byte = (c as u8).to_ascii_lowercase();
hash = djb_hash_byte(hash, byte);
} else {
let c = case_fold_data(c);
let mut bytes = [0; 4];
for byte in c.encode_utf8(&mut bytes).as_bytes() {
hash = djb_hash_byte(hash, *byte);
}
}
}
hash
}
#[inline]
fn djb_hash_byte(hash: u32, byte: u8) -> u32 {
hash.wrapping_mul(33).wrapping_add(u32::from(byte))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_case_fold() {
for (c, fold) in [
('A', 'a'), ('1', '1'), ('Σ', 'σ'), ('K', 'k'), ('I', 'i'), ('İ', 'i'),
('ı', 'i'),
] {
assert_eq!(case_fold(c), fold);
assert_eq!(case_fold(fold), fold);
}
}
#[test]
fn test_case_folding_djb_hash() {
assert_eq!(case_folding_djb_hash(""), 5381);
let s = "İıÀàĀāĹĺЕеẦầKkⰝⱍMm𐲒𐳒";
assert_eq!(case_folding_djb_hash(s), 1145571043);
}
}