include!("full_case_folding_data.rs");
pub fn full_upper_folding() -> &'static [(u32, u32, u32, u32, u8)] {
FULL_UPPER_FOLDING
}
pub fn full_lower_folding() -> &'static [(u32, u32, u32, u32, u8)] {
FULL_LOWER_FOLDING
}
pub fn turkish_upper_folding() -> &'static [(u32, u32, u32, u32, u8)] {
TURKISH_UPPER_FOLDING
}
pub fn turkish_lower_folding() -> &'static [(u32, u32, u32, u32, u8)] {
TURKISH_LOWER_FOLDING
}
pub fn cased_ranges() -> &'static [(u32, u32)] {
CASED_RANGES
}
pub fn case_ignorable_ranges() -> &'static [(u32, u32)] {
CASE_IGNORABLE_RANGES
}
#[inline]
pub fn is_cased(cp: u32) -> bool {
range_contains(CASED_RANGES, cp)
}
#[inline]
pub fn is_case_ignorable(cp: u32) -> bool {
range_contains(CASE_IGNORABLE_RANGES, cp)
}
pub fn is_final_sigma_context(cps: &[u32], anchor: usize) -> bool {
let mut has_cased_before = false;
for &cp in cps[..anchor].iter().rev() {
if is_cased(cp) {
has_cased_before = true;
break;
}
if !is_case_ignorable(cp) {
break;
}
}
if !has_cased_before {
return false;
}
for &cp in cps[anchor + 1..].iter() {
if is_cased(cp) {
return false;
}
if !is_case_ignorable(cp) {
return true;
}
}
true
}
#[inline]
pub fn is_turkish_locale(locale: &str) -> bool {
let bytes = locale.as_bytes();
if bytes.len() < 2 {
return false;
}
let a = bytes[0].to_ascii_lowercase();
let b = bytes[1].to_ascii_lowercase();
let boundary_ok = bytes.len() == 2 || matches!(bytes[2], b'-' | b'_');
boundary_ok && ((a == b't' && b == b'r') || (a == b'a' && b == b'z'))
}
fn range_contains(ranges: &[(u32, u32)], cp: u32) -> bool {
super::cp_in_ranges(cp, ranges)
}
#[inline]
pub fn full_upper_entry(cp: u32) -> Option<(u8, [u32; 3])> {
entry_for(FULL_UPPER_FOLDING, cp)
}
#[inline]
pub fn full_lower_entry(cp: u32) -> Option<(u8, [u32; 3])> {
entry_for(FULL_LOWER_FOLDING, cp)
}
#[inline]
pub fn turkish_upper_entry(cp: u32) -> Option<(u8, [u32; 3])> {
entry_for(TURKISH_UPPER_FOLDING, cp)
}
#[inline]
pub fn turkish_lower_entry(cp: u32) -> Option<(u8, [u32; 3])> {
entry_for(TURKISH_LOWER_FOLDING, cp)
}
fn entry_for(table: &'static [(u32, u32, u32, u32, u8)], cp: u32) -> Option<(u8, [u32; 3])> {
let idx = table.binary_search_by_key(&cp, |&(k, _, _, _, _)| k).ok()?;
let (_, a, b, c, n) = table[idx];
Some((n, [a, b, c]))
}
pub fn encode_full_table_bytes(table: &[(u32, u32, u32, u32, u8)]) -> Vec<u8> {
let mut bytes = Vec::with_capacity(4 + table.len() * 20);
bytes.extend_from_slice(&(table.len() as u32).to_le_bytes());
for (k, a, b, c, n) in table {
bytes.extend_from_slice(&k.to_le_bytes());
bytes.extend_from_slice(&a.to_le_bytes());
bytes.extend_from_slice(&b.to_le_bytes());
bytes.extend_from_slice(&c.to_le_bytes());
bytes.extend_from_slice(&(*n as u32).to_le_bytes());
}
bytes
}
pub fn encoded_full_table_size(table: &[(u32, u32, u32, u32, u8)]) -> usize {
4 + table.len() * 20
}
pub fn encode_simple_view_bytes(table: &[(u32, u32, u32, u32, u8)]) -> Vec<u8> {
let mut bytes = Vec::with_capacity(4 + table.len() * 8);
bytes.extend_from_slice(&(table.len() as u32).to_le_bytes());
for (k, a, _b, _c, _n) in table {
bytes.extend_from_slice(&k.to_le_bytes());
bytes.extend_from_slice(&a.to_le_bytes());
}
bytes
}
pub fn encode_ranges_bytes(table: &[(u32, u32)]) -> Vec<u8> {
super::encode_u32_pair_table(table)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn sharp_s_upper_is_ss() {
let (len, slots) = full_upper_entry(0x00DF).expect("ß entry");
assert_eq!(len, 2);
assert_eq!(&slots[..2], &[0x0053, 0x0053]);
}
#[test]
fn fi_ligature_upper_is_fi() {
let (len, slots) = full_upper_entry(0xFB01).expect("fi entry");
assert_eq!(len, 2);
assert_eq!(&slots[..2], &[0x0046, 0x0049]);
}
#[test]
fn capital_i_with_dot_lower_is_i_plus_dot() {
let (len, slots) = full_lower_entry(0x0130).expect("İ entry");
assert_eq!(len, 2);
assert_eq!(&slots[..2], &[0x0069, 0x0307]);
}
#[test]
fn turkish_capital_i_lower_is_dotless() {
let (len, slots) = turkish_lower_entry(0x0049).expect("tr I entry");
assert_eq!(len, 1);
assert_eq!(slots[0], 0x0131);
}
#[test]
fn cased_basic_letters() {
assert!(is_cased(0x0041)); assert!(is_cased(0x03A3)); assert!(!is_cased(0x0020)); assert!(!is_cased(0x0030)); }
#[test]
fn case_ignorable_basic_marks() {
assert!(is_case_ignorable(0x0027)); assert!(is_case_ignorable(0x0301)); assert!(!is_case_ignorable(0x0041)); }
#[test]
fn final_sigma_context_at_word_end() {
let cps: Vec<u32> = "OΣ".chars().map(|c| c as u32).collect();
assert!(is_final_sigma_context(&cps, 1));
}
#[test]
fn non_final_sigma_in_middle_of_word() {
let cps: Vec<u32> = "ΣΑ".chars().map(|c| c as u32).collect();
assert!(!is_final_sigma_context(&cps, 0));
}
#[test]
fn final_sigma_through_case_ignorable() {
let cps: Vec<u32> = "OΣ'".chars().map(|c| c as u32).collect();
assert!(is_final_sigma_context(&cps, 1));
}
#[test]
fn locale_match_tr_and_az() {
assert!(is_turkish_locale("tr"));
assert!(is_turkish_locale("TR"));
assert!(is_turkish_locale("tr-TR"));
assert!(is_turkish_locale("az_AZ"));
assert!(!is_turkish_locale("en"));
assert!(!is_turkish_locale("de-DE"));
assert!(!is_turkish_locale(""));
assert!(!is_turkish_locale("tron")); }
#[test]
fn encode_full_table_layout() {
let toy: &[(u32, u32, u32, u32, u8)] = &[(0x00DF, 0x0053, 0x0053, 0x0000, 2)];
let bytes = encode_full_table_bytes(toy);
assert_eq!(bytes.len(), 4 + 20);
assert_eq!(&bytes[0..4], &1u32.to_le_bytes());
assert_eq!(&bytes[4..8], &0x00DFu32.to_le_bytes());
assert_eq!(&bytes[8..12], &0x0053u32.to_le_bytes());
assert_eq!(&bytes[20..24], &2u32.to_le_bytes());
}
#[test]
fn upper_table_sorted_and_non_empty() {
let table = FULL_UPPER_FOLDING;
assert!(!table.is_empty());
for win in table.windows(2) {
assert!(win[0].0 < win[1].0, "FULL upper table must be sorted asc");
}
}
#[test]
fn lower_table_sorted() {
let table = FULL_LOWER_FOLDING;
for win in table.windows(2) {
assert!(win[0].0 < win[1].0, "FULL lower table must be sorted asc");
}
}
}