pub fn build_upcase_blob() -> Vec<u8> {
let mut table = [0u16; 0x10000];
for (i, entry) in table.iter_mut().enumerate() {
*entry = i as u16;
}
for c in b'a'..=b'z' {
table[c as usize] = (c - 0x20) as u16;
}
for c in 0xE0u16..=0xF6 {
table[c as usize] = c - 0x20;
}
for c in 0xF8u16..=0xFE {
table[c as usize] = c - 0x20;
}
table[0xFF] = 0x178;
let mut c = 0x0100u16;
while c <= 0x012F {
table[(c + 1) as usize] = c;
c += 2;
}
let mut c = 0x0132u16;
while c <= 0x0137 {
table[(c + 1) as usize] = c;
c += 2;
}
let mut c = 0x0139u16;
while c <= 0x0148 {
table[c as usize] = c + 1;
c += 2;
}
let mut c = 0x014Au16;
while c <= 0x0177 {
table[(c + 1) as usize] = c;
c += 2;
}
for c in 0x03B1u16..=0x03C9 {
let upper = if c == 0x03C2 { 0x03A3 } else { c - 0x20 };
table[c as usize] = upper;
}
for c in 0x0430u16..=0x044F {
table[c as usize] = c - 0x20;
}
table[0x0451] = 0x0401;
for c in 0x0452u16..=0x045F {
table[c as usize] = c - 0x50;
}
for c in 0x0561u16..=0x0586 {
table[c as usize] = c - 0x30;
}
for c in 0xFF41u16..=0xFF5A {
table[c as usize] = c - 0x20;
}
let mut out = Vec::with_capacity(0x10000 * 2);
for v in table.iter() {
out.extend_from_slice(&v.to_le_bytes());
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn fold(table: &[u8], c: u16) -> u16 {
let i = c as usize * 2;
u16::from_le_bytes([table[i], table[i + 1]])
}
#[test]
fn ascii_fold() {
let t = build_upcase_blob();
assert_eq!(fold(&t, b'a' as u16), b'A' as u16);
assert_eq!(fold(&t, b'z' as u16), b'Z' as u16);
assert_eq!(fold(&t, b'A' as u16), b'A' as u16);
assert_eq!(fold(&t, b'0' as u16), b'0' as u16);
}
#[test]
fn length_is_128_kib() {
let t = build_upcase_blob();
assert_eq!(t.len(), 128 * 1024);
}
#[test]
fn latin1_supplement_fold() {
let t = build_upcase_blob();
assert_eq!(fold(&t, 0xE0), 0xC0);
assert_eq!(fold(&t, 0xF6), 0xD6);
assert_eq!(fold(&t, 0xFF), 0x178);
}
}