const BMP_SHIFT: u32 = 5;
const BMP_MASK: u32 = (1 << BMP_SHIFT) - 1; #[allow(dead_code)]
pub(crate) const BMP_BLOCK_SIZE: usize = 1 << BMP_SHIFT;
const SUPP_SHIFT_1: u32 = 11;
const SUPP_SHIFT_2: u32 = 5;
const SUPP_MASK_2: u32 = (1 << (SUPP_SHIFT_1 - SUPP_SHIFT_2)) - 1; const SUPP_MASK_DATA: u32 = (1 << SUPP_SHIFT_2) - 1;
#[derive(Clone, Copy)]
pub(crate) struct CodePointTrie {
pub(crate) bmp_index: &'static [u16],
pub(crate) data: &'static [u32],
pub(crate) supp_index1: &'static [u16],
pub(crate) supp_index2: &'static [u16],
pub(crate) default_value: u32,
}
impl CodePointTrie {
#[inline]
pub(crate) fn get(&self, cp: u32) -> u32 {
if cp < 0x10000 {
unsafe { self.get_bmp_unchecked(cp) }
} else if cp <= 0x10FFFF {
self.get_supplementary(cp)
} else {
self.default_value
}
}
#[allow(dead_code)]
#[inline(always)]
fn get_bmp(&self, cp: u32) -> u32 {
debug_assert!(cp < 0x10000);
let block_idx = (cp >> BMP_SHIFT) as usize;
let offset = (cp & BMP_MASK) as usize;
let base = self.bmp_index[block_idx] as usize;
self.data[base + offset]
}
#[inline(always)]
pub(crate) unsafe fn get_bmp_unchecked(&self, cp: u32) -> u32 {
debug_assert!(cp < 0x10000);
let block_idx = (cp >> BMP_SHIFT) as usize;
let offset = (cp & BMP_MASK) as usize;
let base = unsafe { *self.bmp_index.get_unchecked(block_idx) } as usize;
unsafe { *self.data.get_unchecked(base + offset) }
}
#[allow(dead_code)]
#[inline(always)]
pub(crate) unsafe fn get_two_bmp_pipelined_unchecked(&self, cp0: u32, cp1: u32) -> [u32; 2] {
debug_assert!(cp0 < 0x10000 && cp1 < 0x10000);
let idx0 = (cp0 >> BMP_SHIFT) as usize;
let idx1 = (cp1 >> BMP_SHIFT) as usize;
let off0 = (cp0 & BMP_MASK) as usize;
let off1 = (cp1 & BMP_MASK) as usize;
let base0 = unsafe { *self.bmp_index.get_unchecked(idx0) } as usize;
let base1 = unsafe { *self.bmp_index.get_unchecked(idx1) } as usize;
let v0 = unsafe { *self.data.get_unchecked(base0 + off0) };
let v1 = unsafe { *self.data.get_unchecked(base1 + off1) };
[v0, v1]
}
#[cold]
#[inline(never)]
fn get_supplementary(&self, cp: u32) -> u32 {
debug_assert!((0x10000..=0x10FFFF).contains(&cp));
let adjusted = cp - 0x10000;
let i1 = (adjusted >> SUPP_SHIFT_1) as usize;
let l1_entry = match self.supp_index1.get(i1) {
Some(&v) => v as usize,
None => return self.default_value,
};
let i2_offset = ((adjusted >> SUPP_SHIFT_2) & SUPP_MASK_2) as usize;
let l2_entry = match self.supp_index2.get(l1_entry + i2_offset) {
Some(&v) => v as usize,
None => return self.default_value,
};
let data_offset = (adjusted & SUPP_MASK_DATA) as usize;
match self.data.get(l2_entry + data_offset) {
Some(&v) => v,
None => self.default_value,
}
}
#[inline(always)]
pub(crate) unsafe fn get_supplementary_unchecked(&self, cp: u32) -> u32 {
debug_assert!((0x10000..=0x10FFFF).contains(&cp));
let adjusted = cp - 0x10000;
let i1 = (adjusted >> SUPP_SHIFT_1) as usize;
let l1_entry = unsafe { *self.supp_index1.get_unchecked(i1) } as usize;
let i2_offset = ((adjusted >> SUPP_SHIFT_2) & SUPP_MASK_2) as usize;
let l2_entry = unsafe { *self.supp_index2.get_unchecked(l1_entry + i2_offset) } as usize;
let data_offset = (adjusted & SUPP_MASK_DATA) as usize;
unsafe { *self.data.get_unchecked(l2_entry + data_offset) }
}
}
#[cfg(test)]
mod tests {
use super::*;
static TEST_BMP_INDEX: [u16; 2048] = {
let mut arr = [128u16; 2048]; arr[0] = 0; arr[1] = 32; arr[2] = 64; arr[3] = 96; arr[0x270] = 160; arr
};
static TEST_DATA: [u32; 224] = {
let mut arr = [0u32; 224];
let mut i = 0u32;
while i < 128 {
arr[i as usize] = i;
i += 1;
}
let mut j = 0u32;
while j < 32 {
arr[160 + j as usize] = 0xC000 + j;
j += 1;
}
let mut k = 0u32;
while k < 32 {
arr[192 + k as usize] = 0xE000 + k;
k += 1;
}
arr
};
static TEST_SUPP_INDEX1: [u16; 528] = {
let mut arr = [64u16; 528]; arr[0] = 0; arr
};
static TEST_SUPP_INDEX2: [u16; 128] = {
let mut arr = [128u16; 128]; arr[0] = 192; arr
};
fn test_trie() -> CodePointTrie {
CodePointTrie {
bmp_index: &TEST_BMP_INDEX,
data: &TEST_DATA,
supp_index1: &TEST_SUPP_INDEX1,
supp_index2: &TEST_SUPP_INDEX2,
default_value: 0,
}
}
#[test]
fn test_ascii_lookup() {
let trie = test_trie();
assert_eq!(trie.get(0x00), 0x00);
assert_eq!(trie.get(0x41), 0x41); assert_eq!(trie.get(0x61), 0x61); assert_eq!(trie.get(0x7F), 0x7F);
}
#[test]
fn test_bmp_cjk_lookup() {
let trie = test_trie();
assert_eq!(trie.get(0x4E00), 0xC000);
assert_eq!(trie.get(0x4E01), 0xC001);
assert_eq!(trie.get(0x4E1F), 0xC01F);
}
#[test]
fn test_supplementary_lookup() {
let trie = test_trie();
assert_eq!(trie.get(0x10000), 0xE000);
assert_eq!(trie.get(0x10001), 0xE001);
assert_eq!(trie.get(0x1001F), 0xE01F);
}
#[test]
fn test_unmapped_returns_default() {
let trie = test_trie();
assert_eq!(trie.get(0x0100), 0);
assert_eq!(trie.get(0x100000), 0);
assert_eq!(trie.get(0x110000), 0);
assert_eq!(trie.get(0xFFFFFFFF), 0);
}
#[test]
fn test_bmp_boundary() {
let trie = test_trie();
assert_eq!(trie.get(0xFFFF), 0);
assert_eq!(trie.get(0x10000), 0xE000);
}
#[test]
fn test_all_ascii_round_trip() {
let trie = test_trie();
for cp in 0u32..=0x7F {
assert_eq!(trie.get(cp), cp, "mismatch at U+{cp:04X}");
}
}
#[test]
fn test_supplementary_end_of_range() {
let trie = test_trie();
assert_eq!(trie.get(0x10FFFF), 0);
}
#[test]
fn test_get_is_consistent_with_get_bmp() {
let trie = test_trie();
for cp in (0u32..0x10000).step_by(997) {
assert_eq!(trie.get(cp), trie.get_bmp(cp), "mismatch at U+{cp:04X}");
}
}
}