feruca 0.12.0

An implementation of the Unicode Collation Algorithm
Documentation
#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub struct CollationTable {
    pub page_index: Vec<u16>,
    pub entries: Vec<u64>,
    pub contraction_meta: Vec<ContractionMeta>,
    pub edges: Vec<ContractionEdge>,
    pub weights: Vec<u32>,
}

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub struct ContractionMeta {
    pub first_edge: u32,
    pub edge_len: u16,
    pub max_len: u8,
}

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub struct ContractionEdge {
    pub code_point: u32,
    pub next_first_edge: u32,
    pub weight_start: u32,
    pub next_edge_len: u16,
    pub weight_len: u16,
}

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub struct VariableTable {
    pub page_index: Vec<u16>,
    pub pages: Vec<u64>,
}

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub struct FcdTable {
    pub page_index: Vec<u16>,
    pub pages: Vec<u16>,
}

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub struct DecompTable {
    pub page_index: Vec<u16>,
    pub entries: Vec<u64>,
    pub values: Vec<u32>,
}

impl DecompTable {
    pub fn get(&self, code_point: u32) -> Option<&[u32]> {
        let page = self.page_index[(code_point >> 8) as usize];
        if page == EMPTY_PAGE {
            return None;
        }

        let entry = self.entries[(usize::from(page) << 8) + (code_point & 0xFF) as usize];
        let len = decomp_len(entry);
        if len == 0 {
            return None;
        }

        let start = decomp_start(entry);
        Some(&self.values[start..start + usize::from(len)])
    }
}

impl FcdTable {
    pub fn get(&self, code_point: u32) -> Option<u16> {
        let page = self.page_index[(code_point >> 8) as usize];
        if page == EMPTY_PAGE {
            return None;
        }

        let index = usize::from(page) * 256 + (code_point & 0xFF) as usize;
        let val = self.pages[index];
        (val != 0).then_some(val)
    }
}

impl VariableTable {
    pub fn contains(&self, code_point: u32) -> bool {
        let page = self.page_index[(code_point >> 8) as usize];
        if page == EMPTY_PAGE {
            return false;
        }

        let page_start = usize::from(page) * 4;
        let offset = code_point & 0xFF;
        let word = page_start + (offset >> 6) as usize;
        let bit = offset & 0x3F;
        (self.pages[word] & (1_u64 << bit)) != 0
    }
}

impl CollationTable {
    pub fn entry(&self, code_point: u32) -> u64 {
        let page = self.page_index[(code_point >> 8) as usize] as usize;
        self.entries[(page << 8) + (code_point & 0xFF) as usize]
    }

    pub const fn is_missing(entry: u64) -> bool {
        entry_tag(entry) == ENTRY_MISSING
    }

    pub const fn is_contraction(entry: u64) -> bool {
        entry_tag(entry) == ENTRY_CONTRACTION
    }

    pub fn max_len(&self, entry: u64) -> usize {
        if Self::is_contraction(entry) {
            usize::from(self.contraction_meta(entry).max_len)
        } else {
            1
        }
    }

    pub fn simple_row(&self, entry: u64) -> &[u32] {
        self.weights_slice(entry_start(entry), entry_len(entry))
    }

    pub fn get2(&self, entry: u64, b: u32) -> Option<&[u32]> {
        if !Self::is_contraction(entry) {
            return None;
        }

        let meta = self.contraction_meta(entry);
        let edge = self.find_edge(meta.first_edge, meta.edge_len, b)?;
        self.edge_row(edge)
    }

    pub fn get3(&self, entry: u64, b: u32, c: u32) -> Option<&[u32]> {
        if !Self::is_contraction(entry) {
            return None;
        }

        let meta = self.contraction_meta(entry);
        let edge = self.find_edge(meta.first_edge, meta.edge_len, b)?;
        let edge = self.find_edge(edge.next_first_edge, edge.next_edge_len, c)?;
        self.edge_row(edge)
    }

    fn contraction_meta(&self, entry: u64) -> &ContractionMeta {
        &self.contraction_meta[entry_meta_index(entry)]
    }

    fn find_edge(&self, first_edge: u32, edge_len: u16, cp: u32) -> Option<&ContractionEdge> {
        let start = first_edge as usize;
        let range = &self.edges[start..start + edge_len as usize];

        if edge_len <= 4 {
            return range.iter().find(|edge| edge.code_point == cp);
        }

        let index = range
            .binary_search_by_key(&cp, |edge| edge.code_point)
            .ok()?;
        Some(&range[index])
    }

    fn edge_row(&self, edge: &ContractionEdge) -> Option<&[u32]> {
        if edge.weight_len == 0 {
            return None;
        }

        Some(self.weights_slice(edge.weight_start, edge.weight_len))
    }

    fn weights_slice(&self, start: u32, len: u16) -> &[u32] {
        let start = start as usize;
        &self.weights[start..start + len as usize]
    }
}

const ENTRY_MISSING: u8 = 0;
const ENTRY_CONTRACTION: u8 = 2;
const EMPTY_PAGE: u16 = u16::MAX;

const fn entry_tag(entry: u64) -> u8 {
    (entry & 0b11) as u8
}

const fn entry_len(entry: u64) -> u16 {
    ((entry >> 2) & 0xFFFF) as u16
}

const fn entry_start(entry: u64) -> u32 {
    ((entry >> 18) & 0xFFFF_FFFF) as u32
}

const fn entry_meta_index(entry: u64) -> usize {
    (entry >> 50) as usize
}

const fn decomp_len(entry: u64) -> u16 {
    (entry & 0xFFFF) as u16
}

const fn decomp_start(entry: u64) -> usize {
    (entry >> 16) as usize
}