lindera_dictionary/dictionary/
character_definition.rs

1use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
2use serde::{Deserialize, Serialize};
3
4use crate::LinderaResult;
5use crate::error::LinderaErrorKind;
6
7#[derive(Serialize, Deserialize, Debug, Copy, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
8
9pub struct CategoryData {
10    pub invoke: bool,
11    pub group: bool,
12    pub length: u32,
13}
14
15#[derive(
16    Serialize,
17    Deserialize,
18    Clone,
19    Debug,
20    Hash,
21    Copy,
22    PartialOrd,
23    Ord,
24    Eq,
25    PartialEq,
26    Archive,
27    RkyvSerialize,
28    RkyvDeserialize,
29)]
30
31pub struct CategoryId(pub usize);
32
33#[derive(Serialize, Deserialize, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
34
35pub struct LookupTable<T: Copy + Clone> {
36    boundaries: Vec<u32>,
37    values: Vec<Vec<T>>,
38}
39
40impl<T: Copy + Clone> LookupTable<T> {
41    pub fn from_fn(mut boundaries: Vec<u32>, funct: &dyn Fn(u32, &mut Vec<T>)) -> LookupTable<T> {
42        if !boundaries.contains(&0) {
43            boundaries.push(0);
44        }
45        boundaries.sort_unstable();
46        let mut values = Vec::new();
47        for &boundary in &boundaries {
48            let mut output = Vec::default();
49            funct(boundary, &mut output);
50            values.push(output);
51        }
52        LookupTable { boundaries, values }
53    }
54
55    pub fn eval(&self, target: u32) -> &[T] {
56        let idx = self
57            .boundaries
58            .binary_search(&target)
59            .unwrap_or_else(|val| val - 1);
60        &self.values[idx][..]
61    }
62}
63
64impl<T: Copy + Clone + Archive> ArchivedLookupTable<T> {
65    pub fn eval(&self, target: u32) -> &[T::Archived] {
66        let target_le = rkyv::rend::u32_le::from_native(target);
67        let idx = self
68            .boundaries
69            .binary_search(&target_le)
70            .unwrap_or_else(|val| val - 1);
71        self.values[idx].as_slice()
72    }
73}
74
75#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
76
77pub struct CharacterDefinition {
78    pub category_definitions: Vec<CategoryData>,
79    pub category_names: Vec<String>,
80    pub mapping: LookupTable<CategoryId>,
81}
82
83impl CharacterDefinition {
84    pub fn categories(&self) -> &[String] {
85        &self.category_names[..]
86    }
87
88    pub fn load(char_def_data: &[u8]) -> LinderaResult<CharacterDefinition> {
89        let mut aligned = rkyv::util::AlignedVec::<16>::new();
90        aligned.extend_from_slice(char_def_data);
91        rkyv::from_bytes::<CharacterDefinition, rkyv::rancor::Error>(&aligned).map_err(|err| {
92            LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err.to_string()))
93        })
94    }
95
96    pub fn lookup_definition(&self, category_id: CategoryId) -> &CategoryData {
97        &self.category_definitions[category_id.0]
98    }
99
100    pub fn category_name(&self, category_id: CategoryId) -> &str {
101        &self.category_names[category_id.0]
102    }
103
104    pub fn category_id_by_name(&self, name: &str) -> Option<CategoryId> {
105        self.category_names
106            .iter()
107            .position(|n| n == name)
108            .map(CategoryId)
109    }
110
111    pub fn lookup_categories(&self, c: char) -> &[CategoryId] {
112        self.mapping.eval(c as u32)
113    }
114}
115
116impl ArchivedCharacterDefinition {
117    pub fn categories(&self) -> &[rkyv::string::ArchivedString] {
118        &self.category_names[..]
119    }
120
121    pub fn lookup_definition(&self, category_id: usize) -> &ArchivedCategoryData {
122        &self.category_definitions[category_id]
123    }
124
125    pub fn category_name(&self, category_id: usize) -> &str {
126        self.category_names[category_id].as_str()
127    }
128
129    pub fn lookup_categories(&self, c: char) -> &[ArchivedCategoryId] {
130        self.mapping.eval(c as u32)
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use crate::dictionary::character_definition::LookupTable;
137
138    #[test]
139    fn test_lookup_table() {
140        let funct = |c: u32, output: &mut Vec<u32>| {
141            if c >= 10u32 {
142                output.push(1u32);
143            } else {
144                output.push(0u32);
145            }
146        };
147        let lookup_table = LookupTable::from_fn(vec![0u32, 10u32], &funct);
148        for i in 0..100 {
149            let mut v = Vec::default();
150            funct(i, &mut v);
151            assert_eq!(lookup_table.eval(i), &v[..]);
152        }
153    }
154}