lindera_dictionary/dictionary/
character_definition.rs1use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
2use serde::{Deserialize, Serialize};
3
4use crate::LinderaResult;
5use crate::error::LinderaErrorKind;
6
7#[derive(Serialize, Deserialize, Debug, Copy, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
8
9pub struct CategoryData {
10 pub invoke: bool,
11 pub group: bool,
12 pub length: u32,
13}
14
15#[derive(
16 Serialize,
17 Deserialize,
18 Clone,
19 Debug,
20 Hash,
21 Copy,
22 PartialOrd,
23 Ord,
24 Eq,
25 PartialEq,
26 Archive,
27 RkyvSerialize,
28 RkyvDeserialize,
29)]
30
31pub struct CategoryId(pub usize);
32
33#[derive(Serialize, Deserialize, Clone, Archive, RkyvSerialize, RkyvDeserialize)]
34
35pub struct LookupTable<T: Copy + Clone> {
36 boundaries: Vec<u32>,
37 values: Vec<Vec<T>>,
38}
39
40impl<T: Copy + Clone> LookupTable<T> {
41 pub fn from_fn(mut boundaries: Vec<u32>, funct: &dyn Fn(u32, &mut Vec<T>)) -> LookupTable<T> {
42 if !boundaries.contains(&0) {
43 boundaries.push(0);
44 }
45 boundaries.sort_unstable();
46 let mut values = Vec::new();
47 for &boundary in &boundaries {
48 let mut output = Vec::default();
49 funct(boundary, &mut output);
50 values.push(output);
51 }
52 LookupTable { boundaries, values }
53 }
54
55 pub fn eval(&self, target: u32) -> &[T] {
56 let idx = self
57 .boundaries
58 .binary_search(&target)
59 .unwrap_or_else(|val| val - 1);
60 &self.values[idx][..]
61 }
62}
63
64impl<T: Copy + Clone + Archive> ArchivedLookupTable<T> {
65 pub fn eval(&self, target: u32) -> &[T::Archived] {
66 let target_le = rkyv::rend::u32_le::from_native(target);
67 let idx = self
68 .boundaries
69 .binary_search(&target_le)
70 .unwrap_or_else(|val| val - 1);
71 self.values[idx].as_slice()
72 }
73}
74
75#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
76
77pub struct CharacterDefinition {
78 pub category_definitions: Vec<CategoryData>,
79 pub category_names: Vec<String>,
80 pub mapping: LookupTable<CategoryId>,
81}
82
83impl CharacterDefinition {
84 pub fn categories(&self) -> &[String] {
85 &self.category_names[..]
86 }
87
88 pub fn load(char_def_data: &[u8]) -> LinderaResult<CharacterDefinition> {
89 let mut aligned = rkyv::util::AlignedVec::<16>::new();
90 aligned.extend_from_slice(char_def_data);
91 rkyv::from_bytes::<CharacterDefinition, rkyv::rancor::Error>(&aligned).map_err(|err| {
92 LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err.to_string()))
93 })
94 }
95
96 pub fn lookup_definition(&self, category_id: CategoryId) -> &CategoryData {
97 &self.category_definitions[category_id.0]
98 }
99
100 pub fn category_name(&self, category_id: CategoryId) -> &str {
101 &self.category_names[category_id.0]
102 }
103
104 pub fn category_id_by_name(&self, name: &str) -> Option<CategoryId> {
105 self.category_names
106 .iter()
107 .position(|n| n == name)
108 .map(CategoryId)
109 }
110
111 pub fn lookup_categories(&self, c: char) -> &[CategoryId] {
112 self.mapping.eval(c as u32)
113 }
114}
115
116impl ArchivedCharacterDefinition {
117 pub fn categories(&self) -> &[rkyv::string::ArchivedString] {
118 &self.category_names[..]
119 }
120
121 pub fn lookup_definition(&self, category_id: usize) -> &ArchivedCategoryData {
122 &self.category_definitions[category_id]
123 }
124
125 pub fn category_name(&self, category_id: usize) -> &str {
126 self.category_names[category_id].as_str()
127 }
128
129 pub fn lookup_categories(&self, c: char) -> &[ArchivedCategoryId] {
130 self.mapping.eval(c as u32)
131 }
132}
133
134#[cfg(test)]
135mod tests {
136 use crate::dictionary::character_definition::LookupTable;
137
138 #[test]
139 fn test_lookup_table() {
140 let funct = |c: u32, output: &mut Vec<u32>| {
141 if c >= 10u32 {
142 output.push(1u32);
143 } else {
144 output.push(0u32);
145 }
146 };
147 let lookup_table = LookupTable::from_fn(vec![0u32, 10u32], &funct);
148 for i in 0..100 {
149 let mut v = Vec::default();
150 funct(i, &mut v);
151 assert_eq!(lookup_table.eval(i), &v[..]);
152 }
153 }
154}