kiri_engine/dictionary/
lexicon.rs1use std::sync::Arc;
5
6use crate::dictionary::trie::DoubleArrayTrie;
7use crate::dictionary::word_id_table::WordIdTable;
8use crate::dictionary::word_info::WordInfoList;
9use crate::dictionary::word_params::WordParameterList;
10use crate::types::{WordInfo, WordParameter};
11
12#[derive(Debug)]
14pub struct WordMatch {
15 pub word_ids: Vec<i32>,
16 pub length: usize,
17}
18
19pub struct Lexicon {
20 pub trie: Arc<DoubleArrayTrie>,
21 pub word_id_table: WordIdTable,
22 pub word_params: WordParameterList,
23 pub word_infos: WordInfoList,
24}
25
26impl Lexicon {
27 pub fn from_bytes(data: &[u8], offset: usize, has_synonym_gid: bool) -> (Self, usize) {
30 let start_offset = offset;
31 let mut pos = offset;
32
33 let (trie, trie_bytes) = DoubleArrayTrie::from_bytes(data, pos);
35 pos += trie_bytes;
36
37 let word_id_table = WordIdTable::new(data, pos);
39 pos += word_id_table.storage_size();
40
41 let word_params = WordParameterList::new(data, pos);
43 pos += word_params.storage_size();
44
45 let word_infos = WordInfoList::new(pos, word_params.size(), has_synonym_gid);
47
48 let bytes_read = pos - start_offset;
49 (
50 Self {
51 trie: Arc::new(trie),
52 word_id_table,
53 word_params,
54 word_infos,
55 },
56 bytes_read,
57 )
58 }
59
60 pub fn from_shared(
63 data: &[u8],
64 offset: usize,
65 trie: Arc<DoubleArrayTrie>,
66 trie_bytes: usize,
67 has_synonym_gid: bool,
68 ) -> (Self, usize) {
69 let start_offset = offset;
70 let mut pos = offset + trie_bytes;
71
72 let word_id_table = WordIdTable::new(data, pos);
73 pos += word_id_table.storage_size();
74
75 let word_params = WordParameterList::new(data, pos);
76 pos += word_params.storage_size();
77
78 let word_infos = WordInfoList::new(pos, word_params.size(), has_synonym_gid);
79
80 let bytes_read = pos - start_offset;
81 (
82 Self {
83 trie,
84 word_id_table,
85 word_params,
86 word_infos,
87 },
88 bytes_read,
89 )
90 }
91
92 pub fn lookup(&self, data: &[u8], key: &[u8], offset: usize, limit: usize) -> Vec<WordMatch> {
94 let trie_matches = self.trie.common_prefix_search(key, offset, limit);
95 let mut results = Vec::new();
96
97 for m in &trie_matches {
98 let word_ids = self.word_id_table.get_word_ids(data, m.value, 0);
99 if !word_ids.is_empty() {
100 results.push(WordMatch {
101 word_ids,
102 length: m.length,
103 });
104 }
105 }
106
107 results
108 }
109
110 #[inline]
112 pub fn get_left_id(&self, data: &[u8], word_index: u32) -> i16 {
113 self.word_params.get_left_id(data, word_index)
114 }
115
116 #[inline]
118 pub fn get_right_id(&self, data: &[u8], word_index: u32) -> i16 {
119 self.word_params.get_right_id(data, word_index)
120 }
121
122 #[inline]
124 pub fn get_cost(&self, data: &[u8], word_index: u32) -> i16 {
125 self.word_params.get_cost(data, word_index)
126 }
127
128 pub fn get_parameters(&self, data: &[u8], word_index: u32) -> WordParameter {
130 self.word_params.get_parameters(data, word_index)
131 }
132
133 pub fn get_word_info(&self, data: &[u8], word_index: u32) -> WordInfo {
135 self.word_infos.get_word_info(data, word_index)
136 }
137
138 pub fn size(&self) -> u32 {
140 self.word_params.size()
141 }
142}