1pub mod dictionary_version {
5 pub const SYSTEM_DICT_VERSION_1: i64 = 0x7366d3f18cdc7f5d_u64 as i64;
6 pub const SYSTEM_DICT_VERSION_2: i64 = 0x7366d3f18cdc7f5e_u64 as i64;
7 pub const USER_DICT_VERSION_1: i64 = 0xa50f31188cdc7f5d_u64 as i64;
8 pub const USER_DICT_VERSION_2: i64 = 0xa50f31188cdc7f5e_u64 as i64;
9 pub const USER_DICT_VERSION_3: i64 = 0xa50f31188cdc7f5f_u64 as i64;
10}
11
12pub const DICTIONARY_HEADER_SIZE: usize = 272;
14
15#[derive(Debug, Clone)]
17pub struct DictionaryHeader {
18 pub version: i64,
19 pub create_time: i64,
20 pub description: String,
21}
22
23#[derive(Debug, Clone)]
25pub struct POS {
26 pub tags: [String; 6],
27}
28
29#[derive(Debug, Clone, Copy, Default)]
31pub struct WordParameter {
32 pub left_id: i16,
33 pub right_id: i16,
34 pub cost: i16,
35}
36
37#[derive(Debug)]
39pub struct ConnectionMatrix {
40 pub left_size: usize,
41 pub right_size: usize,
42 pub data_offset: usize,
44}
45
46#[derive(Debug, Clone, Copy)]
49pub struct ConnectionCosts<'a> {
50 pub left_size: usize,
51 pub right_size: usize,
52 pub data_offset: usize,
53 pub data: &'a [u8],
54}
55
56impl<'a> ConnectionCosts<'a> {
57 #[inline]
59 pub fn new(matrix: &ConnectionMatrix, data: &'a [u8]) -> Self {
60 Self {
61 left_size: matrix.left_size,
62 right_size: matrix.right_size,
63 data_offset: matrix.data_offset,
64 data,
65 }
66 }
67
68 #[inline]
71 pub fn get_cost(&self, left_id: i16, right_id: i16) -> i16 {
72 let index = right_id as u16 as usize * self.left_size + left_id as u16 as usize;
73 let byte_offset = self.data_offset + index * 2;
74 match (self.data.get(byte_offset), self.data.get(byte_offset + 1)) {
75 (Some(&lo), Some(&hi)) => i16::from_le_bytes([lo, hi]),
76 _ => 0x7fff, }
78 }
79}
80
81#[derive(Debug)]
83pub struct Grammar {
84 pub pos_list: Vec<POS>,
85 pub connection: ConnectionMatrix,
86 pub bos_parameter: WordParameter,
87 pub eos_parameter: WordParameter,
88}
89
90#[derive(Debug, Clone)]
92pub struct WordInfo {
93 pub surface: String,
94 pub headword_length: u16,
95 pub pos_id: i16,
96 pub normalized_form: String,
97 pub dictionary_form_word_id: i32,
98 pub dictionary_form: String,
99 pub reading_form: String,
100 pub a_unit_split: Vec<i32>,
101 pub b_unit_split: Vec<i32>,
102 pub word_structure: Vec<i32>,
103 pub synonym_gids: Vec<i32>,
104}
105
106const WORD_ID_MASK: i32 = 0x0fff_ffff;
109const DICT_ID_SHIFT: u32 = 28;
110
111#[inline]
113pub fn get_dictionary_id(word_id: i32) -> i32 {
114 ((word_id as u32) >> DICT_ID_SHIFT) as i32 & 0xf
115}
116
117#[inline]
119pub fn get_word_index(word_id: i32) -> i32 {
120 word_id & WORD_ID_MASK
121}
122
123#[inline]
125pub fn pack_word_id(dict_id: i32, word_index: i32) -> i32 {
126 (((dict_id & 0xf) as u32) << DICT_ID_SHIFT) as i32 | (word_index & WORD_ID_MASK)
127}
128
129pub fn is_system_dictionary(version: i64) -> bool {
131 version == dictionary_version::SYSTEM_DICT_VERSION_1
132 || version == dictionary_version::SYSTEM_DICT_VERSION_2
133}
134
135pub fn is_user_dictionary(version: i64) -> bool {
137 version == dictionary_version::USER_DICT_VERSION_1
138 || version == dictionary_version::USER_DICT_VERSION_2
139 || version == dictionary_version::USER_DICT_VERSION_3
140}
141
142pub fn has_synonym_group_ids(version: i64) -> bool {
144 version == dictionary_version::SYSTEM_DICT_VERSION_2
145 || version == dictionary_version::USER_DICT_VERSION_2
146 || version == dictionary_version::USER_DICT_VERSION_3
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn test_pack_unpack_word_id() {
155 let dict_id = 3;
156 let word_index = 12345;
157 let packed = pack_word_id(dict_id, word_index);
158 assert_eq!(get_dictionary_id(packed), dict_id);
159 assert_eq!(get_word_index(packed), word_index);
160 }
161
162 #[test]
163 fn test_pack_word_id_zero() {
164 let packed = pack_word_id(0, 0);
165 assert_eq!(get_dictionary_id(packed), 0);
166 assert_eq!(get_word_index(packed), 0);
167 }
168
169 #[test]
170 fn test_pack_word_id_max() {
171 let packed = pack_word_id(15, WORD_ID_MASK);
172 assert_eq!(get_dictionary_id(packed), 15);
173 assert_eq!(get_word_index(packed), WORD_ID_MASK);
174 }
175
176 #[test]
177 fn test_version_checks() {
178 assert!(is_system_dictionary(
179 dictionary_version::SYSTEM_DICT_VERSION_1
180 ));
181 assert!(is_system_dictionary(
182 dictionary_version::SYSTEM_DICT_VERSION_2
183 ));
184 assert!(!is_system_dictionary(
185 dictionary_version::USER_DICT_VERSION_1
186 ));
187
188 assert!(is_user_dictionary(dictionary_version::USER_DICT_VERSION_1));
189 assert!(is_user_dictionary(dictionary_version::USER_DICT_VERSION_2));
190 assert!(is_user_dictionary(dictionary_version::USER_DICT_VERSION_3));
191 assert!(!is_user_dictionary(
192 dictionary_version::SYSTEM_DICT_VERSION_1
193 ));
194
195 assert!(has_synonym_group_ids(
196 dictionary_version::SYSTEM_DICT_VERSION_2
197 ));
198 assert!(!has_synonym_group_ids(
199 dictionary_version::SYSTEM_DICT_VERSION_1
200 ));
201 }
202}