lindera_dictionary/dictionary/
metadata.rs1use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
2use serde::{Deserialize, Serialize};
3
4use crate::decompress::Algorithm;
5use crate::dictionary::schema::Schema;
6
7const DEFAULT_COMPRESS_ALGORITHM: Algorithm = Algorithm::Deflate;
8const DEFAULT_WORD_COST: i16 = -10000;
9const DEFAULT_LEFT_CONTEXT_ID: u16 = 1288;
10const DEFAULT_RIGHT_CONTEXT_ID: u16 = 1288;
11const DEFAULT_FIELD_VALUE: &str = "*";
12
13#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
14
15pub struct ModelInfo {
16 pub feature_count: usize,
17 pub label_count: usize,
18 pub max_left_context_id: usize,
19 pub max_right_context_id: usize,
20 pub connection_matrix_size: String,
21 pub version: String,
22 pub training_iterations: u64,
23 pub regularization: f64,
24 pub updated_at: u64,
25}
26
27#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
28
29pub struct Metadata {
30 pub name: String, pub encoding: String, pub compress_algorithm: Algorithm, pub default_word_cost: i16, pub default_left_context_id: u16, pub default_right_context_id: u16, pub default_field_value: String, pub flexible_csv: bool, pub skip_invalid_cost_or_id: bool, pub normalize_details: bool, pub dictionary_schema: Schema, pub user_dictionary_schema: Schema, #[serde(skip_serializing_if = "Option::is_none")]
43 pub model_info: Option<ModelInfo>, }
45
46impl Default for Metadata {
47 fn default() -> Self {
48 Metadata::new(
50 "default".to_string(),
51 "UTF-8".to_string(),
52 DEFAULT_COMPRESS_ALGORITHM,
53 DEFAULT_WORD_COST,
54 DEFAULT_LEFT_CONTEXT_ID,
55 DEFAULT_RIGHT_CONTEXT_ID,
56 DEFAULT_FIELD_VALUE.to_string(),
57 false,
58 false,
59 false,
60 Schema::default(),
61 Schema::new(vec![
62 "surface".to_string(),
63 "reading".to_string(),
64 "pronunciation".to_string(),
65 ]),
66 )
67 }
68}
69
70impl Metadata {
71 #[allow(clippy::too_many_arguments)]
72 pub fn new(
73 name: String,
74 encoding: String,
75 compress_algorithm: Algorithm,
76 simple_word_cost: i16,
77 default_left_context_id: u16,
78 default_right_context_id: u16,
79 default_field_value: String,
80 flexible_csv: bool,
81 skip_invalid_cost_or_id: bool,
82 normalize_details: bool,
83 schema: Schema,
84 userdic_schema: Schema,
85 ) -> Self {
86 Self {
87 encoding,
88 compress_algorithm,
89 default_word_cost: simple_word_cost,
90 default_left_context_id,
91 default_right_context_id,
92 default_field_value,
93 dictionary_schema: schema,
94 name,
95 flexible_csv,
96 skip_invalid_cost_or_id,
97 normalize_details,
98 user_dictionary_schema: userdic_schema,
99 model_info: None,
100 }
101 }
102
103 pub fn load(data: &[u8]) -> crate::LinderaResult<Self> {
106 if data.is_empty() {
108 return Err(crate::error::LinderaErrorKind::Io
109 .with_error(anyhow::anyhow!("Empty metadata data")));
110 }
111
112 if let Ok(metadata) = serde_json::from_slice(data) {
114 return Ok(metadata);
115 }
116
117 #[cfg(feature = "compress")]
119 {
120 use crate::decompress::{CompressedData, decompress};
121
122 if let Ok(compressed_data) =
123 rkyv::from_bytes::<CompressedData, rkyv::rancor::Error>(data)
124 {
125 if let Ok(decompressed) = decompress(compressed_data) {
126 if let Ok(metadata) = serde_json::from_slice(&decompressed) {
128 return Ok(metadata);
129 }
130 }
131 }
132 }
133
134 #[cfg(not(feature = "compress"))]
135 {
136 return serde_json::from_slice(data).map_err(|err| {
138 crate::error::LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err))
139 });
140 }
141
142 Err(
144 crate::error::LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(
145 "Failed to deserialize metadata from any supported format"
146 )),
147 )
148 }
149
150 pub fn load_or_default(data: &[u8], default_fn: fn() -> Self) -> Self {
153 if data.is_empty() {
154 default_fn()
155 } else {
156 match Self::load(data) {
157 Ok(metadata) => metadata,
158 Err(_) => default_fn(),
159 }
160 }
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn test_metadata_default() {
170 let metadata = Metadata::default();
171 assert_eq!(metadata.name, "default");
172 }
174
175 #[test]
176 fn test_metadata_new() {
177 let schema = Schema::default();
178 let metadata = Metadata::new(
179 "TestDict".to_string(),
180 "UTF-8".to_string(),
181 Algorithm::Deflate,
182 -10000,
183 0,
184 0,
185 "*".to_string(),
186 false,
187 false,
188 false,
189 schema.clone(),
190 Schema::new(vec!["surface".to_string(), "reading".to_string()]),
191 );
192 assert_eq!(metadata.name, "TestDict");
193 }
195
196 #[test]
197 fn test_metadata_serialization() {
198 let metadata = Metadata::default();
199
200 let serialized = serde_json::to_string(&metadata).unwrap();
202 assert!(serialized.contains("default"));
203 assert!(serialized.contains("schema"));
204 assert!(serialized.contains("name"));
205
206 let deserialized: Metadata = serde_json::from_str(&serialized).unwrap();
208 assert_eq!(deserialized.name, "default");
209 }
211}