lindera_dictionary/dictionary/
metadata.rs1use serde::{Deserialize, Serialize};
2
3use crate::decompress::Algorithm;
4use crate::dictionary::schema::Schema;
5
6const DEFAULT_COMPRESS_ALGORITHM: Algorithm = Algorithm::Deflate;
7const DEFAULT_WORD_COST: i16 = -10000;
8const DEFAULT_LEFT_CONTEXT_ID: u16 = 1288;
9const DEFAULT_RIGHT_CONTEXT_ID: u16 = 1288;
10const DEFAULT_FIELD_VALUE: &str = "*";
11
12#[derive(Clone, Serialize, Deserialize)]
13pub struct ModelInfo {
14 pub feature_count: usize,
15 pub label_count: usize,
16 pub max_left_context_id: usize,
17 pub max_right_context_id: usize,
18 pub connection_matrix_size: String,
19 pub version: String,
20 pub training_iterations: u64,
21 pub regularization: f64,
22 pub updated_at: u64,
23}
24
25#[derive(Clone, Serialize, Deserialize)]
26pub struct Metadata {
27 pub name: String, pub encoding: String, pub compress_algorithm: Algorithm, pub default_word_cost: i16, pub default_left_context_id: u16, pub default_right_context_id: u16, pub default_field_value: String, pub flexible_csv: bool, pub skip_invalid_cost_or_id: bool, pub normalize_details: bool, pub dictionary_schema: Schema, pub user_dictionary_schema: Schema, #[serde(skip_serializing_if = "Option::is_none")]
40 pub model_info: Option<ModelInfo>, }
42
43impl Default for Metadata {
44 fn default() -> Self {
45 Metadata::new(
47 "default".to_string(),
48 "UTF-8".to_string(),
49 DEFAULT_COMPRESS_ALGORITHM,
50 DEFAULT_WORD_COST,
51 DEFAULT_LEFT_CONTEXT_ID,
52 DEFAULT_RIGHT_CONTEXT_ID,
53 DEFAULT_FIELD_VALUE.to_string(),
54 false,
55 false,
56 false,
57 Schema::default(),
58 Schema::new(vec![
59 "surface".to_string(),
60 "reading".to_string(),
61 "pronunciation".to_string(),
62 ]),
63 )
64 }
65}
66
67impl Metadata {
68 #[allow(clippy::too_many_arguments)]
69 pub fn new(
70 name: String,
71 encoding: String,
72 compress_algorithm: Algorithm,
73 simple_word_cost: i16,
74 default_left_context_id: u16,
75 default_right_context_id: u16,
76 default_field_value: String,
77 flexible_csv: bool,
78 skip_invalid_cost_or_id: bool,
79 normalize_details: bool,
80 schema: Schema,
81 userdic_schema: Schema,
82 ) -> Self {
83 Self {
84 encoding,
85 compress_algorithm,
86 default_word_cost: simple_word_cost,
87 default_left_context_id,
88 default_right_context_id,
89 default_field_value,
90 dictionary_schema: schema,
91 name,
92 flexible_csv,
93 skip_invalid_cost_or_id,
94 normalize_details,
95 user_dictionary_schema: userdic_schema,
96 model_info: None,
97 }
98 }
99
100 pub fn load(data: &[u8]) -> crate::LinderaResult<Self> {
103 if data.is_empty() {
105 return Err(crate::error::LinderaErrorKind::Io
106 .with_error(anyhow::anyhow!("Empty metadata data")));
107 }
108
109 if let Ok(metadata) = serde_json::from_slice(data) {
111 return Ok(metadata);
112 }
113
114 #[cfg(feature = "compress")]
116 {
117 use crate::decompress::{CompressedData, decompress};
118
119 if let Ok((compressed_data, _)) = bincode::serde::decode_from_slice::<CompressedData, _>(
120 data,
121 bincode::config::legacy(),
122 ) {
123 if let Ok(decompressed) = decompress(compressed_data) {
124 if let Ok(metadata) = serde_json::from_slice(&decompressed) {
126 return Ok(metadata);
127 }
128 }
129 }
130 }
131
132 #[cfg(not(feature = "compress"))]
133 {
134 return serde_json::from_slice(data).map_err(|err| {
136 crate::error::LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err))
137 });
138 }
139
140 Err(
142 crate::error::LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(
143 "Failed to deserialize metadata from any supported format"
144 )),
145 )
146 }
147
148 pub fn load_or_default(data: &[u8], default_fn: fn() -> Self) -> Self {
151 if data.is_empty() {
152 default_fn()
153 } else {
154 match Self::load(data) {
155 Ok(metadata) => metadata,
156 Err(_) => default_fn(),
157 }
158 }
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn test_metadata_default() {
168 let metadata = Metadata::default();
169 assert_eq!(metadata.name, "default");
170 }
172
173 #[test]
174 fn test_metadata_new() {
175 let schema = Schema::default();
176 let metadata = Metadata::new(
177 "TestDict".to_string(),
178 "UTF-8".to_string(),
179 Algorithm::Deflate,
180 -10000,
181 0,
182 0,
183 "*".to_string(),
184 false,
185 false,
186 false,
187 schema.clone(),
188 Schema::new(vec!["surface".to_string(), "reading".to_string()]),
189 );
190 assert_eq!(metadata.name, "TestDict");
191 }
193
194 #[test]
195 fn test_metadata_serialization() {
196 let metadata = Metadata::default();
197
198 let serialized = serde_json::to_string(&metadata).unwrap();
200 assert!(serialized.contains("default"));
201 assert!(serialized.contains("schema"));
202 assert!(serialized.contains("name"));
203
204 let deserialized: Metadata = serde_json::from_str(&serialized).unwrap();
206 assert_eq!(deserialized.name, "default");
207 }
209}