lindera_dictionary/dictionary/
metadata.rs1use serde::{Deserialize, Serialize};
2
3use crate::decompress::Algorithm;
4use crate::dictionary::schema::Schema;
5
6const DEFAULT_COMPRESS_ALGORITHM: Algorithm = Algorithm::Deflate;
7const DEFAULT_WORD_COST: i16 = -10000;
8const DEFAULT_LEFT_CONTEXT_ID: u16 = 1288;
9const DEFAULT_RIGHT_CONTEXT_ID: u16 = 1288;
10const DEFAULT_FIELD_VALUE: &str = "*";
11
12#[derive(Clone, Serialize, Deserialize)]
13pub struct Metadata {
14 pub name: String, pub encoding: String, pub compress_algorithm: Algorithm, pub default_word_cost: i16, pub default_left_context_id: u16, pub default_right_context_id: u16, pub default_field_value: String, pub flexible_csv: bool, pub skip_invalid_cost_or_id: bool, pub normalize_details: bool, pub dictionary_schema: Schema, pub user_dictionary_schema: Schema, }
27
28impl Default for Metadata {
29 fn default() -> Self {
30 Metadata::new(
32 "default".to_string(),
33 "UTF-8".to_string(),
34 DEFAULT_COMPRESS_ALGORITHM,
35 DEFAULT_WORD_COST,
36 DEFAULT_LEFT_CONTEXT_ID,
37 DEFAULT_RIGHT_CONTEXT_ID,
38 DEFAULT_FIELD_VALUE.to_string(),
39 false,
40 false,
41 false,
42 Schema::default(),
43 Schema::new(vec![
44 "surface".to_string(),
45 "reading".to_string(),
46 "pronunciation".to_string(),
47 ]),
48 )
49 }
50}
51
52impl Metadata {
53 #[allow(clippy::too_many_arguments)]
54 pub fn new(
55 name: String,
56 encoding: String,
57 compress_algorithm: Algorithm,
58 simple_word_cost: i16,
59 default_left_context_id: u16,
60 default_right_context_id: u16,
61 default_field_value: String,
62 flexible_csv: bool,
63 skip_invalid_cost_or_id: bool,
64 normalize_details: bool,
65 schema: Schema,
66 userdic_schema: Schema,
67 ) -> Self {
68 Self {
69 encoding,
70 compress_algorithm,
71 default_word_cost: simple_word_cost,
72 default_left_context_id,
73 default_right_context_id,
74 default_field_value,
75 dictionary_schema: schema,
76 name,
77 flexible_csv,
78 skip_invalid_cost_or_id,
79 normalize_details,
80 user_dictionary_schema: userdic_schema,
81 }
82 }
83
84 pub fn load(data: &[u8]) -> crate::LinderaResult<Self> {
87 if data.is_empty() {
89 return Err(crate::error::LinderaErrorKind::Io
90 .with_error(anyhow::anyhow!("Empty metadata data")));
91 }
92
93 if let Ok(metadata) = serde_json::from_slice(data) {
95 return Ok(metadata);
96 }
97
98 #[cfg(feature = "compress")]
100 {
101 use crate::decompress::{CompressedData, decompress};
102
103 if let Ok((compressed_data, _)) = bincode::serde::decode_from_slice::<CompressedData, _>(
104 data,
105 bincode::config::legacy(),
106 ) {
107 if let Ok(decompressed) = decompress(compressed_data) {
108 if let Ok(metadata) = serde_json::from_slice(&decompressed) {
110 return Ok(metadata);
111 }
112 }
113 }
114 }
115
116 #[cfg(not(feature = "compress"))]
117 {
118 return serde_json::from_slice(data).map_err(|err| {
120 crate::error::LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(err))
121 });
122 }
123
124 Err(
126 crate::error::LinderaErrorKind::Deserialize.with_error(anyhow::anyhow!(
127 "Failed to deserialize metadata from any supported format"
128 )),
129 )
130 }
131
132 pub fn load_or_default(data: &[u8], default_fn: fn() -> Self) -> Self {
135 if data.is_empty() {
136 default_fn()
137 } else {
138 match Self::load(data) {
139 Ok(metadata) => metadata,
140 Err(_) => default_fn(),
141 }
142 }
143 }
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn test_metadata_default() {
152 let metadata = Metadata::default();
153 assert_eq!(metadata.name, "default");
154 }
156
157 #[test]
158 fn test_metadata_new() {
159 let schema = Schema::default();
160 let metadata = Metadata::new(
161 "TestDict".to_string(),
162 "UTF-8".to_string(),
163 Algorithm::Deflate,
164 -10000,
165 0,
166 0,
167 "*".to_string(),
168 false,
169 false,
170 false,
171 schema.clone(),
172 Schema::new(vec!["surface".to_string(), "reading".to_string()]),
173 );
174 assert_eq!(metadata.name, "TestDict");
175 }
177
178 #[test]
179 fn test_metadata_serialization() {
180 let metadata = Metadata::default();
181
182 let serialized = serde_json::to_string(&metadata).unwrap();
184 assert!(serialized.contains("default"));
185 assert!(serialized.contains("schema"));
186 assert!(serialized.contains("name"));
187
188 let deserialized: Metadata = serde_json::from_str(&serialized).unwrap();
190 assert_eq!(deserialized.name, "default");
191 }
193}