lindera_dictionary/dictionary/
metadata.rs1use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
2use serde::{Deserialize, Serialize};
3
4use crate::dictionary::schema::Schema;
5
6const DEFAULT_WORD_COST: i16 = -10000;
7const DEFAULT_LEFT_CONTEXT_ID: u16 = 1288;
8const DEFAULT_RIGHT_CONTEXT_ID: u16 = 1288;
9const DEFAULT_FIELD_VALUE: &str = "*";
10
11#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
12
13pub struct ModelInfo {
14 pub feature_count: usize,
15 pub label_count: usize,
16 pub max_left_context_id: usize,
17 pub max_right_context_id: usize,
18 pub connection_matrix_size: String,
19 pub version: String,
20 pub training_iterations: u64,
21 pub regularization: f64,
22 pub updated_at: u64,
23}
24
25#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
26
27pub struct Metadata {
28 pub name: String, pub encoding: String, pub default_word_cost: i16, pub default_left_context_id: u16, pub default_right_context_id: u16, pub default_field_value: String, pub flexible_csv: bool, pub skip_invalid_cost_or_id: bool, pub normalize_details: bool, pub dictionary_schema: Schema, pub user_dictionary_schema: Schema, #[serde(skip_serializing_if = "Option::is_none")]
40 pub model_info: Option<ModelInfo>, }
42
43impl Default for Metadata {
44 fn default() -> Self {
45 Metadata::new(
47 "default".to_string(),
48 "UTF-8".to_string(),
49 DEFAULT_WORD_COST,
50 DEFAULT_LEFT_CONTEXT_ID,
51 DEFAULT_RIGHT_CONTEXT_ID,
52 DEFAULT_FIELD_VALUE.to_string(),
53 false,
54 false,
55 false,
56 Schema::default(),
57 Schema::new(vec![
58 "surface".to_string(),
59 "reading".to_string(),
60 "pronunciation".to_string(),
61 ]),
62 )
63 }
64}
65
66impl Metadata {
67 #[allow(clippy::too_many_arguments)]
68 pub fn new(
69 name: String,
70 encoding: String,
71 simple_word_cost: i16,
72 default_left_context_id: u16,
73 default_right_context_id: u16,
74 default_field_value: String,
75 flexible_csv: bool,
76 skip_invalid_cost_or_id: bool,
77 normalize_details: bool,
78 schema: Schema,
79 userdic_schema: Schema,
80 ) -> Self {
81 Self {
82 encoding,
83 default_word_cost: simple_word_cost,
84 default_left_context_id,
85 default_right_context_id,
86 default_field_value,
87 dictionary_schema: schema,
88 name,
89 flexible_csv,
90 skip_invalid_cost_or_id,
91 normalize_details,
92 user_dictionary_schema: userdic_schema,
93 model_info: None,
94 }
95 }
96
97 pub fn load(data: &[u8]) -> crate::LinderaResult<Self> {
100 if data.is_empty() {
102 return Err(crate::error::LinderaErrorKind::Io
103 .with_error(anyhow::anyhow!("Empty metadata data")));
104 }
105
106 serde_json::from_slice(data).map_err(|err| {
108 crate::error::LinderaErrorKind::Deserialize
109 .with_error(anyhow::anyhow!(err))
110 .add_context("Failed to deserialize metadata from JSON")
111 })
112 }
113
114 pub fn load_or_default(data: &[u8], default_fn: fn() -> Self) -> Self {
117 if data.is_empty() {
118 default_fn()
119 } else {
120 match Self::load(data) {
121 Ok(metadata) => metadata,
122 Err(_) => default_fn(),
123 }
124 }
125 }
126}
127
128#[cfg(test)]
129mod tests {
130 use super::*;
131
132 #[test]
133 fn test_metadata_default() {
134 let metadata = Metadata::default();
135 assert_eq!(metadata.name, "default");
136 }
138
139 #[test]
140 fn test_metadata_new() {
141 let schema = Schema::default();
142 let metadata = Metadata::new(
143 "TestDict".to_string(),
144 "UTF-8".to_string(),
145 -10000,
146 0,
147 0,
148 "*".to_string(),
149 false,
150 false,
151 false,
152 schema.clone(),
153 Schema::new(vec!["surface".to_string(), "reading".to_string()]),
154 );
155 assert_eq!(metadata.name, "TestDict");
156 }
158
159 #[test]
160 fn test_metadata_serialization() {
161 let metadata = Metadata::default();
162
163 let serialized = serde_json::to_string(&metadata).unwrap();
165 assert!(serialized.contains("default"));
166 assert!(serialized.contains("schema"));
167 assert!(serialized.contains("name"));
168
169 let deserialized: Metadata = serde_json::from_str(&serialized).unwrap();
171 assert_eq!(deserialized.name, "default");
172 }
174}