1use std::collections::HashMap;
7
8use magnus::prelude::*;
9use magnus::{Error, Ruby, function, method};
10
11use lindera::dictionary::Metadata;
12
13use crate::schema::RbSchema;
14
15#[magnus::wrap(class = "Lindera::Metadata", free_immediately, size)]
19#[derive(Debug, Clone)]
20pub struct RbMetadata {
21 name: String,
23 encoding: String,
25 default_word_cost: i16,
27 default_left_context_id: u16,
29 default_right_context_id: u16,
31 default_field_value: String,
33 flexible_csv: bool,
35 skip_invalid_cost_or_id: bool,
37 normalize_details: bool,
39 dictionary_schema: RbSchema,
41 user_dictionary_schema: RbSchema,
43}
44
45impl RbMetadata {
46 #[allow(clippy::too_many_arguments)]
56 fn new(
57 name: Option<String>,
58 encoding: Option<String>,
59 default_word_cost: Option<i16>,
60 default_left_context_id: Option<u16>,
61 default_right_context_id: Option<u16>,
62 default_field_value: Option<String>,
63 flexible_csv: Option<bool>,
64 skip_invalid_cost_or_id: Option<bool>,
65 normalize_details: Option<bool>,
66 ) -> Self {
67 RbMetadata {
68 name: name.unwrap_or_else(|| "default".to_string()),
69 encoding: encoding.unwrap_or_else(|| "UTF-8".to_string()),
70 default_word_cost: default_word_cost.unwrap_or(-10000),
71 default_left_context_id: default_left_context_id.unwrap_or(1288),
72 default_right_context_id: default_right_context_id.unwrap_or(1288),
73 default_field_value: default_field_value.unwrap_or_else(|| "*".to_string()),
74 flexible_csv: flexible_csv.unwrap_or(false),
75 skip_invalid_cost_or_id: skip_invalid_cost_or_id.unwrap_or(false),
76 normalize_details: normalize_details.unwrap_or(false),
77 dictionary_schema: RbSchema::create_default_internal(),
78 user_dictionary_schema: RbSchema::new_internal(vec![
79 "surface".to_string(),
80 "reading".to_string(),
81 "pronunciation".to_string(),
82 ]),
83 }
84 }
85
86 fn create_default() -> Self {
92 RbMetadata::new(None, None, None, None, None, None, None, None, None)
93 }
94
95 fn from_json_file(path: String) -> Result<Self, Error> {
105 let ruby = Ruby::get().expect("Ruby runtime not initialized");
106
107 let json_str = std::fs::read_to_string(&path).map_err(|e| {
108 Error::new(
109 ruby.exception_io_error(),
110 format!("Failed to read file: {e}"),
111 )
112 })?;
113
114 let metadata: Metadata = serde_json::from_str(&json_str).map_err(|e| {
115 Error::new(
116 ruby.exception_arg_error(),
117 format!("Failed to parse JSON: {e}"),
118 )
119 })?;
120
121 Ok(metadata.into())
122 }
123
124 fn name(&self) -> String {
126 self.name.clone()
127 }
128
129 fn encoding(&self) -> String {
131 self.encoding.clone()
132 }
133
134 fn default_word_cost(&self) -> i16 {
136 self.default_word_cost
137 }
138
139 fn default_left_context_id(&self) -> u16 {
141 self.default_left_context_id
142 }
143
144 fn default_right_context_id(&self) -> u16 {
146 self.default_right_context_id
147 }
148
149 fn default_field_value(&self) -> String {
151 self.default_field_value.clone()
152 }
153
154 fn flexible_csv(&self) -> bool {
156 self.flexible_csv
157 }
158
159 fn skip_invalid_cost_or_id(&self) -> bool {
161 self.skip_invalid_cost_or_id
162 }
163
164 fn normalize_details(&self) -> bool {
166 self.normalize_details
167 }
168
169 fn to_hash(&self) -> HashMap<String, String> {
175 let mut dict = HashMap::new();
176 dict.insert("name".to_string(), self.name.clone());
177 dict.insert("encoding".to_string(), self.encoding.clone());
178 dict.insert(
179 "default_word_cost".to_string(),
180 self.default_word_cost.to_string(),
181 );
182 dict.insert(
183 "default_left_context_id".to_string(),
184 self.default_left_context_id.to_string(),
185 );
186 dict.insert(
187 "default_right_context_id".to_string(),
188 self.default_right_context_id.to_string(),
189 );
190 dict.insert(
191 "default_field_value".to_string(),
192 self.default_field_value.clone(),
193 );
194 dict.insert("flexible_csv".to_string(), self.flexible_csv.to_string());
195 dict.insert(
196 "skip_invalid_cost_or_id".to_string(),
197 self.skip_invalid_cost_or_id.to_string(),
198 );
199 dict.insert(
200 "normalize_details".to_string(),
201 self.normalize_details.to_string(),
202 );
203 dict.insert(
204 "dictionary_schema_fields".to_string(),
205 self.dictionary_schema.fields.join(","),
206 );
207 dict.insert(
208 "user_dictionary_schema_fields".to_string(),
209 self.user_dictionary_schema.fields.join(","),
210 );
211 dict
212 }
213
214 fn to_s(&self) -> String {
216 format!(
217 "Metadata(name='{}', encoding='{}')",
218 self.name, self.encoding,
219 )
220 }
221
222 fn inspect(&self) -> String {
224 format!(
225 "#<Lindera::Metadata: name='{}', encoding='{}', schema_fields={}>",
226 self.name,
227 self.encoding,
228 self.dictionary_schema.fields.len()
229 )
230 }
231}
232
233impl From<RbMetadata> for Metadata {
234 fn from(metadata: RbMetadata) -> Self {
235 Metadata::new(
236 metadata.name,
237 metadata.encoding,
238 metadata.default_word_cost,
239 metadata.default_left_context_id,
240 metadata.default_right_context_id,
241 metadata.default_field_value,
242 metadata.flexible_csv,
243 metadata.skip_invalid_cost_or_id,
244 metadata.normalize_details,
245 metadata.dictionary_schema.into(),
246 metadata.user_dictionary_schema.into(),
247 )
248 }
249}
250
251impl From<Metadata> for RbMetadata {
252 fn from(metadata: Metadata) -> Self {
253 RbMetadata {
254 name: metadata.name,
255 encoding: metadata.encoding,
256 default_word_cost: metadata.default_word_cost,
257 default_left_context_id: metadata.default_left_context_id,
258 default_right_context_id: metadata.default_right_context_id,
259 default_field_value: metadata.default_field_value,
260 flexible_csv: metadata.flexible_csv,
261 skip_invalid_cost_or_id: metadata.skip_invalid_cost_or_id,
262 normalize_details: metadata.normalize_details,
263 dictionary_schema: metadata.dictionary_schema.into(),
264 user_dictionary_schema: metadata.user_dictionary_schema.into(),
265 }
266 }
267}
268
269pub fn define(ruby: &Ruby, module: &magnus::RModule) -> Result<(), Error> {
280 let metadata_class = module.define_class("Metadata", ruby.class_object())?;
281 metadata_class.define_singleton_method("new", function!(RbMetadata::new, 9))?;
282 metadata_class
283 .define_singleton_method("create_default", function!(RbMetadata::create_default, 0))?;
284 metadata_class
285 .define_singleton_method("from_json_file", function!(RbMetadata::from_json_file, 1))?;
286 metadata_class.define_method("name", method!(RbMetadata::name, 0))?;
287 metadata_class.define_method("encoding", method!(RbMetadata::encoding, 0))?;
288 metadata_class.define_method(
289 "default_word_cost",
290 method!(RbMetadata::default_word_cost, 0),
291 )?;
292 metadata_class.define_method(
293 "default_left_context_id",
294 method!(RbMetadata::default_left_context_id, 0),
295 )?;
296 metadata_class.define_method(
297 "default_right_context_id",
298 method!(RbMetadata::default_right_context_id, 0),
299 )?;
300 metadata_class.define_method(
301 "default_field_value",
302 method!(RbMetadata::default_field_value, 0),
303 )?;
304 metadata_class.define_method("flexible_csv", method!(RbMetadata::flexible_csv, 0))?;
305 metadata_class.define_method(
306 "skip_invalid_cost_or_id",
307 method!(RbMetadata::skip_invalid_cost_or_id, 0),
308 )?;
309 metadata_class.define_method(
310 "normalize_details",
311 method!(RbMetadata::normalize_details, 0),
312 )?;
313 metadata_class.define_method("to_hash", method!(RbMetadata::to_hash, 0))?;
314 metadata_class.define_method("to_h", method!(RbMetadata::to_hash, 0))?;
315 metadata_class.define_method("to_s", method!(RbMetadata::to_s, 0))?;
316 metadata_class.define_method("inspect", method!(RbMetadata::inspect, 0))?;
317
318 Ok(())
319}
320
321#[cfg(test)]
322mod tests {
323 use super::*;
324
325 #[test]
326 fn test_rb_metadata_to_lindera_metadata() {
327 let rb_metadata = RbMetadata {
328 name: "test_dict".to_string(),
329 encoding: "EUC-JP".to_string(),
330 default_word_cost: -5000,
331 default_left_context_id: 100,
332 default_right_context_id: 200,
333 default_field_value: "N/A".to_string(),
334 flexible_csv: true,
335 skip_invalid_cost_or_id: true,
336 normalize_details: true,
337 dictionary_schema: RbSchema::new_internal(vec![
338 "surface".to_string(),
339 "cost".to_string(),
340 ]),
341 user_dictionary_schema: RbSchema::new_internal(vec!["surface".to_string()]),
342 };
343
344 let lindera_metadata: Metadata = rb_metadata.into();
345 assert_eq!(lindera_metadata.name, "test_dict");
346 assert_eq!(lindera_metadata.encoding, "EUC-JP");
347 assert_eq!(lindera_metadata.default_word_cost, -5000);
348 assert_eq!(lindera_metadata.default_left_context_id, 100);
349 assert_eq!(lindera_metadata.default_right_context_id, 200);
350 assert_eq!(lindera_metadata.default_field_value, "N/A");
351 assert!(lindera_metadata.flexible_csv);
352 assert!(lindera_metadata.skip_invalid_cost_or_id);
353 assert!(lindera_metadata.normalize_details);
354 assert_eq!(lindera_metadata.dictionary_schema.get_all_fields().len(), 2);
355 assert_eq!(
356 lindera_metadata
357 .user_dictionary_schema
358 .get_all_fields()
359 .len(),
360 1
361 );
362 }
363
364 #[test]
365 fn test_lindera_metadata_to_rb_metadata() {
366 let dict_schema =
367 lindera::dictionary::Schema::new(vec!["surface".to_string(), "cost".to_string()]);
368 let user_schema =
369 lindera::dictionary::Schema::new(vec!["surface".to_string(), "reading".to_string()]);
370
371 let lindera_metadata = Metadata::new(
372 "my_dict".to_string(),
373 "UTF-8".to_string(),
374 -8000,
375 500,
376 600,
377 "?".to_string(),
378 false,
379 true,
380 false,
381 dict_schema,
382 user_schema,
383 );
384
385 let rb_metadata: RbMetadata = lindera_metadata.into();
386 assert_eq!(rb_metadata.name, "my_dict");
387 assert_eq!(rb_metadata.encoding, "UTF-8");
388 assert_eq!(rb_metadata.default_word_cost, -8000);
389 assert_eq!(rb_metadata.default_left_context_id, 500);
390 assert_eq!(rb_metadata.default_right_context_id, 600);
391 assert_eq!(rb_metadata.default_field_value, "?");
392 assert!(!rb_metadata.flexible_csv);
393 assert!(rb_metadata.skip_invalid_cost_or_id);
394 assert!(!rb_metadata.normalize_details);
395 assert_eq!(rb_metadata.dictionary_schema.fields.len(), 2);
396 assert_eq!(rb_metadata.user_dictionary_schema.fields.len(), 2);
397 }
398
399 #[test]
400 fn test_rb_metadata_roundtrip() {
401 let rb_metadata = RbMetadata {
402 name: "roundtrip".to_string(),
403 encoding: "UTF-8".to_string(),
404 default_word_cost: -10000,
405 default_left_context_id: 1288,
406 default_right_context_id: 1288,
407 default_field_value: "*".to_string(),
408 flexible_csv: false,
409 skip_invalid_cost_or_id: false,
410 normalize_details: false,
411 dictionary_schema: RbSchema::create_default_internal(),
412 user_dictionary_schema: RbSchema::new_internal(vec![
413 "surface".to_string(),
414 "reading".to_string(),
415 "pronunciation".to_string(),
416 ]),
417 };
418
419 let lindera: Metadata = rb_metadata.into();
420 let back: RbMetadata = lindera.into();
421 assert_eq!(back.name, "roundtrip");
422 assert_eq!(back.encoding, "UTF-8");
423 assert_eq!(back.default_word_cost, -10000);
424 assert_eq!(back.default_left_context_id, 1288);
425 assert_eq!(back.default_right_context_id, 1288);
426 assert_eq!(back.default_field_value, "*");
427 assert!(!back.flexible_csv);
428 assert!(!back.skip_invalid_cost_or_id);
429 assert!(!back.normalize_details);
430 assert_eq!(back.dictionary_schema.fields.len(), 13);
431 assert_eq!(back.user_dictionary_schema.fields.len(), 3);
432 }
433}