Skip to main content

lindera_nodejs/
metadata.rs

1//! Dictionary metadata configuration.
2//!
3//! This module provides structures for configuring dictionary metadata, including
4//! character encodings and schema definitions.
5
6use std::collections::HashMap;
7
8use lindera::dictionary::Metadata;
9
10use crate::schema::JsSchema;
11
12/// Options for creating a Metadata instance.
13///
14/// All fields are optional. When omitted, default values are used.
15#[napi(object)]
16pub struct MetadataOptions {
17    /// Dictionary name (default: "default").
18    pub name: Option<String>,
19    /// Character encoding (default: "UTF-8").
20    pub encoding: Option<String>,
21    /// Default cost for unknown words (default: -10000).
22    pub default_word_cost: Option<i32>,
23    /// Default left context ID (default: 1288).
24    pub default_left_context_id: Option<u32>,
25    /// Default right context ID (default: 1288).
26    pub default_right_context_id: Option<u32>,
27    /// Default value for missing fields (default: "*").
28    pub default_field_value: Option<String>,
29    /// Allow flexible CSV parsing (default: false).
30    pub flexible_csv: Option<bool>,
31    /// Skip entries with invalid cost or ID (default: false).
32    pub skip_invalid_cost_or_id: Option<bool>,
33    /// Normalize morphological details (default: false).
34    pub normalize_details: Option<bool>,
35}
36
37/// Dictionary metadata configuration.
38///
39/// Contains all configuration parameters for building and using dictionaries.
40#[napi(js_name = "Metadata")]
41pub struct JsMetadata {
42    name: String,
43    encoding: String,
44    default_word_cost: i16,
45    default_left_context_id: u16,
46    default_right_context_id: u16,
47    default_field_value: String,
48    flexible_csv: bool,
49    skip_invalid_cost_or_id: bool,
50    normalize_details: bool,
51    dictionary_schema: JsSchema,
52    user_dictionary_schema: JsSchema,
53}
54
55#[napi]
56impl JsMetadata {
57    /// Creates a new Metadata with optional configuration.
58    ///
59    /// # Arguments
60    ///
61    /// * `options` - Optional configuration object. When omitted, all defaults are used.
62    #[napi(constructor)]
63    pub fn new(options: Option<MetadataOptions>) -> Self {
64        let opts = options.unwrap_or(MetadataOptions {
65            name: None,
66            encoding: None,
67            default_word_cost: None,
68            default_left_context_id: None,
69            default_right_context_id: None,
70            default_field_value: None,
71            flexible_csv: None,
72            skip_invalid_cost_or_id: None,
73            normalize_details: None,
74        });
75
76        JsMetadata {
77            name: opts.name.unwrap_or_else(|| "default".to_string()),
78            encoding: opts.encoding.unwrap_or_else(|| "UTF-8".to_string()),
79            default_word_cost: opts.default_word_cost.unwrap_or(-10000) as i16,
80            default_left_context_id: opts.default_left_context_id.unwrap_or(1288) as u16,
81            default_right_context_id: opts.default_right_context_id.unwrap_or(1288) as u16,
82            default_field_value: opts.default_field_value.unwrap_or_else(|| "*".to_string()),
83            flexible_csv: opts.flexible_csv.unwrap_or(false),
84            skip_invalid_cost_or_id: opts.skip_invalid_cost_or_id.unwrap_or(false),
85            normalize_details: opts.normalize_details.unwrap_or(false),
86            dictionary_schema: JsSchema::create_default(),
87            user_dictionary_schema: JsSchema::new(vec![
88                "surface".to_string(),
89                "reading".to_string(),
90                "pronunciation".to_string(),
91            ]),
92        }
93    }
94
95    /// Creates a Metadata with all default values.
96    ///
97    /// # Returns
98    ///
99    /// A Metadata instance with default configuration.
100    #[napi(factory)]
101    pub fn create_default() -> Self {
102        JsMetadata::new(None)
103    }
104
105    /// Loads metadata from a JSON file.
106    ///
107    /// # Arguments
108    ///
109    /// * `path` - Path to the JSON metadata file.
110    ///
111    /// # Returns
112    ///
113    /// A Metadata instance loaded from the file.
114    #[napi(factory)]
115    pub fn from_json_file(path: String) -> napi::Result<Self> {
116        let json_str = std::fs::read_to_string(&path).map_err(|e| {
117            napi::Error::new(
118                napi::Status::GenericFailure,
119                format!("Failed to read file: {e}"),
120            )
121        })?;
122
123        let metadata: Metadata = serde_json::from_str(&json_str).map_err(|e| {
124            napi::Error::new(
125                napi::Status::GenericFailure,
126                format!("Failed to parse JSON: {e}"),
127            )
128        })?;
129
130        Ok(metadata.into())
131    }
132
133    /// Dictionary name.
134    #[napi(getter)]
135    pub fn name(&self) -> String {
136        self.name.clone()
137    }
138
139    /// Sets the dictionary name.
140    #[napi(setter)]
141    pub fn set_name(&mut self, name: String) {
142        self.name = name;
143    }
144
145    /// Character encoding.
146    #[napi(getter)]
147    pub fn encoding(&self) -> String {
148        self.encoding.clone()
149    }
150
151    /// Sets the character encoding.
152    #[napi(setter)]
153    pub fn set_encoding(&mut self, encoding: String) {
154        self.encoding = encoding;
155    }
156
157    /// Default word cost.
158    #[napi(getter)]
159    pub fn default_word_cost(&self) -> i32 {
160        self.default_word_cost as i32
161    }
162
163    /// Sets the default word cost.
164    #[napi(setter)]
165    pub fn set_default_word_cost(&mut self, cost: i32) {
166        self.default_word_cost = cost as i16;
167    }
168
169    /// Default left context ID.
170    #[napi(getter)]
171    pub fn default_left_context_id(&self) -> u32 {
172        self.default_left_context_id as u32
173    }
174
175    /// Sets the default left context ID.
176    #[napi(setter)]
177    pub fn set_default_left_context_id(&mut self, id: u32) {
178        self.default_left_context_id = id as u16;
179    }
180
181    /// Default right context ID.
182    #[napi(getter)]
183    pub fn default_right_context_id(&self) -> u32 {
184        self.default_right_context_id as u32
185    }
186
187    /// Sets the default right context ID.
188    #[napi(setter)]
189    pub fn set_default_right_context_id(&mut self, id: u32) {
190        self.default_right_context_id = id as u16;
191    }
192
193    /// Default field value for missing fields.
194    #[napi(getter)]
195    pub fn default_field_value(&self) -> String {
196        self.default_field_value.clone()
197    }
198
199    /// Sets the default field value.
200    #[napi(setter)]
201    pub fn set_default_field_value(&mut self, value: String) {
202        self.default_field_value = value;
203    }
204
205    /// Whether flexible CSV parsing is enabled.
206    #[napi(getter)]
207    pub fn flexible_csv(&self) -> bool {
208        self.flexible_csv
209    }
210
211    /// Sets flexible CSV parsing.
212    #[napi(setter)]
213    pub fn set_flexible_csv(&mut self, value: bool) {
214        self.flexible_csv = value;
215    }
216
217    /// Whether to skip entries with invalid cost or ID.
218    #[napi(getter)]
219    pub fn skip_invalid_cost_or_id(&self) -> bool {
220        self.skip_invalid_cost_or_id
221    }
222
223    /// Sets whether to skip invalid entries.
224    #[napi(setter)]
225    pub fn set_skip_invalid_cost_or_id(&mut self, value: bool) {
226        self.skip_invalid_cost_or_id = value;
227    }
228
229    /// Whether to normalize morphological details.
230    #[napi(getter)]
231    pub fn normalize_details(&self) -> bool {
232        self.normalize_details
233    }
234
235    /// Sets whether to normalize details.
236    #[napi(setter)]
237    pub fn set_normalize_details(&mut self, value: bool) {
238        self.normalize_details = value;
239    }
240
241    /// Returns a plain object representation of the metadata.
242    ///
243    /// # Returns
244    ///
245    /// A HashMap containing all metadata properties as strings.
246    #[napi]
247    pub fn to_object(&self) -> HashMap<String, String> {
248        let mut dict = HashMap::new();
249        dict.insert("name".to_string(), self.name.clone());
250        dict.insert("encoding".to_string(), self.encoding.clone());
251        dict.insert(
252            "defaultWordCost".to_string(),
253            self.default_word_cost.to_string(),
254        );
255        dict.insert(
256            "defaultLeftContextId".to_string(),
257            self.default_left_context_id.to_string(),
258        );
259        dict.insert(
260            "defaultRightContextId".to_string(),
261            self.default_right_context_id.to_string(),
262        );
263        dict.insert(
264            "defaultFieldValue".to_string(),
265            self.default_field_value.clone(),
266        );
267        dict.insert("flexibleCsv".to_string(), self.flexible_csv.to_string());
268        dict.insert(
269            "skipInvalidCostOrId".to_string(),
270            self.skip_invalid_cost_or_id.to_string(),
271        );
272        dict.insert(
273            "normalizeDetails".to_string(),
274            self.normalize_details.to_string(),
275        );
276        dict
277    }
278}
279
280impl JsMetadata {
281    /// Converts a reference to JsMetadata into a lindera Metadata.
282    ///
283    /// # Arguments
284    ///
285    /// * `metadata` - Reference to JsMetadata.
286    ///
287    /// # Returns
288    ///
289    /// A lindera Metadata instance.
290    pub fn to_lindera_metadata(metadata: &JsMetadata) -> Metadata {
291        Metadata::new(
292            metadata.name.clone(),
293            metadata.encoding.clone(),
294            metadata.default_word_cost,
295            metadata.default_left_context_id,
296            metadata.default_right_context_id,
297            metadata.default_field_value.clone(),
298            metadata.flexible_csv,
299            metadata.skip_invalid_cost_or_id,
300            metadata.normalize_details,
301            JsSchema::new(metadata.dictionary_schema.fields()).into(),
302            JsSchema::new(metadata.user_dictionary_schema.fields()).into(),
303        )
304    }
305}
306
307impl From<JsMetadata> for Metadata {
308    fn from(metadata: JsMetadata) -> Self {
309        Metadata::new(
310            metadata.name,
311            metadata.encoding,
312            metadata.default_word_cost,
313            metadata.default_left_context_id,
314            metadata.default_right_context_id,
315            metadata.default_field_value,
316            metadata.flexible_csv,
317            metadata.skip_invalid_cost_or_id,
318            metadata.normalize_details,
319            metadata.dictionary_schema.into(),
320            metadata.user_dictionary_schema.into(),
321        )
322    }
323}
324
325impl From<Metadata> for JsMetadata {
326    fn from(metadata: Metadata) -> Self {
327        JsMetadata {
328            name: metadata.name,
329            encoding: metadata.encoding,
330            default_word_cost: metadata.default_word_cost,
331            default_left_context_id: metadata.default_left_context_id,
332            default_right_context_id: metadata.default_right_context_id,
333            default_field_value: metadata.default_field_value,
334            flexible_csv: metadata.flexible_csv,
335            skip_invalid_cost_or_id: metadata.skip_invalid_cost_or_id,
336            normalize_details: metadata.normalize_details,
337            dictionary_schema: metadata.dictionary_schema.into(),
338            user_dictionary_schema: metadata.user_dictionary_schema.into(),
339        }
340    }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346
347    #[test]
348    fn test_js_metadata_to_lindera_metadata() {
349        let js_metadata = JsMetadata::new(None);
350        let lindera_metadata: Metadata = js_metadata.into();
351        assert_eq!(lindera_metadata.name, "default");
352        assert_eq!(lindera_metadata.encoding, "UTF-8");
353        assert_eq!(lindera_metadata.default_word_cost, -10000);
354        assert_eq!(lindera_metadata.default_left_context_id, 1288);
355        assert_eq!(lindera_metadata.default_right_context_id, 1288);
356        assert_eq!(lindera_metadata.default_field_value, "*");
357        assert!(!lindera_metadata.flexible_csv);
358        assert!(!lindera_metadata.skip_invalid_cost_or_id);
359        assert!(!lindera_metadata.normalize_details);
360    }
361
362    #[test]
363    fn test_lindera_metadata_to_js_metadata() {
364        let lindera_metadata = Metadata::new(
365            "test".to_string(),
366            "EUC-JP".to_string(),
367            -5000,
368            100,
369            200,
370            "-".to_string(),
371            true,
372            true,
373            true,
374            lindera::dictionary::Schema::default(),
375            lindera::dictionary::Schema::default(),
376        );
377        let js_metadata: JsMetadata = lindera_metadata.into();
378        assert_eq!(js_metadata.name(), "test");
379        assert_eq!(js_metadata.encoding(), "EUC-JP");
380        assert_eq!(js_metadata.default_word_cost(), -5000);
381        assert_eq!(js_metadata.default_left_context_id(), 100);
382        assert_eq!(js_metadata.default_right_context_id(), 200);
383        assert_eq!(js_metadata.default_field_value(), "-");
384        assert!(js_metadata.flexible_csv());
385        assert!(js_metadata.skip_invalid_cost_or_id());
386        assert!(js_metadata.normalize_details());
387    }
388
389    #[test]
390    fn test_js_metadata_with_custom_options() {
391        let opts = MetadataOptions {
392            name: Some("custom".to_string()),
393            encoding: Some("Shift_JIS".to_string()),
394            default_word_cost: Some(-5000),
395            default_left_context_id: Some(100),
396            default_right_context_id: Some(200),
397            default_field_value: Some("-".to_string()),
398            flexible_csv: Some(true),
399            skip_invalid_cost_or_id: Some(true),
400            normalize_details: Some(true),
401        };
402        let js_metadata = JsMetadata::new(Some(opts));
403        assert_eq!(js_metadata.name(), "custom");
404        assert_eq!(js_metadata.encoding(), "Shift_JIS");
405        assert_eq!(js_metadata.default_word_cost(), -5000);
406        assert_eq!(js_metadata.default_left_context_id(), 100);
407        assert_eq!(js_metadata.default_right_context_id(), 200);
408        assert_eq!(js_metadata.default_field_value(), "-");
409        assert!(js_metadata.flexible_csv());
410        assert!(js_metadata.skip_invalid_cost_or_id());
411        assert!(js_metadata.normalize_details());
412    }
413
414    #[test]
415    fn test_js_metadata_roundtrip() {
416        let original = JsMetadata::new(None);
417        let lindera: Metadata = original.into();
418        let roundtripped: JsMetadata = lindera.into();
419        assert_eq!(roundtripped.name(), "default");
420        assert_eq!(roundtripped.encoding(), "UTF-8");
421        assert_eq!(roundtripped.default_word_cost(), -10000);
422        assert_eq!(roundtripped.default_left_context_id(), 1288);
423        assert_eq!(roundtripped.default_right_context_id(), 1288);
424    }
425}