Skip to main content

lindera_nodejs/
schema.rs

1//! Dictionary schema definitions.
2//!
3//! This module provides schema structures that define the format and fields
4//! of dictionary entries.
5
6use std::collections::HashMap;
7
8use lindera::dictionary::{FieldDefinition, FieldType, Schema};
9
10/// Field type in dictionary schema.
11///
12/// Defines the type of a field in the dictionary entry.
13#[napi(string_enum)]
14pub enum JsFieldType {
15    /// Surface form (word text)
16    Surface,
17    /// Left context ID for morphological analysis
18    LeftContextId,
19    /// Right context ID for morphological analysis
20    RightContextId,
21    /// Word cost (used in path selection)
22    Cost,
23    /// Custom field (morphological features)
24    Custom,
25}
26
27impl From<FieldType> for JsFieldType {
28    fn from(field_type: FieldType) -> Self {
29        match field_type {
30            FieldType::Surface => JsFieldType::Surface,
31            FieldType::LeftContextId => JsFieldType::LeftContextId,
32            FieldType::RightContextId => JsFieldType::RightContextId,
33            FieldType::Cost => JsFieldType::Cost,
34            FieldType::Custom => JsFieldType::Custom,
35        }
36    }
37}
38
39impl From<JsFieldType> for FieldType {
40    fn from(field_type: JsFieldType) -> Self {
41        match field_type {
42            JsFieldType::Surface => FieldType::Surface,
43            JsFieldType::LeftContextId => FieldType::LeftContextId,
44            JsFieldType::RightContextId => FieldType::RightContextId,
45            JsFieldType::Cost => FieldType::Cost,
46            JsFieldType::Custom => FieldType::Custom,
47        }
48    }
49}
50
51/// Field definition in dictionary schema.
52///
53/// Describes a single field in the dictionary entry format.
54#[napi(object)]
55pub struct JsFieldDefinition {
56    /// Field index in the record.
57    pub index: u32,
58    /// Field name.
59    pub name: String,
60    /// Field type.
61    pub field_type: JsFieldType,
62    /// Optional description of the field.
63    pub description: Option<String>,
64}
65
66impl From<FieldDefinition> for JsFieldDefinition {
67    fn from(field_def: FieldDefinition) -> Self {
68        JsFieldDefinition {
69            index: field_def.index as u32,
70            name: field_def.name,
71            field_type: field_def.field_type.into(),
72            description: field_def.description,
73        }
74    }
75}
76
77impl From<JsFieldDefinition> for FieldDefinition {
78    fn from(field_def: JsFieldDefinition) -> Self {
79        FieldDefinition {
80            index: field_def.index as usize,
81            name: field_def.name,
82            field_type: field_def.field_type.into(),
83            description: field_def.description,
84        }
85    }
86}
87
88/// Dictionary schema definition.
89///
90/// Defines the structure and fields of dictionary entries.
91#[napi(js_name = "Schema")]
92pub struct JsSchema {
93    /// Field names in the schema.
94    fields: Vec<String>,
95    /// Index map for fast field lookup.
96    field_index_map: HashMap<String, usize>,
97}
98
99#[napi]
100impl JsSchema {
101    /// Creates a new schema with the specified field names.
102    ///
103    /// # Arguments
104    ///
105    /// * `fields` - Array of field name strings.
106    #[napi(constructor)]
107    pub fn new(fields: Vec<String>) -> Self {
108        let field_index_map = fields
109            .iter()
110            .enumerate()
111            .map(|(i, f)| (f.clone(), i))
112            .collect();
113        Self {
114            fields,
115            field_index_map,
116        }
117    }
118
119    /// Creates a default schema matching the IPADIC format (13 fields).
120    ///
121    /// # Returns
122    ///
123    /// A schema with the standard IPADIC field definitions.
124    #[napi(factory)]
125    pub fn create_default() -> Self {
126        Self::new(vec![
127            "surface".to_string(),
128            "left_context_id".to_string(),
129            "right_context_id".to_string(),
130            "cost".to_string(),
131            "major_pos".to_string(),
132            "middle_pos".to_string(),
133            "small_pos".to_string(),
134            "fine_pos".to_string(),
135            "conjugation_type".to_string(),
136            "conjugation_form".to_string(),
137            "base_form".to_string(),
138            "reading".to_string(),
139            "pronunciation".to_string(),
140        ])
141    }
142
143    /// Returns the field names in the schema.
144    #[napi(getter)]
145    pub fn fields(&self) -> Vec<String> {
146        self.fields.clone()
147    }
148
149    /// Returns the index of the specified field name.
150    ///
151    /// # Arguments
152    ///
153    /// * `field_name` - Name of the field to look up.
154    ///
155    /// # Returns
156    ///
157    /// The zero-based index of the field, or `undefined` if not found.
158    #[napi]
159    pub fn get_field_index(&self, field_name: String) -> Option<u32> {
160        self.field_index_map.get(&field_name).map(|&i| i as u32)
161    }
162
163    /// Returns the total number of fields in the schema.
164    #[napi]
165    pub fn field_count(&self) -> u32 {
166        self.fields.len() as u32
167    }
168
169    /// Returns the field name at the specified index.
170    ///
171    /// # Arguments
172    ///
173    /// * `index` - Zero-based index.
174    ///
175    /// # Returns
176    ///
177    /// The field name, or `undefined` if the index is out of range.
178    #[napi]
179    pub fn get_field_name(&self, index: u32) -> Option<String> {
180        self.fields.get(index as usize).cloned()
181    }
182
183    /// Returns the custom fields (index 4 and above).
184    ///
185    /// # Returns
186    ///
187    /// An array of custom field names.
188    #[napi]
189    pub fn get_custom_fields(&self) -> Vec<String> {
190        if self.fields.len() > 4 {
191            self.fields[4..].to_vec()
192        } else {
193            Vec::new()
194        }
195    }
196
197    /// Returns all field names in the schema.
198    ///
199    /// # Returns
200    ///
201    /// An array of all field names.
202    #[napi]
203    pub fn get_all_fields(&self) -> Vec<String> {
204        self.fields.clone()
205    }
206
207    /// Returns the field definition for the specified field name.
208    ///
209    /// # Arguments
210    ///
211    /// * `name` - Name of the field to look up.
212    ///
213    /// # Returns
214    ///
215    /// The field definition, or `undefined` if not found.
216    #[napi]
217    pub fn get_field_by_name(&self, name: String) -> Option<JsFieldDefinition> {
218        self.field_index_map.get(&name).map(|&index| {
219            let field_type = match index {
220                0 => JsFieldType::Surface,
221                1 => JsFieldType::LeftContextId,
222                2 => JsFieldType::RightContextId,
223                3 => JsFieldType::Cost,
224                _ => JsFieldType::Custom,
225            };
226
227            JsFieldDefinition {
228                index: index as u32,
229                name,
230                field_type,
231                description: None,
232            }
233        })
234    }
235
236    /// Validates that a CSV record matches the schema.
237    ///
238    /// # Arguments
239    ///
240    /// * `record` - Array of field values to validate.
241    #[napi]
242    pub fn validate_record(&self, record: Vec<String>) -> napi::Result<()> {
243        if record.len() < self.fields.len() {
244            return Err(napi::Error::new(
245                napi::Status::InvalidArg,
246                format!(
247                    "CSV row has {} fields but schema requires {} fields",
248                    record.len(),
249                    self.fields.len()
250                ),
251            ));
252        }
253
254        for (index, field_name) in self.fields.iter().enumerate() {
255            if index < record.len() && record[index].trim().is_empty() {
256                return Err(napi::Error::new(
257                    napi::Status::InvalidArg,
258                    format!("Field {field_name} is missing or empty"),
259                ));
260            }
261        }
262
263        Ok(())
264    }
265}
266
267impl From<JsSchema> for Schema {
268    fn from(schema: JsSchema) -> Self {
269        Schema::new(schema.fields)
270    }
271}
272
273impl From<Schema> for JsSchema {
274    fn from(schema: Schema) -> Self {
275        JsSchema::new(schema.get_all_fields().to_vec())
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn test_js_field_type_to_field_type_all_variants() {
285        assert!(matches!(
286            FieldType::from(JsFieldType::Surface),
287            FieldType::Surface
288        ));
289        assert!(matches!(
290            FieldType::from(JsFieldType::LeftContextId),
291            FieldType::LeftContextId
292        ));
293        assert!(matches!(
294            FieldType::from(JsFieldType::RightContextId),
295            FieldType::RightContextId
296        ));
297        assert!(matches!(
298            FieldType::from(JsFieldType::Cost),
299            FieldType::Cost
300        ));
301        assert!(matches!(
302            FieldType::from(JsFieldType::Custom),
303            FieldType::Custom
304        ));
305    }
306
307    #[test]
308    fn test_field_type_to_js_field_type_all_variants() {
309        assert!(matches!(
310            JsFieldType::from(FieldType::Surface),
311            JsFieldType::Surface
312        ));
313        assert!(matches!(
314            JsFieldType::from(FieldType::LeftContextId),
315            JsFieldType::LeftContextId
316        ));
317        assert!(matches!(
318            JsFieldType::from(FieldType::RightContextId),
319            JsFieldType::RightContextId
320        ));
321        assert!(matches!(
322            JsFieldType::from(FieldType::Cost),
323            JsFieldType::Cost
324        ));
325        assert!(matches!(
326            JsFieldType::from(FieldType::Custom),
327            JsFieldType::Custom
328        ));
329    }
330
331    #[test]
332    fn test_js_schema_new_builds_index_map() {
333        let schema = JsSchema::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
334        assert_eq!(schema.get_field_index("a".to_string()), Some(0));
335        assert_eq!(schema.get_field_index("b".to_string()), Some(1));
336        assert_eq!(schema.get_field_index("c".to_string()), Some(2));
337    }
338
339    #[test]
340    fn test_js_schema_get_field_index_not_found() {
341        let schema = JsSchema::new(vec!["x".to_string()]);
342        assert_eq!(schema.get_field_index("y".to_string()), None);
343    }
344
345    #[test]
346    fn test_js_schema_field_count() {
347        let schema = JsSchema::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]);
348        assert_eq!(schema.field_count(), 3);
349    }
350
351    #[test]
352    fn test_js_schema_field_count_empty() {
353        let schema = JsSchema::new(vec![]);
354        assert_eq!(schema.field_count(), 0);
355    }
356
357    #[test]
358    fn test_js_schema_get_custom_fields() {
359        let schema = JsSchema::new(vec![
360            "surface".to_string(),
361            "left_context_id".to_string(),
362            "right_context_id".to_string(),
363            "cost".to_string(),
364            "pos1".to_string(),
365            "pos2".to_string(),
366        ]);
367        let custom = schema.get_custom_fields();
368        assert_eq!(custom, vec!["pos1".to_string(), "pos2".to_string()]);
369    }
370
371    #[test]
372    fn test_js_schema_get_custom_fields_no_custom() {
373        let schema = JsSchema::new(vec![
374            "surface".to_string(),
375            "left_context_id".to_string(),
376            "right_context_id".to_string(),
377            "cost".to_string(),
378        ]);
379        let custom = schema.get_custom_fields();
380        assert!(custom.is_empty());
381    }
382
383    #[test]
384    fn test_js_schema_get_custom_fields_fewer_than_4() {
385        let schema = JsSchema::new(vec!["surface".to_string()]);
386        let custom = schema.get_custom_fields();
387        assert!(custom.is_empty());
388    }
389
390    #[test]
391    fn test_js_schema_create_default_has_13_fields() {
392        let schema = JsSchema::create_default();
393        assert_eq!(schema.field_count(), 13);
394    }
395
396    #[test]
397    fn test_js_schema_create_default_field_names() {
398        let schema = JsSchema::create_default();
399        assert_eq!(schema.get_field_index("surface".to_string()), Some(0));
400        assert_eq!(
401            schema.get_field_index("pronunciation".to_string()),
402            Some(12)
403        );
404    }
405
406    #[test]
407    fn test_js_schema_to_lindera_schema_roundtrip() {
408        let fields = vec![
409            "surface".to_string(),
410            "left_context_id".to_string(),
411            "right_context_id".to_string(),
412            "cost".to_string(),
413            "pos".to_string(),
414        ];
415        let js_schema = JsSchema::new(fields.clone());
416        let lindera_schema: Schema = js_schema.into();
417        let roundtripped: JsSchema = lindera_schema.into();
418        assert_eq!(roundtripped.field_count(), 5);
419        assert_eq!(roundtripped.get_field_index("pos".to_string()), Some(4));
420    }
421
422    #[test]
423    fn test_lindera_schema_to_js_schema() {
424        let lindera_schema = Schema::new(vec!["a".to_string(), "b".to_string()]);
425        let js_schema: JsSchema = lindera_schema.into();
426        assert_eq!(js_schema.field_count(), 2);
427        assert_eq!(js_schema.get_field_index("a".to_string()), Some(0));
428        assert_eq!(js_schema.get_field_index("b".to_string()), Some(1));
429    }
430}