Skip to main content

lindera_nodejs/
mode.rs

1//! Tokenization modes and penalty configurations.
2//!
3//! This module defines the different tokenization modes available and their
4//! penalty configurations for controlling segmentation behavior.
5
6use lindera::mode::{Mode as LinderaMode, Penalty as LinderaPenalty};
7
8/// Tokenization mode.
9///
10/// Determines how text is segmented into tokens.
11///
12/// - `Normal`: Standard tokenization based on dictionary cost.
13/// - `Decompose`: Decomposes compound words using penalty-based segmentation.
14#[napi(string_enum)]
15pub enum JsMode {
16    /// Standard tokenization based on dictionary cost
17    Normal,
18    /// Decompose compound words using penalty-based segmentation
19    Decompose,
20}
21
22impl From<JsMode> for LinderaMode {
23    fn from(mode: JsMode) -> Self {
24        match mode {
25            JsMode::Normal => LinderaMode::Normal,
26            JsMode::Decompose => LinderaMode::Decompose(LinderaPenalty::default()),
27        }
28    }
29}
30
31impl From<LinderaMode> for JsMode {
32    fn from(mode: LinderaMode) -> Self {
33        match mode {
34            LinderaMode::Normal => JsMode::Normal,
35            LinderaMode::Decompose(_) => JsMode::Decompose,
36        }
37    }
38}
39
40/// Penalty configuration for decompose mode.
41///
42/// Controls how aggressively compound words are decomposed based on
43/// character type and length thresholds.
44#[napi(object)]
45#[derive(Debug, Clone)]
46pub struct JsPenalty {
47    /// Length threshold for kanji sequences before applying penalty (default: 2).
48    pub kanji_penalty_length_threshold: u32,
49    /// Penalty value for long kanji sequences (default: 3000).
50    pub kanji_penalty_length_penalty: i32,
51    /// Length threshold for other character sequences before applying penalty (default: 7).
52    pub other_penalty_length_threshold: u32,
53    /// Penalty value for long other-character sequences (default: 1700).
54    pub other_penalty_length_penalty: i32,
55}
56
57impl From<JsPenalty> for LinderaPenalty {
58    fn from(penalty: JsPenalty) -> Self {
59        LinderaPenalty {
60            kanji_penalty_length_threshold: penalty.kanji_penalty_length_threshold as usize,
61            kanji_penalty_length_penalty: penalty.kanji_penalty_length_penalty,
62            other_penalty_length_threshold: penalty.other_penalty_length_threshold as usize,
63            other_penalty_length_penalty: penalty.other_penalty_length_penalty,
64        }
65    }
66}
67
68impl From<LinderaPenalty> for JsPenalty {
69    fn from(penalty: LinderaPenalty) -> Self {
70        JsPenalty {
71            kanji_penalty_length_threshold: penalty.kanji_penalty_length_threshold as u32,
72            kanji_penalty_length_penalty: penalty.kanji_penalty_length_penalty,
73            other_penalty_length_threshold: penalty.other_penalty_length_threshold as u32,
74            other_penalty_length_penalty: penalty.other_penalty_length_penalty,
75        }
76    }
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn test_js_mode_normal_to_lindera_mode() {
85        let lindera_mode: LinderaMode = JsMode::Normal.into();
86        assert!(matches!(lindera_mode, LinderaMode::Normal));
87    }
88
89    #[test]
90    fn test_js_mode_decompose_to_lindera_mode() {
91        let lindera_mode: LinderaMode = JsMode::Decompose.into();
92        assert!(matches!(lindera_mode, LinderaMode::Decompose(_)));
93    }
94
95    #[test]
96    fn test_lindera_mode_normal_to_js_mode() {
97        let js_mode: JsMode = LinderaMode::Normal.into();
98        assert!(matches!(js_mode, JsMode::Normal));
99    }
100
101    #[test]
102    fn test_lindera_mode_decompose_to_js_mode() {
103        let penalty = LinderaPenalty::default();
104        let js_mode: JsMode = LinderaMode::Decompose(penalty).into();
105        assert!(matches!(js_mode, JsMode::Decompose));
106    }
107
108    #[test]
109    fn test_js_penalty_to_lindera_penalty() {
110        let js_penalty = JsPenalty {
111            kanji_penalty_length_threshold: 3,
112            kanji_penalty_length_penalty: 5000,
113            other_penalty_length_threshold: 10,
114            other_penalty_length_penalty: 2000,
115        };
116        let lindera_penalty: LinderaPenalty = js_penalty.into();
117        assert_eq!(lindera_penalty.kanji_penalty_length_threshold, 3);
118        assert_eq!(lindera_penalty.kanji_penalty_length_penalty, 5000);
119        assert_eq!(lindera_penalty.other_penalty_length_threshold, 10);
120        assert_eq!(lindera_penalty.other_penalty_length_penalty, 2000);
121    }
122
123    #[test]
124    fn test_lindera_penalty_to_js_penalty() {
125        let lindera_penalty = LinderaPenalty {
126            kanji_penalty_length_threshold: 4,
127            kanji_penalty_length_penalty: 6000,
128            other_penalty_length_threshold: 8,
129            other_penalty_length_penalty: 1500,
130        };
131        let js_penalty: JsPenalty = lindera_penalty.into();
132        assert_eq!(js_penalty.kanji_penalty_length_threshold, 4);
133        assert_eq!(js_penalty.kanji_penalty_length_penalty, 6000);
134        assert_eq!(js_penalty.other_penalty_length_threshold, 8);
135        assert_eq!(js_penalty.other_penalty_length_penalty, 1500);
136    }
137
138    #[test]
139    fn test_penalty_roundtrip() {
140        let original = JsPenalty {
141            kanji_penalty_length_threshold: 2,
142            kanji_penalty_length_penalty: 3000,
143            other_penalty_length_threshold: 7,
144            other_penalty_length_penalty: 1700,
145        };
146        let lindera: LinderaPenalty = original.clone().into();
147        let roundtripped: JsPenalty = lindera.into();
148        assert_eq!(
149            roundtripped.kanji_penalty_length_threshold,
150            original.kanji_penalty_length_threshold
151        );
152        assert_eq!(
153            roundtripped.kanji_penalty_length_penalty,
154            original.kanji_penalty_length_penalty
155        );
156        assert_eq!(
157            roundtripped.other_penalty_length_threshold,
158            original.other_penalty_length_threshold
159        );
160        assert_eq!(
161            roundtripped.other_penalty_length_penalty,
162            original.other_penalty_length_penalty
163        );
164    }
165}