Skip to main content

bamboo_core/
input_method.rs

1use phf::{Map, phf_map};
2
3use crate::input_method_def::InputMethodDef;
4use crate::utils::{add_mark_to_toneless_char, add_tone_to_char, is_vowel};
5
6/// Represents a Vietnamese tone mark.
7#[repr(u8)]
8#[derive(Clone, Copy, Debug, PartialEq, Eq)]
9pub enum Tone {
10    None = 0,
11    Grave = 1,
12    Acute = 2,
13    Hook = 3,
14    Tilde = 4,
15    Dot = 5,
16}
17
18/// Represents a Vietnamese diacritic mark (marks that change the vowel/consonant).
19#[repr(u8)]
20#[derive(Clone, Copy, Debug, PartialEq, Eq)]
21pub enum Mark {
22    None = 0,
23    Hat = 1,   // â, ê, ô
24    Breve = 2, // ă
25    Horn = 3,  // ư, ơ
26    Dash = 4,  // đ
27    /// Not used by the current DSL, kept for parity with other ports.
28    Raw = 5,
29}
30
31/// The type of transformation a rule applies.
32#[repr(u8)]
33#[derive(Clone, Copy, Debug, PartialEq, Eq)]
34pub enum EffectType {
35    /// Appends a character (standard typing).
36    Appending = 0,
37    /// Adds/changes a diacritic mark (e.g., a -> ă).
38    MarkTransformation = 1,
39    /// Adds/changes a tone mark (e.g., a -> á).
40    ToneTransformation = 2,
41    /// Replaces a character with another.
42    Replacing = 3,
43}
44
45static TONES: Map<&'static str, Tone> = phf_map! {
46    "XoaDauThanh" => Tone::None,
47    "DauSac" => Tone::Acute,
48    "DauHuyen" => Tone::Grave,
49    "DauNga" => Tone::Tilde,
50    "DauNang" => Tone::Dot,
51    "DauHoi" => Tone::Hook,
52};
53
54#[derive(Clone, Debug)]
55pub struct Rule {
56    pub key: char,
57    /// Effect value:
58    /// - if `effect_type == ToneTransformation`: this is a `Tone` as `u8`
59    /// - if `effect_type == MarkTransformation`: this is a `Mark` as `u8`
60    pub effect: u8,
61    pub effect_type: EffectType,
62    pub effect_on: char,
63    pub result: char,
64    pub appended_rules: Box<[Rule]>,
65}
66
67impl Rule {
68    pub fn set_tone(&mut self, tone: Tone) {
69        self.effect = tone as u8;
70    }
71
72    pub fn set_mark(&mut self, mark: Mark) {
73        self.effect = mark as u8;
74    }
75
76    pub fn get_tone(&self) -> Tone {
77        // Safety: effect is created by parser or engine.
78        match self.effect {
79            1 => Tone::Grave,
80            2 => Tone::Acute,
81            3 => Tone::Hook,
82            4 => Tone::Tilde,
83            5 => Tone::Dot,
84            _ => Tone::None,
85        }
86    }
87
88    pub fn get_mark(&self) -> Mark {
89        match self.effect {
90            1 => Mark::Hat,
91            2 => Mark::Breve,
92            3 => Mark::Horn,
93            4 => Mark::Dash,
94            5 => Mark::Raw,
95            _ => Mark::None,
96        }
97    }
98}
99
100/// A collection of rules defining how keys transform text.
101///
102/// Use the provided static methods (e.g., [`InputMethod::telex()`]) to get
103/// standard Vietnamese input methods.
104#[derive(Clone, Debug, Default)]
105pub struct InputMethod {
106    pub name: String,
107    pub rules: Vec<Rule>,
108    pub super_keys: Vec<char>,
109    pub tone_keys: Vec<char>,
110    pub appending_keys: Vec<char>,
111    pub keys: Vec<char>,
112}
113
114impl InputMethod {
115    /// Standard Telex input method.
116    pub fn telex() -> Self {
117        parse_input_method("Telex")
118    }
119
120    /// Standard VNI input method (using number keys).
121    pub fn vni() -> Self {
122        parse_input_method("VNI")
123    }
124
125    /// Standard VIQR input method.
126    pub fn viqr() -> Self {
127        parse_input_method("VIQR")
128    }
129
130    /// Microsoft Standard Vietnamese keyboard layout.
131    pub fn microsoft_layout() -> Self {
132        parse_input_method("Microsoft layout")
133    }
134
135    /// Telex variant that also supports `[` and `]` keys.
136    pub fn telex_2() -> Self {
137        parse_input_method("Telex 2")
138    }
139
140    /// Combined Telex and VNI.
141    pub fn telex_vni() -> Self {
142        parse_input_method("Telex + VNI")
143    }
144
145    /// Combined Telex, VNI, and VIQR.
146    pub fn telex_vni_viqr() -> Self {
147        parse_input_method("Telex + VNI + VIQR")
148    }
149
150    /// VNI for French keyboard layouts.
151    pub fn vni_french_layout() -> Self {
152        parse_input_method("VNI Bàn phím tiếng Pháp")
153    }
154
155    /// Telex variant using `w` for marks and `z` for tone removal.
156    pub fn telex_w() -> Self {
157        parse_input_method("Telex W")
158    }
159}
160
161/// Parse a known input method by name from the built-in definitions.
162pub(crate) fn parse_input_method(im_name: &str) -> InputMethod {
163    let defs = crate::input_method_def::get_input_method_definitions();
164    defs.get(im_name)
165        .copied()
166        .map(|def| parse_input_method_def(im_name, def))
167        .unwrap_or_default()
168}
169
170pub(crate) fn parse_input_method_def(
171    im_name: &str,
172    im_def: &InputMethodDef,
173) -> InputMethod {
174    let mut im =
175        InputMethod { name: im_name.to_string(), ..Default::default() };
176
177    for (key_str, line) in im_def.entries() {
178        let key = match key_str.chars().next() {
179            Some(c) => c,
180            None => continue,
181        };
182
183        im.rules.extend(parse_rules(key, line));
184
185        if contains_uo_case_insensitive(line) {
186            im.super_keys.push(key);
187        }
188        im.keys.push(key);
189    }
190
191    for rule in &im.rules {
192        if rule.effect_type == EffectType::Appending {
193            im.appending_keys.push(rule.key);
194        }
195        if rule.effect_type == EffectType::ToneTransformation {
196            im.tone_keys.push(rule.key);
197        }
198    }
199
200    im
201}
202
203#[inline]
204fn contains_uo_case_insensitive(s: &str) -> bool {
205    let mut prev_u = false;
206    for c in s.chars() {
207        let lc = c.to_ascii_lowercase();
208        if prev_u && lc == 'o' {
209            return true;
210        }
211        prev_u = lc == 'u';
212    }
213    false
214}
215
216pub(crate) fn parse_rules(key: char, line: &str) -> Vec<Rule> {
217    if let Some(tone) = TONES.get(line).copied() {
218        return vec![Rule {
219            key,
220            effect_type: EffectType::ToneTransformation,
221            effect: tone as u8,
222            effect_on: '\0',
223            result: '\0',
224            appended_rules: Box::default(),
225        }];
226    }
227
228    parse_toneless_rules(key, line)
229}
230
231pub(crate) fn parse_toneless_rules(key: char, line: &str) -> Vec<Rule> {
232    let lower = line.to_lowercase();
233
234    if let Some((effective_ons, results, rest)) = parse_dsl(&lower) {
235        let mut rules = Vec::new();
236        for (effective_on, result) in
237            effective_ons.into_iter().zip(results.into_iter())
238        {
239            let Some(effect) = find_mark_from_char(result) else {
240                continue;
241            };
242            rules.extend(parse_toneless_rule(
243                key,
244                effective_on,
245                result,
246                effect,
247            ));
248        }
249
250        if let Some(rule) = get_appending_rule(key, rest) {
251            rules.push(rule);
252        }
253
254        return rules;
255    }
256
257    if let Some(rule) = get_appending_rule(key, line) {
258        return vec![rule];
259    }
260
261    Vec::new()
262}
263
264fn parse_toneless_rule(
265    key: char,
266    effective_on: char,
267    result: char,
268    effect: Mark,
269) -> Vec<Rule> {
270    let mut rules = Vec::new();
271
272    for chr in get_mark_family(effective_on) {
273        if chr == result {
274            rules.push(Rule {
275                key,
276                effect_type: EffectType::MarkTransformation,
277                effect: 0,
278                effect_on: result,
279                result: effective_on,
280                appended_rules: Box::default(),
281            });
282            continue;
283        }
284
285        if is_vowel(chr) {
286            for tone in 0u8..=5 {
287                rules.push(Rule {
288                    key,
289                    effect_type: EffectType::MarkTransformation,
290                    effect_on: add_tone_to_char(chr, tone),
291                    effect: effect as u8,
292                    result: add_tone_to_char(result, tone),
293                    appended_rules: Box::default(),
294                });
295            }
296        } else {
297            rules.push(Rule {
298                key,
299                effect_type: EffectType::MarkTransformation,
300                effect_on: chr,
301                effect: effect as u8,
302                result,
303                appended_rules: Box::default(),
304            });
305        }
306    }
307
308    rules
309}
310
311/// Parse: `([a-zA-Z]+)_(\p{L}+)([_\p{L}]*)`.
312fn parse_dsl(s: &str) -> Option<(Vec<char>, Vec<char>, &str)> {
313    let (left, right) = s.split_once('_')?;
314    if left.is_empty() || !left.chars().all(|c| c.is_ascii_alphabetic()) {
315        return None;
316    }
317
318    let mut results = Vec::new();
319    let mut rest_start_byte = right.len();
320
321    for (byte_idx, ch) in right.char_indices() {
322        if ch.is_alphabetic() {
323            results.push(ch);
324            continue;
325        }
326        rest_start_byte = byte_idx;
327        break;
328    }
329
330    if results.is_empty() {
331        return None;
332    }
333
334    let rest = &right[rest_start_byte..];
335    Some((left.chars().collect(), results, rest))
336}
337
338/// Parse: `(_?)_(\p{L}+)`.
339fn get_appending_rule(key: char, value: &str) -> Option<Rule> {
340    if !value.starts_with('_') {
341        return None;
342    }
343
344    // "_x" or "__x" forms.
345    let start = if value.starts_with("__") { 2 } else { 1 };
346    let tail = value.get(start..)?;
347
348    let mut letters = Vec::new();
349    for ch in tail.chars() {
350        if ch.is_alphabetic() {
351            letters.push(ch);
352        } else {
353            break;
354        }
355    }
356
357    let first = *letters.first()?;
358
359    let mut appended_rules = Vec::new();
360    for &ch in letters.iter().skip(1) {
361        appended_rules.push(Rule {
362            key,
363            effect_type: EffectType::Appending,
364            effect: 0,
365            effect_on: ch,
366            result: ch,
367            appended_rules: Box::default(),
368        });
369    }
370
371    Some(Rule {
372        key,
373        effect_type: EffectType::Appending,
374        effect: 0,
375        effect_on: first,
376        result: first,
377        appended_rules: appended_rules.into_boxed_slice(),
378    })
379}
380
381fn get_mark_family(c: char) -> Vec<char> {
382    let base = add_tone_to_char(c, 0);
383    let canonical = add_mark_to_toneless_char(base, 0);
384
385    // Marks are 0..=4 in utils' internal mark table.
386    let mut family: Vec<char> =
387        (0u8..=4).map(|m| add_mark_to_toneless_char(canonical, m)).collect();
388
389    family.sort_unstable();
390    family.dedup();
391    family
392}
393
394fn find_mark_from_char(c: char) -> Option<Mark> {
395    let c = c.to_lowercase().next().unwrap_or(c);
396    let toneless = add_tone_to_char(c, 0);
397    let base = add_mark_to_toneless_char(toneless, 0);
398
399    for m in 0u8..=4 {
400        if add_mark_to_toneless_char(base, m) == toneless {
401            return Some(match m {
402                1 => Mark::Hat,
403                2 => Mark::Breve,
404                3 => Mark::Horn,
405                4 => Mark::Dash,
406                _ => Mark::None,
407            });
408        }
409    }
410
411    None
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    #[test]
419    fn parse_tone_rules() {
420        let rules = parse_rules('z', "XoaDauThanh");
421        assert_eq!(rules.len(), 1);
422        assert_eq!(rules[0].effect_type, EffectType::ToneTransformation);
423        assert_eq!(rules[0].effect, Tone::None as u8);
424
425        let rules = parse_rules('x', "DauNga");
426        assert_eq!(rules.len(), 1);
427        assert_eq!(rules[0].effect_type, EffectType::ToneTransformation);
428        assert_eq!(rules[0].get_tone(), Tone::Tilde);
429    }
430
431    #[test]
432    fn parse_toneless_rules_cases() {
433        let rules = parse_toneless_rules('d', "D_Đ");
434        assert_eq!(rules.len(), 2);
435        assert_eq!(rules[0].effect_type, EffectType::MarkTransformation);
436        assert_eq!(rules[0].effect, Mark::Dash as u8);
437        assert_eq!(rules[0].effect_on, 'd');
438
439        let rules = parse_toneless_rules('{', "_Ư");
440        assert_eq!(rules.len(), 1);
441        assert_eq!(rules[0].effect_type, EffectType::Appending);
442        assert_eq!(rules[0].effect_on, 'Ư');
443
444        let rules = parse_toneless_rules('w', "UOA_ƯƠĂ");
445        assert_eq!(rules.len(), 33);
446        assert_eq!(rules[0].effect_type, EffectType::MarkTransformation);
447        assert_eq!(rules[0].get_mark(), Mark::Horn);
448        assert_eq!(rules[0].effect_on, 'u');
449        assert_eq!(rules[7].effect_type, EffectType::MarkTransformation);
450        assert_eq!(rules[7].get_mark(), Mark::Horn);
451        assert_eq!(rules[7].effect_on, 'o');
452        assert_eq!(rules[20].effect_type, EffectType::MarkTransformation);
453        assert_eq!(rules[20].get_mark(), Mark::Breve);
454        assert_eq!(rules[20].effect_on, 'a');
455
456        let rules = parse_toneless_rules('w', "UOA_ƯƠĂ__Ư");
457        assert_eq!(rules.len(), 34);
458        assert_eq!(rules[20].effect_type, EffectType::MarkTransformation);
459        assert_eq!(rules[20].get_mark(), Mark::Breve);
460        assert_eq!(rules[20].effect_on, 'a');
461        assert_eq!(rules[33].effect_type, EffectType::Appending);
462        assert_eq!(rules[33].effect_on, 'ư');
463    }
464
465    #[test]
466    fn parse_append_rule() {
467        let rules = parse_toneless_rules('[', "__ươ");
468        assert_eq!(rules.len(), 1);
469        let append_rules = &rules[0].appended_rules;
470        assert_eq!(append_rules.len(), 1);
471        assert_eq!(append_rules[0].effect_type, EffectType::Appending);
472        assert_eq!(append_rules[0].effect_on, 'ơ');
473
474        let rules = parse_toneless_rules('{', "__ƯƠ");
475        assert_eq!(rules.len(), 1);
476        let append_rules = &rules[0].appended_rules;
477        assert_eq!(append_rules.len(), 1);
478        assert_eq!(append_rules[0].effect_type, EffectType::Appending);
479        assert_eq!(append_rules[0].effect_on, 'Ơ');
480    }
481
482    #[test]
483    fn parse_input_method_super_key_detection() {
484        let im = parse_input_method("Telex");
485        assert!(im.super_keys.contains(&'w'));
486    }
487
488    #[test]
489    fn parse_telex_o_hat_rule_exists() {
490        // In Telex, typing 'o' after an existing 'o' should be able to mark it as 'ô'.
491        let rules = parse_toneless_rules('o', "O_Ô");
492        assert!(rules.iter().any(|r| {
493            r.effect_type == EffectType::MarkTransformation
494                && r.get_mark() == Mark::Hat
495                && r.effect_on == 'o'
496                && r.result == 'ô'
497        }));
498        assert!(!rules.iter().any(|r| r.effect_type == EffectType::Appending));
499    }
500
501    #[test]
502    fn telex2_has_no_appending_rule_for_o() {
503        let im = parse_input_method("Telex 2");
504        let o_rules: Vec<_> =
505            im.rules.iter().filter(|r| r.key == 'o').collect();
506        assert!(!o_rules.is_empty());
507        assert!(
508            !o_rules.iter().any(|r| r.effect_type == EffectType::Appending)
509        );
510    }
511}