Skip to main content

bamboo_core/
engine.rs

1//! The core engine that processes keypresses and maintains the IME state.
2
3use crate::config::Config;
4use crate::input_method::{InputMethod, Rule};
5use crate::mode::{Mode, OutputOptions};
6
7const MAX_ACTIVE_TRANS: usize = 32;
8
9/// Represents a single keypress or a transformation derived from it (e.g., adding a mark or tone).
10#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash)]
11pub struct Transformation {
12    /// The rule that was applied.
13    pub rule: Rule,
14    /// The index of the transformation in the composition that this transformation targets (if any).
15    /// For example, a tone mark transformation targets an earlier vowel.
16    pub target: Option<usize>,
17    /// Whether the resulting character should be uppercase.
18    pub is_upper_case: bool,
19}
20
21#[inline]
22fn lower(c: char) -> char {
23    if c.is_ascii() { c.to_ascii_lowercase() } else { c.to_lowercase().next().unwrap_or(c) }
24}
25
26#[inline]
27fn is_upper(c: char) -> bool {
28    if c.is_ascii() { c.is_ascii_uppercase() } else { lower(c) != c }
29}
30
31fn uoh_tail_match(s: &str) -> bool {
32    for pat in ["uơ", "ưo"] {
33        if let Some(idx) = s.find(pat) {
34            let after = &s[idx + pat.len()..];
35            if after.chars().next().is_some_and(|c| c.is_alphabetic()) {
36                return true;
37            }
38        }
39    }
40    false
41}
42
43/// The main stateful processor of the Vietnamese Input Method Engine.
44///
45/// It maintains an internal buffer of transformations and produces the correctly marked Vietnamese text.
46pub struct Engine {
47    committed_text: String,
48    /// Stack-allocated buffer for the active composition to avoid heap allocations.
49    active_buffer: [Transformation; MAX_ACTIVE_TRANS],
50    active_len: usize,
51
52    input_method: InputMethod,
53    all_rules: Box<[Rule]>,
54    ascii_rule_indices: [(u16, u16); 128],
55    non_ascii_rule_indices: Box<[(char, (u16, u16))]>,
56    ascii_effect_keys: [bool; 128],
57    non_ascii_effect_keys: Vec<char>,
58    config: Config,
59
60    // Scratch buffers to avoid per-keystroke allocations.
61    work_comp: Vec<Transformation>,
62    scratch_comp: Vec<Transformation>,
63
64    prev_preedit: String,
65    delta_buf: String,
66
67    dfa: crate::dfa::Dfa,
68    current_state_id: u32,
69}
70
71impl Engine {
72    /// Creates a new engine with the specified input method and default configuration.
73    pub fn new(input_method: InputMethod) -> Self {
74        Self::with_config(input_method, Config::default())
75    }
76
77    /// Creates a new engine with a specific input method and configuration.
78    pub fn with_config(input_method: InputMethod, config: Config) -> Self {
79        let mut rules_by_key: std::collections::BTreeMap<char, Vec<Rule>> =
80            std::collections::BTreeMap::new();
81        for rule in &input_method.rules {
82            let key = lower(rule.key);
83            rules_by_key.entry(key).or_default().push(*rule);
84        }
85
86        let total_rules: usize = rules_by_key.values().map(|v| v.len()).sum();
87        let mut all_rules_vec = Vec::with_capacity(total_rules);
88        let mut ascii_rule_indices = [(0u16, 0u16); 128];
89        let mut non_ascii_indices_vec = Vec::new();
90
91        for (key, rules) in rules_by_key {
92            let start = all_rules_vec.len() as u16;
93            all_rules_vec.extend(rules);
94            let end = all_rules_vec.len() as u16;
95            if key.is_ascii() {
96                ascii_rule_indices[key as usize] = (start, end);
97            } else {
98                non_ascii_indices_vec.push((key, (start, end)));
99            }
100        }
101
102        let mut ascii_effect_keys = [false; 128];
103        let mut non_ascii_effect_keys: Vec<char> = Vec::new();
104        for key in &input_method.keys {
105            if key.is_ascii() {
106                ascii_effect_keys[*key as usize] = true;
107            } else {
108                non_ascii_effect_keys.push(*key);
109            }
110        }
111        non_ascii_effect_keys.sort_unstable();
112        non_ascii_effect_keys.dedup();
113
114        Self {
115            committed_text: String::new(),
116            active_buffer: [Transformation::default(); MAX_ACTIVE_TRANS],
117            active_len: 0,
118            input_method,
119            all_rules: all_rules_vec.into_boxed_slice(),
120            ascii_rule_indices,
121            non_ascii_rule_indices: non_ascii_indices_vec.into_boxed_slice(),
122            ascii_effect_keys,
123            non_ascii_effect_keys,
124            config,
125
126            work_comp: Vec::with_capacity(MAX_ACTIVE_TRANS),
127            scratch_comp: Vec::with_capacity(MAX_ACTIVE_TRANS),
128
129            prev_preedit: String::with_capacity(64),
130            delta_buf: String::with_capacity(64),
131            dfa: crate::dfa::Dfa::new(),
132            current_state_id: 0,
133        }
134    }
135
136    #[inline]
137    pub(crate) fn active_slice(&self) -> &[Transformation] {
138        &self.active_buffer[..self.active_len]
139    }
140
141    fn take_active_into(&mut self, out: &mut Vec<Transformation>) {
142        out.clear();
143        out.extend_from_slice(self.active_slice());
144        self.active_len = 0;
145    }
146
147    fn set_active_from_vec(&mut self, src: &mut Vec<Transformation>) {
148        self.active_len = src.len().min(MAX_ACTIVE_TRANS);
149        self.active_buffer[..self.active_len].copy_from_slice(&src[..self.active_len]);
150        src.clear();
151    }
152
153    /// Returns the current configuration of the engine.
154    pub fn config(&self) -> Config {
155        self.config
156    }
157
158    /// Updates the engine configuration.
159    pub fn set_config(&mut self, config: Config) {
160        self.config = config;
161    }
162
163    /// Returns a copy of the current input method.
164    pub fn input_method(&self) -> InputMethod {
165        self.input_method.clone()
166    }
167
168    fn get_applicable_rules(&self, key: char) -> &[Rule] {
169        let key = lower(key);
170        if key.is_ascii() {
171            let (start, end) = self.ascii_rule_indices[key as usize];
172            &self.all_rules[start as usize..end as usize]
173        } else {
174            self.non_ascii_rule_indices
175                .binary_search_by_key(&key, |(k, _)| *k)
176                .map(|idx| {
177                    let (start, end) = self.non_ascii_rule_indices[idx].1;
178                    &self.all_rules[start as usize..end as usize]
179                })
180                .unwrap_or(&[])
181        }
182    }
183
184    fn can_process_key_raw(&self, lower_key: char) -> bool {
185        if crate::utils::is_alpha(lower_key)
186            || (lower_key.is_ascii() && self.ascii_effect_keys[lower_key as usize])
187            || self.non_ascii_effect_keys.binary_search(&lower_key).is_ok()
188        {
189            return true;
190        }
191        if crate::utils::is_word_break_symbol(lower_key) {
192            return false;
193        }
194        crate::utils::is_vietnamese_rune(lower_key)
195    }
196
197    fn generate_transformations(
198        &self,
199        composition: &mut Vec<Transformation>,
200        key: char,
201        is_upper_case: bool,
202    ) {
203        let lower_key = lower(key);
204        let mut transformations = crate::bamboo_util::generate_transformations(
205            composition,
206            self.get_applicable_rules(lower_key),
207            self.config.to_flags(),
208            lower_key,
209            is_upper_case,
210        );
211
212        if transformations.is_empty() {
213            transformations = crate::bamboo_util::generate_fallback_transformations(
214                self.get_applicable_rules(lower_key),
215                lower_key,
216                is_upper_case,
217            );
218            let mut new_comp = composition.clone();
219            new_comp.extend(transformations.clone());
220
221            if !self.input_method.super_keys.is_empty() {
222                let current_str = crate::flattener::flatten_slice(
223                    &new_comp,
224                    OutputOptions::TONE_LESS | OutputOptions::LOWER_CASE,
225                );
226                if uoh_tail_match(&current_str) {
227                    let (target, rule) = crate::bamboo_util::find_target(
228                        &new_comp,
229                        self.get_applicable_rules(self.input_method.super_keys[0]),
230                        self.config.to_flags(),
231                    );
232                    if let (Some(target), Some(mut rule)) = (target, rule) {
233                        rule.key = '\0';
234                        transformations.push(Transformation {
235                            rule,
236                            target: Some(target),
237                            is_upper_case: false,
238                        });
239                    }
240                }
241            }
242        }
243        composition.extend(transformations.iter().cloned());
244        if self.config.to_flags() & crate::bamboo_util::EFREE_TONE_MARKING != 0
245            && self.is_valid_internal(composition, false)
246        {
247            let extra = crate::bamboo_util::refresh_last_tone_target(
248                composition,
249                self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
250            );
251            composition.extend(extra);
252        }
253    }
254
255    fn last_syllable_start(composition: &[Transformation]) -> usize {
256        let mut idx = composition.len();
257        let mut last_is_vowel = false;
258        let mut found_vowel = false;
259
260        while idx > 0 {
261            let tmp = &composition[idx - 1];
262            if tmp.target.is_none() {
263                let is_v = crate::utils::is_vowel(tmp.rule.result);
264                if found_vowel && !is_v && !last_is_vowel {
265                    break;
266                }
267                if is_v {
268                    found_vowel = true;
269                }
270                last_is_vowel = is_v;
271            }
272            idx -= 1;
273        }
274
275        idx
276    }
277
278    fn new_composition_in_place(
279        &self,
280        composition: &mut Vec<Transformation>,
281        scratch: &mut Vec<Transformation>,
282        key: char,
283        is_upper_case: bool,
284    ) {
285        let syllable_abs_start = Self::last_syllable_start(composition);
286
287        scratch.clear();
288        scratch.extend(composition.drain(syllable_abs_start..));
289
290        let offset = syllable_abs_start;
291        if offset != 0 {
292            for t in scratch.iter_mut() {
293                if let Some(target) = t.target {
294                    t.target = Some(target.saturating_sub(offset));
295                }
296            }
297        }
298
299        self.generate_transformations(scratch, key, is_upper_case);
300
301        if offset != 0 {
302            for t in scratch.iter_mut() {
303                if let Some(target) = t.target {
304                    t.target = Some(target + offset);
305                }
306            }
307        }
308
309        composition.append(scratch);
310    }
311
312    /// Processes a string of characters and returns the current active word.
313    pub fn process(&mut self, s: &str, mode: Mode) -> String {
314        self.process_str(s, mode).output()
315    }
316
317    /// Processes a string of characters and returns a reference to the engine.
318    pub fn process_str(&mut self, s: &str, mode: Mode) -> &Self {
319        for key in s.chars() {
320            self.process_key(key, mode);
321        }
322        self
323    }
324
325    fn lcp_chars_and_bytes(a: &str, b: &str) -> (usize, usize) {
326        let mut lcp_chars = 0usize;
327        let mut lcp_bytes = 0usize;
328        for (ac, bc) in a.chars().zip(b.chars()) {
329            if ac == bc {
330                lcp_chars += 1;
331                lcp_bytes += ac.len_utf8();
332            } else {
333                break;
334            }
335        }
336        (lcp_chars, lcp_bytes)
337    }
338
339    /// Processes a single key and returns the "delta" change required for a text editor.
340    ///
341    /// This is useful for IMEs to update the preedit text efficiently without rewriting the entire word.
342    ///
343    /// # Returns
344    /// A tuple containing:
345    /// 1. `backspaces_chars`: Number of characters to delete from the end of the previous preedit.
346    /// 2. `backspaces_bytes`: Number of UTF-8 bytes to delete.
347    /// 3. `inserted`: The new string to append after deletion.
348    pub fn process_key_delta(&mut self, key: char, mode: Mode) -> (usize, usize, &str) {
349        self.process_key(key, mode);
350
351        let active_len = self.active_len;
352        let active = &self.active_buffer[..active_len];
353        crate::flattener::flatten_slice_into(active, OutputOptions::NONE, &mut self.delta_buf);
354
355        let (lcp_chars, lcp_bytes) = Self::lcp_chars_and_bytes(&self.prev_preedit, &self.delta_buf);
356
357        let prev_chars = self.prev_preedit.chars().count();
358
359        let prev_bytes = self.prev_preedit.len();
360
361        let backspaces_chars = prev_chars.saturating_sub(lcp_chars);
362        let backspaces_bytes = prev_bytes.saturating_sub(lcp_bytes);
363
364        std::mem::swap(&mut self.prev_preedit, &mut self.delta_buf);
365        let inserted = &self.prev_preedit[lcp_bytes..];
366        (backspaces_chars, backspaces_bytes, inserted)
367    }
368
369    /// Similar to [`Self::process_key_delta`], but writes the inserted string into a provided buffer.
370    ///
371    /// # Returns
372    /// The number of backspaces (characters) to perform.
373    pub fn process_key_delta_into(
374        &mut self,
375        key: char,
376        mode: Mode,
377        inserted: &mut String,
378    ) -> usize {
379        let (backspaces_chars, _backspaces_bytes, ins) = self.process_key_delta(key, mode);
380        inserted.clear();
381        inserted.push_str(ins);
382        backspaces_chars
383    }
384
385    /// Processes a single character.
386    ///
387    /// The `mode` determines whether to apply Vietnamese transformation rules.
388    pub fn process_key(&mut self, key: char, mode: Mode) {
389        let lower_key = lower(key);
390        let is_upper_case = is_upper(key);
391
392        if mode == Mode::English || !self.can_process_key_raw(lower_key) {
393            if crate::utils::is_word_break_symbol(lower_key) {
394                self.commit();
395            }
396            let trans = crate::bamboo_util::new_appending_trans(lower_key, is_upper_case);
397            self.push_active(trans);
398            if crate::utils::is_word_break_symbol(lower_key) {
399                self.commit();
400            }
401            self.current_state_id = 0;
402            return;
403        }
404
405        // DFA Fast Path
406        if lower_key.is_ascii() && !is_upper_case {
407            let next_state_id =
408                self.dfa.get_state(self.current_state_id).transitions[lower_key as usize];
409            if next_state_id != 0 {
410                self.current_state_id = next_state_id;
411                let next_state = self.dfa.get_state(next_state_id);
412                self.active_len = next_state.composition.len().min(MAX_ACTIVE_TRANS);
413                self.active_buffer[..self.active_len].copy_from_slice(&next_state.composition);
414                return;
415            }
416        }
417
418        let mut work = std::mem::take(&mut self.work_comp);
419        let mut scratch = std::mem::take(&mut self.scratch_comp);
420
421        self.take_active_into(&mut work);
422        self.new_composition_in_place(&mut work, &mut scratch, lower_key, is_upper_case);
423
424        // Try to update DFA (Lazy JIT)
425        if lower_key.is_ascii() && !is_upper_case && work.len() <= MAX_ACTIVE_TRANS {
426            let next_id = self.dfa.add_state(&work);
427            self.dfa.states[self.current_state_id as usize].transitions[lower_key as usize] =
428                next_id;
429            self.current_state_id = next_id;
430        } else {
431            self.current_state_id = self.dfa.find_state(&work).unwrap_or(0);
432        }
433
434        self.set_active_from_vec(&mut work);
435
436        self.work_comp = work;
437        self.scratch_comp = scratch;
438    }
439
440    fn push_active(&mut self, trans: Transformation) {
441        if self.active_len < MAX_ACTIVE_TRANS {
442            self.active_buffer[self.active_len] = trans;
443            self.active_len += 1;
444            self.current_state_id = self.dfa.find_state(self.active_slice()).unwrap_or(0);
445        }
446    }
447
448    /// Clears the active syllable buffer and appends it to the committed text.
449    pub fn commit(&mut self) {
450        if self.active_len == 0 {
451            return;
452        }
453        let word = self.output();
454        self.committed_text.push_str(&word);
455        self.active_len = 0;
456        self.current_state_id = 0;
457    }
458
459    /// Returns the currently active syllable as a string.
460    pub fn output(&self) -> String {
461        crate::flattener::flatten_slice(self.active_slice(), OutputOptions::NONE)
462    }
463
464    /// Returns the processed string according to the specified options.
465    ///
466    /// This can be used to get the full text (committed + active) or variations like toneless text.
467    pub fn get_processed_str(&self, options: OutputOptions) -> String {
468        let active = self.active_slice();
469        if options.contains(OutputOptions::FULL_TEXT) {
470            let mut result = self.committed_text.clone();
471            result.push_str(&crate::flattener::flatten_slice(active, options));
472            return result;
473        }
474        if options.contains(OutputOptions::PUNCTUATION_MODE) {
475            if active.is_empty() {
476                return String::new();
477            }
478            let (_, tail) = crate::bamboo_util::extract_last_word_with_punctuation_marks(
479                active,
480                &self.input_method.keys,
481            );
482            return crate::flattener::flatten_slice(tail, OutputOptions::NONE);
483        }
484        crate::flattener::flatten_slice(active, options)
485    }
486
487    /// Checks if the current composition forms a valid Vietnamese syllable.
488    pub fn is_valid(&self, input_is_full_complete: bool) -> bool {
489        self.is_valid_internal(self.active_slice(), input_is_full_complete)
490    }
491
492    fn is_valid_internal(
493        &self,
494        composition: &[Transformation],
495        input_is_full_complete: bool,
496    ) -> bool {
497        crate::bamboo_util::is_valid(composition, input_is_full_complete)
498    }
499
500    /// Restores the last word in the composition to its un-transformed state.
501    ///
502    /// If `to_vietnamese` is true, it attempts to re-apply Vietnamese transformations.
503    pub fn restore_last_word(&mut self, to_vietnamese: bool) {
504        let mut work = std::mem::take(&mut self.work_comp);
505        let mut scratch = std::mem::take(&mut self.scratch_comp);
506
507        self.take_active_into(&mut work);
508        if work.is_empty() {
509            self.set_active_from_vec(&mut work);
510            self.work_comp = work;
511            self.scratch_comp = scratch;
512            self.current_state_id = 0;
513            return;
514        }
515
516        let (prev_slice, last) =
517            crate::bamboo_util::extract_last_word(&work, Some(&self.input_method.keys));
518        let mut previous = prev_slice.to_vec();
519
520        if last.is_empty() {
521            self.set_active_from_vec(&mut work);
522            self.work_comp = work;
523            self.scratch_comp = scratch;
524            self.current_state_id = 0;
525            return;
526        }
527        if !to_vietnamese {
528            previous.extend(crate::bamboo_util::break_composition_slice(last));
529            self.set_active_from_vec(&mut previous);
530            self.work_comp = work;
531            self.scratch_comp = scratch;
532            self.current_state_id = 0;
533            return;
534        }
535
536        let mut new_comp: Vec<Transformation> = Vec::new();
537        for t in last {
538            if t.rule.key == '\0' {
539                continue;
540            }
541            self.new_composition_in_place(&mut new_comp, &mut scratch, t.rule.key, t.is_upper_case);
542        }
543        previous.extend(new_comp);
544
545        self.set_active_from_vec(&mut previous);
546        self.work_comp = work;
547        self.scratch_comp = scratch;
548        self.current_state_id = 0;
549    }
550
551    /// Removes the last character from the active composition.
552    pub fn remove_last_char(&mut self, refresh_last_tone_target: bool) {
553        let mut work = std::mem::take(&mut self.work_comp);
554
555        self.take_active_into(&mut work);
556        let last_appending_idx = crate::bamboo_util::find_last_appending_trans_idx(&work);
557        let Some(last_idx) = last_appending_idx else {
558            self.set_active_from_vec(&mut work);
559            self.work_comp = work;
560            self.current_state_id = 0;
561            return;
562        };
563
564        let last_appending_key = work[last_idx].rule.key;
565        if !self.can_process_key_raw(last_appending_key) {
566            work.pop();
567            self.set_active_from_vec(&mut work);
568            self.work_comp = work;
569            self.current_state_id = 0;
570            return;
571        }
572
573        if work.is_empty() {
574            self.set_active_from_vec(&mut work);
575            self.work_comp = work;
576            self.current_state_id = 0;
577            return;
578        }
579
580        let (prev_slice, last_comb_slice) =
581            crate::bamboo_util::extract_last_word(&work, Some(&self.input_method.keys));
582        let mut previous = prev_slice.to_vec();
583        let last_comb = last_comb_slice.to_vec();
584
585        let mut new_comb: Vec<Transformation> = Vec::new();
586        let prev_len = previous.len();
587        for (i, t) in last_comb.into_iter().enumerate() {
588            let actual_idx = prev_len + i;
589            if actual_idx == last_idx {
590                continue;
591            }
592            if let Some(target) = t.target
593                && target == last_idx
594            {
595                continue;
596            }
597            new_comb.push(t);
598        }
599
600        if refresh_last_tone_target {
601            let extra = crate::bamboo_util::refresh_last_tone_target(
602                &mut new_comb,
603                self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
604            );
605            new_comb.extend(extra);
606        }
607
608        previous.extend(new_comb);
609        self.set_active_from_vec(&mut previous);
610        self.work_comp = work;
611        self.current_state_id = 0;
612    }
613
614    /// Resets the engine state, clearing committed and active text.
615    pub fn reset(&mut self) {
616        self.committed_text.clear();
617        self.active_len = 0;
618        self.prev_preedit.clear();
619        self.delta_buf.clear();
620        self.current_state_id = 0;
621    }
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627
628    #[test]
629    fn delta_backspaces_and_inserted() {
630        let telex = InputMethod::telex();
631        let mut e = Engine::new(telex);
632
633        let (bs1, _bb1, ins1) = e.process_key_delta('a', Mode::Vietnamese);
634        assert_eq!(bs1, 0, "First 'a' should have 0 backspaces");
635        assert_eq!(ins1, "a");
636
637        let (bs2, _bb2, ins2) = e.process_key_delta('s', Mode::Vietnamese);
638        assert_eq!(bs2, 1, "Adding 's' to 'a' should have 1 backspace for 'á'");
639        assert_eq!(ins2, "á");
640
641        let (bs3, _bb3, ins3) = e.process_key_delta(' ', Mode::Vietnamese);
642        assert_eq!(bs3, 1, "Space should clear the preedit 'á'");
643        assert_eq!(ins3, "");
644    }
645}