Skip to main content

bamboo_core/
engine.rs

1use crate::config::Config;
2use crate::input_method::{InputMethod, Rule};
3use crate::mode::{Mode, OutputOptions};
4
5const MAX_ACTIVE_TRANS: usize = 32;
6
7/// Represents a single keypress or a transformation derived from it.
8#[derive(Clone, Debug)]
9pub struct Transformation {
10    pub rule: Rule,
11    pub target: Option<usize>,
12    pub is_upper_case: bool,
13}
14
15#[inline]
16fn lower(c: char) -> char {
17    if c.is_ascii() {
18        c.to_ascii_lowercase()
19    } else {
20        c.to_lowercase().next().unwrap_or(c)
21    }
22}
23
24#[inline]
25fn is_upper(c: char) -> bool {
26    if c.is_ascii() { c.is_ascii_uppercase() } else { lower(c) != c }
27}
28
29fn uoh_tail_match(s: &str) -> bool {
30    for pat in ["uơ", "ưo"] {
31        if let Some(idx) = s.find(pat) {
32            let after = &s[idx + pat.len()..];
33            if after.chars().next().is_some_and(|c| c.is_alphabetic()) {
34                return true;
35            }
36        }
37    }
38    false
39}
40
41/// The main entry point for the Vietnamese Input Method Engine.
42pub struct Engine {
43    committed_text: String,
44    /// Stack-allocated buffer for the current syllable to avoid heap allocations.
45    active_buffer: [Option<Transformation>; MAX_ACTIVE_TRANS],
46    active_len: usize,
47
48    input_method: InputMethod,
49    all_rules: Box<[Rule]>,
50    ascii_rule_indices: [(u16, u16); 128],
51    non_ascii_rule_indices: Box<[(char, (u16, u16))]>,
52    ascii_effect_keys: [bool; 128],
53    non_ascii_effect_keys: Vec<char>,
54    config: Config,
55}
56
57impl Engine {
58    pub fn new(input_method: InputMethod) -> Self {
59        Self::with_config(input_method, Config::default())
60    }
61
62    pub fn with_config(input_method: InputMethod, config: Config) -> Self {
63        let mut rules_by_key: std::collections::BTreeMap<char, Vec<Rule>> =
64            std::collections::BTreeMap::new();
65        for rule in &input_method.rules {
66            let key = lower(rule.key);
67            rules_by_key.entry(key).or_default().push(rule.clone());
68        }
69
70        let total_rules: usize = rules_by_key.values().map(|v| v.len()).sum();
71        let mut all_rules_vec = Vec::with_capacity(total_rules);
72        let mut ascii_rule_indices = [(0u16, 0u16); 128];
73        let mut non_ascii_indices_vec = Vec::new();
74
75        for (key, rules) in rules_by_key {
76            let start = all_rules_vec.len() as u16;
77            all_rules_vec.extend(rules);
78            let end = all_rules_vec.len() as u16;
79            if key.is_ascii() {
80                ascii_rule_indices[key as usize] = (start, end);
81            } else {
82                non_ascii_indices_vec.push((key, (start, end)));
83            }
84        }
85
86        let mut ascii_effect_keys = [false; 128];
87        let mut non_ascii_effect_keys: Vec<char> = Vec::new();
88        for key in &input_method.keys {
89            if key.is_ascii() {
90                ascii_effect_keys[*key as usize] = true;
91            } else {
92                non_ascii_effect_keys.push(*key);
93            }
94        }
95        non_ascii_effect_keys.sort_unstable();
96        non_ascii_effect_keys.dedup();
97
98        Self {
99            committed_text: String::new(),
100            active_buffer: std::array::from_fn(|_| None),
101            active_len: 0,
102            input_method,
103            all_rules: all_rules_vec.into_boxed_slice(),
104            ascii_rule_indices,
105            non_ascii_rule_indices: non_ascii_indices_vec.into_boxed_slice(),
106            ascii_effect_keys,
107            non_ascii_effect_keys,
108            config,
109        }
110    }
111
112    /// Internal helper to get active composition as a slice of references.
113    fn active_composition(&self) -> Vec<&Transformation> {
114        self.active_buffer[..self.active_len]
115            .iter()
116            .map(|opt| opt.as_ref().unwrap())
117            .collect()
118    }
119
120    /// Internal helper to get active composition as a Vec for mutation.
121    fn active_composition_owned(&self) -> Vec<Transformation> {
122        self.active_buffer[..self.active_len]
123            .iter()
124            .map(|opt| opt.as_ref().unwrap().clone())
125            .collect()
126    }
127
128    fn set_active_composition(&mut self, comp: Vec<Transformation>) {
129        self.active_len = comp.len().min(MAX_ACTIVE_TRANS);
130        for (i, t) in comp.into_iter().enumerate().take(MAX_ACTIVE_TRANS) {
131            self.active_buffer[i] = Some(t);
132        }
133    }
134
135    pub fn config(&self) -> Config {
136        self.config
137    }
138    pub fn set_config(&mut self, config: Config) {
139        self.config = config;
140    }
141    pub fn input_method(&self) -> InputMethod {
142        self.input_method.clone()
143    }
144
145    fn get_applicable_rules(&self, key: char) -> &[Rule] {
146        let key = lower(key);
147        if key.is_ascii() {
148            let (start, end) = self.ascii_rule_indices[key as usize];
149            &self.all_rules[start as usize..end as usize]
150        } else {
151            self.non_ascii_rule_indices
152                .binary_search_by_key(&key, |(k, _)| *k)
153                .map(|idx| {
154                    let (start, end) = self.non_ascii_rule_indices[idx].1;
155                    &self.all_rules[start as usize..end as usize]
156                })
157                .unwrap_or(&[])
158        }
159    }
160
161    fn can_process_key_raw(&self, lower_key: char) -> bool {
162        if crate::utils::is_alpha(lower_key)
163            || (lower_key.is_ascii()
164                && self.ascii_effect_keys[lower_key as usize])
165            || self.non_ascii_effect_keys.binary_search(&lower_key).is_ok()
166        {
167            return true;
168        }
169        if crate::utils::is_word_break_symbol(lower_key) {
170            return false;
171        }
172        crate::utils::is_vietnamese_rune(lower_key)
173    }
174
175    fn generate_transformations(
176        &self,
177        composition: &mut Vec<Transformation>,
178        key: char,
179        is_upper_case: bool,
180    ) {
181        let lower_key = lower(key);
182        let refs: Vec<&Transformation> = composition.iter().collect();
183        let mut transformations = crate::bamboo_util::generate_transformations(
184            &refs,
185            self.get_applicable_rules(lower_key),
186            self.config.to_flags(),
187            lower_key,
188            is_upper_case,
189        );
190
191        if transformations.is_empty() {
192            transformations =
193                crate::bamboo_util::generate_fallback_transformations(
194                    self.get_applicable_rules(lower_key),
195                    lower_key,
196                    is_upper_case,
197                );
198            let mut new_comp = composition.clone();
199            new_comp.extend(transformations.clone());
200            let new_refs: Vec<&Transformation> = new_comp.iter().collect();
201
202            if !self.input_method.super_keys.is_empty() {
203                let current_str = crate::flattener::flatten(
204                    &new_refs,
205                    OutputOptions::TONE_LESS | OutputOptions::LOWER_CASE,
206                );
207                if uoh_tail_match(&current_str) {
208                    let (target, rule) = crate::bamboo_util::find_target(
209                        &new_refs,
210                        self.get_applicable_rules(
211                            self.input_method.super_keys[0],
212                        ),
213                        self.config.to_flags(),
214                    );
215                    if let (Some(target), Some(mut rule)) = (target, rule) {
216                        rule.key = '\0';
217                        transformations.push(Transformation {
218                            rule,
219                            target: Some(target),
220                            is_upper_case: false,
221                        });
222                    }
223                }
224            }
225        }
226        composition.extend(transformations);
227        if self.config.to_flags() & crate::bamboo_util::EFREE_TONE_MARKING != 0
228            && self.is_valid_internal(composition, false)
229        {
230            let extra = crate::bamboo_util::refresh_last_tone_target(
231                composition,
232                self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE
233                    != 0,
234            );
235            composition.extend(extra);
236        }
237    }
238
239    fn new_composition(
240        &self,
241        mut composition: Vec<Transformation>,
242        key: char,
243        is_upper_case: bool,
244    ) -> Vec<Transformation> {
245        let (prev_refs, _) = crate::bamboo_util::extract_last_syllable(
246            &composition,
247            Some(&self.input_method.keys),
248        );
249        let syllable_abs_start = prev_refs.len();
250        let mut syllable = composition.split_off(syllable_abs_start);
251        let mut previous = composition;
252
253        let offset = syllable_abs_start;
254        if offset != 0 {
255            for t in &mut syllable {
256                if let Some(target) = t.target {
257                    t.target = Some(target.saturating_sub(offset));
258                }
259            }
260        }
261        self.generate_transformations(&mut syllable, key, is_upper_case);
262        if offset != 0 {
263            for t in &mut syllable {
264                if let Some(target) = t.target {
265                    t.target = Some(target + offset);
266                }
267            }
268        }
269        previous.extend(syllable);
270        previous
271    }
272
273    pub fn process(&mut self, s: &str, mode: Mode) -> String {
274        self.process_str(s, mode).output()
275    }
276    pub fn process_str(&mut self, s: &str, mode: Mode) -> &Self {
277        for key in s.chars() {
278            self.process_key(key, mode);
279        }
280        self
281    }
282
283    pub fn process_key(&mut self, key: char, mode: Mode) {
284        let lower_key = lower(key);
285        let is_upper_case = is_upper(key);
286
287        if mode == Mode::English || !self.can_process_key_raw(lower_key) {
288            if crate::utils::is_word_break_symbol(lower_key) {
289                self.commit();
290            }
291            let trans = crate::bamboo_util::new_appending_trans(
292                lower_key,
293                is_upper_case,
294            );
295            self.push_active(trans);
296            if crate::utils::is_word_break_symbol(lower_key) {
297                self.commit();
298            }
299            return;
300        }
301
302        let current = self.active_composition_owned();
303        let next = self.new_composition(current, lower_key, is_upper_case);
304        self.set_active_composition(next);
305    }
306
307    fn push_active(&mut self, trans: Transformation) {
308        if self.active_len < MAX_ACTIVE_TRANS {
309            self.active_buffer[self.active_len] = Some(trans);
310            self.active_len += 1;
311        }
312    }
313
314    pub fn commit(&mut self) {
315        if self.active_len == 0 {
316            return;
317        }
318        let word = self.output();
319        self.committed_text.push_str(&word);
320        self.active_len = 0;
321    }
322
323    pub fn output(&self) -> String {
324        let comp = self.active_composition_owned();
325        crate::flattener::flatten_slice(&comp, OutputOptions::NONE)
326    }
327
328    pub fn get_processed_str(&self, options: OutputOptions) -> String {
329        let active_comp = self.active_composition_owned();
330        if options.contains(OutputOptions::FULL_TEXT) {
331            let mut result = self.committed_text.clone();
332            result.push_str(&crate::flattener::flatten_slice(
333                &active_comp,
334                options,
335            ));
336            return result;
337        }
338        if options.contains(OutputOptions::PUNCTUATION_MODE) {
339            let refs = self.active_composition();
340            let (_, tail) = crate::bamboo_util::extract_last_word_with_punctuation_marks_refs(&refs, &self.input_method.keys);
341            return crate::flattener::flatten(&tail, OutputOptions::NONE);
342        }
343        crate::flattener::flatten_slice(&active_comp, options)
344    }
345
346    pub fn is_valid(&self, input_is_full_complete: bool) -> bool {
347        let comp = self.active_composition_owned();
348        self.is_valid_internal(&comp, input_is_full_complete)
349    }
350
351    fn is_valid_internal(
352        &self,
353        composition: &[Transformation],
354        input_is_full_complete: bool,
355    ) -> bool {
356        let refs: Vec<&Transformation> = composition.iter().collect();
357        crate::bamboo_util::is_valid(&refs, input_is_full_complete)
358    }
359
360    pub fn restore_last_word(&mut self, to_vietnamese: bool) {
361        let comp = self.active_composition_owned();
362        let refs: Vec<&Transformation> = comp.iter().collect();
363        let (prev_refs, _) = crate::bamboo_util::extract_last_word(
364            &refs,
365            Some(&self.input_method.keys),
366        );
367        let prev_len = prev_refs.len();
368
369        let mut active = comp;
370        let last = active.split_off(prev_len);
371        let mut previous = active;
372
373        if last.is_empty() {
374            self.set_active_composition(previous);
375            return;
376        }
377        if !to_vietnamese {
378            previous.extend(crate::bamboo_util::break_composition_slice(&last));
379            self.set_active_composition(previous);
380            return;
381        }
382
383        let mut new_comp: Vec<Transformation> = Vec::new();
384        for t in last {
385            if t.rule.key == '\0' {
386                continue;
387            }
388            new_comp =
389                self.new_composition(new_comp, t.rule.key, t.is_upper_case);
390        }
391        previous.extend(new_comp);
392        self.set_active_composition(previous);
393    }
394
395    pub fn remove_last_char(&mut self, refresh_last_tone_target: bool) {
396        let comp = self.active_composition_owned();
397        let last_appending_idx =
398            crate::bamboo_util::find_last_appending_trans_idx(&comp);
399        let Some(last_idx) = last_appending_idx else {
400            return;
401        };
402
403        let last_appending_key = comp[last_idx].rule.key;
404        if !self.can_process_key_raw(last_appending_key) {
405            let mut next = comp;
406            next.pop();
407            self.set_active_composition(next);
408            return;
409        }
410
411        let refs: Vec<&Transformation> = comp.iter().collect();
412        let (previous_slice, _) = crate::bamboo_util::extract_last_word(
413            &refs,
414            Some(&self.input_method.keys),
415        );
416        let prev_len = previous_slice.len();
417
418        let mut previous = comp;
419        let last_comb = previous.split_off(prev_len);
420
421        let mut new_comb: Vec<Transformation> = Vec::new();
422        for (i, t) in last_comb.into_iter().enumerate() {
423            let actual_idx = prev_len + i;
424            if actual_idx == last_idx {
425                continue;
426            }
427            if let Some(target) = t.target
428                && target == last_idx
429            {
430                continue;
431            }
432            new_comb.push(t);
433        }
434
435        if refresh_last_tone_target {
436            let extra = crate::bamboo_util::refresh_last_tone_target(
437                &mut new_comb,
438                self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE
439                    != 0,
440            );
441            new_comb.extend(extra);
442        }
443
444        previous.extend(new_comb);
445        self.set_active_composition(previous);
446    }
447
448    pub fn reset(&mut self) {
449        self.committed_text.clear();
450        self.active_len = 0;
451    }
452}