1use crate::config::Config;
4use crate::input_method::{InputMethod, Rule};
5use crate::mode::{Mode, OutputOptions};
6use crate::utils::{is_upper, lower};
7
8pub const MAX_ACTIVE_TRANS: usize = 16;
10
11#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash)]
13pub struct Transformation {
14 pub rule: Rule,
16 pub target: Option<usize>,
19 pub is_upper_case: bool,
21}
22
23#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash)]
27pub struct TransformationStack {
28 data: [Transformation; MAX_ACTIVE_TRANS],
29 len: usize,
30}
31
32impl TransformationStack {
33 pub fn new() -> Self {
35 Self { data: [Transformation::default(); MAX_ACTIVE_TRANS], len: 0 }
36 }
37
38 pub fn push(&mut self, t: Transformation) {
41 debug_assert!(
42 self.len < MAX_ACTIVE_TRANS,
43 "TransformationStack overflow: max {MAX_ACTIVE_TRANS} reached"
44 );
45 if self.len < MAX_ACTIVE_TRANS {
46 self.data[self.len] = t;
47 self.len += 1;
48 }
49 }
50
51 #[allow(dead_code)]
53 pub fn pop(&mut self) -> Option<Transformation> {
54 if self.len > 0 {
55 self.len -= 1;
56 Some(self.data[self.len])
57 } else {
58 None
59 }
60 }
61
62 pub fn clear(&mut self) {
64 self.len = 0;
65 }
66
67 pub fn len(&self) -> usize {
69 self.len
70 }
71
72 pub fn is_empty(&self) -> bool {
74 self.len == 0
75 }
76
77 pub fn as_slice(&self) -> &[Transformation] {
79 &self.data[..self.len]
80 }
81
82 pub fn as_mut_slice(&mut self) -> &mut [Transformation] {
84 &mut self.data[..self.len]
85 }
86
87 pub fn extend_from_slice(&mut self, other: &[Transformation]) {
89 let to_copy = other.len().min(MAX_ACTIVE_TRANS - self.len);
90 if to_copy > 0 {
91 self.data[self.len..self.len + to_copy].copy_from_slice(&other[..to_copy]);
92 self.len += to_copy;
93 }
94 }
95
96 pub fn drain_to(&mut self, start: usize, target: &mut TransformationStack) {
98 target.clear();
99 if start < self.len {
100 target.extend_from_slice(&self.data[start..self.len]);
101 self.len = start;
102 }
103 }
104}
105
106#[inline]
107fn uoh_tail_match(s: &str) -> bool {
108 for pat in ["uơ", "ưo"] {
109 if let Some(idx) = s.find(pat) {
110 let after = &s[idx + pat.len()..];
111 if after.chars().next().is_some_and(|c| c.is_alphabetic()) {
112 return true;
113 }
114 }
115 }
116 false
117}
118
119pub struct Engine {
124 committed_text: String,
125 active_buffer: [Transformation; MAX_ACTIVE_TRANS],
127 active_len: usize,
128
129 input_method: InputMethod,
130 all_rules: Box<[Rule]>,
131 ascii_rule_indices: [(u16, u16); 128],
132 non_ascii_rule_indices: Box<[(char, (u16, u16))]>,
133 ascii_effect_keys: [bool; 128],
134 non_ascii_effect_keys: Vec<char>,
135 config: Config,
136
137 work_comp: TransformationStack,
139 scratch_comp: TransformationStack,
140
141 prev_preedit: String,
142 delta_buf: String,
143
144 dfa: crate::dfa::Dfa,
145 current_state_id: u32,
146}
147
148impl Engine {
149 pub fn new(input_method: InputMethod) -> Self {
151 Self::with_config(input_method, Config::default())
152 }
153
154 pub fn with_config(input_method: InputMethod, config: Config) -> Self {
156 let mut rules_by_key: std::collections::BTreeMap<char, Vec<Rule>> =
157 std::collections::BTreeMap::new();
158 for rule in &input_method.rules {
159 let key = lower(rule.key);
160 rules_by_key.entry(key).or_default().push(*rule);
161 }
162
163 let total_rules: usize = rules_by_key.values().map(|v| v.len()).sum();
164 let mut all_rules_vec = Vec::with_capacity(total_rules);
165 let mut ascii_rule_indices = [(0u16, 0u16); 128];
166 let mut non_ascii_indices_vec = Vec::new();
167
168 for (key, rules) in rules_by_key {
169 let start = all_rules_vec.len() as u16;
170 all_rules_vec.extend(rules);
171 let end = all_rules_vec.len() as u16;
172 if key.is_ascii() {
173 ascii_rule_indices[key as usize] = (start, end);
174 } else {
175 non_ascii_indices_vec.push((key, (start, end)));
176 }
177 }
178
179 let mut ascii_effect_keys = [false; 128];
180 let mut non_ascii_effect_keys: Vec<char> = Vec::new();
181 for key in &input_method.keys {
182 if key.is_ascii() {
183 ascii_effect_keys[*key as usize] = true;
184 } else {
185 non_ascii_effect_keys.push(*key);
186 }
187 }
188 non_ascii_effect_keys.sort_unstable();
189 non_ascii_effect_keys.dedup();
190
191 Self {
192 committed_text: String::new(),
193 active_buffer: [Transformation::default(); MAX_ACTIVE_TRANS],
194 active_len: 0,
195 input_method,
196 all_rules: all_rules_vec.into_boxed_slice(),
197 ascii_rule_indices,
198 non_ascii_rule_indices: non_ascii_indices_vec.into_boxed_slice(),
199 ascii_effect_keys,
200 non_ascii_effect_keys,
201 config,
202
203 work_comp: TransformationStack::new(),
204 scratch_comp: TransformationStack::new(),
205
206 prev_preedit: String::with_capacity(64),
207 delta_buf: String::with_capacity(64),
208 dfa: crate::dfa::Dfa::new(),
209 current_state_id: 0,
210 }
211 }
212
213 #[inline]
214 pub(crate) fn active_slice(&self) -> &[Transformation] {
215 &self.active_buffer[..self.active_len]
216 }
217
218 fn take_active_into(&mut self, out: &mut TransformationStack) {
219 out.clear();
220 out.extend_from_slice(self.active_slice());
221 self.active_len = 0;
222 }
223
224 fn set_active_from_stack(&mut self, src: &mut TransformationStack) {
225 self.active_len = src.len().min(MAX_ACTIVE_TRANS);
226 self.active_buffer[..self.active_len].copy_from_slice(src.as_slice());
227 src.clear();
228 }
229
230 pub fn config(&self) -> Config {
232 self.config
233 }
234
235 pub fn set_config(&mut self, config: Config) {
237 self.config = config;
238 }
239
240 pub fn input_method(&self) -> InputMethod {
242 self.input_method.clone()
243 }
244
245 #[deprecated(
254 since = "0.3.4",
255 note = "Engine::warm_up() is unstable and may be removed. It uses a Telex-biased heuristic and may regress cold-start or non-Telex workloads."
256 )]
257 pub fn warm_up(&mut self) {
258 let mut compiler = crate::dfa::DfaCompiler::new(&self.input_method, self.config.to_flags());
259 compiler.compile_common();
260 self.dfa = compiler.dfa;
261 self.current_state_id = 0;
262 }
263
264 fn get_applicable_rules(&self, key: char) -> &[Rule] {
265 let key = lower(key);
266 if key.is_ascii() {
267 let (start, end) = self.ascii_rule_indices[key as usize];
268 &self.all_rules[start as usize..end as usize]
269 } else {
270 self.non_ascii_rule_indices
271 .binary_search_by_key(&key, |(k, _)| *k)
272 .map(|idx| {
273 let (start, end) = self.non_ascii_rule_indices[idx].1;
274 &self.all_rules[start as usize..end as usize]
275 })
276 .unwrap_or(&[])
277 }
278 }
279
280 fn can_process_key_raw(&self, lower_key: char) -> bool {
281 if crate::utils::is_alpha(lower_key)
282 || (lower_key.is_ascii() && self.ascii_effect_keys[lower_key as usize])
283 || self.non_ascii_effect_keys.binary_search(&lower_key).is_ok()
284 {
285 return true;
286 }
287 if crate::utils::is_word_break_symbol(lower_key) {
288 return false;
289 }
290 crate::utils::is_vietnamese_rune(lower_key)
291 }
292
293 fn generate_transformations(
294 &self,
295 composition: &mut TransformationStack,
296 key: char,
297 is_upper_case: bool,
298 ) {
299 let lower_key = lower(key);
300 let mut trans_buf = TransformationStack::new();
301
302 crate::bamboo_util::generate_transformations(
303 composition.as_slice(),
304 self.get_applicable_rules(lower_key),
305 self.config.to_flags(),
306 lower_key,
307 is_upper_case,
308 &mut trans_buf,
309 );
310
311 if trans_buf.is_empty() {
312 crate::bamboo_util::generate_fallback_transformations(
313 self.get_applicable_rules(lower_key),
314 lower_key,
315 is_upper_case,
316 &mut trans_buf,
317 );
318
319 let combined_len = composition.len() + trans_buf.len();
321 if combined_len <= MAX_ACTIVE_TRANS {
322 let mut tmp_data = [Transformation::default(); MAX_ACTIVE_TRANS];
323 tmp_data[..composition.len()].copy_from_slice(composition.as_slice());
324 tmp_data[composition.len()..combined_len].copy_from_slice(trans_buf.as_slice());
325
326 if !self.input_method.super_keys.is_empty() {
327 let current_str = crate::flattener::flatten_slice(
328 &tmp_data[..combined_len],
329 OutputOptions::TONE_LESS | OutputOptions::LOWER_CASE,
330 );
331 if uoh_tail_match(¤t_str) {
332 let (target, rule) = crate::bamboo_util::find_target(
333 &tmp_data[..combined_len],
334 self.get_applicable_rules(self.input_method.super_keys[0]),
335 self.config.to_flags(),
336 );
337 if let (Some(target), Some(mut rule)) = (target, rule) {
338 rule.key = '\0';
339 trans_buf.push(Transformation {
340 rule,
341 target: Some(target),
342 is_upper_case: false,
343 });
344 }
345 }
346 }
347 }
348 }
349 composition.extend_from_slice(trans_buf.as_slice());
350 if self.config.to_flags() & crate::bamboo_util::EFREE_TONE_MARKING != 0
351 && self.is_valid_internal(composition.as_slice(), false)
352 {
353 let mut extra = TransformationStack::new();
354 crate::bamboo_util::refresh_last_tone_target_into(
355 composition.as_mut_slice(),
356 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
357 &mut extra,
358 );
359 composition.extend_from_slice(extra.as_slice());
360 }
361 }
362
363 fn last_syllable_start(composition: &[Transformation]) -> usize {
364 let mut idx = composition.len();
365 let mut last_is_vowel = false;
366 let mut found_vowel = false;
367
368 while idx > 0 {
369 let tmp = &composition[idx - 1];
370 if tmp.target.is_none() {
371 let is_v = crate::utils::is_vowel(tmp.rule.result);
372 if found_vowel && !is_v && !last_is_vowel {
373 break;
374 }
375 if is_v {
376 found_vowel = true;
377 }
378 last_is_vowel = is_v;
379 }
380 idx -= 1;
381 }
382
383 idx
384 }
385
386 fn new_composition_in_place(
387 &self,
388 composition: &mut TransformationStack,
389 scratch: &mut TransformationStack,
390 key: char,
391 is_upper_case: bool,
392 ) {
393 let syllable_abs_start = Self::last_syllable_start(composition.as_slice());
394
395 composition.drain_to(syllable_abs_start, scratch);
396
397 let offset = syllable_abs_start;
398 if offset != 0 {
399 for t in scratch.as_mut_slice().iter_mut() {
400 if let Some(target) = t.target {
401 t.target = Some(target.saturating_sub(offset));
402 }
403 }
404 }
405
406 self.generate_transformations(scratch, key, is_upper_case);
407
408 if offset != 0 {
409 for t in scratch.as_mut_slice().iter_mut() {
410 if let Some(target) = t.target {
411 t.target = Some(target + offset);
412 }
413 }
414 }
415
416 composition.extend_from_slice(scratch.as_slice());
417 }
418
419 pub fn process(&mut self, s: &str, mode: Mode) -> String {
423 self.process_str(s, mode).output()
424 }
425
426 pub fn process_str(&mut self, s: &str, mode: Mode) -> &Self {
428 for key in s.chars() {
429 self.process_key(key, mode);
430 }
431 self
432 }
433
434 fn lcp_chars_and_bytes(a: &str, b: &str) -> (usize, usize) {
435 let mut lcp_chars = 0usize;
436 let mut lcp_bytes = 0usize;
437 for (ac, bc) in a.chars().zip(b.chars()) {
438 if ac == bc {
439 lcp_chars += 1;
440 lcp_bytes += ac.len_utf8();
441 } else {
442 break;
443 }
444 }
445 (lcp_chars, lcp_bytes)
446 }
447
448 pub fn process_key_delta(&mut self, key: char, mode: Mode) -> (usize, usize, &str) {
458 self.process_key(key, mode);
459
460 let active_len = self.active_len;
461 let active = &self.active_buffer[..active_len];
462 crate::flattener::flatten_slice_into(active, OutputOptions::NONE, &mut self.delta_buf);
463
464 let (_lcp_chars, lcp_bytes) =
465 Self::lcp_chars_and_bytes(&self.prev_preedit, &self.delta_buf);
466
467 let prev_bytes = self.prev_preedit.len();
468
469 let backspaces_chars = self.prev_preedit[lcp_bytes..].chars().count();
471 let backspaces_bytes = prev_bytes.saturating_sub(lcp_bytes);
472
473 std::mem::swap(&mut self.prev_preedit, &mut self.delta_buf);
474 let inserted = &self.prev_preedit[lcp_bytes..];
475 (backspaces_chars, backspaces_bytes, inserted)
476 }
477
478 pub fn process_key_delta_into(
483 &mut self,
484 key: char,
485 mode: Mode,
486 inserted: &mut String,
487 ) -> usize {
488 let (backspaces_chars, _backspaces_bytes, ins) = self.process_key_delta(key, mode);
489 inserted.clear();
490 inserted.push_str(ins);
491 backspaces_chars
492 }
493
494 pub fn process_key(&mut self, key: char, mode: Mode) {
498 let lower_key = lower(key);
499 let is_upper_case = is_upper(key);
500
501 if mode == Mode::English || !self.can_process_key_raw(lower_key) {
502 if crate::utils::is_word_break_symbol(lower_key) {
503 self.commit();
504 }
505 let trans = crate::bamboo_util::new_appending_trans(lower_key, is_upper_case);
506 self.push_active(trans);
507 if crate::utils::is_word_break_symbol(lower_key) {
508 self.commit();
509 }
510 self.current_state_id = 0;
511 return;
512 }
513
514 if lower_key.is_ascii() && !is_upper_case {
516 let next_state_id =
517 self.dfa.get_state(self.current_state_id).transitions[lower_key as usize];
518 if next_state_id != 0 {
519 self.current_state_id = next_state_id;
520 let comp = self.dfa.get_composition(next_state_id);
521 self.active_len = comp.len().min(MAX_ACTIVE_TRANS);
522 self.active_buffer[..self.active_len].copy_from_slice(comp);
523 return;
524 }
525 }
526
527 let mut work = self.work_comp;
528 let mut scratch = self.scratch_comp;
529
530 self.take_active_into(&mut work);
531 self.new_composition_in_place(&mut work, &mut scratch, lower_key, is_upper_case);
532
533 if lower_key.is_ascii() && !is_upper_case && work.len() <= MAX_ACTIVE_TRANS {
535 let next_id = self.dfa.add_state(work.as_slice());
536 self.dfa.states[self.current_state_id as usize].transitions[lower_key as usize] =
537 next_id;
538 self.current_state_id = next_id;
539 } else {
540 self.current_state_id = self.dfa.find_state(work.as_slice()).unwrap_or(0);
541 }
542
543 self.set_active_from_stack(&mut work);
544
545 self.work_comp = work;
546 self.scratch_comp = scratch;
547 }
548
549 fn push_active(&mut self, trans: Transformation) {
550 if self.active_len >= MAX_ACTIVE_TRANS {
551 self.commit();
553 }
554 self.active_buffer[self.active_len] = trans;
555 self.active_len += 1;
556 self.current_state_id = self.dfa.find_state(self.active_slice()).unwrap_or(0);
557 }
558
559 pub fn commit(&mut self) {
561 if self.active_len == 0 {
562 return;
563 }
564 let word = self.output();
565 self.committed_text.push_str(&word);
566 self.active_len = 0;
567 self.current_state_id = 0;
568 }
569
570 pub fn output(&self) -> String {
572 crate::flattener::flatten_slice(self.active_slice(), OutputOptions::NONE)
573 }
574
575 pub fn get_processed_str(&self, options: OutputOptions) -> String {
579 let active = self.active_slice();
580 if options.contains(OutputOptions::FULL_TEXT) {
581 let mut result = self.committed_text.clone();
582 result.push_str(&crate::flattener::flatten_slice(active, options));
583 return result;
584 }
585 if options.contains(OutputOptions::PUNCTUATION_MODE) {
586 if active.is_empty() {
587 return String::new();
588 }
589 let (_, tail) = crate::bamboo_util::extract_last_word_with_punctuation_marks(
590 active,
591 &self.input_method.keys,
592 );
593 return crate::flattener::flatten_slice(tail, OutputOptions::NONE);
594 }
595 crate::flattener::flatten_slice(active, options)
596 }
597
598 pub fn is_valid(&self, input_is_full_complete: bool) -> bool {
600 self.is_valid_internal(self.active_slice(), input_is_full_complete)
601 }
602
603 fn is_valid_internal(
604 &self,
605 composition: &[Transformation],
606 input_is_full_complete: bool,
607 ) -> bool {
608 crate::bamboo_util::is_valid(composition, input_is_full_complete)
609 }
610
611 pub fn restore_last_word(&mut self, to_vietnamese: bool) {
615 let mut work = self.work_comp;
616
617 self.take_active_into(&mut work);
618 if work.is_empty() {
619 self.set_active_from_stack(&mut work);
620 self.current_state_id = 0;
621 return;
622 }
623
624 let (prev_slice, last) =
625 crate::bamboo_util::extract_last_word(work.as_slice(), Some(&self.input_method.keys));
626
627 let mut previous = TransformationStack::new();
628 previous.extend_from_slice(prev_slice);
629
630 if last.is_empty() {
631 self.set_active_from_stack(&mut work);
632 self.current_state_id = 0;
633 return;
634 }
635 if !to_vietnamese {
636 previous.extend_from_slice(&crate::bamboo_util::break_composition_slice(last));
637 self.set_active_from_stack(&mut previous);
638 self.current_state_id = 0;
639 return;
640 }
641
642 let mut new_comp = TransformationStack::new();
643 let mut temp_engine = Self::with_config(self.input_method.clone(), self.config);
644
645 for t in last {
646 if t.rule.key == '\0' {
647 continue;
648 }
649 temp_engine.process_key(t.rule.key, Mode::Vietnamese);
650 }
651 new_comp.extend_from_slice(temp_engine.active_slice());
652
653 previous.extend_from_slice(new_comp.as_slice());
654
655 self.set_active_from_stack(&mut previous);
656 self.current_state_id = 0;
657 }
658
659 pub fn remove_last_char(&mut self, refresh_last_tone_target: bool) {
661 let mut work = self.work_comp;
662
663 self.take_active_into(&mut work);
664
665 let last_key_idx = work
667 .as_slice()
668 .iter()
669 .enumerate()
670 .rev()
671 .find(|(_, t)| t.rule.key != '\0')
672 .map(|(i, _)| i);
673
674 let Some(idx) = last_key_idx else {
675 self.set_active_from_stack(&mut work);
676 self.current_state_id = 0;
677 return;
678 };
679
680 let (prev_slice, last_comb_slice) =
681 crate::bamboo_util::extract_last_word(work.as_slice(), Some(&self.input_method.keys));
682
683 let mut previous = TransformationStack::new();
684 previous.extend_from_slice(prev_slice);
685
686 let last_comb = last_comb_slice;
687 let idx_in_last = idx as isize - prev_slice.len() as isize;
688
689 let mut new_word_comp = TransformationStack::new();
690 let mut temp_engine = Self::with_config(self.input_method.clone(), self.config);
691
692 for (i, t) in last_comb.iter().enumerate() {
693 if i as isize == idx_in_last {
694 continue;
695 }
696 if t.rule.key == '\0' {
697 continue;
698 }
699 temp_engine.process_key(t.rule.key, Mode::Vietnamese);
700 }
701
702 new_word_comp.extend_from_slice(temp_engine.active_slice());
703
704 if refresh_last_tone_target {
705 let mut extra = TransformationStack::new();
706 crate::bamboo_util::refresh_last_tone_target_into(
707 new_word_comp.as_mut_slice(),
708 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
709 &mut extra,
710 );
711 new_word_comp.extend_from_slice(extra.as_slice());
712 }
713
714 previous.extend_from_slice(new_word_comp.as_slice());
715 self.set_active_from_stack(&mut previous);
716 self.current_state_id = self.dfa.find_state(self.active_slice()).unwrap_or(0);
717 }
718
719 pub fn reset(&mut self) {
721 self.committed_text.clear();
722 self.active_len = 0;
723 self.prev_preedit.clear();
724 self.delta_buf.clear();
725 self.current_state_id = 0;
726 }
727}
728
729#[cfg(test)]
730mod tests {
731 use super::*;
732
733 #[test]
734 fn delta_backspaces_and_inserted() {
735 let telex = InputMethod::telex();
736 let mut e = Engine::new(telex);
737
738 let (bs1, _bb1, ins1) = e.process_key_delta('a', Mode::Vietnamese);
739 assert_eq!(bs1, 0, "First 'a' should have 0 backspaces");
740 assert_eq!(ins1, "a");
741
742 let (bs2, _bb2, ins2) = e.process_key_delta('s', Mode::Vietnamese);
743 assert_eq!(bs2, 1, "Adding 's' to 'a' should have 1 backspace for 'á'");
744 assert_eq!(ins2, "á");
745
746 let (bs3, _bb3, ins3) = e.process_key_delta(' ', Mode::Vietnamese);
747 assert_eq!(bs3, 1, "Space should clear the preedit 'á'");
748 assert_eq!(ins3, "");
749 }
750}