1use crate::config::Config;
4use crate::input_method::{InputMethod, Rule};
5use crate::mode::{Mode, OutputOptions};
6
7pub const MAX_ACTIVE_TRANS: usize = 24;
9
10#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash)]
12pub struct Transformation {
13 pub rule: Rule,
15 pub target: Option<usize>,
18 pub is_upper_case: bool,
20}
21
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash)]
26pub struct TransformationStack {
27 data: [Transformation; MAX_ACTIVE_TRANS],
28 len: usize,
29}
30
31impl TransformationStack {
32 pub fn new() -> Self {
34 Self { data: [Transformation::default(); MAX_ACTIVE_TRANS], len: 0 }
35 }
36
37 pub fn push(&mut self, t: Transformation) {
40 if self.len < MAX_ACTIVE_TRANS {
41 self.data[self.len] = t;
42 self.len += 1;
43 }
44 }
45
46 #[allow(dead_code)]
48 pub fn pop(&mut self) -> Option<Transformation> {
49 if self.len > 0 {
50 self.len -= 1;
51 Some(self.data[self.len])
52 } else {
53 None
54 }
55 }
56
57 pub fn clear(&mut self) {
59 self.len = 0;
60 }
61
62 pub fn len(&self) -> usize {
64 self.len
65 }
66
67 pub fn is_empty(&self) -> bool {
69 self.len == 0
70 }
71
72 pub fn as_slice(&self) -> &[Transformation] {
74 &self.data[..self.len]
75 }
76
77 pub fn as_mut_slice(&mut self) -> &mut [Transformation] {
79 &mut self.data[..self.len]
80 }
81
82 pub fn extend_from_slice(&mut self, other: &[Transformation]) {
84 let to_copy = other.len().min(MAX_ACTIVE_TRANS - self.len);
85 if to_copy > 0 {
86 self.data[self.len..self.len + to_copy].copy_from_slice(&other[..to_copy]);
87 self.len += to_copy;
88 }
89 }
90
91 pub fn drain_to(&mut self, start: usize, target: &mut TransformationStack) {
93 target.clear();
94 if start < self.len {
95 target.extend_from_slice(&self.data[start..self.len]);
96 self.len = start;
97 }
98 }
99}
100
101#[inline]
102fn lower(c: char) -> char {
103 if c.is_ascii() { c.to_ascii_lowercase() } else { c.to_lowercase().next().unwrap_or(c) }
104}
105
106#[inline]
107fn is_upper(c: char) -> bool {
108 if c.is_ascii() { c.is_ascii_uppercase() } else { lower(c) != c }
109}
110
111fn uoh_tail_match(s: &str) -> bool {
112 for pat in ["uơ", "ưo"] {
113 if let Some(idx) = s.find(pat) {
114 let after = &s[idx + pat.len()..];
115 if after.chars().next().is_some_and(|c| c.is_alphabetic()) {
116 return true;
117 }
118 }
119 }
120 false
121}
122
123pub struct Engine {
128 committed_text: String,
129 active_buffer: [Transformation; MAX_ACTIVE_TRANS],
131 active_len: usize,
132
133 input_method: InputMethod,
134 all_rules: Box<[Rule]>,
135 ascii_rule_indices: [(u16, u16); 128],
136 non_ascii_rule_indices: Box<[(char, (u16, u16))]>,
137 ascii_effect_keys: [bool; 128],
138 non_ascii_effect_keys: Vec<char>,
139 config: Config,
140
141 work_comp: TransformationStack,
143 scratch_comp: TransformationStack,
144
145 prev_preedit: String,
146 delta_buf: String,
147
148 dfa: crate::dfa::Dfa,
149 current_state_id: u32,
150}
151
152impl Engine {
153 pub fn new(input_method: InputMethod) -> Self {
155 Self::with_config(input_method, Config::default())
156 }
157
158 pub fn with_config(input_method: InputMethod, config: Config) -> Self {
160 let mut rules_by_key: std::collections::BTreeMap<char, Vec<Rule>> =
161 std::collections::BTreeMap::new();
162 for rule in &input_method.rules {
163 let key = lower(rule.key);
164 rules_by_key.entry(key).or_default().push(*rule);
165 }
166
167 let total_rules: usize = rules_by_key.values().map(|v| v.len()).sum();
168 let mut all_rules_vec = Vec::with_capacity(total_rules);
169 let mut ascii_rule_indices = [(0u16, 0u16); 128];
170 let mut non_ascii_indices_vec = Vec::new();
171
172 for (key, rules) in rules_by_key {
173 let start = all_rules_vec.len() as u16;
174 all_rules_vec.extend(rules);
175 let end = all_rules_vec.len() as u16;
176 if key.is_ascii() {
177 ascii_rule_indices[key as usize] = (start, end);
178 } else {
179 non_ascii_indices_vec.push((key, (start, end)));
180 }
181 }
182
183 let mut ascii_effect_keys = [false; 128];
184 let mut non_ascii_effect_keys: Vec<char> = Vec::new();
185 for key in &input_method.keys {
186 if key.is_ascii() {
187 ascii_effect_keys[*key as usize] = true;
188 } else {
189 non_ascii_effect_keys.push(*key);
190 }
191 }
192 non_ascii_effect_keys.sort_unstable();
193 non_ascii_effect_keys.dedup();
194
195 Self {
196 committed_text: String::new(),
197 active_buffer: [Transformation::default(); MAX_ACTIVE_TRANS],
198 active_len: 0,
199 input_method,
200 all_rules: all_rules_vec.into_boxed_slice(),
201 ascii_rule_indices,
202 non_ascii_rule_indices: non_ascii_indices_vec.into_boxed_slice(),
203 ascii_effect_keys,
204 non_ascii_effect_keys,
205 config,
206
207 work_comp: TransformationStack::new(),
208 scratch_comp: TransformationStack::new(),
209
210 prev_preedit: String::with_capacity(64),
211 delta_buf: String::with_capacity(64),
212 dfa: crate::dfa::Dfa::new(),
213 current_state_id: 0,
214 }
215 }
216
217 #[inline]
218 pub(crate) fn active_slice(&self) -> &[Transformation] {
219 &self.active_buffer[..self.active_len]
220 }
221
222 fn take_active_into(&mut self, out: &mut TransformationStack) {
223 out.clear();
224 out.extend_from_slice(self.active_slice());
225 self.active_len = 0;
226 }
227
228 fn set_active_from_stack(&mut self, src: &mut TransformationStack) {
229 self.active_len = src.len().min(MAX_ACTIVE_TRANS);
230 self.active_buffer[..self.active_len].copy_from_slice(src.as_slice());
231 src.clear();
232 }
233
234 pub fn config(&self) -> Config {
236 self.config
237 }
238
239 pub fn set_config(&mut self, config: Config) {
241 self.config = config;
242 }
243
244 pub fn input_method(&self) -> InputMethod {
246 self.input_method.clone()
247 }
248
249 pub fn warm_up(&mut self) {
252 let mut compiler = crate::dfa::DfaCompiler::new(&self.input_method, self.config.to_flags());
253 compiler.compile_common();
254 self.dfa = compiler.dfa;
255 self.current_state_id = 0;
256 }
257
258 fn get_applicable_rules(&self, key: char) -> &[Rule] {
259 let key = lower(key);
260 if key.is_ascii() {
261 let (start, end) = self.ascii_rule_indices[key as usize];
262 &self.all_rules[start as usize..end as usize]
263 } else {
264 self.non_ascii_rule_indices
265 .binary_search_by_key(&key, |(k, _)| *k)
266 .map(|idx| {
267 let (start, end) = self.non_ascii_rule_indices[idx].1;
268 &self.all_rules[start as usize..end as usize]
269 })
270 .unwrap_or(&[])
271 }
272 }
273
274 fn can_process_key_raw(&self, lower_key: char) -> bool {
275 if crate::utils::is_alpha(lower_key)
276 || (lower_key.is_ascii() && self.ascii_effect_keys[lower_key as usize])
277 || self.non_ascii_effect_keys.binary_search(&lower_key).is_ok()
278 {
279 return true;
280 }
281 if crate::utils::is_word_break_symbol(lower_key) {
282 return false;
283 }
284 crate::utils::is_vietnamese_rune(lower_key)
285 }
286
287 fn generate_transformations(
288 &self,
289 composition: &mut TransformationStack,
290 key: char,
291 is_upper_case: bool,
292 ) {
293 let lower_key = lower(key);
294 let mut trans_buf = TransformationStack::new();
295
296 crate::bamboo_util::generate_transformations(
297 composition.as_slice(),
298 self.get_applicable_rules(lower_key),
299 self.config.to_flags(),
300 lower_key,
301 is_upper_case,
302 &mut trans_buf,
303 );
304
305 if trans_buf.is_empty() {
306 crate::bamboo_util::generate_fallback_transformations(
307 self.get_applicable_rules(lower_key),
308 lower_key,
309 is_upper_case,
310 &mut trans_buf,
311 );
312
313 let combined_len = composition.len() + trans_buf.len();
315 if combined_len <= MAX_ACTIVE_TRANS {
316 let mut tmp_data = [Transformation::default(); MAX_ACTIVE_TRANS];
317 tmp_data[..composition.len()].copy_from_slice(composition.as_slice());
318 tmp_data[composition.len()..combined_len].copy_from_slice(trans_buf.as_slice());
319
320 if !self.input_method.super_keys.is_empty() {
321 let current_str = crate::flattener::flatten_slice(
322 &tmp_data[..combined_len],
323 OutputOptions::TONE_LESS | OutputOptions::LOWER_CASE,
324 );
325 if uoh_tail_match(¤t_str) {
326 let (target, rule) = crate::bamboo_util::find_target(
327 &tmp_data[..combined_len],
328 self.get_applicable_rules(self.input_method.super_keys[0]),
329 self.config.to_flags(),
330 );
331 if let (Some(target), Some(mut rule)) = (target, rule) {
332 rule.key = '\0';
333 trans_buf.push(Transformation {
334 rule,
335 target: Some(target),
336 is_upper_case: false,
337 });
338 }
339 }
340 }
341 }
342 }
343 composition.extend_from_slice(trans_buf.as_slice());
344 if self.config.to_flags() & crate::bamboo_util::EFREE_TONE_MARKING != 0
345 && self.is_valid_internal(composition.as_slice(), false)
346 {
347 let mut extra = TransformationStack::new();
348 crate::bamboo_util::refresh_last_tone_target_into(
349 composition.as_mut_slice(),
350 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
351 &mut extra,
352 );
353 composition.extend_from_slice(extra.as_slice());
354 }
355 }
356
357 fn last_syllable_start(composition: &[Transformation]) -> usize {
358 let mut idx = composition.len();
359 let mut last_is_vowel = false;
360 let mut found_vowel = false;
361
362 while idx > 0 {
363 let tmp = &composition[idx - 1];
364 if tmp.target.is_none() {
365 let is_v = crate::utils::is_vowel(tmp.rule.result);
366 if found_vowel && !is_v && !last_is_vowel {
367 break;
368 }
369 if is_v {
370 found_vowel = true;
371 }
372 last_is_vowel = is_v;
373 }
374 idx -= 1;
375 }
376
377 idx
378 }
379
380 fn new_composition_in_place(
381 &self,
382 composition: &mut TransformationStack,
383 scratch: &mut TransformationStack,
384 key: char,
385 is_upper_case: bool,
386 ) {
387 let syllable_abs_start = Self::last_syllable_start(composition.as_slice());
388
389 composition.drain_to(syllable_abs_start, scratch);
390
391 let offset = syllable_abs_start;
392 if offset != 0 {
393 for t in scratch.as_mut_slice().iter_mut() {
394 if let Some(target) = t.target {
395 t.target = Some(target.saturating_sub(offset));
396 }
397 }
398 }
399
400 self.generate_transformations(scratch, key, is_upper_case);
401
402 if offset != 0 {
403 for t in scratch.as_mut_slice().iter_mut() {
404 if let Some(target) = t.target {
405 t.target = Some(target + offset);
406 }
407 }
408 }
409
410 composition.extend_from_slice(scratch.as_slice());
411 }
412
413 pub fn process(&mut self, s: &str, mode: Mode) -> String {
417 self.process_str(s, mode).output()
418 }
419
420 pub fn process_str(&mut self, s: &str, mode: Mode) -> &Self {
422 for key in s.chars() {
423 self.process_key(key, mode);
424 }
425 self
426 }
427
428 fn lcp_chars_and_bytes(a: &str, b: &str) -> (usize, usize) {
429 let mut lcp_chars = 0usize;
430 let mut lcp_bytes = 0usize;
431 for (ac, bc) in a.chars().zip(b.chars()) {
432 if ac == bc {
433 lcp_chars += 1;
434 lcp_bytes += ac.len_utf8();
435 } else {
436 break;
437 }
438 }
439 (lcp_chars, lcp_bytes)
440 }
441
442 pub fn process_key_delta(&mut self, key: char, mode: Mode) -> (usize, usize, &str) {
452 self.process_key(key, mode);
453
454 let active_len = self.active_len;
455 let active = &self.active_buffer[..active_len];
456 crate::flattener::flatten_slice_into(active, OutputOptions::NONE, &mut self.delta_buf);
457
458 let (lcp_chars, lcp_bytes) = Self::lcp_chars_and_bytes(&self.prev_preedit, &self.delta_buf);
459
460 let prev_chars = self.prev_preedit.chars().count();
461
462 let prev_bytes = self.prev_preedit.len();
463
464 let backspaces_chars = prev_chars.saturating_sub(lcp_chars);
465 let backspaces_bytes = prev_bytes.saturating_sub(lcp_bytes);
466
467 std::mem::swap(&mut self.prev_preedit, &mut self.delta_buf);
468 let inserted = &self.prev_preedit[lcp_bytes..];
469 (backspaces_chars, backspaces_bytes, inserted)
470 }
471
472 pub fn process_key_delta_into(
477 &mut self,
478 key: char,
479 mode: Mode,
480 inserted: &mut String,
481 ) -> usize {
482 let (backspaces_chars, _backspaces_bytes, ins) = self.process_key_delta(key, mode);
483 inserted.clear();
484 inserted.push_str(ins);
485 backspaces_chars
486 }
487
488 pub fn process_key(&mut self, key: char, mode: Mode) {
492 let lower_key = lower(key);
493 let is_upper_case = is_upper(key);
494
495 if mode == Mode::English || !self.can_process_key_raw(lower_key) {
496 if crate::utils::is_word_break_symbol(lower_key) {
497 self.commit();
498 }
499 let trans = crate::bamboo_util::new_appending_trans(lower_key, is_upper_case);
500 self.push_active(trans);
501 if crate::utils::is_word_break_symbol(lower_key) {
502 self.commit();
503 }
504 self.current_state_id = 0;
505 return;
506 }
507
508 if lower_key.is_ascii() && !is_upper_case {
510 let next_state_id =
511 self.dfa.get_state(self.current_state_id).transitions[lower_key as usize];
512 if next_state_id != 0 {
513 self.current_state_id = next_state_id;
514 let comp = self.dfa.get_composition(next_state_id);
515 self.active_len = comp.len().min(MAX_ACTIVE_TRANS);
516 self.active_buffer[..self.active_len].copy_from_slice(comp);
517 return;
518 }
519 }
520
521 let mut work = self.work_comp;
522 let mut scratch = self.scratch_comp;
523
524 self.take_active_into(&mut work);
525 self.new_composition_in_place(&mut work, &mut scratch, lower_key, is_upper_case);
526
527 if lower_key.is_ascii() && !is_upper_case && work.len() <= MAX_ACTIVE_TRANS {
529 let next_id = self.dfa.add_state(work.as_slice());
530 self.dfa.states[self.current_state_id as usize].transitions[lower_key as usize] =
531 next_id;
532 self.current_state_id = next_id;
533 } else {
534 self.current_state_id = self.dfa.find_state(work.as_slice()).unwrap_or(0);
535 }
536
537 self.set_active_from_stack(&mut work);
538
539 self.work_comp = work;
540 self.scratch_comp = scratch;
541 }
542
543 fn push_active(&mut self, trans: Transformation) {
544 if self.active_len < MAX_ACTIVE_TRANS {
545 self.active_buffer[self.active_len] = trans;
546 self.active_len += 1;
547 self.current_state_id = self.dfa.find_state(self.active_slice()).unwrap_or(0);
548 }
549 }
550
551 pub fn commit(&mut self) {
553 if self.active_len == 0 {
554 return;
555 }
556 let word = self.output();
557 self.committed_text.push_str(&word);
558 self.active_len = 0;
559 self.current_state_id = 0;
560 }
561
562 pub fn output(&self) -> String {
564 crate::flattener::flatten_slice(self.active_slice(), OutputOptions::NONE)
565 }
566
567 pub fn get_processed_str(&self, options: OutputOptions) -> String {
571 let active = self.active_slice();
572 if options.contains(OutputOptions::FULL_TEXT) {
573 let mut result = self.committed_text.clone();
574 result.push_str(&crate::flattener::flatten_slice(active, options));
575 return result;
576 }
577 if options.contains(OutputOptions::PUNCTUATION_MODE) {
578 if active.is_empty() {
579 return String::new();
580 }
581 let (_, tail) = crate::bamboo_util::extract_last_word_with_punctuation_marks(
582 active,
583 &self.input_method.keys,
584 );
585 return crate::flattener::flatten_slice(tail, OutputOptions::NONE);
586 }
587 crate::flattener::flatten_slice(active, options)
588 }
589
590 pub fn is_valid(&self, input_is_full_complete: bool) -> bool {
592 self.is_valid_internal(self.active_slice(), input_is_full_complete)
593 }
594
595 fn is_valid_internal(
596 &self,
597 composition: &[Transformation],
598 input_is_full_complete: bool,
599 ) -> bool {
600 crate::bamboo_util::is_valid(composition, input_is_full_complete)
601 }
602
603 pub fn restore_last_word(&mut self, to_vietnamese: bool) {
607 let mut work = self.work_comp;
608 let mut scratch = self.scratch_comp;
609
610 self.take_active_into(&mut work);
611 if work.is_empty() {
612 self.set_active_from_stack(&mut work);
613 self.current_state_id = 0;
614 return;
615 }
616
617 let (prev_slice, last) =
618 crate::bamboo_util::extract_last_word(work.as_slice(), Some(&self.input_method.keys));
619
620 let mut previous = TransformationStack::new();
621 previous.extend_from_slice(prev_slice);
622
623 if last.is_empty() {
624 self.set_active_from_stack(&mut work);
625 self.current_state_id = 0;
626 return;
627 }
628 if !to_vietnamese {
629 previous.extend_from_slice(&crate::bamboo_util::break_composition_slice(last));
630 self.set_active_from_stack(&mut previous);
631 self.current_state_id = 0;
632 return;
633 }
634
635 let mut new_comp = TransformationStack::new();
636 for t in last {
637 if t.rule.key == '\0' {
638 continue;
639 }
640 self.new_composition_in_place(&mut new_comp, &mut scratch, t.rule.key, t.is_upper_case);
641 }
642 previous.extend_from_slice(new_comp.as_slice());
643
644 self.set_active_from_stack(&mut previous);
645 self.current_state_id = 0;
646 }
647
648 pub fn remove_last_char(&mut self, refresh_last_tone_target: bool) {
650 let mut work = self.work_comp;
651 let mut scratch = self.scratch_comp;
652
653 self.take_active_into(&mut work);
654
655 let last_key_idx = work
657 .as_slice()
658 .iter()
659 .enumerate()
660 .rev()
661 .find(|(_, t)| t.rule.key != '\0')
662 .map(|(i, _)| i);
663
664 let Some(idx) = last_key_idx else {
665 self.set_active_from_stack(&mut work);
666 self.current_state_id = 0;
667 return;
668 };
669
670 let (prev_slice, last_comb_slice) =
671 crate::bamboo_util::extract_last_word(work.as_slice(), Some(&self.input_method.keys));
672
673 let mut previous = TransformationStack::new();
674 previous.extend_from_slice(prev_slice);
675
676 let last_comb = last_comb_slice;
677 let idx_in_last = idx as isize - prev_slice.len() as isize;
678
679 let mut new_word_comp = TransformationStack::new();
680 for (i, t) in last_comb.iter().enumerate() {
681 if i as isize == idx_in_last {
682 continue; }
684 if t.rule.key == '\0' {
685 continue; }
687 self.new_composition_in_place(
689 &mut new_word_comp,
690 &mut scratch,
691 t.rule.key,
692 t.is_upper_case,
693 );
694 }
695
696 if refresh_last_tone_target {
697 let mut extra = TransformationStack::new();
698 crate::bamboo_util::refresh_last_tone_target_into(
699 new_word_comp.as_mut_slice(),
700 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
701 &mut extra,
702 );
703 new_word_comp.extend_from_slice(extra.as_slice());
704 }
705
706 previous.extend_from_slice(new_word_comp.as_slice());
707 self.set_active_from_stack(&mut previous);
708 self.current_state_id = self.dfa.find_state(self.active_slice()).unwrap_or(0);
709 }
710
711 pub fn reset(&mut self) {
713 self.committed_text.clear();
714 self.active_len = 0;
715 self.prev_preedit.clear();
716 self.delta_buf.clear();
717 self.current_state_id = 0;
718 }
719}
720
721#[cfg(test)]
722mod tests {
723 use super::*;
724
725 #[test]
726 fn delta_backspaces_and_inserted() {
727 let telex = InputMethod::telex();
728 let mut e = Engine::new(telex);
729
730 let (bs1, _bb1, ins1) = e.process_key_delta('a', Mode::Vietnamese);
731 assert_eq!(bs1, 0, "First 'a' should have 0 backspaces");
732 assert_eq!(ins1, "a");
733
734 let (bs2, _bb2, ins2) = e.process_key_delta('s', Mode::Vietnamese);
735 assert_eq!(bs2, 1, "Adding 's' to 'a' should have 1 backspace for 'á'");
736 assert_eq!(ins2, "á");
737
738 let (bs3, _bb3, ins3) = e.process_key_delta(' ', Mode::Vietnamese);
739 assert_eq!(bs3, 1, "Space should clear the preedit 'á'");
740 assert_eq!(ins3, "");
741 }
742}