1use crate::config::Config;
4use crate::input_method::{InputMethod, Rule};
5use crate::mode::{Mode, OutputOptions};
6
7const MAX_ACTIVE_TRANS: usize = 32;
8
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Hash)]
11pub struct Transformation {
12 pub rule: Rule,
14 pub target: Option<usize>,
17 pub is_upper_case: bool,
19}
20
21#[inline]
22fn lower(c: char) -> char {
23 if c.is_ascii() { c.to_ascii_lowercase() } else { c.to_lowercase().next().unwrap_or(c) }
24}
25
26#[inline]
27fn is_upper(c: char) -> bool {
28 if c.is_ascii() { c.is_ascii_uppercase() } else { lower(c) != c }
29}
30
31fn uoh_tail_match(s: &str) -> bool {
32 for pat in ["uơ", "ưo"] {
33 if let Some(idx) = s.find(pat) {
34 let after = &s[idx + pat.len()..];
35 if after.chars().next().is_some_and(|c| c.is_alphabetic()) {
36 return true;
37 }
38 }
39 }
40 false
41}
42
43pub struct Engine {
47 committed_text: String,
48 active_buffer: [Transformation; MAX_ACTIVE_TRANS],
50 active_len: usize,
51
52 input_method: InputMethod,
53 all_rules: Box<[Rule]>,
54 ascii_rule_indices: [(u16, u16); 128],
55 non_ascii_rule_indices: Box<[(char, (u16, u16))]>,
56 ascii_effect_keys: [bool; 128],
57 non_ascii_effect_keys: Vec<char>,
58 config: Config,
59
60 work_comp: Vec<Transformation>,
62 scratch_comp: Vec<Transformation>,
63
64 prev_preedit: String,
65 delta_buf: String,
66
67 dfa: crate::dfa::Dfa,
68 current_state_id: u32,
69}
70
71impl Engine {
72 pub fn new(input_method: InputMethod) -> Self {
74 Self::with_config(input_method, Config::default())
75 }
76
77 pub fn with_config(input_method: InputMethod, config: Config) -> Self {
79 let mut rules_by_key: std::collections::BTreeMap<char, Vec<Rule>> =
80 std::collections::BTreeMap::new();
81 for rule in &input_method.rules {
82 let key = lower(rule.key);
83 rules_by_key.entry(key).or_default().push(*rule);
84 }
85
86 let total_rules: usize = rules_by_key.values().map(|v| v.len()).sum();
87 let mut all_rules_vec = Vec::with_capacity(total_rules);
88 let mut ascii_rule_indices = [(0u16, 0u16); 128];
89 let mut non_ascii_indices_vec = Vec::new();
90
91 for (key, rules) in rules_by_key {
92 let start = all_rules_vec.len() as u16;
93 all_rules_vec.extend(rules);
94 let end = all_rules_vec.len() as u16;
95 if key.is_ascii() {
96 ascii_rule_indices[key as usize] = (start, end);
97 } else {
98 non_ascii_indices_vec.push((key, (start, end)));
99 }
100 }
101
102 let mut ascii_effect_keys = [false; 128];
103 let mut non_ascii_effect_keys: Vec<char> = Vec::new();
104 for key in &input_method.keys {
105 if key.is_ascii() {
106 ascii_effect_keys[*key as usize] = true;
107 } else {
108 non_ascii_effect_keys.push(*key);
109 }
110 }
111 non_ascii_effect_keys.sort_unstable();
112 non_ascii_effect_keys.dedup();
113
114 Self {
115 committed_text: String::new(),
116 active_buffer: [Transformation::default(); MAX_ACTIVE_TRANS],
117 active_len: 0,
118 input_method,
119 all_rules: all_rules_vec.into_boxed_slice(),
120 ascii_rule_indices,
121 non_ascii_rule_indices: non_ascii_indices_vec.into_boxed_slice(),
122 ascii_effect_keys,
123 non_ascii_effect_keys,
124 config,
125
126 work_comp: Vec::with_capacity(MAX_ACTIVE_TRANS),
127 scratch_comp: Vec::with_capacity(MAX_ACTIVE_TRANS),
128
129 prev_preedit: String::with_capacity(64),
130 delta_buf: String::with_capacity(64),
131 dfa: crate::dfa::Dfa::new(),
132 current_state_id: 0,
133 }
134 }
135
136 #[inline]
137 pub(crate) fn active_slice(&self) -> &[Transformation] {
138 &self.active_buffer[..self.active_len]
139 }
140
141 fn take_active_into(&mut self, out: &mut Vec<Transformation>) {
142 out.clear();
143 out.extend_from_slice(self.active_slice());
144 self.active_len = 0;
145 }
146
147 fn set_active_from_vec(&mut self, src: &mut Vec<Transformation>) {
148 self.active_len = src.len().min(MAX_ACTIVE_TRANS);
149 self.active_buffer[..self.active_len].copy_from_slice(&src[..self.active_len]);
150 src.clear();
151 }
152
153 pub fn config(&self) -> Config {
155 self.config
156 }
157
158 pub fn set_config(&mut self, config: Config) {
160 self.config = config;
161 }
162
163 pub fn input_method(&self) -> InputMethod {
165 self.input_method.clone()
166 }
167
168 fn get_applicable_rules(&self, key: char) -> &[Rule] {
169 let key = lower(key);
170 if key.is_ascii() {
171 let (start, end) = self.ascii_rule_indices[key as usize];
172 &self.all_rules[start as usize..end as usize]
173 } else {
174 self.non_ascii_rule_indices
175 .binary_search_by_key(&key, |(k, _)| *k)
176 .map(|idx| {
177 let (start, end) = self.non_ascii_rule_indices[idx].1;
178 &self.all_rules[start as usize..end as usize]
179 })
180 .unwrap_or(&[])
181 }
182 }
183
184 fn can_process_key_raw(&self, lower_key: char) -> bool {
185 if crate::utils::is_alpha(lower_key)
186 || (lower_key.is_ascii() && self.ascii_effect_keys[lower_key as usize])
187 || self.non_ascii_effect_keys.binary_search(&lower_key).is_ok()
188 {
189 return true;
190 }
191 if crate::utils::is_word_break_symbol(lower_key) {
192 return false;
193 }
194 crate::utils::is_vietnamese_rune(lower_key)
195 }
196
197 fn generate_transformations(
198 &self,
199 composition: &mut Vec<Transformation>,
200 key: char,
201 is_upper_case: bool,
202 ) {
203 let lower_key = lower(key);
204 let mut transformations = crate::bamboo_util::generate_transformations(
205 composition,
206 self.get_applicable_rules(lower_key),
207 self.config.to_flags(),
208 lower_key,
209 is_upper_case,
210 );
211
212 if transformations.is_empty() {
213 transformations = crate::bamboo_util::generate_fallback_transformations(
214 self.get_applicable_rules(lower_key),
215 lower_key,
216 is_upper_case,
217 );
218 let mut new_comp = composition.clone();
219 new_comp.extend(transformations.clone());
220
221 if !self.input_method.super_keys.is_empty() {
222 let current_str = crate::flattener::flatten_slice(
223 &new_comp,
224 OutputOptions::TONE_LESS | OutputOptions::LOWER_CASE,
225 );
226 if uoh_tail_match(¤t_str) {
227 let (target, rule) = crate::bamboo_util::find_target(
228 &new_comp,
229 self.get_applicable_rules(self.input_method.super_keys[0]),
230 self.config.to_flags(),
231 );
232 if let (Some(target), Some(mut rule)) = (target, rule) {
233 rule.key = '\0';
234 transformations.push(Transformation {
235 rule,
236 target: Some(target),
237 is_upper_case: false,
238 });
239 }
240 }
241 }
242 }
243 composition.extend(transformations.iter().cloned());
244 if self.config.to_flags() & crate::bamboo_util::EFREE_TONE_MARKING != 0
245 && self.is_valid_internal(composition, false)
246 {
247 let extra = crate::bamboo_util::refresh_last_tone_target(
248 composition,
249 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
250 );
251 composition.extend(extra);
252 }
253 }
254
255 fn last_syllable_start(composition: &[Transformation]) -> usize {
256 let mut idx = composition.len();
257 let mut last_is_vowel = false;
258 let mut found_vowel = false;
259
260 while idx > 0 {
261 let tmp = &composition[idx - 1];
262 if tmp.target.is_none() {
263 let is_v = crate::utils::is_vowel(tmp.rule.result);
264 if found_vowel && !is_v && !last_is_vowel {
265 break;
266 }
267 if is_v {
268 found_vowel = true;
269 }
270 last_is_vowel = is_v;
271 }
272 idx -= 1;
273 }
274
275 idx
276 }
277
278 fn new_composition_in_place(
279 &self,
280 composition: &mut Vec<Transformation>,
281 scratch: &mut Vec<Transformation>,
282 key: char,
283 is_upper_case: bool,
284 ) {
285 let syllable_abs_start = Self::last_syllable_start(composition);
286
287 scratch.clear();
288 scratch.extend(composition.drain(syllable_abs_start..));
289
290 let offset = syllable_abs_start;
291 if offset != 0 {
292 for t in scratch.iter_mut() {
293 if let Some(target) = t.target {
294 t.target = Some(target.saturating_sub(offset));
295 }
296 }
297 }
298
299 self.generate_transformations(scratch, key, is_upper_case);
300
301 if offset != 0 {
302 for t in scratch.iter_mut() {
303 if let Some(target) = t.target {
304 t.target = Some(target + offset);
305 }
306 }
307 }
308
309 composition.append(scratch);
310 }
311
312 pub fn process(&mut self, s: &str, mode: Mode) -> String {
314 self.process_str(s, mode).output()
315 }
316
317 pub fn process_str(&mut self, s: &str, mode: Mode) -> &Self {
319 for key in s.chars() {
320 self.process_key(key, mode);
321 }
322 self
323 }
324
325 fn lcp_chars_and_bytes(a: &str, b: &str) -> (usize, usize) {
326 let mut lcp_chars = 0usize;
327 let mut lcp_bytes = 0usize;
328 for (ac, bc) in a.chars().zip(b.chars()) {
329 if ac == bc {
330 lcp_chars += 1;
331 lcp_bytes += ac.len_utf8();
332 } else {
333 break;
334 }
335 }
336 (lcp_chars, lcp_bytes)
337 }
338
339 pub fn process_key_delta(&mut self, key: char, mode: Mode) -> (usize, usize, &str) {
349 self.process_key(key, mode);
350
351 let active_len = self.active_len;
352 let active = &self.active_buffer[..active_len];
353 crate::flattener::flatten_slice_into(active, OutputOptions::NONE, &mut self.delta_buf);
354
355 let (lcp_chars, lcp_bytes) = Self::lcp_chars_and_bytes(&self.prev_preedit, &self.delta_buf);
356
357 let prev_chars = self.prev_preedit.chars().count();
358
359 let prev_bytes = self.prev_preedit.len();
360
361 let backspaces_chars = prev_chars.saturating_sub(lcp_chars);
362 let backspaces_bytes = prev_bytes.saturating_sub(lcp_bytes);
363
364 std::mem::swap(&mut self.prev_preedit, &mut self.delta_buf);
365 let inserted = &self.prev_preedit[lcp_bytes..];
366 (backspaces_chars, backspaces_bytes, inserted)
367 }
368
369 pub fn process_key_delta_into(
374 &mut self,
375 key: char,
376 mode: Mode,
377 inserted: &mut String,
378 ) -> usize {
379 let (backspaces_chars, _backspaces_bytes, ins) = self.process_key_delta(key, mode);
380 inserted.clear();
381 inserted.push_str(ins);
382 backspaces_chars
383 }
384
385 pub fn process_key(&mut self, key: char, mode: Mode) {
389 let lower_key = lower(key);
390 let is_upper_case = is_upper(key);
391
392 if mode == Mode::English || !self.can_process_key_raw(lower_key) {
393 if crate::utils::is_word_break_symbol(lower_key) {
394 self.commit();
395 }
396 let trans = crate::bamboo_util::new_appending_trans(lower_key, is_upper_case);
397 self.push_active(trans);
398 if crate::utils::is_word_break_symbol(lower_key) {
399 self.commit();
400 }
401 self.current_state_id = 0;
402 return;
403 }
404
405 if lower_key.is_ascii() && !is_upper_case {
407 let next_state_id =
408 self.dfa.get_state(self.current_state_id).transitions[lower_key as usize];
409 if next_state_id != 0 {
410 self.current_state_id = next_state_id;
411 let next_state = self.dfa.get_state(next_state_id);
412 self.active_len = next_state.composition.len().min(MAX_ACTIVE_TRANS);
413 self.active_buffer[..self.active_len].copy_from_slice(&next_state.composition);
414 return;
415 }
416 }
417
418 let mut work = std::mem::take(&mut self.work_comp);
419 let mut scratch = std::mem::take(&mut self.scratch_comp);
420
421 self.take_active_into(&mut work);
422 self.new_composition_in_place(&mut work, &mut scratch, lower_key, is_upper_case);
423
424 if lower_key.is_ascii() && !is_upper_case && work.len() <= MAX_ACTIVE_TRANS {
426 let next_id = self.dfa.add_state(&work);
427 self.dfa.states[self.current_state_id as usize].transitions[lower_key as usize] =
428 next_id;
429 self.current_state_id = next_id;
430 } else {
431 self.current_state_id = self.dfa.find_state(&work).unwrap_or(0);
432 }
433
434 self.set_active_from_vec(&mut work);
435
436 self.work_comp = work;
437 self.scratch_comp = scratch;
438 }
439
440 fn push_active(&mut self, trans: Transformation) {
441 if self.active_len < MAX_ACTIVE_TRANS {
442 self.active_buffer[self.active_len] = trans;
443 self.active_len += 1;
444 self.current_state_id = self.dfa.find_state(self.active_slice()).unwrap_or(0);
445 }
446 }
447
448 pub fn commit(&mut self) {
450 if self.active_len == 0 {
451 return;
452 }
453 let word = self.output();
454 self.committed_text.push_str(&word);
455 self.active_len = 0;
456 self.current_state_id = 0;
457 }
458
459 pub fn output(&self) -> String {
461 crate::flattener::flatten_slice(self.active_slice(), OutputOptions::NONE)
462 }
463
464 pub fn get_processed_str(&self, options: OutputOptions) -> String {
468 let active = self.active_slice();
469 if options.contains(OutputOptions::FULL_TEXT) {
470 let mut result = self.committed_text.clone();
471 result.push_str(&crate::flattener::flatten_slice(active, options));
472 return result;
473 }
474 if options.contains(OutputOptions::PUNCTUATION_MODE) {
475 if active.is_empty() {
476 return String::new();
477 }
478 let (_, tail) = crate::bamboo_util::extract_last_word_with_punctuation_marks(
479 active,
480 &self.input_method.keys,
481 );
482 return crate::flattener::flatten_slice(tail, OutputOptions::NONE);
483 }
484 crate::flattener::flatten_slice(active, options)
485 }
486
487 pub fn is_valid(&self, input_is_full_complete: bool) -> bool {
489 self.is_valid_internal(self.active_slice(), input_is_full_complete)
490 }
491
492 fn is_valid_internal(
493 &self,
494 composition: &[Transformation],
495 input_is_full_complete: bool,
496 ) -> bool {
497 crate::bamboo_util::is_valid(composition, input_is_full_complete)
498 }
499
500 pub fn restore_last_word(&mut self, to_vietnamese: bool) {
504 let mut work = std::mem::take(&mut self.work_comp);
505 let mut scratch = std::mem::take(&mut self.scratch_comp);
506
507 self.take_active_into(&mut work);
508 if work.is_empty() {
509 self.set_active_from_vec(&mut work);
510 self.work_comp = work;
511 self.scratch_comp = scratch;
512 self.current_state_id = 0;
513 return;
514 }
515
516 let (prev_slice, last) =
517 crate::bamboo_util::extract_last_word(&work, Some(&self.input_method.keys));
518 let mut previous = prev_slice.to_vec();
519
520 if last.is_empty() {
521 self.set_active_from_vec(&mut work);
522 self.work_comp = work;
523 self.scratch_comp = scratch;
524 self.current_state_id = 0;
525 return;
526 }
527 if !to_vietnamese {
528 previous.extend(crate::bamboo_util::break_composition_slice(last));
529 self.set_active_from_vec(&mut previous);
530 self.work_comp = work;
531 self.scratch_comp = scratch;
532 self.current_state_id = 0;
533 return;
534 }
535
536 let mut new_comp: Vec<Transformation> = Vec::new();
537 for t in last {
538 if t.rule.key == '\0' {
539 continue;
540 }
541 self.new_composition_in_place(&mut new_comp, &mut scratch, t.rule.key, t.is_upper_case);
542 }
543 previous.extend(new_comp);
544
545 self.set_active_from_vec(&mut previous);
546 self.work_comp = work;
547 self.scratch_comp = scratch;
548 self.current_state_id = 0;
549 }
550
551 pub fn remove_last_char(&mut self, refresh_last_tone_target: bool) {
553 let mut work = std::mem::take(&mut self.work_comp);
554
555 self.take_active_into(&mut work);
556 let last_appending_idx = crate::bamboo_util::find_last_appending_trans_idx(&work);
557 let Some(last_idx) = last_appending_idx else {
558 self.set_active_from_vec(&mut work);
559 self.work_comp = work;
560 self.current_state_id = 0;
561 return;
562 };
563
564 let last_appending_key = work[last_idx].rule.key;
565 if !self.can_process_key_raw(last_appending_key) {
566 work.pop();
567 self.set_active_from_vec(&mut work);
568 self.work_comp = work;
569 self.current_state_id = 0;
570 return;
571 }
572
573 if work.is_empty() {
574 self.set_active_from_vec(&mut work);
575 self.work_comp = work;
576 self.current_state_id = 0;
577 return;
578 }
579
580 let (prev_slice, last_comb_slice) =
581 crate::bamboo_util::extract_last_word(&work, Some(&self.input_method.keys));
582 let mut previous = prev_slice.to_vec();
583 let last_comb = last_comb_slice.to_vec();
584
585 let mut new_comb: Vec<Transformation> = Vec::new();
586 let prev_len = previous.len();
587 for (i, t) in last_comb.into_iter().enumerate() {
588 let actual_idx = prev_len + i;
589 if actual_idx == last_idx {
590 continue;
591 }
592 if let Some(target) = t.target
593 && target == last_idx
594 {
595 continue;
596 }
597 new_comb.push(t);
598 }
599
600 if refresh_last_tone_target {
601 let extra = crate::bamboo_util::refresh_last_tone_target(
602 &mut new_comb,
603 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE != 0,
604 );
605 new_comb.extend(extra);
606 }
607
608 previous.extend(new_comb);
609 self.set_active_from_vec(&mut previous);
610 self.work_comp = work;
611 self.current_state_id = 0;
612 }
613
614 pub fn reset(&mut self) {
616 self.committed_text.clear();
617 self.active_len = 0;
618 self.prev_preedit.clear();
619 self.delta_buf.clear();
620 self.current_state_id = 0;
621 }
622}
623
624#[cfg(test)]
625mod tests {
626 use super::*;
627
628 #[test]
629 fn delta_backspaces_and_inserted() {
630 let telex = InputMethod::telex();
631 let mut e = Engine::new(telex);
632
633 let (bs1, _bb1, ins1) = e.process_key_delta('a', Mode::Vietnamese);
634 assert_eq!(bs1, 0, "First 'a' should have 0 backspaces");
635 assert_eq!(ins1, "a");
636
637 let (bs2, _bb2, ins2) = e.process_key_delta('s', Mode::Vietnamese);
638 assert_eq!(bs2, 1, "Adding 's' to 'a' should have 1 backspace for 'á'");
639 assert_eq!(ins2, "á");
640
641 let (bs3, _bb3, ins3) = e.process_key_delta(' ', Mode::Vietnamese);
642 assert_eq!(bs3, 1, "Space should clear the preedit 'á'");
643 assert_eq!(ins3, "");
644 }
645}