1use crate::config::Config;
2use crate::input_method::{InputMethod, Rule};
3use crate::mode::{Mode, OutputOptions};
4
5const MAX_ACTIVE_TRANS: usize = 32;
6
7#[derive(Clone, Debug)]
9pub struct Transformation {
10 pub rule: Rule,
11 pub target: Option<usize>,
12 pub is_upper_case: bool,
13}
14
15#[inline]
16fn lower(c: char) -> char {
17 if c.is_ascii() {
18 c.to_ascii_lowercase()
19 } else {
20 c.to_lowercase().next().unwrap_or(c)
21 }
22}
23
24#[inline]
25fn is_upper(c: char) -> bool {
26 if c.is_ascii() { c.is_ascii_uppercase() } else { lower(c) != c }
27}
28
29fn uoh_tail_match(s: &str) -> bool {
30 for pat in ["uơ", "ưo"] {
31 if let Some(idx) = s.find(pat) {
32 let after = &s[idx + pat.len()..];
33 if after.chars().next().is_some_and(|c| c.is_alphabetic()) {
34 return true;
35 }
36 }
37 }
38 false
39}
40
41pub struct Engine {
43 committed_text: String,
44 active_buffer: [Option<Transformation>; MAX_ACTIVE_TRANS],
46 active_len: usize,
47
48 input_method: InputMethod,
49 all_rules: Box<[Rule]>,
50 ascii_rule_indices: [(u16, u16); 128],
51 non_ascii_rule_indices: Box<[(char, (u16, u16))]>,
52 ascii_effect_keys: [bool; 128],
53 non_ascii_effect_keys: Vec<char>,
54 config: Config,
55}
56
57impl Engine {
58 pub fn new(input_method: InputMethod) -> Self {
59 Self::with_config(input_method, Config::default())
60 }
61
62 pub fn with_config(input_method: InputMethod, config: Config) -> Self {
63 let mut rules_by_key: std::collections::BTreeMap<char, Vec<Rule>> =
64 std::collections::BTreeMap::new();
65 for rule in &input_method.rules {
66 let key = lower(rule.key);
67 rules_by_key.entry(key).or_default().push(rule.clone());
68 }
69
70 let total_rules: usize = rules_by_key.values().map(|v| v.len()).sum();
71 let mut all_rules_vec = Vec::with_capacity(total_rules);
72 let mut ascii_rule_indices = [(0u16, 0u16); 128];
73 let mut non_ascii_indices_vec = Vec::new();
74
75 for (key, rules) in rules_by_key {
76 let start = all_rules_vec.len() as u16;
77 all_rules_vec.extend(rules);
78 let end = all_rules_vec.len() as u16;
79 if key.is_ascii() {
80 ascii_rule_indices[key as usize] = (start, end);
81 } else {
82 non_ascii_indices_vec.push((key, (start, end)));
83 }
84 }
85
86 let mut ascii_effect_keys = [false; 128];
87 let mut non_ascii_effect_keys: Vec<char> = Vec::new();
88 for key in &input_method.keys {
89 if key.is_ascii() {
90 ascii_effect_keys[*key as usize] = true;
91 } else {
92 non_ascii_effect_keys.push(*key);
93 }
94 }
95 non_ascii_effect_keys.sort_unstable();
96 non_ascii_effect_keys.dedup();
97
98 Self {
99 committed_text: String::new(),
100 active_buffer: std::array::from_fn(|_| None),
101 active_len: 0,
102 input_method,
103 all_rules: all_rules_vec.into_boxed_slice(),
104 ascii_rule_indices,
105 non_ascii_rule_indices: non_ascii_indices_vec.into_boxed_slice(),
106 ascii_effect_keys,
107 non_ascii_effect_keys,
108 config,
109 }
110 }
111
112 fn active_composition(&self) -> Vec<&Transformation> {
114 self.active_buffer[..self.active_len]
115 .iter()
116 .map(|opt| opt.as_ref().unwrap())
117 .collect()
118 }
119
120 fn active_composition_owned(&self) -> Vec<Transformation> {
122 self.active_buffer[..self.active_len]
123 .iter()
124 .map(|opt| opt.as_ref().unwrap().clone())
125 .collect()
126 }
127
128 fn set_active_composition(&mut self, comp: Vec<Transformation>) {
129 self.active_len = comp.len().min(MAX_ACTIVE_TRANS);
130 for (i, t) in comp.into_iter().enumerate().take(MAX_ACTIVE_TRANS) {
131 self.active_buffer[i] = Some(t);
132 }
133 }
134
135 pub fn config(&self) -> Config {
136 self.config
137 }
138 pub fn set_config(&mut self, config: Config) {
139 self.config = config;
140 }
141 pub fn input_method(&self) -> InputMethod {
142 self.input_method.clone()
143 }
144
145 fn get_applicable_rules(&self, key: char) -> &[Rule] {
146 let key = lower(key);
147 if key.is_ascii() {
148 let (start, end) = self.ascii_rule_indices[key as usize];
149 &self.all_rules[start as usize..end as usize]
150 } else {
151 self.non_ascii_rule_indices
152 .binary_search_by_key(&key, |(k, _)| *k)
153 .map(|idx| {
154 let (start, end) = self.non_ascii_rule_indices[idx].1;
155 &self.all_rules[start as usize..end as usize]
156 })
157 .unwrap_or(&[])
158 }
159 }
160
161 fn can_process_key_raw(&self, lower_key: char) -> bool {
162 if crate::utils::is_alpha(lower_key)
163 || (lower_key.is_ascii()
164 && self.ascii_effect_keys[lower_key as usize])
165 || self.non_ascii_effect_keys.binary_search(&lower_key).is_ok()
166 {
167 return true;
168 }
169 if crate::utils::is_word_break_symbol(lower_key) {
170 return false;
171 }
172 crate::utils::is_vietnamese_rune(lower_key)
173 }
174
175 fn generate_transformations(
176 &self,
177 composition: &mut Vec<Transformation>,
178 key: char,
179 is_upper_case: bool,
180 ) {
181 let lower_key = lower(key);
182 let refs: Vec<&Transformation> = composition.iter().collect();
183 let mut transformations = crate::bamboo_util::generate_transformations(
184 &refs,
185 self.get_applicable_rules(lower_key),
186 self.config.to_flags(),
187 lower_key,
188 is_upper_case,
189 );
190
191 if transformations.is_empty() {
192 transformations =
193 crate::bamboo_util::generate_fallback_transformations(
194 self.get_applicable_rules(lower_key),
195 lower_key,
196 is_upper_case,
197 );
198 let mut new_comp = composition.clone();
199 new_comp.extend(transformations.clone());
200 let new_refs: Vec<&Transformation> = new_comp.iter().collect();
201
202 if !self.input_method.super_keys.is_empty() {
203 let current_str = crate::flattener::flatten(
204 &new_refs,
205 OutputOptions::TONE_LESS | OutputOptions::LOWER_CASE,
206 );
207 if uoh_tail_match(¤t_str) {
208 let (target, rule) = crate::bamboo_util::find_target(
209 &new_refs,
210 self.get_applicable_rules(
211 self.input_method.super_keys[0],
212 ),
213 self.config.to_flags(),
214 );
215 if let (Some(target), Some(mut rule)) = (target, rule) {
216 rule.key = '\0';
217 transformations.push(Transformation {
218 rule,
219 target: Some(target),
220 is_upper_case: false,
221 });
222 }
223 }
224 }
225 }
226 composition.extend(transformations);
227 if self.config.to_flags() & crate::bamboo_util::EFREE_TONE_MARKING != 0
228 && self.is_valid_internal(composition, false)
229 {
230 let extra = crate::bamboo_util::refresh_last_tone_target(
231 composition,
232 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE
233 != 0,
234 );
235 composition.extend(extra);
236 }
237 }
238
239 fn new_composition(
240 &self,
241 mut composition: Vec<Transformation>,
242 key: char,
243 is_upper_case: bool,
244 ) -> Vec<Transformation> {
245 let (prev_refs, _) = crate::bamboo_util::extract_last_syllable(
246 &composition,
247 Some(&self.input_method.keys),
248 );
249 let syllable_abs_start = prev_refs.len();
250 let mut syllable = composition.split_off(syllable_abs_start);
251 let mut previous = composition;
252
253 let offset = syllable_abs_start;
254 if offset != 0 {
255 for t in &mut syllable {
256 if let Some(target) = t.target {
257 t.target = Some(target.saturating_sub(offset));
258 }
259 }
260 }
261 self.generate_transformations(&mut syllable, key, is_upper_case);
262 if offset != 0 {
263 for t in &mut syllable {
264 if let Some(target) = t.target {
265 t.target = Some(target + offset);
266 }
267 }
268 }
269 previous.extend(syllable);
270 previous
271 }
272
273 pub fn process(&mut self, s: &str, mode: Mode) -> String {
274 self.process_str(s, mode).output()
275 }
276 pub fn process_str(&mut self, s: &str, mode: Mode) -> &Self {
277 for key in s.chars() {
278 self.process_key(key, mode);
279 }
280 self
281 }
282
283 pub fn process_key(&mut self, key: char, mode: Mode) {
284 let lower_key = lower(key);
285 let is_upper_case = is_upper(key);
286
287 if mode == Mode::English || !self.can_process_key_raw(lower_key) {
288 if crate::utils::is_word_break_symbol(lower_key) {
289 self.commit();
290 }
291 let trans = crate::bamboo_util::new_appending_trans(
292 lower_key,
293 is_upper_case,
294 );
295 self.push_active(trans);
296 if crate::utils::is_word_break_symbol(lower_key) {
297 self.commit();
298 }
299 return;
300 }
301
302 let current = self.active_composition_owned();
303 let next = self.new_composition(current, lower_key, is_upper_case);
304 self.set_active_composition(next);
305 }
306
307 fn push_active(&mut self, trans: Transformation) {
308 if self.active_len < MAX_ACTIVE_TRANS {
309 self.active_buffer[self.active_len] = Some(trans);
310 self.active_len += 1;
311 }
312 }
313
314 pub fn commit(&mut self) {
315 if self.active_len == 0 {
316 return;
317 }
318 let word = self.output();
319 self.committed_text.push_str(&word);
320 self.active_len = 0;
321 }
322
323 pub fn output(&self) -> String {
324 let comp = self.active_composition_owned();
325 crate::flattener::flatten_slice(&comp, OutputOptions::NONE)
326 }
327
328 pub fn get_processed_str(&self, options: OutputOptions) -> String {
329 let active_comp = self.active_composition_owned();
330 if options.contains(OutputOptions::FULL_TEXT) {
331 let mut result = self.committed_text.clone();
332 result.push_str(&crate::flattener::flatten_slice(
333 &active_comp,
334 options,
335 ));
336 return result;
337 }
338 if options.contains(OutputOptions::PUNCTUATION_MODE) {
339 let refs = self.active_composition();
340 let (_, tail) = crate::bamboo_util::extract_last_word_with_punctuation_marks_refs(&refs, &self.input_method.keys);
341 return crate::flattener::flatten(&tail, OutputOptions::NONE);
342 }
343 crate::flattener::flatten_slice(&active_comp, options)
344 }
345
346 pub fn is_valid(&self, input_is_full_complete: bool) -> bool {
347 let comp = self.active_composition_owned();
348 self.is_valid_internal(&comp, input_is_full_complete)
349 }
350
351 fn is_valid_internal(
352 &self,
353 composition: &[Transformation],
354 input_is_full_complete: bool,
355 ) -> bool {
356 let refs: Vec<&Transformation> = composition.iter().collect();
357 crate::bamboo_util::is_valid(&refs, input_is_full_complete)
358 }
359
360 pub fn restore_last_word(&mut self, to_vietnamese: bool) {
361 let comp = self.active_composition_owned();
362 let refs: Vec<&Transformation> = comp.iter().collect();
363 let (prev_refs, _) = crate::bamboo_util::extract_last_word(
364 &refs,
365 Some(&self.input_method.keys),
366 );
367 let prev_len = prev_refs.len();
368
369 let mut active = comp;
370 let last = active.split_off(prev_len);
371 let mut previous = active;
372
373 if last.is_empty() {
374 self.set_active_composition(previous);
375 return;
376 }
377 if !to_vietnamese {
378 previous.extend(crate::bamboo_util::break_composition_slice(&last));
379 self.set_active_composition(previous);
380 return;
381 }
382
383 let mut new_comp: Vec<Transformation> = Vec::new();
384 for t in last {
385 if t.rule.key == '\0' {
386 continue;
387 }
388 new_comp =
389 self.new_composition(new_comp, t.rule.key, t.is_upper_case);
390 }
391 previous.extend(new_comp);
392 self.set_active_composition(previous);
393 }
394
395 pub fn remove_last_char(&mut self, refresh_last_tone_target: bool) {
396 let comp = self.active_composition_owned();
397 let last_appending_idx =
398 crate::bamboo_util::find_last_appending_trans_idx(&comp);
399 let Some(last_idx) = last_appending_idx else {
400 return;
401 };
402
403 let last_appending_key = comp[last_idx].rule.key;
404 if !self.can_process_key_raw(last_appending_key) {
405 let mut next = comp;
406 next.pop();
407 self.set_active_composition(next);
408 return;
409 }
410
411 let refs: Vec<&Transformation> = comp.iter().collect();
412 let (previous_slice, _) = crate::bamboo_util::extract_last_word(
413 &refs,
414 Some(&self.input_method.keys),
415 );
416 let prev_len = previous_slice.len();
417
418 let mut previous = comp;
419 let last_comb = previous.split_off(prev_len);
420
421 let mut new_comb: Vec<Transformation> = Vec::new();
422 for (i, t) in last_comb.into_iter().enumerate() {
423 let actual_idx = prev_len + i;
424 if actual_idx == last_idx {
425 continue;
426 }
427 if let Some(target) = t.target
428 && target == last_idx
429 {
430 continue;
431 }
432 new_comb.push(t);
433 }
434
435 if refresh_last_tone_target {
436 let extra = crate::bamboo_util::refresh_last_tone_target(
437 &mut new_comb,
438 self.config.to_flags() & crate::bamboo_util::ESTD_TONE_STYLE
439 != 0,
440 );
441 new_comb.extend(extra);
442 }
443
444 previous.extend(new_comb);
445 self.set_active_composition(previous);
446 }
447
448 pub fn reset(&mut self) {
449 self.committed_text.clear();
450 self.active_len = 0;
451 }
452}