1use phf::{Map, phf_map};
2
3use crate::input_method_def::InputMethodDef;
4use crate::utils::{add_mark_to_toneless_char, add_tone_to_char, is_vowel};
5
6#[repr(u8)]
8#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
9pub enum Tone {
10 None = 0,
12 Grave = 1,
14 Acute = 2,
16 Hook = 3,
18 Tilde = 4,
20 Dot = 5,
22}
23
24#[repr(u8)]
26#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
27pub enum Mark {
28 None = 0,
30 Hat = 1,
32 Breve = 2,
34 Horn = 3,
36 Dash = 4,
38 Raw = 5,
40}
41
42#[repr(u8)]
44#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
45pub enum EffectType {
46 #[default]
48 Appending = 0,
49 MarkTransformation = 1,
51 ToneTransformation = 2,
53 Replacing = 3,
55}
56
57static TONES: Map<&'static str, Tone> = phf_map! {
58 "XoaDauThanh" => Tone::None,
59 "DauSac" => Tone::Acute,
60 "DauHuyen" => Tone::Grave,
61 "DauNga" => Tone::Tilde,
62 "DauNang" => Tone::Dot,
63 "DauHoi" => Tone::Hook,
64};
65
66#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
68pub struct Rule {
69 pub key: char,
71 pub effect: u8,
75 pub effect_type: EffectType,
77 pub effect_on: char,
79 pub result: char,
81 pub appended: [char; 2],
83 pub appended_len: u8,
85}
86
87impl Rule {
88 pub fn set_tone(&mut self, tone: Tone) {
90 self.effect = tone as u8;
91 }
92
93 pub fn set_mark(&mut self, mark: Mark) {
95 self.effect = mark as u8;
96 }
97
98 pub fn get_tone(&self) -> Tone {
100 match self.effect {
102 1 => Tone::Grave,
103 2 => Tone::Acute,
104 3 => Tone::Hook,
105 4 => Tone::Tilde,
106 5 => Tone::Dot,
107 _ => Tone::None,
108 }
109 }
110
111 pub fn get_mark(&self) -> Mark {
113 match self.effect {
114 1 => Mark::Hat,
115 2 => Mark::Breve,
116 3 => Mark::Horn,
117 4 => Mark::Dash,
118 5 => Mark::Raw,
119 _ => Mark::None,
120 }
121 }
122}
123
124#[derive(Clone, Debug, Default)]
129pub struct InputMethod {
130 pub name: String,
132 pub rules: Vec<Rule>,
134 pub super_keys: Vec<char>,
136 pub tone_keys: Vec<char>,
138 pub appending_keys: Vec<char>,
140 pub keys: Vec<char>,
142}
143
144impl InputMethod {
145 pub fn telex() -> Self {
147 parse_input_method("Telex")
148 }
149
150 pub fn vni() -> Self {
152 parse_input_method("VNI")
153 }
154
155 pub fn viqr() -> Self {
157 parse_input_method("VIQR")
158 }
159
160 pub fn microsoft_layout() -> Self {
162 parse_input_method("Microsoft layout")
163 }
164
165 pub fn telex_2() -> Self {
167 parse_input_method("Telex 2")
168 }
169
170 pub fn telex_vni() -> Self {
172 parse_input_method("Telex + VNI")
173 }
174
175 pub fn telex_vni_viqr() -> Self {
177 parse_input_method("Telex + VNI + VIQR")
178 }
179
180 pub fn vni_french_layout() -> Self {
182 parse_input_method("VNI Bàn phím tiếng Pháp")
183 }
184
185 pub fn telex_w() -> Self {
187 parse_input_method("Telex W")
188 }
189}
190
191pub(crate) fn parse_input_method(im_name: &str) -> InputMethod {
193 let defs = crate::input_method_def::get_input_method_definitions();
194 defs.get(im_name).copied().map(|def| parse_input_method_def(im_name, def)).unwrap_or_default()
195}
196
197pub(crate) fn parse_input_method_def(im_name: &str, im_def: &InputMethodDef) -> InputMethod {
199 let mut im = InputMethod { name: im_name.to_string(), ..Default::default() };
200
201 for (key_str, line) in im_def.entries() {
202 let key = match key_str.chars().next() {
203 Some(c) => c,
204 None => continue,
205 };
206
207 im.rules.extend(parse_rules(key, line));
208
209 if contains_uo_case_insensitive(line) {
210 im.super_keys.push(key);
211 }
212 im.keys.push(key);
213 }
214
215 for rule in &im.rules {
216 if rule.effect_type == EffectType::Appending {
217 im.appending_keys.push(rule.key);
218 }
219 if rule.effect_type == EffectType::ToneTransformation {
220 im.tone_keys.push(rule.key);
221 }
222 }
223
224 im
225}
226
227#[inline]
228fn contains_uo_case_insensitive(s: &str) -> bool {
229 let mut prev_u = false;
230 for c in s.chars() {
231 let lc = c.to_ascii_lowercase();
232 if prev_u && lc == 'o' {
233 return true;
234 }
235 prev_u = lc == 'u';
236 }
237 false
238}
239
240pub(crate) fn parse_rules(key: char, line: &str) -> Vec<Rule> {
242 if let Some(tone) = TONES.get(line).copied() {
243 return vec![Rule {
244 key,
245 effect_type: EffectType::ToneTransformation,
246 effect: tone as u8,
247 effect_on: '\0',
248 result: '\0',
249 appended: ['\0'; 2],
250 appended_len: 0,
251 }];
252 }
253
254 parse_toneless_rules(key, line)
255}
256
257pub(crate) fn parse_toneless_rules(key: char, line: &str) -> Vec<Rule> {
259 let lower = line.to_lowercase();
260
261 if let Some((effective_ons, results, rest)) = parse_dsl(&lower) {
262 let mut rules = Vec::new();
263 for (effective_on, result) in effective_ons.into_iter().zip(results.into_iter()) {
264 let Some(effect) = find_mark_from_char(result) else {
265 continue;
266 };
267 rules.extend(parse_toneless_rule(key, effective_on, result, effect));
268 }
269
270 if let Some(rule) = get_appending_rule(key, rest) {
271 rules.push(rule);
272 }
273
274 return rules;
275 }
276
277 if let Some(rule) = get_appending_rule(key, line) {
278 return vec![rule];
279 }
280
281 Vec::new()
282}
283
284fn parse_toneless_rule(key: char, effective_on: char, result: char, effect: Mark) -> Vec<Rule> {
285 let mut rules = Vec::new();
286
287 for chr in get_mark_family(effective_on) {
288 if chr == result {
289 rules.push(Rule {
290 key,
291 effect_type: EffectType::MarkTransformation,
292 effect: 0,
293 effect_on: result,
294 result: effective_on,
295 appended: ['\0'; 2],
296 appended_len: 0,
297 });
298 continue;
299 }
300
301 if is_vowel(chr) {
302 for tone in 0u8..=5 {
303 rules.push(Rule {
304 key,
305 effect_type: EffectType::MarkTransformation,
306 effect_on: add_tone_to_char(chr, tone),
307 effect: effect as u8,
308 result: add_tone_to_char(result, tone),
309 appended: ['\0'; 2],
310 appended_len: 0,
311 });
312 }
313 } else {
314 rules.push(Rule {
315 key,
316 effect_type: EffectType::MarkTransformation,
317 effect_on: chr,
318 effect: effect as u8,
319 result,
320 appended: ['\0'; 2],
321 appended_len: 0,
322 });
323 }
324 }
325
326 rules
327}
328
329fn parse_dsl(s: &str) -> Option<(Vec<char>, Vec<char>, &str)> {
331 let (left, right) = s.split_once('_')?;
332 if left.is_empty() || !left.chars().all(|c| c.is_ascii_alphabetic()) {
333 return None;
334 }
335
336 let mut results = Vec::new();
337 let mut rest_start_byte = right.len();
338
339 for (byte_idx, ch) in right.char_indices() {
340 if ch.is_alphabetic() {
341 results.push(ch);
342 continue;
343 }
344 rest_start_byte = byte_idx;
345 break;
346 }
347
348 if results.is_empty() {
349 return None;
350 }
351
352 let rest = &right[rest_start_byte..];
353 Some((left.chars().collect(), results, rest))
354}
355
356fn get_appending_rule(key: char, value: &str) -> Option<Rule> {
358 if !value.starts_with('_') {
359 return None;
360 }
361
362 let start = if value.starts_with("__") { 2 } else { 1 };
364 let tail = value.get(start..)?;
365
366 let mut letters = Vec::new();
367 for ch in tail.chars() {
368 if ch.is_alphabetic() {
369 letters.push(ch);
370 } else {
371 break;
372 }
373 }
374
375 let first = *letters.first()?;
376
377 let mut appended = ['\0'; 2];
378 let mut appended_len = 0u8;
379 for &ch in letters.iter().skip(1) {
380 if (appended_len as usize) < appended.len() {
381 appended[appended_len as usize] = ch;
382 appended_len += 1;
383 }
384 }
385
386 Some(Rule {
387 key,
388 effect_type: EffectType::Appending,
389 effect: 0,
390 effect_on: first,
391 result: first,
392 appended,
393 appended_len,
394 })
395}
396
397fn get_mark_family(c: char) -> Vec<char> {
398 let base = add_tone_to_char(c, 0);
399 let canonical = add_mark_to_toneless_char(base, 0);
400
401 let mut family: Vec<char> =
403 (0u8..=4).map(|m| add_mark_to_toneless_char(canonical, m)).collect();
404
405 family.sort_unstable();
406 family.dedup();
407 family
408}
409
410fn find_mark_from_char(c: char) -> Option<Mark> {
411 let c = c.to_lowercase().next().unwrap_or(c);
412 let toneless = add_tone_to_char(c, 0);
413 let base = add_mark_to_toneless_char(toneless, 0);
414
415 for m in 0u8..=4 {
416 if add_mark_to_toneless_char(base, m) == toneless {
417 return Some(match m {
418 1 => Mark::Hat,
419 2 => Mark::Breve,
420 3 => Mark::Horn,
421 4 => Mark::Dash,
422 _ => Mark::None,
423 });
424 }
425 }
426
427 None
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433
434 #[test]
435 fn parse_tone_rules() {
436 let rules = parse_rules('z', "XoaDauThanh");
437 assert_eq!(rules.len(), 1);
438 assert_eq!(rules[0].effect_type, EffectType::ToneTransformation);
439 assert_eq!(rules[0].effect, Tone::None as u8);
440
441 let rules = parse_rules('x', "DauNga");
442 assert_eq!(rules.len(), 1);
443 assert_eq!(rules[0].effect_type, EffectType::ToneTransformation);
444 assert_eq!(rules[0].get_tone(), Tone::Tilde);
445 }
446
447 #[test]
448 fn parse_toneless_rules_cases() {
449 let rules = parse_toneless_rules('d', "D_Đ");
450 assert_eq!(rules.len(), 2);
451 assert_eq!(rules[0].effect_type, EffectType::MarkTransformation);
452 assert_eq!(rules[0].effect, Mark::Dash as u8);
453 assert_eq!(rules[0].effect_on, 'd');
454
455 let rules = parse_toneless_rules('{', "_Ư");
456 assert_eq!(rules.len(), 1);
457 assert_eq!(rules[0].effect_type, EffectType::Appending);
458 assert_eq!(rules[0].effect_on, 'Ư');
459
460 let rules = parse_toneless_rules('w', "UOA_ƯƠĂ");
461 assert_eq!(rules.len(), 33);
462 assert_eq!(rules[0].effect_type, EffectType::MarkTransformation);
463 assert_eq!(rules[0].get_mark(), Mark::Horn);
464 assert_eq!(rules[0].effect_on, 'u');
465 assert_eq!(rules[7].effect_type, EffectType::MarkTransformation);
466 assert_eq!(rules[7].get_mark(), Mark::Horn);
467 assert_eq!(rules[7].effect_on, 'o');
468 assert_eq!(rules[20].effect_type, EffectType::MarkTransformation);
469 assert_eq!(rules[20].get_mark(), Mark::Breve);
470 assert_eq!(rules[20].effect_on, 'a');
471
472 let rules = parse_toneless_rules('w', "UOA_ƯƠĂ__Ư");
473 assert_eq!(rules.len(), 34);
474 assert_eq!(rules[20].effect_type, EffectType::MarkTransformation);
475 assert_eq!(rules[20].get_mark(), Mark::Breve);
476 assert_eq!(rules[20].effect_on, 'a');
477 assert_eq!(rules[33].effect_type, EffectType::Appending);
478 assert_eq!(rules[33].effect_on, 'ư');
479 }
480
481 #[test]
482 fn parse_append_rule() {
483 let rules = parse_toneless_rules('[', "__ươ");
484 assert_eq!(rules.len(), 1);
485 let appended_len = rules[0].appended_len;
486 assert_eq!(appended_len, 1);
487 assert_eq!(rules[0].appended[0], 'ơ');
488
489 let rules = parse_toneless_rules('{', "__ƯƠ");
490 assert_eq!(rules.len(), 1);
491 let appended_len = rules[0].appended_len;
492 assert_eq!(appended_len, 1);
493 assert_eq!(rules[0].appended[0], 'Ơ');
494 }
495
496 #[test]
497 fn parse_input_method_super_key_detection() {
498 let im = parse_input_method("Telex");
499 assert!(im.super_keys.contains(&'w'));
500 }
501
502 #[test]
503 fn parse_telex_o_hat_rule_exists() {
504 let rules = parse_toneless_rules('o', "O_Ô");
506 assert!(rules.iter().any(|r| {
507 r.effect_type == EffectType::MarkTransformation
508 && r.get_mark() == Mark::Hat
509 && r.effect_on == 'o'
510 && r.result == 'ô'
511 }));
512 assert!(!rules.iter().any(|r| r.effect_type == EffectType::Appending));
513 }
514
515 #[test]
516 fn telex2_has_no_appending_rule_for_o() {
517 let im = parse_input_method("Telex 2");
518 let o_rules: Vec<_> = im.rules.iter().filter(|r| r.key == 'o').collect();
519 assert!(!o_rules.is_empty());
520 assert!(!o_rules.iter().any(|r| r.effect_type == EffectType::Appending));
521 }
522}