1use phf::{Map, phf_map};
2
3use crate::input_method_def::InputMethodDef;
4use crate::utils::{add_mark_to_toneless_char, add_tone_to_char, is_vowel};
5
6#[repr(u8)]
8#[derive(Clone, Copy, Debug, PartialEq, Eq)]
9pub enum Tone {
10 None = 0,
11 Grave = 1,
12 Acute = 2,
13 Hook = 3,
14 Tilde = 4,
15 Dot = 5,
16}
17
18#[repr(u8)]
20#[derive(Clone, Copy, Debug, PartialEq, Eq)]
21pub enum Mark {
22 None = 0,
23 Hat = 1, Breve = 2, Horn = 3, Dash = 4, Raw = 5,
29}
30
31#[repr(u8)]
33#[derive(Clone, Copy, Debug, PartialEq, Eq)]
34pub enum EffectType {
35 Appending = 0,
37 MarkTransformation = 1,
39 ToneTransformation = 2,
41 Replacing = 3,
43}
44
45static TONES: Map<&'static str, Tone> = phf_map! {
46 "XoaDauThanh" => Tone::None,
47 "DauSac" => Tone::Acute,
48 "DauHuyen" => Tone::Grave,
49 "DauNga" => Tone::Tilde,
50 "DauNang" => Tone::Dot,
51 "DauHoi" => Tone::Hook,
52};
53
54#[derive(Clone, Debug)]
55pub struct Rule {
56 pub key: char,
57 pub effect: u8,
61 pub effect_type: EffectType,
62 pub effect_on: char,
63 pub result: char,
64 pub appended_rules: Box<[Rule]>,
65}
66
67impl Rule {
68 pub fn set_tone(&mut self, tone: Tone) {
69 self.effect = tone as u8;
70 }
71
72 pub fn set_mark(&mut self, mark: Mark) {
73 self.effect = mark as u8;
74 }
75
76 pub fn get_tone(&self) -> Tone {
77 match self.effect {
79 1 => Tone::Grave,
80 2 => Tone::Acute,
81 3 => Tone::Hook,
82 4 => Tone::Tilde,
83 5 => Tone::Dot,
84 _ => Tone::None,
85 }
86 }
87
88 pub fn get_mark(&self) -> Mark {
89 match self.effect {
90 1 => Mark::Hat,
91 2 => Mark::Breve,
92 3 => Mark::Horn,
93 4 => Mark::Dash,
94 5 => Mark::Raw,
95 _ => Mark::None,
96 }
97 }
98}
99
100#[derive(Clone, Debug, Default)]
105pub struct InputMethod {
106 pub name: String,
107 pub rules: Vec<Rule>,
108 pub super_keys: Vec<char>,
109 pub tone_keys: Vec<char>,
110 pub appending_keys: Vec<char>,
111 pub keys: Vec<char>,
112}
113
114impl InputMethod {
115 pub fn telex() -> Self {
117 parse_input_method("Telex")
118 }
119
120 pub fn vni() -> Self {
122 parse_input_method("VNI")
123 }
124
125 pub fn viqr() -> Self {
127 parse_input_method("VIQR")
128 }
129
130 pub fn microsoft_layout() -> Self {
132 parse_input_method("Microsoft layout")
133 }
134
135 pub fn telex_2() -> Self {
137 parse_input_method("Telex 2")
138 }
139
140 pub fn telex_vni() -> Self {
142 parse_input_method("Telex + VNI")
143 }
144
145 pub fn telex_vni_viqr() -> Self {
147 parse_input_method("Telex + VNI + VIQR")
148 }
149
150 pub fn vni_french_layout() -> Self {
152 parse_input_method("VNI Bàn phím tiếng Pháp")
153 }
154
155 pub fn telex_w() -> Self {
157 parse_input_method("Telex W")
158 }
159}
160
161pub(crate) fn parse_input_method(im_name: &str) -> InputMethod {
163 let defs = crate::input_method_def::get_input_method_definitions();
164 defs.get(im_name)
165 .copied()
166 .map(|def| parse_input_method_def(im_name, def))
167 .unwrap_or_default()
168}
169
170pub(crate) fn parse_input_method_def(
171 im_name: &str,
172 im_def: &InputMethodDef,
173) -> InputMethod {
174 let mut im =
175 InputMethod { name: im_name.to_string(), ..Default::default() };
176
177 for (key_str, line) in im_def.entries() {
178 let key = match key_str.chars().next() {
179 Some(c) => c,
180 None => continue,
181 };
182
183 im.rules.extend(parse_rules(key, line));
184
185 if contains_uo_case_insensitive(line) {
186 im.super_keys.push(key);
187 }
188 im.keys.push(key);
189 }
190
191 for rule in &im.rules {
192 if rule.effect_type == EffectType::Appending {
193 im.appending_keys.push(rule.key);
194 }
195 if rule.effect_type == EffectType::ToneTransformation {
196 im.tone_keys.push(rule.key);
197 }
198 }
199
200 im
201}
202
203#[inline]
204fn contains_uo_case_insensitive(s: &str) -> bool {
205 let mut prev_u = false;
206 for c in s.chars() {
207 let lc = c.to_ascii_lowercase();
208 if prev_u && lc == 'o' {
209 return true;
210 }
211 prev_u = lc == 'u';
212 }
213 false
214}
215
216pub(crate) fn parse_rules(key: char, line: &str) -> Vec<Rule> {
217 if let Some(tone) = TONES.get(line).copied() {
218 return vec![Rule {
219 key,
220 effect_type: EffectType::ToneTransformation,
221 effect: tone as u8,
222 effect_on: '\0',
223 result: '\0',
224 appended_rules: Box::default(),
225 }];
226 }
227
228 parse_toneless_rules(key, line)
229}
230
231pub(crate) fn parse_toneless_rules(key: char, line: &str) -> Vec<Rule> {
232 let lower = line.to_lowercase();
233
234 if let Some((effective_ons, results, rest)) = parse_dsl(&lower) {
235 let mut rules = Vec::new();
236 for (effective_on, result) in
237 effective_ons.into_iter().zip(results.into_iter())
238 {
239 let Some(effect) = find_mark_from_char(result) else {
240 continue;
241 };
242 rules.extend(parse_toneless_rule(
243 key,
244 effective_on,
245 result,
246 effect,
247 ));
248 }
249
250 if let Some(rule) = get_appending_rule(key, rest) {
251 rules.push(rule);
252 }
253
254 return rules;
255 }
256
257 if let Some(rule) = get_appending_rule(key, line) {
258 return vec![rule];
259 }
260
261 Vec::new()
262}
263
264fn parse_toneless_rule(
265 key: char,
266 effective_on: char,
267 result: char,
268 effect: Mark,
269) -> Vec<Rule> {
270 let mut rules = Vec::new();
271
272 for chr in get_mark_family(effective_on) {
273 if chr == result {
274 rules.push(Rule {
275 key,
276 effect_type: EffectType::MarkTransformation,
277 effect: 0,
278 effect_on: result,
279 result: effective_on,
280 appended_rules: Box::default(),
281 });
282 continue;
283 }
284
285 if is_vowel(chr) {
286 for tone in 0u8..=5 {
287 rules.push(Rule {
288 key,
289 effect_type: EffectType::MarkTransformation,
290 effect_on: add_tone_to_char(chr, tone),
291 effect: effect as u8,
292 result: add_tone_to_char(result, tone),
293 appended_rules: Box::default(),
294 });
295 }
296 } else {
297 rules.push(Rule {
298 key,
299 effect_type: EffectType::MarkTransformation,
300 effect_on: chr,
301 effect: effect as u8,
302 result,
303 appended_rules: Box::default(),
304 });
305 }
306 }
307
308 rules
309}
310
311fn parse_dsl(s: &str) -> Option<(Vec<char>, Vec<char>, &str)> {
313 let (left, right) = s.split_once('_')?;
314 if left.is_empty() || !left.chars().all(|c| c.is_ascii_alphabetic()) {
315 return None;
316 }
317
318 let mut results = Vec::new();
319 let mut rest_start_byte = right.len();
320
321 for (byte_idx, ch) in right.char_indices() {
322 if ch.is_alphabetic() {
323 results.push(ch);
324 continue;
325 }
326 rest_start_byte = byte_idx;
327 break;
328 }
329
330 if results.is_empty() {
331 return None;
332 }
333
334 let rest = &right[rest_start_byte..];
335 Some((left.chars().collect(), results, rest))
336}
337
338fn get_appending_rule(key: char, value: &str) -> Option<Rule> {
340 if !value.starts_with('_') {
341 return None;
342 }
343
344 let start = if value.starts_with("__") { 2 } else { 1 };
346 let tail = value.get(start..)?;
347
348 let mut letters = Vec::new();
349 for ch in tail.chars() {
350 if ch.is_alphabetic() {
351 letters.push(ch);
352 } else {
353 break;
354 }
355 }
356
357 let first = *letters.first()?;
358
359 let mut appended_rules = Vec::new();
360 for &ch in letters.iter().skip(1) {
361 appended_rules.push(Rule {
362 key,
363 effect_type: EffectType::Appending,
364 effect: 0,
365 effect_on: ch,
366 result: ch,
367 appended_rules: Box::default(),
368 });
369 }
370
371 Some(Rule {
372 key,
373 effect_type: EffectType::Appending,
374 effect: 0,
375 effect_on: first,
376 result: first,
377 appended_rules: appended_rules.into_boxed_slice(),
378 })
379}
380
381fn get_mark_family(c: char) -> Vec<char> {
382 let base = add_tone_to_char(c, 0);
383 let canonical = add_mark_to_toneless_char(base, 0);
384
385 let mut family: Vec<char> =
387 (0u8..=4).map(|m| add_mark_to_toneless_char(canonical, m)).collect();
388
389 family.sort_unstable();
390 family.dedup();
391 family
392}
393
394fn find_mark_from_char(c: char) -> Option<Mark> {
395 let c = c.to_lowercase().next().unwrap_or(c);
396 let toneless = add_tone_to_char(c, 0);
397 let base = add_mark_to_toneless_char(toneless, 0);
398
399 for m in 0u8..=4 {
400 if add_mark_to_toneless_char(base, m) == toneless {
401 return Some(match m {
402 1 => Mark::Hat,
403 2 => Mark::Breve,
404 3 => Mark::Horn,
405 4 => Mark::Dash,
406 _ => Mark::None,
407 });
408 }
409 }
410
411 None
412}
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417
418 #[test]
419 fn parse_tone_rules() {
420 let rules = parse_rules('z', "XoaDauThanh");
421 assert_eq!(rules.len(), 1);
422 assert_eq!(rules[0].effect_type, EffectType::ToneTransformation);
423 assert_eq!(rules[0].effect, Tone::None as u8);
424
425 let rules = parse_rules('x', "DauNga");
426 assert_eq!(rules.len(), 1);
427 assert_eq!(rules[0].effect_type, EffectType::ToneTransformation);
428 assert_eq!(rules[0].get_tone(), Tone::Tilde);
429 }
430
431 #[test]
432 fn parse_toneless_rules_cases() {
433 let rules = parse_toneless_rules('d', "D_Đ");
434 assert_eq!(rules.len(), 2);
435 assert_eq!(rules[0].effect_type, EffectType::MarkTransformation);
436 assert_eq!(rules[0].effect, Mark::Dash as u8);
437 assert_eq!(rules[0].effect_on, 'd');
438
439 let rules = parse_toneless_rules('{', "_Ư");
440 assert_eq!(rules.len(), 1);
441 assert_eq!(rules[0].effect_type, EffectType::Appending);
442 assert_eq!(rules[0].effect_on, 'Ư');
443
444 let rules = parse_toneless_rules('w', "UOA_ƯƠĂ");
445 assert_eq!(rules.len(), 33);
446 assert_eq!(rules[0].effect_type, EffectType::MarkTransformation);
447 assert_eq!(rules[0].get_mark(), Mark::Horn);
448 assert_eq!(rules[0].effect_on, 'u');
449 assert_eq!(rules[7].effect_type, EffectType::MarkTransformation);
450 assert_eq!(rules[7].get_mark(), Mark::Horn);
451 assert_eq!(rules[7].effect_on, 'o');
452 assert_eq!(rules[20].effect_type, EffectType::MarkTransformation);
453 assert_eq!(rules[20].get_mark(), Mark::Breve);
454 assert_eq!(rules[20].effect_on, 'a');
455
456 let rules = parse_toneless_rules('w', "UOA_ƯƠĂ__Ư");
457 assert_eq!(rules.len(), 34);
458 assert_eq!(rules[20].effect_type, EffectType::MarkTransformation);
459 assert_eq!(rules[20].get_mark(), Mark::Breve);
460 assert_eq!(rules[20].effect_on, 'a');
461 assert_eq!(rules[33].effect_type, EffectType::Appending);
462 assert_eq!(rules[33].effect_on, 'ư');
463 }
464
465 #[test]
466 fn parse_append_rule() {
467 let rules = parse_toneless_rules('[', "__ươ");
468 assert_eq!(rules.len(), 1);
469 let append_rules = &rules[0].appended_rules;
470 assert_eq!(append_rules.len(), 1);
471 assert_eq!(append_rules[0].effect_type, EffectType::Appending);
472 assert_eq!(append_rules[0].effect_on, 'ơ');
473
474 let rules = parse_toneless_rules('{', "__ƯƠ");
475 assert_eq!(rules.len(), 1);
476 let append_rules = &rules[0].appended_rules;
477 assert_eq!(append_rules.len(), 1);
478 assert_eq!(append_rules[0].effect_type, EffectType::Appending);
479 assert_eq!(append_rules[0].effect_on, 'Ơ');
480 }
481
482 #[test]
483 fn parse_input_method_super_key_detection() {
484 let im = parse_input_method("Telex");
485 assert!(im.super_keys.contains(&'w'));
486 }
487
488 #[test]
489 fn parse_telex_o_hat_rule_exists() {
490 let rules = parse_toneless_rules('o', "O_Ô");
492 assert!(rules.iter().any(|r| {
493 r.effect_type == EffectType::MarkTransformation
494 && r.get_mark() == Mark::Hat
495 && r.effect_on == 'o'
496 && r.result == 'ô'
497 }));
498 assert!(!rules.iter().any(|r| r.effect_type == EffectType::Appending));
499 }
500
501 #[test]
502 fn telex2_has_no_appending_rule_for_o() {
503 let im = parse_input_method("Telex 2");
504 let o_rules: Vec<_> =
505 im.rules.iter().filter(|r| r.key == 'o').collect();
506 assert!(!o_rules.is_empty());
507 assert!(
508 !o_rules.iter().any(|r| r.effect_type == EffectType::Appending)
509 );
510 }
511}