Skip to main content

bamboo_core/
dfa.rs

1//! DFA-based engine for high-performance Vietnamese input method.
2
3use crate::engine::Transformation;
4use crate::input_method::InputMethod;
5use std::collections::HashMap;
6
7/// A DFA state representing a unique syllable composition.
8#[derive(Clone, Debug)]
9pub struct State {
10    /// State transition table for 128 ASCII characters.
11    pub transitions: [u32; 128],
12    /// Start index in the DFA arena.
13    pub comp_offset: u32,
14    /// Number of transformations in this state.
15    pub comp_len: u8,
16}
17
18impl Default for State {
19    fn default() -> Self {
20        Self { transitions: [0; 128], comp_offset: 0, comp_len: 0 }
21    }
22}
23
24/// The DFA core that manages states and transitions with Arena Allocation.
25pub struct Dfa {
26    pub states: Vec<State>,
27    pub arena: Vec<Transformation>,
28    pub composition_to_state: HashMap<Box<[Transformation]>, u32>,
29}
30
31impl Default for Dfa {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37impl Dfa {
38    /// Creates a new DFA with an initial empty state.
39    pub fn new() -> Self {
40        let mut dfa = Self {
41            states: Vec::with_capacity(1024),
42            arena: Vec::with_capacity(4096),
43            composition_to_state: HashMap::new(),
44        };
45        let empty_comp: Box<[Transformation]> = Box::new([]);
46        dfa.states.push(State::default());
47        dfa.composition_to_state.insert(empty_comp, 0);
48        dfa
49    }
50
51    pub fn get_state(&self, id: u32) -> &State {
52        &self.states[id as usize]
53    }
54
55    pub fn get_composition(&self, state_id: u32) -> &[Transformation] {
56        let state = &self.states[state_id as usize];
57        let start = state.comp_offset as usize;
58        let end = start + state.comp_len as usize;
59        &self.arena[start..end]
60    }
61
62    pub fn add_state(&mut self, composition: &[Transformation]) -> u32 {
63        if let Some(&id) = self.composition_to_state.get(composition) {
64            return id;
65        }
66
67        let id = self.states.len() as u32;
68        let comp_offset = self.arena.len() as u32;
69        let comp_len = composition.len() as u8;
70
71        self.arena.extend_from_slice(composition);
72        self.states.push(State { transitions: [0; 128], comp_offset, comp_len });
73
74        self.composition_to_state.insert(composition.to_vec().into_boxed_slice(), id);
75        id
76    }
77
78    pub fn find_state(&self, composition: &[Transformation]) -> Option<u32> {
79        self.composition_to_state.get(composition).copied()
80    }
81}
82
83/// A DFA compiler that supports pre-initializing common states.
84pub struct DfaCompiler<'a> {
85    pub input_method: &'a InputMethod,
86    pub flags: u32,
87    pub dfa: Dfa,
88}
89
90impl<'a> DfaCompiler<'a> {
91    pub fn new(im: &'a InputMethod, flags: u32) -> Self {
92        Self { input_method: im, flags, dfa: Dfa::new() }
93    }
94
95    /// Compiles common Vietnamese syllables into the DFA.
96    pub fn compile_common(&mut self) {
97        let fc = [
98            "", "b", "c", "ch", "d", "dd", "g", "gh", "h", "k", "kh", "l", "m", "n", "nh", "ng",
99            "ngh", "p", "ph", "q", "r", "s", "t", "th", "tr", "v", "x",
100        ];
101        let vowels = [
102            "a", "e", "i", "o", "u", "y", "aa", "ee", "oo", "aw", "ow", "uw", "ai", "ao", "au",
103            "ay", "ie", "oa", "oe", "oi", "ua", "ue", "ui", "uo", "uy",
104        ];
105        let tones = ["", "s", "f", "r", "x", "j"];
106
107        for &f in &fc {
108            for &v in &vowels {
109                for &t in &tones {
110                    let mut seq = String::with_capacity(8);
111                    seq.push_str(f);
112                    seq.push_str(v);
113                    seq.push_str(t);
114                    self.simulate_str(&seq);
115                }
116            }
117        }
118    }
119
120    fn simulate_str(&mut self, s: &str) {
121        let mut engine = crate::Engine::with_config(
122            self.input_method.clone(),
123            crate::Config::from_flags(self.flags),
124        );
125
126        let mut current_state = 0u32;
127        for k in s.chars() {
128            if !k.is_ascii() {
129                continue;
130            }
131
132            let prev_state = current_state;
133            engine.process_key(k, crate::Mode::Vietnamese);
134
135            let comp = engine.active_slice();
136            current_state = self.dfa.add_state(comp);
137
138            // Link the transition
139            self.dfa.states[prev_state as usize].transitions[k as usize] = current_state;
140        }
141    }
142}