Skip to main content

bamboo_core/
dfa.rs

1//! DFA-based engine for high-performance Vietnamese input method.
2
3use crate::engine::Transformation;
4use crate::input_method::InputMethod;
5use std::collections::HashMap;
6
7/// A DFA state representing a unique syllable composition.
8///
9/// Each state stores transitions to next states based on ASCII keys
10/// and the corresponding transformation stack (composition).
11#[derive(Clone, Debug)]
12pub struct State {
13    /// State transition table for 128 ASCII characters.
14    /// A value of 0 indicates no transition or fallback to the Rule Engine.
15    pub transitions: [u32; 128],
16    /// The transformation stack that makes up this state.
17    pub composition: Box<[Transformation]>,
18}
19
20impl Default for State {
21    fn default() -> Self {
22        Self { transitions: [0; 128], composition: Box::new([]) }
23    }
24}
25
26/// The DFA (Deterministic Finite Automaton) core that manages states and transitions.
27///
28/// This DFA supports a Lazy JIT mechanism, allowing the Engine to learn new states
29/// during execution to optimize performance.
30pub struct Dfa {
31    /// List of states in the DFA.
32    pub states: Vec<State>,
33    /// Hash map mapping transformation stacks to state IDs to avoid duplicates.
34    pub composition_to_state: HashMap<Box<[Transformation]>, u32>,
35}
36
37impl Default for Dfa {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl Dfa {
44    /// Creates a new DFA with an initial empty state (empty syllable).
45    pub fn new() -> Self {
46        let mut dfa =
47            Self { states: Vec::with_capacity(1024), composition_to_state: HashMap::new() };
48        // Initial empty state
49        let empty_comp = Box::new([]);
50        dfa.states.push(State::default());
51        dfa.composition_to_state.insert(empty_comp, 0);
52        dfa
53    }
54
55    /// Retrieves a reference to a state by its ID.
56    pub fn get_state(&self, id: u32) -> &State {
57        &self.states[id as usize]
58    }
59
60    /// Adds a new state to the DFA from a transformation stack.
61    /// If the state already exists, it returns the existing state ID.
62    pub fn add_state(&mut self, composition: &[Transformation]) -> u32 {
63        let comp_box: Box<[Transformation]> = composition.to_vec().into_boxed_slice();
64        if let Some(&id) = self.composition_to_state.get(&comp_box) {
65            return id;
66        }
67
68        let id = self.states.len() as u32;
69        self.states.push(State { transitions: [0; 128], composition: comp_box.clone() });
70        self.composition_to_state.insert(comp_box, id);
71        id
72    }
73
74    /// Finds the state ID corresponding to a transformation stack.
75    pub fn find_state(&self, composition: &[Transformation]) -> Option<u32> {
76        self.composition_to_state.get(composition).copied()
77    }
78}
79
80/// A DFA compiler that supports pre-initializing common states.
81#[allow(dead_code)]
82pub struct DfaCompiler<'a> {
83    pub input_method: &'a InputMethod,
84    pub flags: u32,
85    pub composition_to_state: HashMap<Vec<Transformation>, u32>,
86    pub dfa: Dfa,
87}
88
89#[allow(dead_code)]
90impl<'a> DfaCompiler<'a> {
91    pub fn new(im: &'a InputMethod, flags: u32) -> Self {
92        let mut compiler =
93            Self { input_method: im, flags, composition_to_state: HashMap::new(), dfa: Dfa::new() };
94        compiler.composition_to_state.insert(Vec::new(), 0);
95        compiler
96    }
97
98    /// Compiles common Vietnamese syllables into the DFA to reduce initial latency.
99    pub fn compile_common(&mut self) {
100        let vowels = ['a', 'e', 'i', 'o', 'u', 'y'];
101        let tones = ['s', 'f', 'r', 'x', 'j']; // Telex tone keys
102
103        // Compile single vowels and basic tone combinations
104        for &v in &vowels {
105            self.simulate_key(v);
106            for &t in &tones {
107                self.simulate_sequence(&[v, t]);
108            }
109        }
110
111        // Common double vowel combinations
112        let double_vowels = ["aa", "ee", "oo", "aw", "ow", "uw"];
113        for s in double_vowels {
114            self.simulate_str(s);
115        }
116    }
117
118    fn simulate_key(&mut self, key: char) -> u32 {
119        let mut engine = crate::Engine::with_config(
120            self.input_method.clone(),
121            crate::Config::from_flags(self.flags),
122        );
123        engine.process_key(key, crate::Mode::Vietnamese);
124        self.dfa.add_state(engine.active_slice())
125    }
126
127    fn simulate_sequence(&mut self, keys: &[char]) -> u32 {
128        let mut engine = crate::Engine::with_config(
129            self.input_method.clone(),
130            crate::Config::from_flags(self.flags),
131        );
132        for &k in keys {
133            engine.process_key(k, crate::Mode::Vietnamese);
134        }
135        self.dfa.add_state(engine.active_slice())
136    }
137
138    fn simulate_str(&mut self, s: &str) {
139        let mut engine = crate::Engine::with_config(
140            self.input_method.clone(),
141            crate::Config::from_flags(self.flags),
142        );
143        for k in s.chars() {
144            engine.process_key(k, crate::Mode::Vietnamese);
145        }
146        self.dfa.add_state(engine.active_slice());
147    }
148}