gerber/
nfa.rs

1use corrida::Corrida;
2use smallmap::Map;
3use std::collections::HashMap;
4use std::{collections::HashSet, ptr::NonNull};
5use std::hash::Hash;
6use smallvec::{Array, SmallVec};
7use crate::dfa::{Dfa, PartialState, State as DfaState};
8use crate::dfa_state_creator;
9
10
11type Transitions<const TARGETS_HINT: usize, Σ> = SmallVec<[NonNull<State<{TARGETS_HINT}, Σ>>; TARGETS_HINT]>;
12
13// MARK: State
14/// A state in the NFA, optimized for NFA which mostly contain nodes with small number of transitions. If a node has more than 'TARGETS_HINT' transitions on a given symbol, the target list will be heap allocated.
15pub struct State<const TARGETS_HINT: usize, Σ: Eq + Hash + Copy> 
16where 
17    [NonNull<State<TARGETS_HINT, Σ>>; TARGETS_HINT]: Array<Item = NonNull<State<TARGETS_HINT, Σ>>>,
18{
19    transitions: Map<Option<Σ>, Transitions<TARGETS_HINT, Σ>>,
20    is_accept: bool,
21}
22
23/// An iterator over the targets of the transitions from a state for a given symbol.
24pub struct HomoIter<'a, const TARGETS_HINT: usize, Σ: Eq + Hash + Copy> 
25where 
26    [NonNull<State<TARGETS_HINT, Σ>>; TARGETS_HINT]: Array<Item = NonNull<State<TARGETS_HINT, Σ>>>,
27{
28    _self: &'a State<TARGETS_HINT, Σ>,
29    transitions_vec: &'a [NonNull<State<TARGETS_HINT, Σ>>],
30    index: usize,
31}
32
33impl <'a, const TARGETS_HINT: usize, Σ: Eq + Hash + Copy> Iterator for HomoIter<'a, TARGETS_HINT, Σ> 
34where 
35    [NonNull<State<TARGETS_HINT, Σ>>; TARGETS_HINT]: Array<Item = NonNull<State<TARGETS_HINT, Σ>>>,
36{
37    type Item = &'a State<TARGETS_HINT, Σ>;
38
39    fn next(&mut self) -> Option<Self::Item> {
40        if self.index >= self.transitions_vec.len() {
41            None
42        } else {
43            let next = unsafe { self.transitions_vec[self.index].as_ref() };
44            self.index += 1;
45            Some(next)
46        }
47    }
48}
49
50impl<const TARGETS_HINT: usize, Σ: Eq + Hash + Copy> State<TARGETS_HINT, Σ> 
51where
52    [NonNull<State<TARGETS_HINT, Σ>>; TARGETS_HINT]: Array<Item = NonNull<State<TARGETS_HINT, Σ>>>,
53{
54    /// Creates a new state with no transitions and not accepting.
55    pub fn new(is_accept: bool) -> Self {
56        Self {
57            transitions: Map::new(),
58            is_accept,
59        }
60    }
61
62    /// Pushes a transition to the state.
63    pub fn push_transition(&mut self, symbol: Option<Σ>, target: Option<&Self>) {
64        let target = (match target {
65            Some(target) => NonNull::new(target as *const Self as *mut Self),
66            None => NonNull::new(self as *const Self as *mut Self),
67        }).unwrap();
68
69        let vec = match self.transitions.get_mut(&symbol) {
70            Some(existing) => existing,
71            None => {
72                self.transitions.insert(symbol, SmallVec::new());
73                self.transitions.get_mut(&symbol).unwrap()
74            }
75        };
76
77        vec.push(target);
78    }
79
80    /// Sets the state to be accepting or not.
81    pub fn set_accept(&mut self, is_accept: bool) {
82        self.is_accept = is_accept;
83    }
84
85    /// Returns an iterator over the transitions for the given symbol.
86    pub fn get_transitions(&self, symbol: Option<Σ>) -> HomoIter<'_, TARGETS_HINT, Σ> {
87        HomoIter {
88            _self: self,
89            transitions_vec: self.transitions.get(&symbol).map(|smallvec| smallvec.as_slice()).unwrap_or(&[]),
90            index: 0,
91        }
92    }
93
94    /// Returns if the state is accepting.
95    pub fn is_accept(&self) -> bool {
96        self.is_accept
97    }
98}
99
100/// Creates a macro for creating states in the NFA.
101#[macro_export]
102macro_rules! nfa_state_creator {
103    (($d: tt), $func_name: ident, $arena: expr, $symbol: ty, $TARGETS_HINT: expr) => {
104        macro_rules! $func_name {
105            ($d($is_accept:expr $d(,$transitions: expr)?)? ) => {
106                {
107                    let mut accept_state = false;
108                    $d(
109                        accept_state = $is_accept;
110                    )?
111                    let new_state = $arena.alloc(State::<$TARGETS_HINT, $symbol>::new(accept_state));
112                    $d(
113                        $d(
114                            let transitions: &[(_, Option<&State::<$TARGETS_HINT, $symbol>>)] = $transitions;
115                            transitions.iter().for_each(|&(symbol, target)| new_state.push_transition(symbol, target));
116                        )?
117                    )?
118                    new_state
119                }
120            };
121        }        
122    }
123}
124
125// MARK: NFA
126/// A non-deterministic fintie automaton.
127pub struct Nfa<'a ,T> {
128    start_node: &'a T,
129}
130
131type SymbolMapValue<'a, const TARGETS_HINT: usize, Σ> = (SmallVec<[&'a State<{TARGETS_HINT}, Σ>; 32]>, HashSet<*const State<TARGETS_HINT, Σ>>);
132impl<'a, const TARGETS_HINT:usize, Σ: Eq + Hash + Copy>  Nfa<'a, State<TARGETS_HINT, Σ>> 
133where 
134    [NonNull<State<TARGETS_HINT, Σ>>; TARGETS_HINT]: Array<Item = NonNull<State<TARGETS_HINT, Σ>>>,
135{
136    /// Creates a new NFA with the given start node, consumes the arena where the nodes were made.
137    /// TODO: This can just take any arena, and then we would be able to change the NFA after creation. is that a good idea?
138    pub fn new(start_node: &'a State<TARGETS_HINT, Σ>) -> Self {
139        Self {
140            start_node
141        }
142    }
143
144
145    //? Possibly my worst work yet.
146    /// Converts the NFA to a DFA using subset construction.
147    pub fn as_dfa(&self, arena: &Corrida) -> Dfa<'a, Σ, PartialState<Σ>> {
148        
149        dfa_state_creator!(($), new_state, arena, PartialState<Σ>);
150                
151        let mut hash_map = HashMap::new();
152
153        let set_hash = |set: &SmallVec<[&State<TARGETS_HINT, Σ>; 32]>| -> Vec<*const State<TARGETS_HINT, Σ>> {
154            set.iter().map(|&r| r as *const State<TARGETS_HINT, Σ>).collect()
155        };
156
157        let mut current_states: SmallVec<[&State<TARGETS_HINT, Σ>; 32]> = SmallVec::from_elem(self.start_node, 1);
158        let mut set: HashSet<*const State<TARGETS_HINT, Σ>> = HashSet::from([self.start_node as *const State<TARGETS_HINT, Σ>]);
159
160        let mut i = 0;
161        while i < current_states.len() {
162            let state = current_states[i];
163            for next in state.get_transitions(None) {
164                if set.insert(next as *const State<TARGETS_HINT, Σ>) {
165                    current_states.push(next);
166                }
167            }
168            i += 1;
169        }
170
171        // We have our start state now. 
172        let hash = set_hash(&current_states);
173        let mut is_accept = false;
174        for state in &current_states {
175            if state.is_accept {
176                is_accept = true;
177                break;
178            }
179        }
180        hash_map.insert(hash.clone(), (new_state!(is_accept) as *mut PartialState<Σ>, false));
181
182        
183        let mut queue = vec![(current_states, hash.clone())];
184        while let Some((subset, hash)) = queue.pop() {
185            let (my_dfa_node_ptr, processed) = hash_map.get_mut(&hash).unwrap();
186            let my_dfa_node = unsafe { my_dfa_node_ptr.as_mut().unwrap() };
187            *processed = true;
188
189
190            let mut symbol_map: HashMap<Σ, SymbolMapValue<'a, TARGETS_HINT, Σ>> = HashMap::new();
191            for state in subset {
192                for (symbol, next) in state.transitions.iter().filter(|(symbol, _)| symbol.is_some()) {
193                    let (vecb, setb) = symbol_map.entry(symbol.unwrap()).or_insert((SmallVec::new(), HashSet::new()));
194                    for next in next {
195                        if setb.insert(next.as_ptr()) {
196                            vecb.push(unsafe { next.as_ref() });
197                        }
198                    }
199                }
200            }
201
202            for (vec, subset) in symbol_map.values_mut() {
203                let mut i = 0;
204                while i < vec.len() {
205                    let state = vec[i];
206                    for next in state.get_transitions(None) {
207                        if subset.insert(next as *const State<TARGETS_HINT, Σ>) {
208                            vec.push(next);
209                        }
210                    }
211                    i += 1;
212                }
213            }
214
215            for (symbol, (subset, _)) in symbol_map {
216                let hash = set_hash(&subset);
217                let mut is_accept = false;
218                for state in &subset {
219                    if state.is_accept {
220                        is_accept = true;
221                        break;
222                    }
223                }
224                let &mut (dfa_node, processed) = hash_map.entry(hash.clone()).or_insert_with(|| (new_state!(is_accept) as *mut PartialState<Σ>, false));
225                my_dfa_node.add_transition((symbol, Some(unsafe { dfa_node.as_ref().unwrap()})));
226                if !processed {
227                    queue.push((subset, hash));
228                }
229            }
230        }
231
232        Dfa::<Σ, PartialState<Σ>>::new(unsafe { hash_map.get(&hash).unwrap().0.as_ref().unwrap() })
233    }
234
235    /// Simulates the NFA on the given input, returning if the NFA accepts the input.
236    pub fn simulate_iter(&self, input: impl Iterator<Item = Σ>) -> bool {
237        let mut current_states: SmallVec<[&State<TARGETS_HINT, Σ>; 32]> = SmallVec::from_elem(self.start_node, 1);
238        let mut set: HashSet<*const State<TARGETS_HINT, Σ>> = HashSet::from([self.start_node as *const State<TARGETS_HINT, Σ>]);
239        let mut next_states: SmallVec<[&State<TARGETS_HINT, Σ>; 32]> = SmallVec::new();
240
241        let mut i = 0;
242        while i < current_states.len() {
243            let state = current_states[i];
244            for next in state.get_transitions(None) {
245                if set.insert(next as *const State<TARGETS_HINT, Σ>) {
246                    current_states.push(next);
247                }
248            }
249            i += 1;
250        }
251
252        for symbol in input {
253            set.clear();
254
255            // Symbol Transition
256            for cur in current_states.into_iter() {
257                for next in cur.get_transitions(Some(symbol)) {
258                    if set.insert(next as *const State<TARGETS_HINT, Σ>) {
259                        next_states.push(next);
260                    }
261                }
262            }
263
264            // Epsilon Closure
265            let mut i = 0;
266            while i < next_states.len() {
267                let state = next_states[i];
268                for next in state.get_transitions(None) {
269                    if set.insert(next as *const State<TARGETS_HINT, Σ>) {
270                        next_states.push(next);
271                    }
272                }
273                i += 1;
274            }
275
276            (current_states, next_states) = (next_states, SmallVec::new());
277        }
278
279        current_states.into_iter().any(|state| state.is_accept)
280    }
281
282    /// Simulates the NFA on the given input, returning if the NFA accepts the input.
283    pub fn simulate_slice(&self, input: &[Σ]) -> bool {
284        self.simulate_iter(input.iter().copied())
285    }
286
287    /// Simulates the NFA on the given input, returning if the NFA accepts the input. Will infinite loop on epsilon loops, so only use for 'friendly' NFAs where specific states are not reached many times at the same token.
288    pub fn simulate_iter_friendly(&self, input: impl Iterator<Item = Σ>) -> bool {
289        let mut current_states: SmallVec<[&State<TARGETS_HINT, Σ>; 32]> = SmallVec::from_elem(self.start_node, 1);
290        let mut next_states: SmallVec<[&State<TARGETS_HINT, Σ>; 32]> = SmallVec::new();
291
292        let mut i = 0;
293        while i < current_states.len() {
294            let state = current_states[i];
295            for next in state.get_transitions(None) {
296                current_states.push(next);
297            }
298            i += 1;
299        }
300
301        //? In a well formed NFA, i believe that reaching the same state from two different paths is very rare.
302        for symbol in input {
303            for cur in current_states.into_iter() {
304                for next in cur.get_transitions(Some(symbol)) {
305                    next_states.push(next);
306                }
307            }
308
309            let mut i = 0;
310            while i < next_states.len() {
311                
312                for next in next_states[i].get_transitions(None) {
313                    next_states.push(next);
314                }
315                i += 1;
316            }
317            (current_states, next_states) = (next_states, SmallVec::new());
318        }
319
320        current_states.into_iter().any(|state| state.is_accept)
321    }
322
323    /// Simulates the NFA on the given input, returning if the NFA accepts the input. Will infinite loop on epsilon loops, so only use for 'friendly' NFAs where specific states are not reached many times at the same token.
324    pub fn simulate_slice_friendly(&self, input: &[Σ]) -> bool {
325        self.simulate_iter_friendly(input.iter().copied())
326    }
327}
328
329//MARK: Tests
330#[cfg(test)]
331mod test {
332    use std::time::Instant;
333
334    use super::*;
335
336    use corrida::Corrida;
337
338    #[test]
339    fn test_homo() {
340        let arena = Corrida::new(None);
341
342        nfa_state_creator!(($), new_state, arena, char, 2);
343
344        let start_node = {
345            let s_0 = new_state!();
346
347            let s_1 = new_state!(true, &[(Some('1'), None)]);
348            let s_2 = new_state!(false, &[(Some('1'), None)]);
349            s_2.push_transition(Some('0'), Some(s_1));
350            s_1.push_transition(Some('0'), Some(s_2));
351            s_0.push_transition(None, Some(s_1));
352
353            let s_3 = new_state!(true, &[(Some('0'), None)]);
354            let s_4 = new_state!(false, &[(Some('0'), None)]);
355            s_4.push_transition(Some('1'), Some(s_3));
356            s_3.push_transition(Some('1'), Some(s_4));
357            s_0.push_transition(None, Some(s_3));
358
359            s_0
360        };
361
362        let nfa = Nfa::new(start_node);
363        assert!(nfa.simulate_slice_friendly(&[]));
364        assert!(nfa.simulate_slice_friendly(&['0','0']));
365        assert!(!nfa.simulate_slice_friendly(&['0','1']));
366        assert!(nfa.simulate_slice_friendly(&['1','1']));
367        assert!(nfa.simulate_slice_friendly(&['0','0','0','1','0','1','0']));
368        assert!(!nfa.simulate_slice_friendly(&['0','0','0','1','1','1','0','0']));
369    }
370
371    #[test]
372    pub fn test_big() {
373        let arena = Corrida::new(None);
374        nfa_state_creator!(($), new_state, arena, u8, 2);
375
376        let start_node = {
377            let s_0 = new_state!(false, &[(Some(1), None), (Some(0), None)]);
378            let s_1 = new_state!();
379            s_0.push_transition(Some(1), Some(s_1));
380            let s_2 = new_state!();
381            s_1.push_transition(Some(0), Some(s_2));
382            s_1.push_transition(Some(1), Some(s_2));
383            let s_3 = new_state!();
384            s_2.push_transition(Some(0), Some(s_3));
385            s_2.push_transition(Some(1), Some(s_3));
386            let s_4 = new_state!(true);
387            s_3.push_transition(Some(0), Some(s_4));
388            s_3.push_transition(Some(1), Some(s_4));
389            
390            s_0
391        };
392
393        let nfa = Nfa::new(start_node);
394        let mut test = vec![1; 1_000_000];
395        test.extend([0,0,0]);
396
397        let start = Instant::now();
398        assert!(nfa.simulate_slice_friendly(&test));
399        test.push(1);
400        assert!(!nfa.simulate_slice_friendly(&test));
401        let a = start.elapsed();
402
403        test.pop();
404
405        let start = Instant::now();
406        assert!(nfa.simulate_slice(&test));
407        test.push(1);
408        assert!(!nfa.simulate_slice(&test));
409        let b = start.elapsed();
410
411        test.pop();
412
413        let start = Instant::now();
414        let dfa = nfa.as_dfa(&arena);
415        assert!(dfa.simulate_slice(&test));
416        test.push(1);
417        assert!(!dfa.simulate_slice(&test));
418        let c = start.elapsed();
419
420        println!("Big Test -- Friendly NFA: {:?} NFA: {:?} DFA: {:?}", a, b, c);
421    }
422
423    #[test]
424    pub fn test_loop() {
425        let arena = Corrida::new(None);
426        nfa_state_creator!(($), new_state, arena, u8, 2);
427
428        let start_node = {
429            let s_0 = new_state!(false, &[(Some(1), None), (Some(0), None)]);
430            let mut cur = new_state!(false);
431            s_0.push_transition(None, Some(cur));
432
433            for _ in (0..100).rev() {
434                let new = new_state!(false);
435                cur.push_transition(None, Some(new));
436                cur = new;
437            }
438            cur.set_accept(true);
439            cur.push_transition(None, Some(s_0));
440            
441            s_0
442        };
443
444        let nfa = Nfa::new(start_node);
445        let test = vec![1; 100_000];
446        let start = Instant::now();
447        assert!(nfa.simulate_slice(&test));
448        let a = start.elapsed();
449
450        let dfa = nfa.as_dfa(&arena);
451        let start = Instant::now();
452        assert!(dfa.simulate_slice(&test));
453        let b = start.elapsed();
454
455        println!("Loop -- NFA: {:?} DFA: {:?}", a, b);
456    }
457}
458
459
460
461