smt_str/automata/
mod.rs

1pub mod comp;
2mod compile;
3pub mod det;
4mod dot;
5pub mod inter;
6
7use std::collections::BinaryHeap;
8use std::error::Error;
9use std::{
10    collections::{HashMap, HashSet, VecDeque},
11    fmt::Display,
12};
13
14use crate::alphabet::Alphabet;
15use crate::{
16    alphabet::{partition::AlphabetPartitionMap, CharRange},
17    SmtChar, SmtString,
18};
19
20extern crate dot as dotlib;
21
22pub use compile::compile;
23
24/// The type of a transition in a nondeterministic finite automaton.
25/// Every transition in an automaton is labeled with a type that determines the behavior of the transition.
26/// The type can be a character range, a negated character range, or an epsilon transition.
27/// For a character range transition, the transition is taken if the input character is in the range.
28/// For a negated character range transition, the transition is taken if the input character is not in the range.
29/// An epsilon transition is taken without consuming any input.
30#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
31pub enum TransitionType {
32    /// A transition that is taken if the input character is in the given range.
33    Range(CharRange),
34    /// A transition that is taken if the input character is not in the given range.
35    NotRange(CharRange),
36    /// An epsilon transition that is taken without consuming any input.
37    Epsilon,
38}
39
40impl TransitionType {
41    /// Returns true if the transition is an epsilon transition.
42    pub fn is_epsilon(&self) -> bool {
43        matches!(self, TransitionType::Epsilon)
44    }
45
46    /// Returns true if the transition is an empty transition, i.e. a transition that can never be taken.
47    pub fn is_empty(&self) -> bool {
48        matches!(self, TransitionType::Epsilon)
49    }
50}
51
52/// A transition from one state to another.
53/// The transition can be of different types, e.g. a character range or an epsilon transition.
54/// The destination state is stored as an index that is unique within the automaton.
55#[derive(Debug, Clone, Eq, Hash, PartialEq)]
56pub struct Transition {
57    /// The type of the transition.
58    label: TransitionType,
59    /// The index of the destination state.
60    destination: usize,
61}
62
63impl Transition {
64    pub fn new(label: TransitionType, destination: usize) -> Self {
65        Self { label, destination }
66    }
67
68    /// Returns the destination state of the transition.
69    pub fn get_dest(&self) -> usize {
70        self.destination
71    }
72
73    /// Returns the type of the transition.
74    pub fn get_type(&self) -> &TransitionType {
75        &self.label
76    }
77
78    /// Returns if the transition is an epsilon transition.
79    pub fn is_epsilon(&self) -> bool {
80        matches!(self.label, TransitionType::Epsilon)
81    }
82}
83
84/// A state in a nondeterministic finite automaton.
85/// A state is merely a collection of transitions to other states.
86/// In a nondeterministic automaton, a state can have multiple transitions with the same input leading to different states.
87/// If the state has no transitions, it is a dead end state.
88/// If the state is a final state, it accepts the input.
89#[derive(Debug, Clone, Eq, Hash, PartialEq)]
90pub struct State {
91    transitions: Vec<Transition>,
92}
93
94impl State {
95    /// Returns an iterator over the transitions of the state.
96    fn transitions(&self) -> impl Iterator<Item = &Transition> {
97        self.transitions.iter()
98    }
99
100    /// Adds a transition to the state.
101    /// If the transition already exists, it is not added again.
102    /// Needs a linear scan over the transitions to check for duplicates.
103    fn add_transition(&mut self, label: TransitionType, destination: StateId) {
104        let transition = Transition { label, destination };
105        if !self.transitions.contains(&transition) {
106            self.transitions.push(transition);
107        }
108    }
109
110    /// Consumes the input character and returns the set of states that can be reached from this state.
111    fn consume(&self, input: SmtChar) -> HashSet<StateId> {
112        let mut res = HashSet::new();
113        for t in self.transitions() {
114            match t.label {
115                TransitionType::Range(r) if r.contains(input) => {
116                    res.insert(t.destination);
117                }
118                TransitionType::NotRange(r) if !r.contains(input) => {
119                    res.insert(t.destination);
120                }
121                _ => {}
122            }
123        }
124        res
125    }
126
127    /// Checks if the state is deterministic.
128    /// A state is deterministic if it has at most one transition for each input character.
129    /// A state is not deterministic if it has multiple transitions for the same input character or an epsilon transition.
130    pub fn is_det(&self) -> bool {
131        let mut map = AlphabetPartitionMap::empty();
132        for t in self.transitions.iter() {
133            match t.label {
134                TransitionType::Range(r) => {
135                    if map.insert(r, t.get_dest()).is_err() {
136                        return false;
137                    }
138                }
139                TransitionType::NotRange(r) => {
140                    for r in r.complement() {
141                        if map.insert(r, t.get_dest()).is_err() {
142                            return false;
143                        }
144                    }
145                }
146                _ => return false,
147            }
148        }
149        true
150    }
151}
152
153#[derive(Debug, Clone, Eq, Hash, PartialEq)]
154pub struct StateNotFound(pub StateId);
155
156impl Display for StateNotFound {
157    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158        write!(f, "State not found: {}", self.0)
159    }
160}
161
162impl Error for StateNotFound {}
163
164/// Every state in a nondeterministic automaton is identified by a unique index.
165pub type StateId = usize;
166
167/// A nondeterministic finite automaton.
168/// The automaton consists of a collection of states, an initial state, and a set of final states.
169#[derive(Debug, Clone, Default)]
170pub struct NFA {
171    states: Vec<State>,
172    initial: Option<StateId>,
173    finals: HashSet<StateId>,
174
175    /// Flag that indicates whether the automaton is trim.
176    /// This is true if the automaton has been trimmed, i.e. all states are reachable from the initial state and all states can reach a final state.
177    /// This set to false whenever a new state is added to the automaton.
178    /// Note that adding a transition does not affect the trim flag.
179    /// The empty automaton  is considered trim.
180    trim: bool,
181    /// Flag that indicates whether the automaton is epsilon-free.
182    /// This is true if the automaton has no epsilon transitions.
183    efree: bool,
184}
185
186impl NFA {
187    /// Create a new, empty nondeterministic finite automaton.
188    pub fn new() -> Self {
189        Self {
190            states: Vec::new(),
191            initial: None,
192            finals: HashSet::new(),
193            trim: true,
194            efree: true,
195        }
196    }
197
198    /// Merges the other automaton into this automaton.
199    /// The states and transitions of the other automaton are copied into this automaton.
200    /// The transitions are updated to point to the corresponding states in this automaton.
201    /// The initial state and final states are not changed.
202    /// The method returns the offset of the states in the other automaton.
203    pub(crate) fn merge(&mut self, other: &Self) -> usize {
204        let offset = self.states.len();
205
206        self.states.extend(other.states.iter().cloned());
207        // Change the destinations of the transitions to point to the new states
208        for state in self.states.iter_mut().skip(offset) {
209            for t in state.transitions.iter_mut() {
210                t.destination += offset;
211            }
212        }
213        offset
214    }
215
216    /// Create a new nondeterministic finite automaton that accepts all strings.
217    fn universal() -> Self {
218        let mut aut = Self::new();
219        let q0 = aut.new_state();
220        aut.set_initial(q0).unwrap();
221        aut.add_final(q0).unwrap();
222        aut.add_transition(q0, q0, TransitionType::Range(CharRange::all()))
223            .unwrap();
224        aut
225    }
226
227    /// Add a new state to the automaton and return its index.
228    pub fn new_state(&mut self) -> StateId {
229        let id = self.states.len();
230        self.states.push(State {
231            transitions: Vec::new(),
232        });
233        self.trim = false;
234        id
235    }
236
237    /// Set the initial state of the automaton.
238    /// The index must be a valid state index, otherwise an error is returned.
239    pub fn set_initial(&mut self, state: StateId) -> Result<(), StateNotFound> {
240        if state < self.states.len() {
241            self.initial = Some(state);
242            Ok(())
243        } else {
244            Err(StateNotFound(state))
245        }
246    }
247
248    /// Returns the initial state of the automaton, if it exists.
249    pub fn initial(&self) -> Option<StateId> {
250        self.initial
251    }
252
253    /// Add a state to the set of final states.
254    /// The index must be a valid state index, otherwise an error is returned.
255    pub fn add_final(&mut self, state: StateId) -> Result<(), StateNotFound> {
256        if state < self.states.len() {
257            self.finals.insert(state);
258            Ok(())
259        } else {
260            Err(StateNotFound(state))
261        }
262    }
263
264    /// Returns an iterator over the final states of the automaton.
265    pub fn finals(&self) -> impl Iterator<Item = StateId> + '_ {
266        self.finals.iter().copied()
267    }
268
269    /// Returns if a state is a final state.
270    /// Invalid indices are not considered final states.
271    pub fn is_final(&self, state: StateId) -> bool {
272        self.finals.contains(&state)
273    }
274
275    /// Add a transition from one state to another.
276    /// The indices must be valid state indices, otherwise an error is returned.
277    pub fn add_transition(
278        &mut self,
279        from: StateId,
280        to: StateId,
281        label: TransitionType,
282    ) -> Result<(), StateNotFound> {
283        if to >= self.states.len() {
284            return Err(StateNotFound(to));
285        }
286        self.efree &= !label.is_epsilon();
287        if let Some(s) = self.states.get_mut(from) {
288            s.add_transition(label, to);
289            Ok(())
290        } else {
291            Err(StateNotFound(from))
292        }
293    }
294
295    /// Returns the number of states in the automaton.
296    pub fn num_states(&self) -> usize {
297        self.states.len()
298    }
299
300    /// Returns the number of transitions in the automaton.
301    /// This is the sum of the number of transitions of each state.
302    /// Note that this require a linear scan over all states and transitions.
303    pub fn num_transitions(&self) -> usize {
304        self.states.iter().map(|s| s.transitions.len()).sum()
305    }
306
307    /// Returns an iterator over the states of the automaton.
308    pub fn states(&self) -> impl Iterator<Item = StateId> {
309        0..self.states.len()
310    }
311
312    /// Returns an iterator over the transitions of the automaton.
313    /// The iterator yields pairs of a state and its outgoing transitions.
314    pub fn transitions(&self) -> impl Iterator<Item = (StateId, &Vec<Transition>)> {
315        self.states
316            .iter()
317            .enumerate()
318            .map(|(i, s)| (i, &s.transitions))
319    }
320
321    /// Returns an iterator over the transitions from a state.
322    /// If the state is not a valid state index, an error is returned.
323    pub fn transitions_from(
324        &self,
325        state: StateId,
326    ) -> Result<impl Iterator<Item = &Transition>, StateNotFound> {
327        if state < self.states.len() {
328            Ok(self.states[state].transitions())
329        } else {
330            Err(StateNotFound(state))
331        }
332    }
333
334    /// Returns a map from states to the set of their predecessors.
335    /// A state p has predecessor q if there is a transition from q to p.
336    pub fn predecessors(&self) -> HashMap<StateId, HashSet<StateId>> {
337        let mut queue = VecDeque::new();
338        let mut seen = HashSet::new();
339        let mut preds = HashMap::new();
340        if let Some(q0) = self.initial {
341            queue.push_back(q0);
342            seen.insert(q0);
343        }
344        while let Some(q) = queue.pop_front() {
345            for t in self.transitions_from(q).unwrap() {
346                let dest = t.destination;
347                if !seen.contains(&dest) {
348                    queue.push_back(dest);
349                    seen.insert(dest);
350                }
351                preds.entry(dest).or_insert(HashSet::new()).insert(q);
352            }
353        }
354        preds
355    }
356
357    /// Checks if the automaton is deterministic.
358    /// An automaton is deterministic if all states are deterministic.
359    /// A state is deterministic if it has at most one transition for each input character and no epsilon transitions.
360    /// Checking for determinism is done in O(|V| + |E|) time, where |V| is the number of states and |E| is the number of transitions.
361    pub fn is_det(&self) -> bool {
362        self.states.iter().all(|s| s.is_det())
363    }
364
365    /// Trims the automaton by removing unreachable and dead states.
366    /// This ensures that all states are reachable from the initial state AND can reach a final state.
367    /// Runs in **O(V + E)** time using two BFS traversals.
368    pub fn trim(&self) -> Self {
369        if self.trim {
370            return self.clone();
371        }
372        if self.finals.is_empty() || self.initial.is_none() {
373            return Self::new(); // Empty automaton if no final or initial state
374        }
375
376        let initial = self.initial.unwrap();
377
378        // Forward BFS to find reachable states
379        let mut reachable = HashSet::new();
380        let mut queue = VecDeque::new();
381        queue.push_back(initial);
382        reachable.insert(initial);
383
384        while let Some(state) = queue.pop_front() {
385            for t in self.states[state].transitions() {
386                let dest = t.destination;
387                if reachable.insert(dest) {
388                    // Insert returns true if newly inserted
389                    queue.push_back(dest);
390                }
391            }
392        }
393
394        // Reverse BFS from final states to find states that can reach a final state
395        // We are left with `useful` states that are reachable from the initial state and can reach a final state
396        let mut useful = HashSet::new();
397        let mut queue = VecDeque::new();
398        let predecessors = self.predecessors();
399
400        for &f in &self.finals {
401            if reachable.contains(&f) {
402                // Only consider reachable final states
403                queue.push_back(f);
404                useful.insert(f);
405            }
406        }
407
408        while let Some(state) = queue.pop_front() {
409            if let Some(preds) = predecessors.get(&state) {
410                for &src in preds {
411                    if reachable.contains(&src) && useful.insert(src) {
412                        queue.push_back(src);
413                    }
414                }
415            }
416        }
417
418        //  Build the new trimmed automaton
419        let mut aut = Self::new();
420        let mut state_map = HashMap::new();
421
422        for &state in useful.iter() {
423            let new_state = aut.new_state();
424            state_map.insert(state, new_state);
425            if self.finals.contains(&state) {
426                aut.add_final(new_state).unwrap();
427            }
428            if state == initial {
429                aut.set_initial(new_state).unwrap();
430            }
431        }
432
433        // Copy  transitions
434        for &state in useful.iter() {
435            let new_state = state_map[&state];
436            for t in self.states[state].transitions() {
437                let dest = t.destination;
438                if useful.contains(&dest) {
439                    let new_dest = state_map[&dest];
440                    aut.add_transition(new_state, new_dest, t.label).unwrap();
441                }
442            }
443        }
444
445        aut
446    }
447
448    /// Compresesses transitions from a state if they have the same destination but their ranges overlap or are adjacent.
449    /// If there are two transitions from state q to state p with ranges r1 and r2 and the ranges can be merged into a single range r, then the transitions are replaced by a single transition from q to p with range r.
450    pub fn compress_ranges(&mut self) {
451        for q in self.states() {
452            let mut dest_map = HashMap::new();
453            let mut compressed: Vec<Transition> = Vec::new();
454            for t in self.transitions_from(q).unwrap() {
455                match t.get_type() {
456                    TransitionType::Range(r) => dest_map
457                        .entry(t.get_dest())
458                        .or_insert_with(Alphabet::default)
459                        .insert(*r),
460                    _ => compressed.push(t.clone()),
461                }
462            }
463
464            for (d, ranges) in dest_map {
465                for r in ranges.iter_ranges() {
466                    compressed.push(Transition::new(TransitionType::Range(r), d));
467                }
468            }
469
470            self.states[q].transitions = compressed;
471        }
472    }
473
474    /// Returns the epsilon closure of a state.
475    /// The epsilon closure of a state is the set of states that can be reached from the state by following epsilon transitions.
476    /// If the state is not a valid state index, an error is returned.
477    pub fn epsilon_closure(&self, state: StateId) -> Result<HashSet<StateId>, StateNotFound> {
478        if state >= self.states.len() {
479            return Err(StateNotFound(state));
480        }
481        let mut closure = HashSet::new();
482        let mut stack = Vec::new();
483        stack.push(state);
484        while let Some(s) = stack.pop() {
485            closure.insert(s);
486            for t in self.states[s].transitions.iter() {
487                if t.is_epsilon() {
488                    let dest = t.destination;
489                    if !closure.contains(&dest) {
490                        stack.push(dest);
491                    }
492                }
493            }
494        }
495        Ok(closure)
496    }
497
498    /// Performs the standard epsilon removal algorithm on the automaton.
499    /// The result is a new automaton that accepts the same language as this automaton but has no epsilon transitions.
500    /// If the automaton is already epsilon-free, this is a no-op.
501    /// The resulting automaton is also trim as a side effect.
502    ///
503    /// The algorithm works as follows:
504    /// We do a breadth-first search starting from the initial state and compute the epsilon closure of each state.
505    /// The epsilon closure of a state is the set of states that can be reached from the state by following epsilon transitions.
506    /// We then add transitions for all non-epsilon transitions from the state to the destination states of the epsilon closure.
507    /// If any state in the epsilon closure is a final state, we mark the new state as final.
508    ///
509    /// The algorithm runs in O(|V| + |E|) time, where |V| is the number of states and |E| is the number of transitions.
510    pub fn eliminate_epsilon(self) -> Self {
511        if self.efree {
512            return self;
513        }
514        // We do a breadth-first search starting from the initial state
515        // and compute the epsilon closure of each state.
516        // The epsilon closure of a state is the set of states that can be reached from the state by following epsilon transitions.
517        // We then add transitions for all non-epsilon transitions from the state to the destination states of the epsilon closure.
518        // If any state in the epsilon closure is a final state, we mark the new state as final.
519
520        let mut queue = VecDeque::new();
521        let mut aut = Self::new();
522        // The automaton is epsilon-free after eliminating epsilon transitions. As a side effect, the automaton is also trimmed.
523        aut.efree = true;
524        aut.trim = true;
525        let mut old_to_new = HashMap::new();
526
527        if let Some(q0) = self.initial {
528            let q0_new = aut.new_state();
529            aut.set_initial(q0_new).unwrap();
530            queue.push_back(q0);
531            old_to_new.insert(q0, q0_new);
532        } else {
533            return aut;
534        }
535
536        while let Some(q) = queue.pop_front() {
537            // we ensure that a state is in the old_to_new map before adding it to the queue
538            let q_new = *old_to_new.get(&q).unwrap();
539            // obtain the epsilon closure of the current state
540            let closure = self.epsilon_closure(q).unwrap();
541            for q_i in closure {
542                if self.finals.contains(&q_i) {
543                    aut.add_final(q_new).unwrap();
544                }
545                // For all non-epsilon transitions from the state, add a transition to the destination state
546                for t in self.states[q_i].transitions() {
547                    if !t.is_epsilon() {
548                        let q_j = t.destination;
549                        let q_j_new = match old_to_new.get(&q_j) {
550                            Some(q_j_new) => *q_j_new,
551                            None => {
552                                let q_j_new = aut.new_state();
553                                old_to_new.insert(q_j, q_j_new);
554                                queue.push_back(q_j);
555                                q_j_new
556                            }
557                        };
558                        aut.add_transition(q_new, q_j_new, t.label).unwrap();
559                    }
560                }
561            }
562        }
563        aut
564    }
565
566    /// Returns the longest non-cyclic path in the automaton from the initial state to a final state, if it exists.
567    /// The length of the path is the number of transitions in the path and, therefore, the number of characters in the longest word accepted by the automaton.
568    /// Epsilon transitions are not counted in the path length.
569    /// If the automaton is empty or contains a cycle on an accepting path, this returns None.
570    pub fn longest_path(&self) -> Option<usize> {
571        let mut queue = VecDeque::new();
572        let mut seen = HashSet::new();
573        queue.push_back((self.initial?, 0));
574        seen.insert(self.initial?);
575
576        let mut longest = -1;
577        while let Some((q, len)) = queue.pop_front() {
578            for t in self.transitions_from(q).unwrap() {
579                let d = if t.is_epsilon() { 0 } else { 1 };
580                let dest = t.get_dest();
581                if !seen.contains(&dest) {
582                    seen.insert(dest);
583                    queue.push_back((dest, len + d));
584                    if self.is_final(dest) {
585                        longest = longest.max(len + d);
586                    }
587                } else {
588                    // If we have a cycle, we can't have a longest path
589                    return None;
590                }
591            }
592        }
593        if longest >= 0 {
594            Some(longest as usize)
595        } else {
596            None
597        }
598    }
599
600    /// Returns the shortest path in the automaton from the initial state to a final state, if it exists.
601    /// The length of the path is the number of transitions in the path and, therefore, the number of characters in the shortest word accepted by the automaton.
602    /// Epsilon transitions are not counted in the path length.
603    /// If the automaton is empty, this returns None.
604    pub fn shortest_path(&self) -> Option<usize> {
605        let mut queue = VecDeque::new();
606        let mut seen = HashSet::new();
607        queue.push_back((self.initial?, 0));
608        seen.insert(self.initial?);
609        while let Some((q, len)) = queue.pop_front() {
610            if self.is_final(q) {
611                return Some(len);
612            }
613            for t in self.transitions_from(q).unwrap() {
614                let d = if t.is_epsilon() { 0 } else { 1 };
615                let dest = t.get_dest();
616                if !seen.contains(&dest) {
617                    seen.insert(dest);
618                    if t.is_epsilon() {
619                        queue.push_front((dest, len));
620                    } else {
621                        queue.push_back((dest, len + d));
622                    }
623                }
624            }
625        }
626        None
627    }
628
629    /// Compute the length of the shortest path from the initial state to any state.
630    /// Returns a map from state indices to the length of the shortest path to that state.
631    /// Epsilon transitions are not counted in the path length, i.e., an epsilon transition has length 0.
632    ///
633    /// If a state is not reachable from the initial state, it is not included in the map.
634    /// In particular, if the automaton has no final states, the map is empty.
635    pub fn lengths_from_initial(&self) -> HashMap<StateId, usize> {
636        if let Some(q0) = self.initial {
637            self.shortest_paths(&[q0])
638        } else {
639            HashMap::new()
640        }
641    }
642
643    /// Compute the length of the shortest path to a final state from any state.
644    /// Returns a map from state indices to the length of the shortest path to a final state from that state.
645    /// Epsilon transitions are not counted in the path length, i.e., an epsilon transition has length 0.
646    ///
647    /// If a state cannot reach a final state, it is not included in the map.
648    /// If the automaton has no final states, the map is empty.
649    pub fn lengths_to_final(&self) -> HashMap<StateId, usize> {
650        let reversed = self.reversed();
651        reversed.lengths_from_initial()
652    }
653
654    /// Computes the length of the shortest paths from the states in `sources` to any state.
655    /// Returns a map from state indices to the length of the shortest path to that state from any of the sources.
656    fn shortest_paths(&self, sources: &[StateId]) -> HashMap<StateId, usize> {
657        let mut queue = BinaryHeap::new();
658        let mut length = HashMap::new();
659
660        for &q0 in sources {
661            queue.push((0, q0));
662            length.insert(q0, 0);
663        }
664
665        while let Some((len, q)) = queue.pop() {
666            for t in self.transitions_from(q).unwrap() {
667                let d = if t.is_epsilon() { 0 } else { 1 };
668                let dest = t.get_dest();
669                let new_len = len + d;
670                if !length.contains_key(&dest) || new_len < length[&dest] {
671                    length.insert(dest, new_len);
672                    queue.push((new_len, dest));
673                }
674            }
675        }
676
677        length
678    }
679
680    /// Reverse the automaton. The resulting automaton
681    ///
682    /// - has all transitions reversed,
683    /// - has a new initial state with and epsilon transition to every old final states,
684    /// - has all old initial states as final states.
685    ///
686    /// The reversed automaton accepts the reverse of the language of the original automaton.
687    pub fn reversed(&self) -> Self {
688        let mut aut = self.clone();
689        aut.finals.clear();
690        aut.initial = None;
691        // Reverse transitions
692        for q in aut.states() {
693            let transitions = aut.states[q].transitions.clone();
694            aut.states[q].transitions.clear();
695            for t in transitions {
696                aut.add_transition(t.destination, q, t.label).unwrap();
697            }
698        }
699
700        // Create a new initial state with epsilon transitions to all old final states
701        let q0 = aut.new_state();
702        aut.set_initial(q0).unwrap();
703        for f in self.finals() {
704            aut.add_transition(q0, f, TransitionType::Epsilon).unwrap();
705        }
706
707        // Set the old initial states as final states
708        if let Some(q0) = self.initial() {
709            aut.add_final(q0).unwrap();
710        }
711
712        aut
713    }
714
715    /// Returns whether the automaton is empty.
716    /// An automaton is empty if either
717    ///
718    /// - it has no final states,
719    /// - it has no initial state, or
720    /// - there is no path from the initial state to a final state.
721    pub fn is_empty(&self) -> bool {
722        if self.trim {
723            self.finals.is_empty() || self.initial.is_none()
724        } else {
725            // Check if there is a path from the initial state to a final state
726            // If a path exists, then also a shortest path exists
727            self.shortest_path().is_none()
728        }
729    }
730
731    /// Returns the set of states that can be reached from the initial state by consuming the given word.
732    pub fn run(&self, word: &SmtString) -> HashSet<StateId> {
733        let mut current = HashSet::new();
734
735        if let Some(initial) = self.initial {
736            current = self.epsilon_closure(initial).unwrap();
737        }
738
739        for c in word.iter() {
740            let mut next_states = HashSet::new();
741            for s in current {
742                for reached in self.states[s].consume(*c) {
743                    next_states.extend(self.epsilon_closure(reached).unwrap());
744                }
745            }
746            current = next_states;
747        }
748
749        current
750    }
751
752    /// Returns if the automaton accepts the given word.
753    /// A word is accepted if there is a path from the initial state to a final state by consuming the word.
754    pub fn accepts(&self, word: &SmtString) -> bool {
755        let reached = self.run(word);
756        !reached.is_disjoint(&self.finals)
757    }
758
759    /// Returns the DOT representation of the automaton.
760    /// The DOT representation can be used to visualize the automaton using Graphviz.
761    pub fn dot(&self) -> String {
762        let mut buf = Vec::new();
763        dotlib::render(self, &mut buf).unwrap();
764        String::from_utf8(buf).expect("Failed to convert DOT to string")
765    }
766}
767
768impl Display for TransitionType {
769    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
770        match self {
771            TransitionType::Range(r) => write!(f, "{}", r),
772            TransitionType::NotRange(r) => write!(f, "not({})", r),
773            TransitionType::Epsilon => write!(f, ""),
774        }
775    }
776}
777
778impl Display for NFA {
779    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
780        writeln!(f, "NFA {{")?;
781        writeln!(f, "\tStates:")?;
782        for (i, state) in self.states.iter().enumerate() {
783            write!(f, "\t\t{}: ", i)?;
784            for t in state.transitions() {
785                write!(f, "{} -> {}, ", t.label, t.destination)?;
786            }
787            writeln!(f)?;
788        }
789        if let Some(q0) = self.initial {
790            writeln!(f, "\tInitial: {q0}")?;
791        } else {
792            writeln!(f, "\tInitial: None")?;
793        }
794        writeln!(f, "\tFinals: {:?}", self.finals)?;
795        writeln!(f, "}}")
796    }
797}
798
799#[cfg(test)]
800mod tests {
801
802    use super::*;
803
804    #[test]
805    fn test_invalid_initial_state() {
806        let mut a = NFA::new();
807        let result = a.set_initial(0);
808        assert_eq!(result, Err(StateNotFound(0)));
809    }
810
811    #[test]
812    fn test_valid_initial_state() {
813        let mut a = NFA::new();
814        let state = a.new_state();
815        let result = a.set_initial(state);
816        assert!(result.is_ok());
817        assert_eq!(a.initial, Some(state));
818    }
819
820    #[test]
821    fn test_invalid_final_state() {
822        let mut a = NFA::new();
823        let result = a.add_final(0);
824        assert_eq!(result, Err(StateNotFound(0)));
825    }
826
827    #[test]
828    fn test_valid_final_state() {
829        let mut a = NFA::new();
830        let state = a.new_state();
831        let result = a.add_final(state);
832        assert!(result.is_ok());
833        assert!(a.finals.contains(&state));
834    }
835
836    #[test]
837    fn test_invalid_transition_from() {
838        let mut a = NFA::new();
839        let s = a.new_state();
840        let unknown_state = a.num_states() + 1;
841        let result = a.add_transition(unknown_state, s, TransitionType::Epsilon);
842        assert_eq!(result, Err(StateNotFound(unknown_state)));
843    }
844
845    #[test]
846    fn test_invalid_transition_to() {
847        let mut a = NFA::new();
848        let state = a.new_state();
849        let result = a.add_transition(state, 1, TransitionType::Epsilon);
850        assert_eq!(result, Err(StateNotFound(1)));
851    }
852
853    #[test]
854    fn test_valid_transition() {
855        let mut a = NFA::new();
856        let state1 = a.new_state();
857        let state2 = a.new_state();
858        let result = a.add_transition(state1, state2, TransitionType::Epsilon);
859        assert!(result.is_ok());
860        assert_eq!(a.states[state1].transitions.len(), 1);
861        assert_eq!(a.states[state1].transitions[0].destination, state2);
862        assert_eq!(
863            a.states[state1].transitions[0].label,
864            TransitionType::Epsilon
865        );
866    }
867
868    /* Tests for epsilon removal */
869
870    #[test]
871    fn test_epsilon_closure() {
872        let mut a = NFA::new();
873        let s0 = a.new_state();
874        let s1 = a.new_state();
875        let s2 = a.new_state();
876        let s3 = a.new_state();
877        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
878        a.add_transition(s1, s2, TransitionType::Epsilon).unwrap();
879        a.add_transition(s2, s3, TransitionType::Epsilon).unwrap();
880        let result = a.epsilon_closure(s0).unwrap();
881        assert_eq!(result.len(), 4);
882        assert!(result.contains(&s0));
883        assert!(result.contains(&s1));
884        assert!(result.contains(&s2));
885        assert!(result.contains(&s3));
886    }
887
888    #[test]
889    fn test_elim_epsilon() {
890        // s0 --ε--> s1 --ε--> s2 --ε--> *s3*
891        let mut a = NFA::new();
892        let s0 = a.new_state();
893        let s1 = a.new_state();
894        let s2 = a.new_state();
895        let s3 = a.new_state();
896        a.set_initial(s0).unwrap();
897        a.add_final(s3).unwrap();
898        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
899        a.add_transition(s1, s2, TransitionType::Epsilon).unwrap();
900        a.add_transition(s2, s3, TransitionType::Epsilon).unwrap();
901        let result = a.eliminate_epsilon();
902        assert_eq!(result.states.len(), 1);
903        assert_eq!(result.initial, Some(0));
904        assert_eq!(result.finals.len(), 1);
905    }
906
907    #[test]
908    fn test_elim_epsilon_with_direct_transition() {
909        // s0 --(a|ε)--> s1
910        let mut a = NFA::new();
911        let s0 = a.new_state();
912        let s1 = a.new_state();
913        a.set_initial(s0).unwrap();
914        a.add_final(s1).unwrap();
915        a.add_transition(s0, s1, TransitionType::Range(CharRange::singleton('a')))
916            .unwrap();
917        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
918        let result = a.eliminate_epsilon();
919        assert_eq!(result.states.len(), 2);
920        assert_eq!(result.initial, Some(0));
921        assert_eq!(result.finals.len(), 2);
922    }
923
924    /* Tests for trimming */
925
926    #[test]
927    fn test_trim_empty() {
928        let a = NFA::new();
929        let result = a.trim();
930        assert_eq!(result.states.len(), 0);
931        assert_eq!(result.initial, None);
932        assert_eq!(result.finals.len(), 0);
933    }
934
935    #[test]
936    fn test_trim_no_finals() {
937        let mut a = NFA::new();
938        let s0 = a.new_state();
939        let s1 = a.new_state();
940        a.set_initial(s0).unwrap();
941        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
942        let result = a.trim();
943        assert_eq!(result.states.len(), 0);
944        assert_eq!(result.initial, None);
945        assert_eq!(result.finals.len(), 0);
946    }
947
948    #[test]
949    fn test_trim_no_initial() {
950        let mut a = NFA::new();
951        let s0 = a.new_state();
952        let s1 = a.new_state();
953        a.add_final(s1).unwrap();
954        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
955        let result = a.trim();
956        assert_eq!(result.states.len(), 0);
957        assert_eq!(result.initial, None);
958        assert_eq!(result.finals.len(), 0);
959    }
960
961    #[test]
962    fn test_trim_single_state_automaton() {
963        let mut nfa = NFA::new();
964        let q0 = nfa.new_state();
965        nfa.set_initial(q0).unwrap();
966        nfa.add_final(q0).unwrap(); // The only state is both initial and final
967
968        let trimmed = nfa.trim();
969
970        assert!(trimmed.accepts(&"".into())); // Should accept the empty string
971        assert_eq!(trimmed.num_states(), 1);
972    }
973
974    #[test]
975    fn test_trim_disconnected_states() {
976        let mut nfa = NFA::new();
977        let q0 = nfa.new_state();
978        let q1 = nfa.new_state();
979        let q2 = nfa.new_state();
980        let q3 = nfa.new_state(); // Completely disconnected state
981
982        nfa.set_initial(q0).unwrap();
983        nfa.add_final(q2).unwrap();
984        nfa.add_final(q3).unwrap(); // Should be removed because it's unreachable
985
986        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('x', 'x')))
987            .unwrap();
988        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('y', 'y')))
989            .unwrap();
990
991        let trimmed = nfa.trim();
992
993        assert!(trimmed.accepts(&"xy".into()));
994        assert_eq!(trimmed.num_states(), 3);
995    }
996
997    #[test]
998    fn test_trim_dead_states() {
999        let mut nfa = NFA::new();
1000        let q0 = nfa.new_state();
1001        let q1 = nfa.new_state();
1002        let q2 = nfa.new_state();
1003        let q3 = nfa.new_state(); // This state has no path to final states
1004
1005        nfa.set_initial(q0).unwrap();
1006        nfa.add_final(q2).unwrap();
1007
1008        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
1009            .unwrap();
1010        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('b', 'b')))
1011            .unwrap();
1012        nfa.add_transition(q3, q3, TransitionType::Range(CharRange::new('c', 'c')))
1013            .unwrap(); // Useless state
1014
1015        let trimmed = nfa.trim();
1016
1017        assert!(trimmed.accepts(&"ab".into()));
1018        assert_eq!(trimmed.num_states(), 3); // Dead-end state removed
1019    }
1020
1021    #[test]
1022    fn test_trim_already_trimmed() {
1023        let mut nfa = NFA::new();
1024        let q0 = nfa.new_state();
1025        let q1 = nfa.new_state();
1026        let q2 = nfa.new_state();
1027
1028        nfa.set_initial(q0).unwrap();
1029        nfa.add_final(q2).unwrap();
1030
1031        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
1032            .unwrap();
1033        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('b', 'b')))
1034            .unwrap();
1035
1036        let trimmed = nfa.trim();
1037
1038        assert_eq!(nfa.num_states(), trimmed.num_states()); // Should be the same size
1039        assert!(trimmed.accepts(&"ab".into()));
1040    }
1041}