smt_str/automata/
mod.rs

1pub mod comp;
2mod compile;
3pub mod det;
4mod dot;
5pub mod inter;
6
7use std::collections::BinaryHeap;
8use std::error::Error;
9use std::{
10    collections::{HashMap, HashSet, VecDeque},
11    fmt::Display,
12};
13
14use crate::alphabet::Alphabet;
15use crate::{
16    alphabet::{partition::AlphabetPartitionMap, CharRange},
17    SmtChar, SmtString,
18};
19
20extern crate dot as dotlib;
21
22pub use compile::compile;
23
24/// The type of a transition in a nondeterministic finite automaton.
25/// Every transition in an automaton is labeled with a type that determines the behavior of the transition.
26/// The type can be a character range, a negated character range, or an epsilon transition.
27/// For a character range transition, the transition is taken if the input character is in the range.
28/// For a negated character range transition, the transition is taken if the input character is not in the range.
29/// An epsilon transition is taken without consuming any input.
30#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
31pub enum TransitionType {
32    /// A transition that is taken if the input character is in the given range.
33    Range(CharRange),
34    /// A transition that is taken if the input character is not in the given range.
35    NotRange(CharRange),
36    /// An epsilon transition that is taken without consuming any input.
37    Epsilon,
38}
39
40impl TransitionType {
41    /// Returns true if the transition is an epsilon transition.
42    pub fn is_epsilon(&self) -> bool {
43        matches!(self, TransitionType::Epsilon)
44    }
45
46    /// Returns true if the transition is an empty transition, i.e. a transition that can never be taken.
47    pub fn is_empty(&self) -> bool {
48        matches!(self, TransitionType::Epsilon)
49    }
50}
51
52/// A transition from one state to another.
53/// The transition can be of different types, e.g. a character range or an epsilon transition.
54/// The destination state is stored as an index that is unique within the automaton.
55#[derive(Debug, Clone, Eq, Hash, PartialEq)]
56pub struct Transition {
57    /// The type of the transition.
58    label: TransitionType,
59    /// The index of the destination state.
60    destination: usize,
61}
62
63impl Transition {
64    pub fn new(label: TransitionType, destination: usize) -> Self {
65        Self { label, destination }
66    }
67
68    /// Returns the destination state of the transition.
69    pub fn get_dest(&self) -> usize {
70        self.destination
71    }
72
73    /// Returns the type of the transition.
74    pub fn get_type(&self) -> &TransitionType {
75        &self.label
76    }
77
78    /// Returns if the transition is an epsilon transition.
79    pub fn is_epsilon(&self) -> bool {
80        matches!(self.label, TransitionType::Epsilon)
81    }
82}
83
84/// A state in a nondeterministic finite automaton.
85/// A state is merely a collection of transitions to other states.
86/// In a nondeterministic automaton, a state can have multiple transitions with the same input leading to different states.
87/// If the state has no transitions, it is a dead end state.
88/// If the state is a final state, it accepts the input.
89#[derive(Debug, Clone, Eq, Hash, PartialEq)]
90pub struct State {
91    transitions: Vec<Transition>,
92}
93
94impl State {
95    /// Returns an iterator over the transitions of the state.
96    fn transitions(&self) -> impl Iterator<Item = &Transition> {
97        self.transitions.iter()
98    }
99
100    /// Adds a transition to the state.
101    /// If the transition already exists, it is not added again.
102    /// Needs a linear scan over the transitions to check for duplicates.
103    fn add_transition(&mut self, label: TransitionType, destination: StateId) {
104        let transition = Transition { label, destination };
105        if !self.transitions.contains(&transition) {
106            self.transitions.push(transition);
107        }
108    }
109
110    /// Consumes the input character and returns the set of states that can be reached from this state.
111    fn consume(&self, input: SmtChar) -> HashSet<StateId> {
112        let mut res = HashSet::new();
113        for t in self.transitions() {
114            match t.label {
115                TransitionType::Range(r) if r.contains(input) => {
116                    res.insert(t.destination);
117                }
118                TransitionType::NotRange(r) if !r.contains(input) => {
119                    res.insert(t.destination);
120                }
121                _ => {}
122            }
123        }
124        res
125    }
126
127    /// Checks if the state is deterministic.
128    /// A state is deterministic if it has at most one transition for each input character.
129    /// A state is not deterministic if it has multiple transitions for the same input character or an epsilon transition.
130    pub fn is_det(&self) -> bool {
131        let mut map = AlphabetPartitionMap::empty();
132        for t in self.transitions.iter() {
133            match t.label {
134                TransitionType::Range(r) => {
135                    if map.insert(r, t.get_dest()).is_err() {
136                        return false;
137                    }
138                }
139                TransitionType::NotRange(r) => {
140                    for r in r.complement() {
141                        if map.insert(r, t.get_dest()).is_err() {
142                            return false;
143                        }
144                    }
145                }
146                _ => return false,
147            }
148        }
149        true
150    }
151}
152
153#[derive(Debug, Clone, Eq, Hash, PartialEq)]
154pub struct StateNotFound(pub StateId);
155
156impl Display for StateNotFound {
157    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158        write!(f, "State not found: {}", self.0)
159    }
160}
161
162impl Error for StateNotFound {}
163
164/// Every state in a nondeterministic automaton is identified by a unique index.
165pub type StateId = usize;
166
167/// A nondeterministic finite automaton.
168/// The automaton consists of a collection of states, an initial state, and a set of final states.
169#[derive(Debug, Clone, Default)]
170pub struct NFA {
171    states: Vec<State>,
172    initial: Option<StateId>,
173    finals: HashSet<StateId>,
174
175    /// Flag that indicates whether the automaton is trim.
176    /// This is true if the automaton has been trimmed, i.e. all states are reachable from the initial state and all states can reach a final state.
177    /// This set to false whenever a new state is added to the automaton.
178    /// Note that adding a transition does not affect the trim flag.
179    /// The empty automaton  is considered trim.
180    trim: bool,
181    /// Flag that indicates whether the automaton is epsilon-free.
182    /// This is true if the automaton has no epsilon transitions.
183    efree: bool,
184}
185
186impl NFA {
187    /// Create a new, empty nondeterministic finite automaton.
188    pub fn new() -> Self {
189        Self {
190            states: Vec::new(),
191            initial: None,
192            finals: HashSet::new(),
193            trim: true,
194            efree: true,
195        }
196    }
197
198    /// Merges the other automaton into this automaton.
199    /// The states and transitions of the other automaton are copied into this automaton.
200    /// The transitions are updated to point to the corresponding states in this automaton.
201    /// The initial state and final states are not changed.
202    /// The method returns the offset of the states in the other automaton.
203    pub(crate) fn merge(&mut self, other: &Self) -> usize {
204        let offset = self.states.len();
205
206        self.states.extend(other.states.iter().cloned());
207        // Change the destinations of the transitions to point to the new states
208        for state in self.states.iter_mut().skip(offset) {
209            for t in state.transitions.iter_mut() {
210                t.destination += offset;
211            }
212        }
213        offset
214    }
215
216    /// Create a new nondeterministic finite automaton that accepts all strings.
217    fn universal() -> Self {
218        let mut aut = Self::new();
219        let q0 = aut.new_state();
220        aut.set_initial(q0).unwrap();
221        aut.add_final(q0).unwrap();
222        aut.add_transition(q0, q0, TransitionType::Range(CharRange::all()))
223            .unwrap();
224        aut
225    }
226
227    /// Add a new state to the automaton and return its index.
228    pub fn new_state(&mut self) -> StateId {
229        let id = self.states.len();
230        self.states.push(State {
231            transitions: Vec::new(),
232        });
233        self.trim = false;
234        id
235    }
236
237    /// Set the initial state of the automaton.
238    /// The index must be a valid state index, otherwise an error is returned.
239    pub fn set_initial(&mut self, state: StateId) -> Result<(), StateNotFound> {
240        if state < self.states.len() {
241            self.initial = Some(state);
242            Ok(())
243        } else {
244            Err(StateNotFound(state))
245        }
246    }
247
248    /// Returns the initial state of the automaton, if it exists.
249    pub fn initial(&self) -> Option<StateId> {
250        self.initial
251    }
252
253    /// Add a state to the set of final states.
254    /// The index must be a valid state index, otherwise an error is returned.
255    pub fn add_final(&mut self, state: StateId) -> Result<(), StateNotFound> {
256        if state < self.states.len() {
257            self.finals.insert(state);
258            Ok(())
259        } else {
260            Err(StateNotFound(state))
261        }
262    }
263
264    /// Returns an iterator over the final states of the automaton.
265    pub fn finals(&self) -> impl Iterator<Item = StateId> + '_ {
266        self.finals.iter().copied()
267    }
268
269    /// Returns if a state is a final state.
270    /// Invalid indices are not considered final states.
271    pub fn is_final(&self, state: StateId) -> bool {
272        self.finals.contains(&state)
273    }
274
275    /// Add a transition from one state to another.
276    /// The indices must be valid state indices, otherwise an error is returned.
277    pub fn add_transition(
278        &mut self,
279        from: StateId,
280        to: StateId,
281        label: TransitionType,
282    ) -> Result<(), StateNotFound> {
283        if to >= self.states.len() {
284            return Err(StateNotFound(to));
285        }
286        self.efree &= !label.is_epsilon();
287        if let Some(s) = self.states.get_mut(from) {
288            s.add_transition(label, to);
289            Ok(())
290        } else {
291            Err(StateNotFound(from))
292        }
293    }
294
295    /// Returns the number of states in the automaton.
296    pub fn num_states(&self) -> usize {
297        self.states.len()
298    }
299
300    /// Returns the number of transitions in the automaton.
301    /// This is the sum of the number of transitions of each state.
302    /// Note that this require a linear scan over all states and transitions.
303    pub fn num_transitions(&self) -> usize {
304        self.states.iter().map(|s| s.transitions.len()).sum()
305    }
306
307    /// Returns an iterator over the states of the automaton.
308    pub fn states(&self) -> impl Iterator<Item = StateId> {
309        0..self.states.len()
310    }
311
312    /// Returns an iterator over the transitions of the automaton.
313    /// The iterator yields pairs of a state and its outgoing transitions.
314    pub fn transitions(&self) -> impl Iterator<Item = (StateId, &Vec<Transition>)> {
315        self.states
316            .iter()
317            .enumerate()
318            .map(|(i, s)| (i, &s.transitions))
319    }
320
321    /// Returns an iterator over the transitions from a state.
322    /// If the state is not a valid state index, an error is returned.
323    pub fn transitions_from(
324        &self,
325        state: StateId,
326    ) -> Result<impl Iterator<Item = &Transition>, StateNotFound> {
327        if state < self.states.len() {
328            Ok(self.states[state].transitions())
329        } else {
330            Err(StateNotFound(state))
331        }
332    }
333
334    /// Consumes an [SmtChar] from the given state.
335    /// Returns all states reached when reading `c` in state `state`.
336    /// If the set is empty, then the NFA cannot proceed from the given state by reading the given character.
337    ///
338    /// Returns a [StateNotFound] error if the given state is not a state in the NFA.
339    pub fn consume(&self, state: StateId, c: SmtChar) -> Result<HashSet<StateId>, StateNotFound> {
340        self.states
341            .get(state)
342            .ok_or(StateNotFound(state))
343            .map(|s| s.consume(c))
344    }
345
346    /// Returns a map from states to the set of their predecessors.
347    /// A state p has predecessor q if there is a transition from q to p.
348    pub fn predecessors(&self) -> HashMap<StateId, HashSet<StateId>> {
349        let mut queue = VecDeque::new();
350        let mut seen = HashSet::new();
351        let mut preds = HashMap::new();
352        if let Some(q0) = self.initial {
353            queue.push_back(q0);
354            seen.insert(q0);
355        }
356        while let Some(q) = queue.pop_front() {
357            for t in self.transitions_from(q).unwrap() {
358                let dest = t.destination;
359                if !seen.contains(&dest) {
360                    queue.push_back(dest);
361                    seen.insert(dest);
362                }
363                preds.entry(dest).or_insert(HashSet::new()).insert(q);
364            }
365        }
366        preds
367    }
368
369    /// Checks if the automaton is deterministic.
370    /// An automaton is deterministic if all states are deterministic.
371    /// A state is deterministic if it has at most one transition for each input character and no epsilon transitions.
372    /// Checking for determinism is done in O(|V| + |E|) time, where |V| is the number of states and |E| is the number of transitions.
373    pub fn is_det(&self) -> bool {
374        self.states.iter().all(|s| s.is_det())
375    }
376
377    /// Trims the automaton by removing unreachable and dead states.
378    /// This ensures that all states are reachable from the initial state AND can reach a final state.
379    /// Runs in **O(V + E)** time using two BFS traversals.
380    pub fn trim(&self) -> Self {
381        if self.trim {
382            return self.clone();
383        }
384        if self.finals.is_empty() || self.initial.is_none() {
385            return Self::new(); // Empty automaton if no final or initial state
386        }
387
388        let initial = self.initial.unwrap();
389
390        // Forward BFS to find reachable states
391        let mut reachable = HashSet::new();
392        let mut queue = VecDeque::new();
393        queue.push_back(initial);
394        reachable.insert(initial);
395
396        while let Some(state) = queue.pop_front() {
397            for t in self.states[state].transitions() {
398                let dest = t.destination;
399                if reachable.insert(dest) {
400                    // Insert returns true if newly inserted
401                    queue.push_back(dest);
402                }
403            }
404        }
405
406        // Reverse BFS from final states to find states that can reach a final state
407        // We are left with `useful` states that are reachable from the initial state and can reach a final state
408        let mut useful = HashSet::new();
409        let mut queue = VecDeque::new();
410        let predecessors = self.predecessors();
411
412        for &f in &self.finals {
413            if reachable.contains(&f) {
414                // Only consider reachable final states
415                queue.push_back(f);
416                useful.insert(f);
417            }
418        }
419
420        while let Some(state) = queue.pop_front() {
421            if let Some(preds) = predecessors.get(&state) {
422                for &src in preds {
423                    if reachable.contains(&src) && useful.insert(src) {
424                        queue.push_back(src);
425                    }
426                }
427            }
428        }
429
430        //  Build the new trimmed automaton
431        let mut aut = Self::new();
432        let mut state_map = HashMap::new();
433
434        for &state in useful.iter() {
435            let new_state = aut.new_state();
436            state_map.insert(state, new_state);
437            if self.finals.contains(&state) {
438                aut.add_final(new_state).unwrap();
439            }
440            if state == initial {
441                aut.set_initial(new_state).unwrap();
442            }
443        }
444
445        // Copy  transitions
446        for &state in useful.iter() {
447            let new_state = state_map[&state];
448            for t in self.states[state].transitions() {
449                let dest = t.destination;
450                if useful.contains(&dest) {
451                    let new_dest = state_map[&dest];
452                    aut.add_transition(new_state, new_dest, t.label).unwrap();
453                }
454            }
455        }
456
457        aut
458    }
459
460    /// Compresesses transitions from a state if they have the same destination but their ranges overlap or are adjacent.
461    /// If there are two transitions from state q to state p with ranges r1 and r2 and the ranges can be merged into a single range r, then the transitions are replaced by a single transition from q to p with range r.
462    pub fn compress_ranges(&mut self) {
463        for q in self.states() {
464            let mut dest_map = HashMap::new();
465            let mut compressed: Vec<Transition> = Vec::new();
466            for t in self.transitions_from(q).unwrap() {
467                match t.get_type() {
468                    TransitionType::Range(r) => dest_map
469                        .entry(t.get_dest())
470                        .or_insert_with(Alphabet::default)
471                        .insert(*r),
472                    _ => compressed.push(t.clone()),
473                }
474            }
475
476            for (d, ranges) in dest_map {
477                for r in ranges.iter_ranges() {
478                    compressed.push(Transition::new(TransitionType::Range(r), d));
479                }
480            }
481
482            self.states[q].transitions = compressed;
483        }
484    }
485
486    /// Returns the epsilon closure of a state.
487    /// The epsilon closure of a state is the set of states that can be reached from the state by following epsilon transitions.
488    /// If the state is not a valid state index, an error is returned.
489    pub fn epsilon_closure(&self, state: StateId) -> Result<HashSet<StateId>, StateNotFound> {
490        if state >= self.states.len() {
491            return Err(StateNotFound(state));
492        }
493        let mut closure = HashSet::new();
494        let mut stack = Vec::new();
495        stack.push(state);
496        while let Some(s) = stack.pop() {
497            closure.insert(s);
498            for t in self.states[s].transitions.iter() {
499                if t.is_epsilon() {
500                    let dest = t.destination;
501                    if !closure.contains(&dest) {
502                        stack.push(dest);
503                    }
504                }
505            }
506        }
507        Ok(closure)
508    }
509
510    /// Performs the standard epsilon removal algorithm on the automaton.
511    /// The result is a new automaton that accepts the same language as this automaton but has no epsilon transitions.
512    /// If the automaton is already epsilon-free, this is a no-op.
513    /// The resulting automaton is also trim as a side effect.
514    ///
515    /// The algorithm works as follows:
516    /// We do a breadth-first search starting from the initial state and compute the epsilon closure of each state.
517    /// The epsilon closure of a state is the set of states that can be reached from the state by following epsilon transitions.
518    /// We then add transitions for all non-epsilon transitions from the state to the destination states of the epsilon closure.
519    /// If any state in the epsilon closure is a final state, we mark the new state as final.
520    ///
521    /// The algorithm runs in O(|V| + |E|) time, where |V| is the number of states and |E| is the number of transitions.
522    pub fn eliminate_epsilon(self) -> Self {
523        if self.efree {
524            return self;
525        }
526        // We do a breadth-first search starting from the initial state
527        // and compute the epsilon closure of each state.
528        // The epsilon closure of a state is the set of states that can be reached from the state by following epsilon transitions.
529        // We then add transitions for all non-epsilon transitions from the state to the destination states of the epsilon closure.
530        // If any state in the epsilon closure is a final state, we mark the new state as final.
531
532        let mut queue = VecDeque::new();
533        let mut aut = Self::new();
534        // The automaton is epsilon-free after eliminating epsilon transitions. As a side effect, the automaton is also trimmed.
535        aut.efree = true;
536        aut.trim = true;
537        let mut old_to_new = HashMap::new();
538
539        if let Some(q0) = self.initial {
540            let q0_new = aut.new_state();
541            aut.set_initial(q0_new).unwrap();
542            queue.push_back(q0);
543            old_to_new.insert(q0, q0_new);
544        } else {
545            return aut;
546        }
547
548        while let Some(q) = queue.pop_front() {
549            // we ensure that a state is in the old_to_new map before adding it to the queue
550            let q_new = *old_to_new.get(&q).unwrap();
551            // obtain the epsilon closure of the current state
552            let closure = self.epsilon_closure(q).unwrap();
553            for q_i in closure {
554                if self.finals.contains(&q_i) {
555                    aut.add_final(q_new).unwrap();
556                }
557                // For all non-epsilon transitions from the state, add a transition to the destination state
558                for t in self.states[q_i].transitions() {
559                    if !t.is_epsilon() {
560                        let q_j = t.destination;
561                        let q_j_new = match old_to_new.get(&q_j) {
562                            Some(q_j_new) => *q_j_new,
563                            None => {
564                                let q_j_new = aut.new_state();
565                                old_to_new.insert(q_j, q_j_new);
566                                queue.push_back(q_j);
567                                q_j_new
568                            }
569                        };
570                        aut.add_transition(q_new, q_j_new, t.label).unwrap();
571                    }
572                }
573            }
574        }
575        aut
576    }
577
578    /// Returns the longest non-cyclic path in the automaton from the initial state to a final state, if it exists.
579    /// The length of the path is the number of transitions in the path and, therefore, the number of characters in the longest word accepted by the automaton.
580    /// Epsilon transitions are not counted in the path length.
581    /// If the automaton is empty or contains a cycle on an accepting path, this returns None.
582    pub fn longest_path(&self) -> Option<usize> {
583        let mut queue = VecDeque::new();
584        let mut seen = HashSet::new();
585        queue.push_back((self.initial?, 0));
586        seen.insert(self.initial?);
587
588        let mut longest = -1;
589        while let Some((q, len)) = queue.pop_front() {
590            for t in self.transitions_from(q).unwrap() {
591                let d = if t.is_epsilon() { 0 } else { 1 };
592                let dest = t.get_dest();
593                if !seen.contains(&dest) {
594                    seen.insert(dest);
595                    queue.push_back((dest, len + d));
596                    if self.is_final(dest) {
597                        longest = longest.max(len + d);
598                    }
599                } else {
600                    // If we have a cycle, we can't have a longest path
601                    return None;
602                }
603            }
604        }
605        if longest >= 0 {
606            Some(longest as usize)
607        } else {
608            None
609        }
610    }
611
612    /// Returns the shortest path in the automaton from the initial state to a final state, if it exists.
613    /// The length of the path is the number of transitions in the path and, therefore, the number of characters in the shortest word accepted by the automaton.
614    /// Epsilon transitions are not counted in the path length.
615    /// If the automaton is empty, this returns None.
616    pub fn shortest_path(&self) -> Option<usize> {
617        let mut queue = VecDeque::new();
618        let mut seen = HashSet::new();
619        queue.push_back((self.initial?, 0));
620        seen.insert(self.initial?);
621        while let Some((q, len)) = queue.pop_front() {
622            if self.is_final(q) {
623                return Some(len);
624            }
625            for t in self.transitions_from(q).unwrap() {
626                let d = if t.is_epsilon() { 0 } else { 1 };
627                let dest = t.get_dest();
628                if !seen.contains(&dest) {
629                    seen.insert(dest);
630                    if t.is_epsilon() {
631                        queue.push_front((dest, len));
632                    } else {
633                        queue.push_back((dest, len + d));
634                    }
635                }
636            }
637        }
638        None
639    }
640
641    /// Compute the length of the shortest path from the initial state to any state.
642    /// Returns a map from state indices to the length of the shortest path to that state.
643    /// Epsilon transitions are not counted in the path length, i.e., an epsilon transition has length 0.
644    ///
645    /// If a state is not reachable from the initial state, it is not included in the map.
646    /// In particular, if the automaton has no final states, the map is empty.
647    pub fn lengths_from_initial(&self) -> HashMap<StateId, usize> {
648        if let Some(q0) = self.initial {
649            self.shortest_paths(&[q0])
650        } else {
651            HashMap::new()
652        }
653    }
654
655    /// Compute the length of the shortest path to a final state from any state.
656    /// Returns a map from state indices to the length of the shortest path to a final state from that state.
657    /// Epsilon transitions are not counted in the path length, i.e., an epsilon transition has length 0.
658    ///
659    /// If a state cannot reach a final state, it is not included in the map.
660    /// If the automaton has no final states, the map is empty.
661    pub fn lengths_to_final(&self) -> HashMap<StateId, usize> {
662        let reversed = self.reversed();
663        reversed.lengths_from_initial()
664    }
665
666    /// Computes the length of the shortest paths from the states in `sources` to any state.
667    /// Returns a map from state indices to the length of the shortest path to that state from any of the sources.
668    fn shortest_paths(&self, sources: &[StateId]) -> HashMap<StateId, usize> {
669        let mut queue = BinaryHeap::new();
670        let mut length = HashMap::new();
671
672        for &q0 in sources {
673            queue.push((0, q0));
674            length.insert(q0, 0);
675        }
676
677        while let Some((len, q)) = queue.pop() {
678            for t in self.transitions_from(q).unwrap() {
679                let d = if t.is_epsilon() { 0 } else { 1 };
680                let dest = t.get_dest();
681                let new_len = len + d;
682                if !length.contains_key(&dest) || new_len < length[&dest] {
683                    length.insert(dest, new_len);
684                    queue.push((new_len, dest));
685                }
686            }
687        }
688
689        length
690    }
691
692    /// Reverse the automaton. The resulting automaton
693    ///
694    /// - has all transitions reversed,
695    /// - has a new initial state with and epsilon transition to every old final states,
696    /// - has all old initial states as final states.
697    ///
698    /// The reversed automaton accepts the reverse of the language of the original automaton.
699    pub fn reversed(&self) -> Self {
700        let mut aut = self.clone();
701        aut.finals.clear();
702        aut.initial = None;
703        // Reverse transitions
704        for q in aut.states() {
705            let transitions = aut.states[q].transitions.clone();
706            aut.states[q].transitions.clear();
707            for t in transitions {
708                aut.add_transition(t.destination, q, t.label).unwrap();
709            }
710        }
711
712        // Create a new initial state with epsilon transitions to all old final states
713        let q0 = aut.new_state();
714        aut.set_initial(q0).unwrap();
715        for f in self.finals() {
716            aut.add_transition(q0, f, TransitionType::Epsilon).unwrap();
717        }
718
719        // Set the old initial states as final states
720        if let Some(q0) = self.initial() {
721            aut.add_final(q0).unwrap();
722        }
723
724        aut
725    }
726
727    /// Returns whether the automaton is empty.
728    /// An automaton is empty if either
729    ///
730    /// - it has no final states,
731    /// - it has no initial state, or
732    /// - there is no path from the initial state to a final state.
733    pub fn is_empty(&self) -> bool {
734        if self.trim {
735            self.finals.is_empty() || self.initial.is_none()
736        } else {
737            // Check if there is a path from the initial state to a final state
738            // If a path exists, then also a shortest path exists
739            self.shortest_path().is_none()
740        }
741    }
742
743    /// Returns the set of states that can be reached from the initial state by consuming the given word.
744    pub fn run(&self, word: &SmtString) -> HashSet<StateId> {
745        let mut current = HashSet::new();
746
747        if let Some(initial) = self.initial {
748            current = self.epsilon_closure(initial).unwrap();
749        }
750
751        for c in word.iter() {
752            let mut next_states = HashSet::new();
753            for s in current {
754                for reached in self.consume(s, *c).unwrap() {
755                    next_states.extend(self.epsilon_closure(reached).unwrap());
756                }
757            }
758            current = next_states;
759        }
760
761        current
762    }
763
764    /// Returns if the automaton accepts the given word.
765    /// A word is accepted if there is a path from the initial state to a final state by consuming the word.
766    pub fn accepts(&self, word: &SmtString) -> bool {
767        let reached = self.run(word);
768        !reached.is_disjoint(&self.finals)
769    }
770
771    /// Returns the DOT representation of the automaton.
772    /// The DOT representation can be used to visualize the automaton using Graphviz.
773    pub fn dot(&self) -> String {
774        let mut buf = Vec::new();
775        dotlib::render(self, &mut buf).unwrap();
776        String::from_utf8(buf).expect("Failed to convert DOT to string")
777    }
778}
779
780impl Display for TransitionType {
781    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
782        match self {
783            TransitionType::Range(r) => write!(f, "{}", r),
784            TransitionType::NotRange(r) => write!(f, "not({})", r),
785            TransitionType::Epsilon => write!(f, ""),
786        }
787    }
788}
789
790impl Display for NFA {
791    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
792        writeln!(f, "NFA {{")?;
793        writeln!(f, "\tStates:")?;
794        for (i, state) in self.states.iter().enumerate() {
795            write!(f, "\t\t{}: ", i)?;
796            for t in state.transitions() {
797                write!(f, "{} -> {}, ", t.label, t.destination)?;
798            }
799            writeln!(f)?;
800        }
801        if let Some(q0) = self.initial {
802            writeln!(f, "\tInitial: {q0}")?;
803        } else {
804            writeln!(f, "\tInitial: None")?;
805        }
806        writeln!(f, "\tFinals: {:?}", self.finals)?;
807        writeln!(f, "}}")
808    }
809}
810
811#[cfg(test)]
812mod tests {
813
814    use super::*;
815
816    #[test]
817    fn test_invalid_initial_state() {
818        let mut a = NFA::new();
819        let result = a.set_initial(0);
820        assert_eq!(result, Err(StateNotFound(0)));
821    }
822
823    #[test]
824    fn test_valid_initial_state() {
825        let mut a = NFA::new();
826        let state = a.new_state();
827        let result = a.set_initial(state);
828        assert!(result.is_ok());
829        assert_eq!(a.initial, Some(state));
830    }
831
832    #[test]
833    fn test_invalid_final_state() {
834        let mut a = NFA::new();
835        let result = a.add_final(0);
836        assert_eq!(result, Err(StateNotFound(0)));
837    }
838
839    #[test]
840    fn test_valid_final_state() {
841        let mut a = NFA::new();
842        let state = a.new_state();
843        let result = a.add_final(state);
844        assert!(result.is_ok());
845        assert!(a.finals.contains(&state));
846    }
847
848    #[test]
849    fn test_invalid_transition_from() {
850        let mut a = NFA::new();
851        let s = a.new_state();
852        let unknown_state = a.num_states() + 1;
853        let result = a.add_transition(unknown_state, s, TransitionType::Epsilon);
854        assert_eq!(result, Err(StateNotFound(unknown_state)));
855    }
856
857    #[test]
858    fn test_invalid_transition_to() {
859        let mut a = NFA::new();
860        let state = a.new_state();
861        let result = a.add_transition(state, 1, TransitionType::Epsilon);
862        assert_eq!(result, Err(StateNotFound(1)));
863    }
864
865    #[test]
866    fn test_valid_transition() {
867        let mut a = NFA::new();
868        let state1 = a.new_state();
869        let state2 = a.new_state();
870        let result = a.add_transition(state1, state2, TransitionType::Epsilon);
871        assert!(result.is_ok());
872        assert_eq!(a.states[state1].transitions.len(), 1);
873        assert_eq!(a.states[state1].transitions[0].destination, state2);
874        assert_eq!(
875            a.states[state1].transitions[0].label,
876            TransitionType::Epsilon
877        );
878    }
879
880    /* Tests for epsilon removal */
881
882    #[test]
883    fn test_epsilon_closure() {
884        let mut a = NFA::new();
885        let s0 = a.new_state();
886        let s1 = a.new_state();
887        let s2 = a.new_state();
888        let s3 = a.new_state();
889        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
890        a.add_transition(s1, s2, TransitionType::Epsilon).unwrap();
891        a.add_transition(s2, s3, TransitionType::Epsilon).unwrap();
892        let result = a.epsilon_closure(s0).unwrap();
893        assert_eq!(result.len(), 4);
894        assert!(result.contains(&s0));
895        assert!(result.contains(&s1));
896        assert!(result.contains(&s2));
897        assert!(result.contains(&s3));
898    }
899
900    #[test]
901    fn test_elim_epsilon() {
902        // s0 --ε--> s1 --ε--> s2 --ε--> *s3*
903        let mut a = NFA::new();
904        let s0 = a.new_state();
905        let s1 = a.new_state();
906        let s2 = a.new_state();
907        let s3 = a.new_state();
908        a.set_initial(s0).unwrap();
909        a.add_final(s3).unwrap();
910        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
911        a.add_transition(s1, s2, TransitionType::Epsilon).unwrap();
912        a.add_transition(s2, s3, TransitionType::Epsilon).unwrap();
913        let result = a.eliminate_epsilon();
914        assert_eq!(result.states.len(), 1);
915        assert_eq!(result.initial, Some(0));
916        assert_eq!(result.finals.len(), 1);
917    }
918
919    #[test]
920    fn test_elim_epsilon_with_direct_transition() {
921        // s0 --(a|ε)--> s1
922        let mut a = NFA::new();
923        let s0 = a.new_state();
924        let s1 = a.new_state();
925        a.set_initial(s0).unwrap();
926        a.add_final(s1).unwrap();
927        a.add_transition(s0, s1, TransitionType::Range(CharRange::singleton('a')))
928            .unwrap();
929        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
930        let result = a.eliminate_epsilon();
931        assert_eq!(result.states.len(), 2);
932        assert_eq!(result.initial, Some(0));
933        assert_eq!(result.finals.len(), 2);
934    }
935
936    /* Tests for trimming */
937
938    #[test]
939    fn test_trim_empty() {
940        let a = NFA::new();
941        let result = a.trim();
942        assert_eq!(result.states.len(), 0);
943        assert_eq!(result.initial, None);
944        assert_eq!(result.finals.len(), 0);
945    }
946
947    #[test]
948    fn test_trim_no_finals() {
949        let mut a = NFA::new();
950        let s0 = a.new_state();
951        let s1 = a.new_state();
952        a.set_initial(s0).unwrap();
953        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
954        let result = a.trim();
955        assert_eq!(result.states.len(), 0);
956        assert_eq!(result.initial, None);
957        assert_eq!(result.finals.len(), 0);
958    }
959
960    #[test]
961    fn test_trim_no_initial() {
962        let mut a = NFA::new();
963        let s0 = a.new_state();
964        let s1 = a.new_state();
965        a.add_final(s1).unwrap();
966        a.add_transition(s0, s1, TransitionType::Epsilon).unwrap();
967        let result = a.trim();
968        assert_eq!(result.states.len(), 0);
969        assert_eq!(result.initial, None);
970        assert_eq!(result.finals.len(), 0);
971    }
972
973    #[test]
974    fn test_trim_single_state_automaton() {
975        let mut nfa = NFA::new();
976        let q0 = nfa.new_state();
977        nfa.set_initial(q0).unwrap();
978        nfa.add_final(q0).unwrap(); // The only state is both initial and final
979
980        let trimmed = nfa.trim();
981
982        assert!(trimmed.accepts(&"".into())); // Should accept the empty string
983        assert_eq!(trimmed.num_states(), 1);
984    }
985
986    #[test]
987    fn test_trim_disconnected_states() {
988        let mut nfa = NFA::new();
989        let q0 = nfa.new_state();
990        let q1 = nfa.new_state();
991        let q2 = nfa.new_state();
992        let q3 = nfa.new_state(); // Completely disconnected state
993
994        nfa.set_initial(q0).unwrap();
995        nfa.add_final(q2).unwrap();
996        nfa.add_final(q3).unwrap(); // Should be removed because it's unreachable
997
998        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('x', 'x')))
999            .unwrap();
1000        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('y', 'y')))
1001            .unwrap();
1002
1003        let trimmed = nfa.trim();
1004
1005        assert!(trimmed.accepts(&"xy".into()));
1006        assert_eq!(trimmed.num_states(), 3);
1007    }
1008
1009    #[test]
1010    fn test_trim_dead_states() {
1011        let mut nfa = NFA::new();
1012        let q0 = nfa.new_state();
1013        let q1 = nfa.new_state();
1014        let q2 = nfa.new_state();
1015        let q3 = nfa.new_state(); // This state has no path to final states
1016
1017        nfa.set_initial(q0).unwrap();
1018        nfa.add_final(q2).unwrap();
1019
1020        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
1021            .unwrap();
1022        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('b', 'b')))
1023            .unwrap();
1024        nfa.add_transition(q3, q3, TransitionType::Range(CharRange::new('c', 'c')))
1025            .unwrap(); // Useless state
1026
1027        let trimmed = nfa.trim();
1028
1029        assert!(trimmed.accepts(&"ab".into()));
1030        assert_eq!(trimmed.num_states(), 3); // Dead-end state removed
1031    }
1032
1033    #[test]
1034    fn test_trim_already_trimmed() {
1035        let mut nfa = NFA::new();
1036        let q0 = nfa.new_state();
1037        let q1 = nfa.new_state();
1038        let q2 = nfa.new_state();
1039
1040        nfa.set_initial(q0).unwrap();
1041        nfa.add_final(q2).unwrap();
1042
1043        nfa.add_transition(q0, q1, TransitionType::Range(CharRange::new('a', 'a')))
1044            .unwrap();
1045        nfa.add_transition(q1, q2, TransitionType::Range(CharRange::new('b', 'b')))
1046            .unwrap();
1047
1048        let trimmed = nfa.trim();
1049
1050        assert_eq!(nfa.num_states(), trimmed.num_states()); // Should be the same size
1051        assert!(trimmed.accepts(&"ab".into()));
1052    }
1053}