aws_smt_strings/
automata.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Deterministic finite-state automata
6//!
7//! States are indexed by an integer from 0 to N-1 where N is the number of states.
8//!
9//! The alphabet is divided in equivalence classes such that all characters in an equivalence
10//! class have the same transitions.
11//!
12//! More precisely, a character partition is attached to every state of an automaton.
13//! The partition consists of disjoint character intervals (see [character_sets](crate::character_sets)).
14//! If the i-th interval in the partition of state `s` is interval [a<sub>i</sub>, b<sub>i</sub>] then all
15//! character in this interval have the same successors `delta(s, i)`.
16//! In addition, state `s` may have a default successor, that is, the successor state of `s` for characters
17//! that do not belong to any interval [a<sub>i</sub>, b<sub>i</sub>].
18//!
19//! Function [combined_char_partition](Automaton::combined_char_partition) constructs a
20//! partition of the alphabet that is valid for all states of an automaton. If
21//! two characters `c1` and `c2` are in the same class in the combined partition, then we
22//! have `delta(s, c1) = delta(s, c2)` for any state `s` of the automaton.
23//!
24//! Function [pick_alphabet](Automaton::pick_alphabet) returns a vector of representative
25//! characters (i.e., one element in each class of the combined partition).
26//!
27use std::{collections::HashMap, fmt::Display, hash::Hash};
28
29use crate::{
30    bfs_queues::BfsQueue,
31    character_sets::*,
32    compact_tables::{CompactTable, CompactTableBuilder},
33    errors::*,
34    minimizer::Minimizer,
35    partitions::Partition,
36    smt_strings::SmtString,
37};
38
39///
40/// Deterministic finite state automaton
41///
42#[derive(Debug)]
43pub struct Automaton {
44    // number of states
45    num_states: usize,
46    // number of final states
47    num_final_states: usize,
48    // index of the initial state
49    initial_state: usize,
50    // array of states
51    states: Box<[State]>,
52}
53
54///
55/// State of an automaton
56///
57#[derive(Debug)]
58pub struct State {
59    // id of a state (an index between 0 and num_states)
60    id: usize,
61    // whether this state is final
62    is_final: bool,
63    // character classes for which successors are defined
64    classes: CharPartition,
65    // successor[i] = index of the successor state for the i-th interval in the class partition
66    // so the successor array has as many elements as classes has intervals.
67    successor: Box<[usize]>,
68    // default successor for character outside any of the class partition intervals
69    default_successor: Option<usize>,
70}
71
72///
73/// Iterator to list the transitions from a state
74/// - successors are given as pairs (class id, reference to successor state)
75///
76#[derive(Debug)]
77pub struct EdgeIterator<'a> {
78    state_array: &'a [State],
79    state: &'a State,
80    index: usize,
81}
82
83///
84/// Iterator to enumerate the final states of an automaton
85///
86#[derive(Debug)]
87pub struct FinalStateIterator<'a> {
88    state_array: &'a [State],
89    index: usize,
90}
91
92//
93// Remap structure for automata minimization
94// 1) assume equivalent nodes are mapped to a new id
95// 2) for every equivalence class, one node is taken as representative
96// The mapping stores this in two arrays:
97// - for every old node id i: new_id[i] = new id for node
98// - for every new node id j: old_id[j] = id of the old node that's representative
99//   of the class of j.
100//
101#[derive(Debug)]
102struct StateMapping {
103    new_id: Vec<usize>,
104    old_id: Vec<usize>,
105}
106
107impl StateMapping {
108    // Build a mapping from an array of nodes to keep
109    // - nodes_to_keep must not contain duplicates
110    // - the mapping assigns new id k to node j such that nodes_to_keep[k] = j.
111    // - num_nodes = number of old nodes
112    fn from_array(num_nodes: usize, nodes_to_keep: &[usize]) -> Self {
113        let mut new_id = vec![0; num_nodes];
114        let num_new_nodes = nodes_to_keep.len();
115        let mut old_id = vec![0; num_new_nodes];
116        for (i, &node_id) in nodes_to_keep.iter().enumerate() {
117            new_id[node_id] = i;
118            old_id[i] = node_id;
119        }
120        StateMapping { new_id, old_id }
121    }
122
123    //
124    // Build a mapping from a state partition
125    // - pick one state to keep in each block of the partition
126    // - map nodes to their block id - 1 (since block 0 is empty)
127    //
128    fn from_partition(p: &Partition) -> Self {
129        let num_nodes = p.size() as usize;
130        let mut new_id = vec![0; num_nodes];
131        let num_new_nodes = p.index() as usize;
132        let mut old_id = vec![0; num_new_nodes];
133        for s in 0..p.size() {
134            new_id[s as usize] = (p.block_id(s) - 1) as usize;
135        }
136        for b in 1..p.num_blocks() {
137            let s = p.pick_element(b);
138            old_id[b as usize - 1] = s as usize;
139        }
140        StateMapping { new_id, old_id }
141    }
142
143    // Number of new states
144    fn num_new_states(&self) -> usize {
145        self.old_id.len()
146    }
147
148    // Check whether old id is representative of its class
149    fn is_class_rep(&self, id: usize) -> bool {
150        self.old_id[self.new_id[id]] == id
151    }
152}
153
154impl Automaton {
155    /// Get the initial state
156    pub fn initial_state(&self) -> &State {
157        &self.states[self.initial_state]
158    }
159
160    /// Get a state from its id
161    /// panics if the id is out of range
162    pub fn state(&self, id: usize) -> &State {
163        &self.states[id]
164    }
165
166    /// Number of states
167    pub fn num_states(&self) -> usize {
168        self.num_states
169    }
170
171    /// Number of final states
172    pub fn num_final_states(&self) -> usize {
173        self.num_final_states
174    }
175
176    /// Default successor of a state
177    pub fn default_successor(&self, s: &State) -> Option<&State> {
178        s.default_successor.map(|i| &self.states[i])
179    }
180
181    ///
182    /// Successor for a character class
183    ///
184    /// The class id must be valid for state s
185    ///
186    /// # Panics
187    ///
188    /// If cid is of the form ClassId::Interval(i) but i is larger than
189    /// the number of successors of s.
190    ///
191    /// If cid is of the form ClassId::Complement but s has no default successor.
192    ///
193    pub fn class_next(&self, s: &State, cid: ClassId) -> &State {
194        debug_assert!(s.valid_class_id(cid));
195        let i = match cid {
196            ClassId::Interval(i) => s.successor[i],
197            ClassId::Complement => s.default_successor.unwrap(),
198        };
199        &self.states[i]
200    }
201
202    ///
203    /// Successor of a state via a character set
204    ///
205    /// # Errors
206    ///
207    /// If this set overlaps two or more successor classes,
208    /// produce [Error::AmbiguousCharSet].
209    ///
210    pub fn char_set_next(&self, s: &State, set: &CharSet) -> Result<&State, Error> {
211        let cid = s.classes.class_of_set(set)?;
212        Ok(self.class_next(s, cid))
213    }
214
215    ///
216    /// Successor of a state via a character
217    ///
218    pub fn next(&self, s: &State, c: u32) -> &State {
219        let cid = s.classes.class_of_char(c);
220        self.class_next(s, cid)
221    }
222
223    /// Successor of a state via an SMT string
224    pub fn str_next<'a>(&'a self, s: &'a State, str: &SmtString) -> &'a State {
225        str.iter().fold(s, |s1, c| self.next(s1, *c))
226    }
227
228    /// Check whether an SMT string is accepted
229    pub fn accepts(&self, str: &SmtString) -> bool {
230        self.str_next(self.initial_state(), str).is_final
231    }
232
233    /// Iterator to got through the states
234    pub fn states(&self) -> impl Iterator<Item = &State> {
235        self.states.iter()
236    }
237
238    /// Iterator to list the out edges of a state
239    ///
240    /// The iterator produces a list of pairs (ClassId, SuccessorState)
241    /// for every class in the state's character partition.
242    ///
243    pub fn edges<'a>(&'a self, s: &'a State) -> EdgeIterator<'a> {
244        EdgeIterator {
245            state_array: &self.states,
246            state: s,
247            index: 0,
248        }
249    }
250
251    /// Iterator to list the final states
252    pub fn final_states(&self) -> FinalStateIterator<'_> {
253        FinalStateIterator {
254            state_array: &self.states,
255            index: 0,
256        }
257    }
258
259    ///
260    /// Merge the state partitions and return the result.
261    /// This returns an abstraction of the automaton's alphabet.
262    ///
263    /// Every class in the returned partition contains equivalent
264    /// characters: if c1 and c2 are in one class, then
265    /// delta(s, c1) = delta(s, c2) for every state s of the automaton.
266    ///
267    pub fn combined_char_partition(&self) -> CharPartition {
268        merge_partition_list(self.states().map(|x| &x.classes))
269    }
270
271    /// Get a representative character from the combined partition
272    pub fn pick_alphabet(&self) -> Vec<u32> {
273        self.combined_char_partition().picks().collect()
274    }
275
276    ///
277    /// Compile the successor function.
278    ///
279    /// The successor function is defined on state indices and character indices
280    /// and it returns a state index. More precisely, it starts with building the
281    /// representative alphabet for this automaton (see [pick_alphabet](Self::pick_alphabet)). If
282    /// the automaton has N states and the representative alphabet has M characters,
283    /// then the compiled successor function is defined on [0 .. N-1] x [0 .. M-1]
284    /// and return an index in [0 .. N-1].
285    ///
286    pub fn compile_successors(&self) -> CompactTable {
287        let alphabet = self.pick_alphabet();
288        let num_states = self.num_states as u32;
289        let alpha_size = alphabet.len() as u32;
290        let mut builder = CompactTableBuilder::new(num_states, alpha_size);
291        for s in self.states() {
292            let id = s.id as u32;
293            if s.has_default_successor() {
294                let d = s.default_successor.unwrap();
295                builder.set_default(id, d as u32);
296            }
297            let suc: Vec<(u32, u32)> = alphabet
298                .iter()
299                .enumerate()
300                .filter(|(_, &c)| !s.char_maps_to_default(c))
301                .map(|(i, &c)| (i as u32, self.next(s, c).id as u32))
302                .collect();
303            builder.set_successors(id, &suc);
304        }
305        builder.build()
306    }
307
308    /// Apply a remapping
309    fn remap_nodes(&mut self, remap: &StateMapping) {
310        let i = self.initial_state;
311        self.initial_state = remap.new_id[i];
312
313        self.num_final_states = 0;
314        let num_new_nodes = remap.num_new_states();
315        let mut new_states = Vec::with_capacity(num_new_nodes);
316        for i in 0..num_new_nodes {
317            let old_id = remap.old_id[i];
318            let s = &mut self.states[old_id];
319            new_states.push(s.remap_nodes(remap));
320            debug_assert!(new_states[i].id == i);
321            if s.is_final {
322                debug_assert!(new_states[i].is_final);
323                self.num_final_states += 1;
324            }
325        }
326
327        self.num_states = num_new_nodes;
328        self.states = new_states.into();
329    }
330
331    /// Remove unreachable states
332    pub fn remove_unreachable_states(&mut self) {
333        let mut queue = BfsQueue::new();
334        let mut reachable = Vec::new();
335        queue.push(self.initial_state);
336        while let Some(i) = queue.pop() {
337            reachable.push(i);
338            let s = self.state(i);
339            for (_, next) in self.edges(s) {
340                queue.push(next.id);
341            }
342        }
343        reachable.sort_unstable();
344        let remap = StateMapping::from_array(self.num_states, &reachable);
345        self.remap_nodes(&remap);
346    }
347
348    /// Minimize the automaton
349    pub fn minimize(&mut self) {
350        let is_final = |i: u32| self.state(i as usize).is_final;
351        let transition_map = self.compile_successors();
352        let delta = |i, j| transition_map.eval(i, j);
353        let num_states = self.num_states as u32;
354        let alphabet_size = transition_map.alphabet_size() as u32;
355        let mut minimizer = Minimizer::new(num_states, alphabet_size, delta, is_final);
356        let p = minimizer.refine();
357        if (p.index() as usize) < self.num_states {
358            let remap = StateMapping::from_partition(p);
359            self.remap_nodes(&remap)
360        }
361    }
362
363    /// test minimizer
364    #[cfg(test)]
365    pub fn test_minimizer(&self) {
366        let is_final = |i: u32| self.state(i as usize).is_final;
367        let transition_map = self.compile_successors();
368        let delta = |i, j| transition_map.eval(i, j);
369        let num_states = self.num_states as u32;
370        let alphabet_size = transition_map.alphabet_size() as u32;
371        let mut minimizer = Minimizer::new(num_states, alphabet_size, delta, is_final);
372        minimizer.refine_and_trace();
373    }
374}
375
376impl State {
377    /// State id
378    pub fn id(&self) -> usize {
379        self.id
380    }
381
382    /// Check whether a state is final
383    pub fn is_final(&self) -> bool {
384        self.is_final
385    }
386
387    /// Number of transitions, not including the default
388    pub fn num_successors(&self) -> usize {
389        self.classes.len()
390    }
391
392    /// Check whether a state has a default successor
393    pub fn has_default_successor(&self) -> bool {
394        self.default_successor.is_some()
395    }
396
397    /// Get the default successor
398    pub fn default_successor(&self) -> Option<usize> {
399        self.default_successor
400    }
401
402    /// Check whether a class id is valid for this state
403    /// - a class id Interval(i) is valid if i < number of transitions
404    /// - class id Complement is valid if there's a default successor defined
405    pub fn valid_class_id(&self, cid: ClassId) -> bool {
406        self.classes.valid_class_id(cid)
407    }
408
409    /// Check whether character c maps to the default successor
410    pub fn char_maps_to_default(&self, c: u32) -> bool {
411        self.has_default_successor() && self.classes.class_of_char(c) == ClassId::Complement
412    }
413
414    /// Iterate through all valid class ids for this state
415    pub fn char_classes(&self) -> ClassIdIterator<'_> {
416        self.classes.class_ids()
417    }
418
419    /// Return the class id for a character
420    pub fn class_of_char(&self, x: u32) -> ClassId {
421        self.classes.class_of_char(x)
422    }
423
424    /// Pick a character in each class of this state's character partition
425    pub fn char_picks(&self) -> PickIterator<'_> {
426        self.classes.picks()
427    }
428
429    /// Iterate through the character ranges defined for this state.
430    ///
431    /// Same as [char_classes](Self::char_classes), except that the
432    /// complementary class is not
433    /// included.
434    pub fn char_ranges(&self) -> impl Iterator<Item = &CharSet> {
435        self.classes.ranges()
436    }
437
438    /// Apply remapping to a state and return the updated state
439    /// - `self` must be a state to keep as defined by remap
440    ///
441    /// This operation is destructive:
442    /// - self.classes is modified (replaced by an empty partition)
443    /// - self.successors is lost too.
444    fn remap_nodes(&mut self, remap: &StateMapping) -> Self {
445        let old_id = self.id;
446        debug_assert!(remap.is_class_rep(old_id));
447        let new_id = remap.new_id[old_id];
448        let new_default = self.default_successor.map(|i| remap.new_id[i]);
449        // take then modify the successors array in place
450        let mut new_successors = std::mem::take(&mut self.successor);
451        for s in new_successors.as_mut() {
452            *s = remap.new_id[*s];
453        }
454        let new_classes = std::mem::take(&mut self.classes);
455        State {
456            id: new_id,
457            is_final: self.is_final,
458            default_successor: new_default,
459            successor: new_successors,
460            classes: new_classes,
461        }
462    }
463}
464
465impl Display for State {
466    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
467        write!(f, "s{}", self.id)
468    }
469}
470
471impl Display for Automaton {
472    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
473        fn plural(n: usize) -> &'static str {
474            if n == 1 {
475                ""
476            } else {
477                "s"
478            }
479        }
480
481        writeln!(f, "{} states", self.num_states)?;
482        writeln!(f, "initial state: {}", self.initial_state())?;
483        write!(f, "final state{}:", plural(self.num_final_states))?;
484        for s in self.final_states() {
485            write!(f, " {}", &s)?;
486        }
487        writeln!(f)?;
488        writeln!(f, "transitions:")?;
489        for s in self.states.iter() {
490            for c in s.char_ranges() {
491                let d = self.char_set_next(s, c).unwrap();
492                writeln!(f, "  \u{03B4}({s}, {c}) = {d}")?;
493            }
494            if s.has_default_successor() {
495                let d = s.default_successor().unwrap();
496                writeln!(f, "  \u{03B4}({s}, ...) = s{d}")?;
497            }
498        }
499        writeln!(f)?;
500        Ok(())
501    }
502}
503
504impl<'a> Iterator for EdgeIterator<'a> {
505    type Item = (ClassId, &'a State);
506
507    fn next(&mut self) -> Option<Self::Item> {
508        let i = self.index;
509        let source = self.state;
510        if i < source.num_successors() {
511            self.index += 1;
512            let next_id = source.successor[i];
513            Some((ClassId::Interval(i), &self.state_array[next_id]))
514        } else if i == source.num_successors() && source.has_default_successor() {
515            self.index += 1;
516            let next_id = source.default_successor.unwrap();
517            Some((ClassId::Complement, &self.state_array[next_id]))
518        } else {
519            None
520        }
521    }
522}
523
524impl<'a> Iterator for FinalStateIterator<'a> {
525    type Item = &'a State;
526
527    fn next(&mut self) -> Option<Self::Item> {
528        let mut i = self.index;
529        let a = self.state_array;
530        while i < a.len() {
531            if a[i].is_final {
532                self.index = i + 1;
533                return Some(&a[i]);
534            }
535            i += 1;
536        }
537        self.index = i;
538        None
539    }
540}
541
542#[derive(Debug)]
543struct StateInConstruction {
544    is_final: bool,
545    default_successor: Option<usize>,
546    transitions: Vec<(CharSet, usize)>,
547}
548
549impl StateInConstruction {
550    fn new() -> StateInConstruction {
551        StateInConstruction {
552            is_final: false,
553            default_successor: None,
554            transitions: Vec::new(),
555        }
556    }
557
558    fn set_default_successor(&mut self, next_id: usize) {
559        self.default_successor = Some(next_id);
560    }
561
562    fn add_transition(&mut self, set: &CharSet, next_id: usize) {
563        self.transitions.push((*set, next_id))
564    }
565
566    // choose a default successor for this state if there isn't one already
567    fn choose_default_successor(&mut self) {
568        // Boyer-Moore maj: first pass
569        // assumes s is not empty
570        fn maj_candidate(s: &[(CharSet, usize)]) -> usize {
571            let mut maj = s[0].1;
572            let mut k = 1;
573            for (_, x) in &s[1..] {
574                let x = *x;
575                if k == 0 {
576                    maj = x;
577                    k = 1;
578                } else if x == maj {
579                    k += 1;
580                } else {
581                    k -= 1;
582                }
583            }
584            maj
585        }
586
587        // number of occurrences of m in s
588        fn count(s: &[(CharSet, usize)], m: usize) -> usize {
589            let mut n = 0;
590            for (_, x) in s {
591                if *x == m {
592                    n += 1;
593                }
594            }
595            n
596        }
597
598        if self.default_successor.is_none() && !self.transitions.is_empty() {
599            let m = maj_candidate(&self.transitions);
600            let n = count(&self.transitions, m);
601            if n >= self.transitions.len() / 2 {
602                self.set_default_successor(m);
603            }
604        }
605    }
606
607    fn remove_transitions_to_default(&mut self) {
608        if let Some(i) = self.default_successor {
609            self.transitions.retain(|x| x.1 != i)
610        }
611    }
612
613    fn cleanup(&mut self) {
614        self.choose_default_successor();
615        self.remove_transitions_to_default();
616    }
617
618    fn make_partition(&self) -> Result<CharPartition, Error> {
619        CharPartition::try_from_iter(self.transitions.iter().map(|x| x.0))
620    }
621
622    fn make_successor(&self, p: &CharPartition) -> Box<[usize]> {
623        let n = p.len();
624        let mut result = vec![0; n];
625        for s in &self.transitions {
626            let c = s.0.pick();
627            if let ClassId::Interval(i) = p.class_of_char(c) {
628                result[i] = s.1;
629            } else {
630                panic!();
631            }
632        }
633        result.into()
634    }
635}
636
637///
638/// Automaton builder
639///
640#[derive(Debug)]
641pub struct AutomatonBuilder<T> {
642    size: usize,
643    id_map: HashMap<T, usize>,
644    states: Vec<StateInConstruction>,
645}
646
647impl<T: Eq + Hash + Clone> AutomatonBuilder<T> {
648    fn get_state_id(&mut self, state: &T) -> usize {
649        match self.id_map.get(state) {
650            Some(i) => *i,
651            None => {
652                let i = self.size;
653                let new_state = StateInConstruction::new();
654                self.states.push(new_state);
655                self.id_map.insert(state.clone(), i);
656                self.size += 1;
657                i
658            }
659        }
660    }
661
662    ///
663    /// Create a new builder
664    ///
665    /// - initial_state = initial state for the resulting automaton
666    ///
667    pub fn new(initial_state: &T) -> Self {
668        let mut new = AutomatonBuilder {
669            size: 0,
670            id_map: HashMap::new(),
671            states: Vec::new(),
672        };
673        new.get_state_id(initial_state);
674        new
675    }
676
677    ///
678    /// Mark a final state
679    ///
680    pub fn mark_final(&mut self, state: &T) -> &mut Self {
681        let i = self.get_state_id(state);
682        self.states[i].is_final = true;
683        self
684    }
685
686    ///
687    /// Set the default successor of a state
688    ///
689    pub fn set_default_successor(&mut self, state: &T, next: &T) -> &mut Self {
690        let i = self.get_state_id(state);
691        let j = self.get_state_id(next);
692        self.states[i].set_default_successor(j);
693        self
694    }
695
696    ///
697    /// Add a transition
698    ///
699    pub fn add_transition(&mut self, state: &T, set: &CharSet, next: &T) -> &mut Self {
700        let i = self.get_state_id(state);
701        let j = self.get_state_id(next);
702        self.states[i].add_transition(set, j);
703        self
704    }
705
706    ///
707    /// Construct an automaton
708    /// - fails if a state `s` has non-deterministic transitions,
709    ///   i.e., if two distinct transitions from s have non-disjoint labels (i.e., character sets).
710    /// - fails if a state `s` has a default successor but its transitions
711    ///   already cover the full alphabet.
712    /// - fails if a state `s` doesn't have a default successor but its
713    ///   transitions do not cover the full alphabet.
714    ///
715    pub fn build(&mut self) -> Result<Automaton, Error> {
716        let n = self.size;
717        let mut num_final_states = 0;
718        let mut state_array = Vec::with_capacity(n);
719        for (i, s) in self.states.iter_mut().enumerate() {
720            s.cleanup();
721            let p = s.make_partition()?;
722            if s.default_successor.is_some() && p.empty_complement() {
723                return Err(Error::EmptyComplementaryClass);
724            }
725            if s.default_successor.is_none() && !p.empty_complement() {
726                return Err(Error::MissingDefaultSuccessor);
727            }
728            let successor = s.make_successor(&p);
729            if s.is_final {
730                num_final_states += 1;
731            }
732            let new_state = State {
733                id: i,
734                is_final: s.is_final,
735                successor,
736                classes: p,
737                default_successor: s.default_successor,
738            };
739            state_array.push(new_state);
740        }
741        Ok(Automaton {
742            num_states: n,
743            num_final_states,
744            initial_state: 0,
745            states: state_array.into(),
746        })
747    }
748
749    ///
750    /// Build without checking
751    ///
752    pub fn build_unchecked(&mut self) -> Automaton {
753        let num_states = self.size;
754        let mut num_final_states = 0;
755        let mut state_array = Vec::with_capacity(num_states);
756        for (i, s) in self.states.iter_mut().enumerate() {
757            s.cleanup();
758            let p = s.make_partition().unwrap();
759            let successor = s.make_successor(&p);
760            if s.is_final {
761                num_final_states += 1;
762            }
763            state_array.push(State {
764                id: i,
765                is_final: s.is_final,
766                successor,
767                classes: p,
768                default_successor: s.default_successor,
769            })
770        }
771        Automaton {
772            num_states,
773            num_final_states,
774            initial_state: 0,
775            states: state_array.into(),
776        }
777    }
778}
779
780#[cfg(test)]
781mod test {
782    use crate::smt_strings::char_to_smt;
783
784    use super::*;
785
786    //
787    // Test automaton:
788    /// 5 states
789    /// state 0 is the initial state
790    /// state 3 is the only final state
791    /// state 4 is a sink: all default transitions do to 4
792    ///
793    /// other transitions:
794    //   0 --a--> 0
795    //   0 --b--> 1
796    //   0 --c--> 2
797    //   1 --a--> 3
798    //   1 --c--> 2
799    //   2 --b--> 3
800    //   2 --c--> 3
801    //   3 --a--> 0
802    //   3 --b--> 1
803    //   3 --c--> 3
804    //
805    fn graph() -> Vec<(u32, char, u32)> {
806        vec![
807            (0, 'a', 0),
808            (0, 'b', 1),
809            (0, 'c', 2),
810            (1, 'a', 3),
811            (1, 'c', 2),
812            (2, 'b', 3),
813            (2, 'c', 3),
814            (3, 'a', 0),
815            (3, 'b', 1),
816            (3, 'c', 3),
817        ]
818    }
819
820    // Check that automaton is correct
821    fn check_automaton(automaton: &Automaton) {
822        assert_eq!(automaton.num_states(), 5);
823        assert_eq!(automaton.initial_state().id, 0);
824        assert_eq!(automaton.num_final_states(), 1);
825
826        let s0 = automaton.state(0);
827        assert_eq!(automaton.next(s0, 'a' as u32).id, 0);
828        assert_eq!(automaton.next(s0, 'b' as u32).id, 1);
829        assert_eq!(automaton.next(s0, 'c' as u32).id, 2);
830        assert_eq!(automaton.next(s0, 'd' as u32).id, 4);
831        assert!(!s0.is_final());
832
833        let s1 = automaton.state(1);
834        assert_eq!(automaton.next(s1, 'a' as u32).id, 3);
835        assert_eq!(automaton.next(s1, 'c' as u32).id, 2);
836        assert_eq!(automaton.next(s1, 'd' as u32).id, 4);
837        assert!(!s1.is_final());
838
839        let s2 = automaton.state(2);
840        assert_eq!(automaton.next(s2, 'a' as u32).id, 4);
841        assert_eq!(automaton.next(s2, 'b' as u32).id, 3);
842        assert_eq!(automaton.next(s2, 'c' as u32).id, 3);
843        assert_eq!(automaton.next(s2, 'd' as u32).id, 4);
844        assert!(!s2.is_final());
845
846        let s3 = automaton.state(3);
847        assert_eq!(automaton.next(s3, 'a' as u32).id, 0);
848        assert_eq!(automaton.next(s3, 'b' as u32).id, 1);
849        assert_eq!(automaton.next(s3, 'c' as u32).id, 3);
850        assert_eq!(automaton.next(s3, 'd' as u32).id, 4);
851        assert!(s3.is_final());
852    }
853
854    #[test]
855    fn test_builder() {
856        let g = graph();
857        let mut builder = AutomatonBuilder::new(&0);
858        for (source, label, dest) in &g {
859            builder.add_transition(source, &CharSet::singleton(*label as u32), dest);
860        }
861        builder
862            .set_default_successor(&0, &4)
863            .set_default_successor(&1, &4)
864            .set_default_successor(&2, &4)
865            .set_default_successor(&3, &4)
866            .set_default_successor(&4, &4)
867            .mark_final(&3);
868        let automaton = builder.build().unwrap();
869
870        println!(
871            "Result automaton: {} states, initial state: {}",
872            automaton.num_states(),
873            automaton.initial_state()
874        );
875        for state in automaton.states.iter() {
876            println!("State {state}");
877            if state.is_final() {
878                println!("  final");
879            }
880            match state.default_successor() {
881                None => println!("  no default successor"),
882                Some(x) => println!("  default successor: {x}"),
883            }
884            println!("  transitions:");
885            for (cid, next) in automaton.edges(state) {
886                println!("  delta({state}, {cid}) = {next}");
887            }
888        }
889
890        println!("{automaton}");
891
892        check_automaton(&automaton);
893
894        let map = automaton.compile_successors();
895        let alphabet = automaton.pick_alphabet();
896        print!("Alphabet:");
897        for c in &alphabet {
898            print!(" {}", char_to_smt(*c));
899        }
900        println!();
901        let alphabet_size = alphabet.len() as u32;
902        for s in 0..automaton.num_states() as u32 {
903            for c in 0..alphabet_size {
904                let next = map.eval(s, c);
905                let char = alphabet[c as usize];
906                println!(
907                    "  d({}, {}) = {}  <=>  \u{03B4}(s{}, {}) = s{}",
908                    s,
909                    c,
910                    next,
911                    s,
912                    char_to_smt(char),
913                    next
914                );
915            }
916            println!();
917        }
918
919        println!("Compact transition map: {map}");
920
921        automaton.test_minimizer();
922    }
923
924    #[test]
925    fn test_remove_unreachable() {
926        let g = graph();
927        let mut builder = AutomatonBuilder::new(&0);
928
929        // unreachable nodes
930        builder.add_transition(&5, &CharSet::singleton('z' as u32), &6);
931        builder.set_default_successor(&5, &8);
932        builder.add_transition(&6, &CharSet::singleton('y' as u32), &5);
933        builder.set_default_successor(&6, &8);
934        builder.set_default_successor(&8, &5);
935        builder.mark_final(&6);
936
937        for (source, label, dest) in &g {
938            builder.add_transition(source, &CharSet::singleton(*label as u32), dest);
939        }
940        builder
941            .set_default_successor(&0, &4)
942            .set_default_successor(&1, &4)
943            .set_default_successor(&2, &4)
944            .set_default_successor(&3, &4)
945            .set_default_successor(&4, &4)
946            .mark_final(&3);
947
948        let mut automaton = builder.build().unwrap();
949
950        println!(
951            "Result automaton: {} states, initial state: {}",
952            automaton.num_states(),
953            automaton.initial_state()
954        );
955
956        assert_eq!(automaton.num_states(), 8);
957
958        println!("{automaton}");
959
960        automaton.remove_unreachable_states();
961        println!(
962            "After removing unreachable states: {} states, initial state: {}",
963            automaton.num_states(),
964            automaton.initial_state()
965        );
966
967        println!("{automaton}");
968        check_automaton(&automaton);
969    }
970
971    #[test]
972    fn test_minimizer() {
973        let builder = &mut AutomatonBuilder::new(&0);
974
975        // language abc(a*)
976        builder.add_transition(&0, &CharSet::singleton('a' as u32), &1);
977        builder.add_transition(&1, &CharSet::singleton('b' as u32), &2);
978        builder.add_transition(&2, &CharSet::singleton('c' as u32), &3);
979
980        // states 3, 4, 5 are equivalent
981        builder.add_transition(&3, &CharSet::singleton('a' as u32), &4);
982        builder.mark_final(&3);
983
984        builder.add_transition(&4, &CharSet::singleton('a' as u32), &5);
985        builder.mark_final(&4);
986
987        builder.add_transition(&5, &CharSet::singleton('a' as u32), &3);
988        builder.mark_final(&5);
989
990        // add transitions to sink states 6, 7, 8
991        builder.set_default_successor(&0, &6);
992        builder.set_default_successor(&1, &7);
993        builder.set_default_successor(&2, &7);
994        builder.set_default_successor(&3, &8);
995        builder.set_default_successor(&4, &6);
996        builder.set_default_successor(&5, &7);
997
998        // sink states 6, 7, 8 are equivalent
999        builder.set_default_successor(&6, &7);
1000        builder.set_default_successor(&7, &8);
1001        builder.set_default_successor(&8, &6);
1002
1003        let automaton = &mut builder.build().unwrap();
1004
1005        println!(
1006            "Result automaton: {} states, initial state: {}",
1007            automaton.num_states(),
1008            automaton.initial_state()
1009        );
1010        println!("{automaton}");
1011
1012        automaton.test_minimizer();
1013
1014        automaton.minimize();
1015        println!(
1016            "\nAfter minimization: {} states, initial state: {}",
1017            automaton.num_states(),
1018            automaton.initial_state(),
1019        );
1020        println!("{automaton}");
1021    }
1022}