fsa/
dfa.rs

1use std::iter;
2use nfa;
3use std::collections::HashMap;
4use std::fmt::Display;
5use std::hash::Hash;
6use std::io::Write;
7use bit_set::BitSet;
8
9/* deterministic finite automaton */
10
11pub struct State<T> {
12    // this is not strictly speaking
13    // a DFA since there may be no
14    // transitions for a given input
15    pub trans: [usize; 256],
16
17    // for DFA determinization
18    // remember the set of NFA states
19    // that this state corresponds to
20    states: BitSet,
21    pub data: T
22}
23
24pub struct Automaton<T> {
25    pub states: Vec<State<T>>,
26    pub initials: Vec<usize>
27}
28
29impl<T> Automaton<T> where T: nfa::StateData {
30    // determinize a nondeterministic finite automaton and "adds" it to this
31    // deterministic automaton. adding it means that the newly built DFA will
32    // use the next state number available in this DFA but there will be no
33    // transition between the differents DFA.
34    // The resulting DFA is thus not strictly a DFA but this is needed to
35    // implement "conditions" in the lexical analysers
36    // returns the index of the initial state of the created DFA in the
37    // `initials' vector
38    pub fn determinize<S: nfa::State<Data = T>>(&mut self, nfa: &nfa::Automaton<S>)
39        -> usize {
40        let (eclos, data) = nfa.eclosure_(nfa.initial);
41        let ini = self.create_state(data, Some(eclos));
42        let mut unmarked = vec!(ini);
43
44        while !unmarked.is_empty() {
45            let next = unmarked.pop().unwrap();
46
47            for ch in 0 .. 256 {
48                let moves = nfa.moves(&self.states[next].states, ch as u8);
49                let (clos, action) = nfa.eclosure(&moves);
50                if clos.is_empty() {
51                    continue;
52                }
53
54                // do we have clos in dstates ?
55                let mut i = ini;
56                let mut dst = None;
57                for s in self.states.iter().skip(ini) {
58                    if s.states == clos {
59                        dst = Some(i);
60                        break;
61                    }
62                    i += 1;
63                }
64
65                match dst {
66                    // in any case, add a transition
67                    Some(i) => self.states[next].trans[ch] = i,
68                    None => {
69                        // create a new DFA state for this set
70                        let st = self.create_state(action, Some(clos));
71                        self.states[next].trans[ch] = st;
72                        unmarked.push(st);
73                    }
74                }
75            }
76        }
77
78        let ret = self.initials.len();
79        self.initials.push(ini);
80        ret
81    }
82
83    pub fn new() -> Automaton<T> {
84        let mut ret = Automaton {
85            states: vec!(),
86            initials: vec!()
87        };
88
89        // create a dead state
90        ret.create_state(T::no_data(), None);
91        ret
92    }
93
94    #[inline(always)]
95    fn create_state(&mut self, act: T, states: Option<BitSet>) -> usize {
96        self.states.push(State {
97            trans: [0; 256],
98            states: match states {
99                Some(s) => s,
100                None => BitSet::new()
101            },
102            data: act
103        });
104
105        self.states.len() - 1
106    }
107
108    // construct an equivalent DFA whose number of state is minimal for the
109    // recognized input langage
110    pub fn minimize(&self) -> Automaton<T> where T: Clone + Eq + Hash {
111        // groups are stored as an array indexed by a state number
112        // giving a group number.
113        let mut groups = Vec::with_capacity(self.states.len());
114
115        let mut group_of_action = HashMap::new();
116        let mut next_group = 0;
117
118        // create one subgroup per action
119        for st in self.states.iter() {
120            groups.push(*group_of_action.entry(&st.data).or_insert_with(|| {
121                let ret = next_group;
122                next_group += 1;
123                ret
124            }));
125        }
126
127        // now iterate over the states and split the groups into
128        // subgroups. This records the subgroups we have created for a
129        // given group. It is indexed by a group number and give a list
130        // of subgroups of the form (gr, st) where gr is the number of the
131        // subgroup (it may be the same as the original group), and st the
132        // number of a representing state
133        let mut subgroups: Vec<Vec<(usize, usize)>> =
134            iter::repeat(vec!()).take(next_group).collect();
135        loop {
136            // subgroups become groups, reinitialize subgroups
137            for i in subgroups.iter_mut() {
138                *i = vec!();
139            }
140
141            // create a new partition
142            let mut new_groups = Vec::with_capacity(self.states.len());
143            let mut modified = false;
144
145            'g: for s in 0 .. groups.len() {
146                let group = groups[s];
147
148                // check if we have a subgroup of the group of s
149                // that matches its transitions. st is the representing
150                // state of the subgroup subgr
151                'h: for &(subgr, st) in subgroups[group].iter() {
152                    // if st and s are similar, s goes to subgr
153                    // 2 states are said similar if for each input
154                    // symbol they have a transition to states that
155                    // are in the same group of the current partition
156                    for i in 0 .. 255usize {
157                        let (s1, s2) = (
158                            self.states[st].trans[i],
159                            self.states[s].trans[i]
160                        );
161                        if groups[s1] != groups[s2] {
162                            continue 'h;
163                        }
164                    }
165
166                    // okay, we have found a subgroup for s
167                    // it may as well be the same so we may not have
168                    // modified the partition here
169                    new_groups.push(subgr);
170                    continue 'g;
171                }
172
173                // no subgroup, create one
174                // if there is no subgroup for this group, reuse the
175                // same index
176                if subgroups[group].is_empty() {
177                    subgroups[group].push((group, s));
178                    new_groups.push(group);
179                } else {
180                    // create a new subgroup with a new index
181                    // take this state as a representing state
182                    let subgroup = subgroups.len();
183                    subgroups.push(vec!());
184                    subgroups[group].push((subgroup, s));
185                    new_groups.push(subgroup);
186                    modified = true;
187                }
188            }
189
190            groups = new_groups;
191
192            // we stop when the partition is the same as before,
193            // i.e. when we cannot create new subgroups anymore.
194            if !modified {
195                break;
196            }
197        }
198
199        // construct the minimal DFA
200        let mut ret = Automaton {
201            states: Vec::with_capacity(subgroups.len()),
202            initials: Vec::with_capacity(self.initials.len()),
203        };
204
205        // create the dead state
206        // FIXME: is this really necessary ? it works
207        // but in fine it looks like we end up with two
208        // dead states. one should check if the dead state
209        // of the initial automata is always preserved and
210        // keep it instead of creating a new one
211        ret.create_state(T::no_data(), None);
212
213        // build representing states
214        // now that we are here
215        // - groups contains the final partition and lets us find the group
216        //   in which a state of the initial DFA will be
217        // - subgroups contains only one subgroup for each group because we
218        //   didn't created new subgroups at the last iteration, so this will
219        //   allow us to find representing states for each groups
220        // the number of a state of the new DFA will be the number of the
221        // group of which it is a representing state
222        for gr in subgroups.iter() {
223            let (_, st) = gr[0];
224            let st = &self.states[st];
225            let state = ret.create_state(st.data.clone(), None);
226            let state = &mut ret.states[state];
227
228            // adjust transitions
229            // the new state transitions to the representing state of the group
230            // that contains the state to which is previously transitionned
231            let mut ch = 0usize;
232            for t in st.trans.iter() {
233                match *t {
234                    0 => state.trans[ch] = 0,
235                    _ => state.trans[ch] = groups[*t] + 1
236                }
237                ch += 1
238            };
239        }
240
241        // update the initial state numbers of each condition
242        for c in self.initials.iter() {
243            ret.initials.push(groups[*c] + 1);
244        }
245
246        ret
247    }
248
249    #[allow(dead_code)]
250    #[allow(unused_must_use)]
251    // outs the automaton as a dot file for graphviz
252    // for debugging purposes
253    pub fn todot(&self, out: &mut Write) where T: Display + Eq {
254        writeln!(out, "digraph automata {{");
255        writeln!(out, "\trankdir = LR;");
256        writeln!(out, "\tsize = \"4,4\";");
257
258        for i in self.initials.iter() {
259            writeln!(out, "\tnode [shape=box]; {};", i);
260        }
261
262        let mut i = 0usize;
263
264        // outputs final states as doublecircle-shaped nodes
265        for st in self.states.iter() {
266            if st.data != T::no_data() {
267                writeln!(out, "\tnode [shape=doublecircle, label=\"{} ({})\"] {};",
268                    i, st.data, i);
269            }
270
271            i += 1;
272        }
273
274        writeln!(out, "\tnode [shape=circle];");
275
276        let mut i = 0usize;
277        for st in self.states.iter() {
278            for ch in 0 .. 256usize {
279                match st.trans[ch] {
280                    0 => (),
281                    dst => {
282                        let mut esc = String::new();
283                        esc.extend((ch as u8 as char).escape_default());
284                        writeln!(out, "\t{} -> {} [label=\"{}\"];",
285                            i, dst, esc);
286                    }
287                }
288            }
289
290            i += 1;
291        }
292
293        writeln!(out, "}}");
294    }
295}