fsa/dfa.rs
1use std::iter;
2use nfa;
3use std::collections::HashMap;
4use std::fmt::Display;
5use std::hash::Hash;
6use std::io::Write;
7use bit_set::BitSet;
8
9/* deterministic finite automaton */
10
11pub struct State<T> {
12 // this is not strictly speaking
13 // a DFA since there may be no
14 // transitions for a given input
15 pub trans: [usize; 256],
16
17 // for DFA determinization
18 // remember the set of NFA states
19 // that this state corresponds to
20 states: BitSet,
21 pub data: T
22}
23
24pub struct Automaton<T> {
25 pub states: Vec<State<T>>,
26 pub initials: Vec<usize>
27}
28
29impl<T> Automaton<T> where T: nfa::StateData {
30 // determinize a nondeterministic finite automaton and "adds" it to this
31 // deterministic automaton. adding it means that the newly built DFA will
32 // use the next state number available in this DFA but there will be no
33 // transition between the differents DFA.
34 // The resulting DFA is thus not strictly a DFA but this is needed to
35 // implement "conditions" in the lexical analysers
36 // returns the index of the initial state of the created DFA in the
37 // `initials' vector
38 pub fn determinize<S: nfa::State<Data = T>>(&mut self, nfa: &nfa::Automaton<S>)
39 -> usize {
40 let (eclos, data) = nfa.eclosure_(nfa.initial);
41 let ini = self.create_state(data, Some(eclos));
42 let mut unmarked = vec!(ini);
43
44 while !unmarked.is_empty() {
45 let next = unmarked.pop().unwrap();
46
47 for ch in 0 .. 256 {
48 let moves = nfa.moves(&self.states[next].states, ch as u8);
49 let (clos, action) = nfa.eclosure(&moves);
50 if clos.is_empty() {
51 continue;
52 }
53
54 // do we have clos in dstates ?
55 let mut i = ini;
56 let mut dst = None;
57 for s in self.states.iter().skip(ini) {
58 if s.states == clos {
59 dst = Some(i);
60 break;
61 }
62 i += 1;
63 }
64
65 match dst {
66 // in any case, add a transition
67 Some(i) => self.states[next].trans[ch] = i,
68 None => {
69 // create a new DFA state for this set
70 let st = self.create_state(action, Some(clos));
71 self.states[next].trans[ch] = st;
72 unmarked.push(st);
73 }
74 }
75 }
76 }
77
78 let ret = self.initials.len();
79 self.initials.push(ini);
80 ret
81 }
82
83 pub fn new() -> Automaton<T> {
84 let mut ret = Automaton {
85 states: vec!(),
86 initials: vec!()
87 };
88
89 // create a dead state
90 ret.create_state(T::no_data(), None);
91 ret
92 }
93
94 #[inline(always)]
95 fn create_state(&mut self, act: T, states: Option<BitSet>) -> usize {
96 self.states.push(State {
97 trans: [0; 256],
98 states: match states {
99 Some(s) => s,
100 None => BitSet::new()
101 },
102 data: act
103 });
104
105 self.states.len() - 1
106 }
107
108 // construct an equivalent DFA whose number of state is minimal for the
109 // recognized input langage
110 pub fn minimize(&self) -> Automaton<T> where T: Clone + Eq + Hash {
111 // groups are stored as an array indexed by a state number
112 // giving a group number.
113 let mut groups = Vec::with_capacity(self.states.len());
114
115 let mut group_of_action = HashMap::new();
116 let mut next_group = 0;
117
118 // create one subgroup per action
119 for st in self.states.iter() {
120 groups.push(*group_of_action.entry(&st.data).or_insert_with(|| {
121 let ret = next_group;
122 next_group += 1;
123 ret
124 }));
125 }
126
127 // now iterate over the states and split the groups into
128 // subgroups. This records the subgroups we have created for a
129 // given group. It is indexed by a group number and give a list
130 // of subgroups of the form (gr, st) where gr is the number of the
131 // subgroup (it may be the same as the original group), and st the
132 // number of a representing state
133 let mut subgroups: Vec<Vec<(usize, usize)>> =
134 iter::repeat(vec!()).take(next_group).collect();
135 loop {
136 // subgroups become groups, reinitialize subgroups
137 for i in subgroups.iter_mut() {
138 *i = vec!();
139 }
140
141 // create a new partition
142 let mut new_groups = Vec::with_capacity(self.states.len());
143 let mut modified = false;
144
145 'g: for s in 0 .. groups.len() {
146 let group = groups[s];
147
148 // check if we have a subgroup of the group of s
149 // that matches its transitions. st is the representing
150 // state of the subgroup subgr
151 'h: for &(subgr, st) in subgroups[group].iter() {
152 // if st and s are similar, s goes to subgr
153 // 2 states are said similar if for each input
154 // symbol they have a transition to states that
155 // are in the same group of the current partition
156 for i in 0 .. 255usize {
157 let (s1, s2) = (
158 self.states[st].trans[i],
159 self.states[s].trans[i]
160 );
161 if groups[s1] != groups[s2] {
162 continue 'h;
163 }
164 }
165
166 // okay, we have found a subgroup for s
167 // it may as well be the same so we may not have
168 // modified the partition here
169 new_groups.push(subgr);
170 continue 'g;
171 }
172
173 // no subgroup, create one
174 // if there is no subgroup for this group, reuse the
175 // same index
176 if subgroups[group].is_empty() {
177 subgroups[group].push((group, s));
178 new_groups.push(group);
179 } else {
180 // create a new subgroup with a new index
181 // take this state as a representing state
182 let subgroup = subgroups.len();
183 subgroups.push(vec!());
184 subgroups[group].push((subgroup, s));
185 new_groups.push(subgroup);
186 modified = true;
187 }
188 }
189
190 groups = new_groups;
191
192 // we stop when the partition is the same as before,
193 // i.e. when we cannot create new subgroups anymore.
194 if !modified {
195 break;
196 }
197 }
198
199 // construct the minimal DFA
200 let mut ret = Automaton {
201 states: Vec::with_capacity(subgroups.len()),
202 initials: Vec::with_capacity(self.initials.len()),
203 };
204
205 // create the dead state
206 // FIXME: is this really necessary ? it works
207 // but in fine it looks like we end up with two
208 // dead states. one should check if the dead state
209 // of the initial automata is always preserved and
210 // keep it instead of creating a new one
211 ret.create_state(T::no_data(), None);
212
213 // build representing states
214 // now that we are here
215 // - groups contains the final partition and lets us find the group
216 // in which a state of the initial DFA will be
217 // - subgroups contains only one subgroup for each group because we
218 // didn't created new subgroups at the last iteration, so this will
219 // allow us to find representing states for each groups
220 // the number of a state of the new DFA will be the number of the
221 // group of which it is a representing state
222 for gr in subgroups.iter() {
223 let (_, st) = gr[0];
224 let st = &self.states[st];
225 let state = ret.create_state(st.data.clone(), None);
226 let state = &mut ret.states[state];
227
228 // adjust transitions
229 // the new state transitions to the representing state of the group
230 // that contains the state to which is previously transitionned
231 let mut ch = 0usize;
232 for t in st.trans.iter() {
233 match *t {
234 0 => state.trans[ch] = 0,
235 _ => state.trans[ch] = groups[*t] + 1
236 }
237 ch += 1
238 };
239 }
240
241 // update the initial state numbers of each condition
242 for c in self.initials.iter() {
243 ret.initials.push(groups[*c] + 1);
244 }
245
246 ret
247 }
248
249 #[allow(dead_code)]
250 #[allow(unused_must_use)]
251 // outs the automaton as a dot file for graphviz
252 // for debugging purposes
253 pub fn todot(&self, out: &mut Write) where T: Display + Eq {
254 writeln!(out, "digraph automata {{");
255 writeln!(out, "\trankdir = LR;");
256 writeln!(out, "\tsize = \"4,4\";");
257
258 for i in self.initials.iter() {
259 writeln!(out, "\tnode [shape=box]; {};", i);
260 }
261
262 let mut i = 0usize;
263
264 // outputs final states as doublecircle-shaped nodes
265 for st in self.states.iter() {
266 if st.data != T::no_data() {
267 writeln!(out, "\tnode [shape=doublecircle, label=\"{} ({})\"] {};",
268 i, st.data, i);
269 }
270
271 i += 1;
272 }
273
274 writeln!(out, "\tnode [shape=circle];");
275
276 let mut i = 0usize;
277 for st in self.states.iter() {
278 for ch in 0 .. 256usize {
279 match st.trans[ch] {
280 0 => (),
281 dst => {
282 let mut esc = String::new();
283 esc.extend((ch as u8 as char).escape_default());
284 writeln!(out, "\t{} -> {} [label=\"{}\"];",
285 i, dst, esc);
286 }
287 }
288 }
289
290 i += 1;
291 }
292
293 writeln!(out, "}}");
294 }
295}