regex_dfa_gen/
dfa.rs

1
2use sorted_vec::SortedVec;
3use crate::set::*;
4use crate::nfa::*;
5use std::borrow::Cow;
6use std::collections::HashMap;
7pub struct DfaBuilder<'a>{
8    // SortedVec<usize> store nfa_states
9    pub(crate) states: Vec<(DfaState, SortedVec<usize>)>,
10    // pub(crate) hashmap: HashMap<DfaState, usize>,
11    pub(crate) nfa: &'a Nfa
12}
13#[derive(Hash, Eq, PartialEq, Clone, Debug)]
14pub struct DfaState {
15    // CharRange, usize(dfa_state), is_greedy
16    pub table: Vec<(CharRange, usize, bool)>,
17    pub end_num: Option<usize>
18}
19
20
21impl<'a> DfaBuilder<'a> {
22    fn new(nfa: &'a Nfa) -> Self {
23        Self {
24            states: Vec::with_capacity(0),
25            // hashmap: HashMap::new(),
26            nfa,
27        }
28    }
29    fn push(&mut self, state: DfaState, nfa_states: SortedVec<usize>) -> usize {
30        let ret = self.states.len();
31        self.states.push((state, nfa_states));
32        ret
33    }
34    fn build(&mut self, nfa_states: SortedVec<usize>) -> usize {
35        let ret = self.states.iter().position(|state| state.1 == nfa_states);
36        // dbg!(&nfa_states, ret);
37        if let Some(index) = ret {
38            index
39        } else {
40            let iter: _ = nfa_states.iter().flat_map(|nfa_state| &self.nfa.states[*nfa_state].table).copied();
41            let maps: _ = self.iter_to_map(iter);
42            self.build_from_vec(nfa_states, maps)
43        }
44    }
45
46    fn iter_to_map(&self, targets: impl Iterator<Item=usize>) -> RangeMap::<char, usize> {
47        let mut maps = RangeMap::<char, usize>::new();
48        for i in targets {
49            let target = &self.nfa.states[i];
50            maps.insert(target.ch.clone(), i);
51        }
52        maps
53    }
54
55    fn state_init(&mut self, nfa_states: SortedVec<usize>) -> usize {
56        let end_num = nfa_states.iter()
57        .filter_map(|&x| self.nfa.states[x].end_num)
58        .max_by(|x,y| x.cmp(y));
59        self.push(
60            DfaState{
61                table: Vec::new(),
62                end_num,
63            },
64            nfa_states,
65        )
66    }
67
68    #[inline]
69    fn build_from_vec(&mut self, nfa_states: SortedVec<usize>, vec: RangeMap::<char, usize>) -> usize {
70        let vec = vec.0;
71        let ret = self.state_init(nfa_states);
72
73        let mut table = Vec::new();
74        for (k, v) in vec {
75            let mut v = SortedVec::from_unsorted(v);
76            v.dedup();
77            let is_greedy = v.iter().map(|&i| self.nfa.states[i].is_greedy)
78                .fold(false, |a,b| a | b);
79            table.push((
80                k, self.build(v), is_greedy
81            ))
82        }
83        self.states[ret].0.table = table;
84
85        ret
86    }
87    /// get the builder from nfa.
88    pub fn from_nfa(nfa: &'a Nfa) -> Self {
89        let mut ret = Self::new(&nfa);
90        let maps = ret.iter_to_map(nfa.node.0.iter().copied());
91        ret.build_from_vec(SortedVec::new(), maps);
92        ret
93    }
94    
95    /// get the dfa.
96    pub fn to_dfa(self) -> Dfa {
97        Dfa {
98            states:
99            self.states.into_iter().map(|(state, _)|
100                state
101            ).collect()
102        }
103    }
104}
105
106
107pub struct Dfa {
108    pub states: Vec<DfaState>,
109}
110
111
112type Nd = (usize, Option<usize>);
113type Ed = (usize, usize, CharRange, bool);
114use std::io;
115
116impl Dfa {
117    /// get the dot file.
118    /// 
119    /// ```
120    /// use regex_dfa_gen::ast::AstNode;
121    /// use regex_dfa_gen::nfa::Nfa;
122    /// use regex_dfa_gen::dfa::{ DfaBuilder, Dfa };
123    /// use std::fs::File;
124    /// 
125    /// let ast : AstNode = r"([A-Z]*|A[a-z]*?)H".parse::<AstNode>().unwrap();
126    /// let nfa = Nfa::from_ast(&ast);
127    /// let dfa = DfaBuilder::from_nfa(&nfa).to_dfa();
128    /// 
129    /// let mut f = File::create("dfa.dot").unwrap();
130    /// dfa.render_to(&mut f).expect("msg");
131    /// ```
132    pub fn render_to<W: io::Write>(&self, w: &mut W) -> io::Result<()> {
133        dot::render(self, w)
134    }
135
136    pub fn from_nfa(nfa: &Nfa) -> Dfa {
137        DfaBuilder::from_nfa(&nfa).to_dfa()
138    }
139    pub fn replace(self, pair: HashMap<usize, usize>) -> Dfa {
140        let mut ret = Vec::new();
141        let mut maps= Vec::new();
142
143        let mut ind = 0;
144        for i in 0..self.states.len() {
145            if let Some(&j) = pair.get(&i) {
146                maps.push(maps[j])
147            } else {
148                maps.push(ind);
149                ind += 1;
150            }
151        }
152
153
154        for (i,mut s) in self.states.into_iter().enumerate() {
155            if !pair.contains_key(&i) {
156                for (_, arc, _) in s.table.iter_mut() {
157                    *arc = maps[*arc];
158                }
159                ret.push(s);
160            }
161        }
162        Dfa{states: ret}
163    }
164    
165    pub fn opt(self) -> Dfa {
166        let mut maps = HashMap::new();
167        let mut pair = HashMap::new();
168        for (i, state) in self.states.iter().enumerate() {
169            if let Some(&j) = maps.get(&state) {
170                pair.insert(i, j);
171            }
172            maps.insert(state, i);
173        }
174        let ret = self.replace(pair);
175        ret
176    }
177}
178
179
180impl<'a> dot::Labeller<'a, Nd, Ed> for Dfa {
181    fn graph_id(&'a self) -> dot::Id<'a> { dot::Id::new("example1").unwrap() }
182
183    fn node_id(&'a self, n: &Nd) -> dot::Id<'a> {
184        dot::Id::new(format!("N{}", n.0)).unwrap()
185    }
186
187    fn node_label<'b>(&'b self, n: &Nd) -> dot::LabelText<'b> {
188        let (i, num) = n;
189        if let Some(n) = num {
190            dot::LabelText::LabelStr(format!("{}({})", i, n).into())
191        }else {
192            dot::LabelText::LabelStr(format!("{}", i).into())
193        }
194    }
195    fn edge_label<'b>(&'b self, (_, _, ch, _): &Ed) -> dot::LabelText<'b> {
196        dot::LabelText::LabelStr(format!("{}", show_char_range(ch.clone())).into())
197    }
198    fn edge_color<'b>(&'b self, (_, _, _, is_greedy): &Ed) -> Option<dot::LabelText<'b>>{
199        if *is_greedy {
200            Some(dot::LabelText::LabelStr("red4".into()))
201        } else {
202            None
203        }
204    }
205}
206
207impl<'a> dot::GraphWalk<'a, Nd, Ed> for Dfa {
208    fn nodes(&self) -> dot::Nodes<'a,Nd> {
209        let nodes: Vec<_> = self.states.iter().enumerate()
210            .map(|(i,state)| (i, state.end_num.clone()))
211            .collect();
212        Cow::Owned(nodes)
213    }
214
215    fn edges(&'a self) -> dot::Edges<'a, Ed> {
216        self.states.iter().enumerate()
217        .flat_map(|(i, x)| x.table.iter().map(
218            move |(range, j, is_greedy)| (i, *j, range.clone(), *is_greedy)
219        )).collect()
220    }
221
222    fn source(&self, e: &Ed) -> Nd { (e.0, self.states[e.0].end_num) }
223
224    fn target(&self, e: &Ed) -> Nd { (e.1, self.states[e.1].end_num) }
225}
226
227
228
229#[cfg(test)]
230mod test {
231    use super::*;
232    use std::assert_eq;
233    use std::fs::File;
234    use crate::ast::*;
235    #[test]
236    fn test0() {
237        let ast: AstNode = r"12".parse::<AstNode>().unwrap();
238        let nfa = Nfa::from_ast(&ast);
239        let dfa = DfaBuilder::from_nfa(&nfa).to_dfa();
240        assert_eq!(dfa.states.len(), 3);
241
242        let ast: AstNode = r"([A-Z]*|A[a-z]*?)H".parse::<AstNode>().unwrap();
243        let nfa = Nfa::from_ast(&ast);
244        let dfa = DfaBuilder::from_nfa(&nfa).to_dfa();
245
246        let mut f = File::create("dfa.dot").unwrap();
247        dfa.render_to(&mut f).expect("msg");
248
249        assert_eq!(nfa.states.len(), 4);
250        assert_eq!(dfa.states.len(), 6);
251    }
252}