1pub struct DFA {
4 pub states: Vec<bool>,
5 pub transitions: std::collections::HashMap<(u64,char),u64>,
6}
7
8impl DFA {
9 pub fn accepts(self: &DFA, s: &str) -> bool {
10 let d = self;
11 let mut at:u64 = 0;
12 for c in s.chars() {
13 if d.transitions.contains_key(&(at,c)) {
14 at = d.transitions.get(&(at,c)).unwrap().clone();
15 } else {
16 return false;
17 }
18 };
19 d.states[at as usize]
20 }
21
22 pub fn intersect(self: &DFA, right: &DFA) -> DFA {
23 let left = self;
24 let mut p = DFA {
25 states: vec![false; left.states.len() * right.states.len()],
26 transitions: std::collections::HashMap::new(),
27 };
28 for pi in 0..p.states.len() {
29 let li = pi / right.states.len();
30 let ri = pi % right.states.len();
31 if left.states[li] && right.states[ri] {
32 p.states[pi] = true;
33 }
34 }
35 for ((ls,lc),lt) in left.transitions.iter() {
36 for ((rs,rc),rt) in right.transitions.iter() {
37 if lc==rc { let ps = ls * (right.states.len() as u64) + rs;
39 let pt = lt * (right.states.len() as u64) + rt;
40 p.transitions.insert((ps,*lc),pt);
41 }}}
42 p
43 }
44
45 pub fn union(self: &DFA, right: &DFA) -> DFA {
46 let left = self;
47 let mut p = DFA {
48 states: vec![false; left.states.len() * right.states.len()],
49 transitions: std::collections::HashMap::new(),
50 };
51 for pi in 0..p.states.len() {
52 let li = pi / right.states.len();
53 let ri = pi % right.states.len();
54 if left.states[li] || right.states[ri] {
55 p.states[pi] = true;
56 }
57 }
58 for ((ls,lc),lt) in left.transitions.iter() {
59 for ((rs,rc),rt) in right.transitions.iter() {
60 let ps = ls * (right.states.len() as u64) + rs;
61 let pt = lt * (right.states.len() as u64) + rt;
62 p.transitions.insert((ps,*lc),pt);
63 p.transitions.insert((ps,*rc),pt);
64 }}
65 p
66 }
67
68 pub fn complement(self: &DFA) -> DFA {
69 let d = self;
70 DFA {
71 states: d.states.iter().map(|c| !c).collect::<Vec<bool>>(),
72 transitions: self.transitions.clone(),
73 }
74 }
75
76 pub fn is_empty(self: &DFA) -> bool {
77 let mut reached = 0;
78 let mut reach = std::collections::HashSet::new();
79 reach.insert(0);
80 while reach.len() > reached {
81 reached = reach.len();
82 for ((ls,_lc),lt) in self.transitions.iter() {
83 if reach.contains(ls) {
84 reach.insert(*lt);
85 }}
86 }
87 for ri in reach.iter() {
88 if self.states[*ri as usize] {
89 return false;
90 }}
91 true
92 }
93
94 pub fn is_subset_of(self: &DFA, right: &DFA) -> bool {
95 self.intersect( &right.complement() ).is_empty()
96 }
97
98 pub fn concatenate(self: &DFA, _right: &DFA) -> DFA {
99 unimplemented!("DFA::concatenate")
100 }
101}
102
103use regex_syntax::Parser;
104use regex_syntax::hir::{self,HirKind};
105
106pub fn try_parse(regex: &str) -> Option<DFA> {
107 let Ok(hir) = Parser::new().parse(regex)
108 else { return None; };
109 Some(compile_ast(hir.kind()))
110}
111
112fn compile_literal(cs: &str) -> DFA {
113 println!("compile literal: {}", cs);
114 let cs = cs.chars().collect::<Vec<char>>();
115 let mut states = vec![false; cs.len()+1];
116 states[cs.len()] = true;
117 let mut transitions = std::collections::HashMap::new();
118 for ci in 0..cs.len() {
119 transitions.insert((ci as u64,cs[ci]),(ci+1) as u64);
120 }
121 DFA {
122 states: states,
123 transitions: transitions,
124 }
125}
126
127fn compile_ast(hir: &HirKind) -> DFA {
128 match hir {
129 HirKind::Empty => unimplemented!("try_compile Empty Regex"),
130 HirKind::Literal(_l) => unimplemented!("try_compile Literal Regex"),
131 HirKind::Class(_c) => unimplemented!("try_compile Class Regex"),
132 HirKind::Anchor(_a) => unimplemented!("try_compile Anchor Regex"),
133 HirKind::WordBoundary(_wb) => unimplemented!("try_compile Word Boundary Regex"),
134 HirKind::Repetition(_r) => unimplemented!("try_compile Repetition Regex"),
135 HirKind::Group(_g) => unimplemented!("try_compile Group Regex"),
136 HirKind::Concat(cs) => {
137 enum C { S(String), K(HirKind) }
138 let mut ds: Vec<C> = Vec::new();
139 let mut sbuf = "".to_string();
140 for c in cs.iter() {
141 let ck = c.kind();
142 if let HirKind::Literal(kl) = ck {
143 match kl {
144 hir::Literal::Unicode(c) => { sbuf.push(*c); }
145 hir::Literal::Byte(b) => { sbuf.push(*b as char); }
146 }} else {
147 if sbuf.len()>0 {
148 ds.push(C::S(sbuf.clone()));
149 sbuf.clear();
150 }
151 ds.push(C::K(ck.clone()));
152 }
153 }
154 if sbuf.len()>0 {
155 ds.push(C::S(sbuf.clone()));
156 }
157 let mut dfa = match &ds[0] {
158 C::S(s) => { compile_literal(&s) },
159 C::K(k) => { compile_ast(&k) },
160 };
161 for di in 1..ds.len() {
162 match &ds[di] {
163 C::S(s) => { dfa = dfa.concatenate(&compile_literal(&s)) },
164 C::K(k) => { dfa = dfa.concatenate(&compile_ast(&k)) },
165 }}
166 dfa
167 },
168 HirKind::Alternation(a) => {
169 let mut dfa = compile_ast(a[0].kind());
170 for ai in 1..a.len() {
171 dfa = dfa.union(&compile_ast(a[ai].kind()));
172 }
173 dfa
174 }
175 }
176}