l1_dfa/
lib.rs

1//Manually Transpiled, TODO generate from L1 specification
2
3pub struct DFA {
4  pub states: Vec<bool>,
5  pub transitions: std::collections::HashMap<(u64,char),u64>,
6}
7
8impl DFA {
9  pub fn accepts(self: &DFA, s: &str) -> bool {
10    let d = self;
11    let mut at:u64 = 0;
12    for c in s.chars() {
13      if d.transitions.contains_key(&(at,c)) {
14         at = d.transitions.get(&(at,c)).unwrap().clone();
15      } else {
16         return false;
17      }
18    };
19    d.states[at as usize]
20  }
21
22  pub fn intersect(self: &DFA, right: &DFA) -> DFA {
23    let left = self;
24    let mut p = DFA {
25      states: vec![false; left.states.len() * right.states.len()],
26      transitions: std::collections::HashMap::new(),
27    };
28    for pi in 0..p.states.len() {
29      let li = pi / right.states.len();
30      let ri = pi % right.states.len();
31      if left.states[li] && right.states[ri] {
32        p.states[pi] = true;
33      }
34    }
35    for ((ls,lc),lt) in left.transitions.iter() {
36    for ((rs,rc),rt) in right.transitions.iter() {
37    if lc==rc { //this is more strict than the simple product
38      let ps = ls * (right.states.len() as u64) + rs;
39      let pt = lt * (right.states.len() as u64) + rt;
40      p.transitions.insert((ps,*lc),pt);
41    }}}
42    p
43  }
44
45  pub fn union(self: &DFA, right: &DFA) -> DFA {
46    let left = self;
47    let mut p = DFA {
48      states: vec![false; left.states.len() * right.states.len()],
49      transitions: std::collections::HashMap::new(),
50    };
51    for pi in 0..p.states.len() {
52      let li = pi / right.states.len();
53      let ri = pi % right.states.len();
54      if left.states[li] || right.states[ri] {
55        p.states[pi] = true;
56      }
57    }
58    for ((ls,lc),lt) in left.transitions.iter() {
59    for ((rs,rc),rt) in right.transitions.iter() {
60      let ps = ls * (right.states.len() as u64) + rs;
61      let pt = lt * (right.states.len() as u64) + rt;
62      p.transitions.insert((ps,*lc),pt);
63      p.transitions.insert((ps,*rc),pt);
64    }}
65    p
66  }
67
68  pub fn complement(self: &DFA) -> DFA {
69    let d = self;
70    DFA {
71      states: d.states.iter().map(|c| !c).collect::<Vec<bool>>(),
72      transitions: self.transitions.clone(),
73    }
74  }
75
76  pub fn is_empty(self: &DFA) -> bool {
77    let mut reached = 0;
78    let mut reach = std::collections::HashSet::new();
79    reach.insert(0);
80    while reach.len() > reached {
81      reached = reach.len();
82      for ((ls,_lc),lt) in self.transitions.iter() {
83      if reach.contains(ls) {
84        reach.insert(*lt);
85      }}
86    }
87    for ri in reach.iter() {
88    if self.states[*ri as usize] {
89      return false;
90    }}
91    true
92  }
93
94  pub fn is_subset_of(self: &DFA, right: &DFA) -> bool {
95     self.intersect( &right.complement() ).is_empty()
96  }
97
98  pub fn concatenate(self: &DFA, _right: &DFA) -> DFA {
99     unimplemented!("DFA::concatenate")
100  }
101}
102
103use regex_syntax::Parser;
104use regex_syntax::hir::{self,HirKind};
105
106pub fn try_parse(regex: &str) -> Option<DFA> {
107   let Ok(hir) = Parser::new().parse(regex)
108   else { return None; };
109   Some(compile_ast(hir.kind()))
110}
111
112fn compile_literal(cs: &str) -> DFA {
113   println!("compile literal: {}", cs);
114   let cs = cs.chars().collect::<Vec<char>>();
115   let mut states = vec![false; cs.len()+1];
116   states[cs.len()] = true;
117   let mut transitions = std::collections::HashMap::new();
118   for ci in 0..cs.len() {
119      transitions.insert((ci as u64,cs[ci]),(ci+1) as u64);
120   }
121   DFA {
122      states: states,
123      transitions: transitions,
124   }
125}
126
127fn compile_ast(hir: &HirKind) -> DFA {
128   match hir {
129      HirKind::Empty => unimplemented!("try_compile Empty Regex"),
130      HirKind::Literal(_l) => unimplemented!("try_compile Literal Regex"),
131      HirKind::Class(_c) => unimplemented!("try_compile Class Regex"),
132      HirKind::Anchor(_a) => unimplemented!("try_compile Anchor Regex"),
133      HirKind::WordBoundary(_wb) => unimplemented!("try_compile Word Boundary Regex"),
134      HirKind::Repetition(_r) => unimplemented!("try_compile Repetition Regex"),
135      HirKind::Group(_g) => unimplemented!("try_compile Group Regex"),
136      HirKind::Concat(cs) => {
137         enum C { S(String), K(HirKind) }
138         let mut ds: Vec<C> = Vec::new();
139         let mut sbuf = "".to_string();
140         for c in cs.iter() {
141            let ck = c.kind();
142            if let HirKind::Literal(kl) = ck {
143            match kl {
144               hir::Literal::Unicode(c) => { sbuf.push(*c); }
145               hir::Literal::Byte(b) => { sbuf.push(*b as char); }
146            }} else {
147               if sbuf.len()>0 {
148                  ds.push(C::S(sbuf.clone()));
149                  sbuf.clear();
150               }
151               ds.push(C::K(ck.clone()));
152            }
153         }
154         if sbuf.len()>0 {
155            ds.push(C::S(sbuf.clone()));
156         }
157         let mut dfa = match &ds[0] {
158            C::S(s) => { compile_literal(&s) },
159            C::K(k) => { compile_ast(&k) },
160         };
161         for di in 1..ds.len() {
162         match &ds[di] {
163            C::S(s) => { dfa = dfa.concatenate(&compile_literal(&s)) },
164            C::K(k) => { dfa = dfa.concatenate(&compile_ast(&k)) },
165         }}
166         dfa
167      },
168      HirKind::Alternation(a) => {
169         let mut dfa = compile_ast(a[0].kind());
170         for ai in 1..a.len() {
171            dfa = dfa.union(&compile_ast(a[ai].kind()));
172         }
173         dfa
174      }
175   }
176}