1use crate::compile::AlphabetOrder;
2use proc_macro2::TokenStream;
3use std::collections::HashSet;
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::{Hash, Hasher};
8use std::ops::Deref;
9use std::rc::Rc;
10use syn::Path;
11
12pub struct Symbol {
13 pub(super) name: Path,
14}
15
16impl Eq for Symbol {}
17
18impl PartialEq for Symbol {
19 fn eq(&self, other: &Self) -> bool {
20 self.to_string() == other.to_string()
21 }
22}
23
24impl Debug for Symbol {
25 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
26 Display::fmt(self, f)
27 }
28}
29
30impl Hash for Symbol {
31 fn hash<H: Hasher>(&self, state: &mut H) {
32 self.to_string().hash(state)
33 }
34}
35
36impl Display for Symbol {
37 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
38 let name = &self.name;
39 write!(f, "{}", quote::quote!(#name))
40 }
41}
42
43impl From<&str> for Symbol {
44 fn from(value: &str) -> Self {
45 Self {
46 name: syn::parse2(value.parse::<TokenStream>().unwrap()).unwrap(),
47 }
48 }
49}
50
51#[derive(Hash, Clone, PartialEq, Eq)]
55pub enum Regex {
56 EmptyString,
58 EmptySet,
60 Symbol(Rc<Symbol>),
62 Repeat(Rc<Regex>),
64 Complement(Rc<Regex>),
66 Or(Rc<Regex>, Rc<Regex>),
68 And(Rc<Regex>, Rc<Regex>),
70 Concat(Rc<Regex>, Rc<Regex>),
72}
73
74impl Debug for Regex {
75 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
76 match self {
77 Self::EmptyString => write!(f, "e"),
78 Self::EmptySet => write!(f, "0"),
79 Self::Symbol(sym) => write!(f, "{:?}", sym.name.get_ident()),
80 Self::Repeat(re) => write!(f, "{:?}*", *re),
81 Self::Complement(re) => write!(f, "~{:?}", *re),
82 Self::Or(l, r) => write!(f, "{:?} | {:?}", *l, *r),
83 Self::And(l, r) => write!(f, "{:?} & {:?}", *l, *r),
84 Self::Concat(l, r) => write!(f, "{:?} {:?}", *l, *r),
85 }
86 }
87}
88
89impl Regex {
90 pub(crate) fn derive(&self, symbol: Option<&Rc<Symbol>>) -> Rc<Regex> {
101 match self {
102 Regex::EmptySet => Regex::EmptySet.into(),
104 Regex::EmptyString => Regex::EmptySet.into(),
106 Regex::Symbol(s) => {
109 if Some(s) == symbol {
110 Regex::EmptyString.into()
111 } else {
112 Regex::EmptySet.into()
113 }
114 }
115 Regex::Repeat(r) => {
117 Regex::Concat(r.derive(symbol), Regex::Repeat(r.clone()).into()).into()
118 }
119 Regex::Complement(c) => Regex::Complement(c.derive(symbol)).into(),
121 Regex::Or(l, r) => Regex::Or(l.derive(symbol), r.derive(symbol)).into(),
123 Regex::And(l, r) => Regex::And(l.derive(symbol), r.derive(symbol)).into(),
125 Regex::Concat(l, r) => {
127 let new = Regex::Concat(l.derive(symbol), r.clone()).into();
128
129 if l.is_nullable() {
130 Regex::Or(new, r.derive(symbol)).into()
131 } else {
132 new
133 }
134 }
135 }
136 }
137
138 pub(crate) fn normalize(self: &Rc<Self>, ab: &AlphabetOrder) -> Rc<Regex> {
142 match self.deref() {
143 Regex::EmptyString => self.clone(),
144 Regex::EmptySet => self.clone(),
145 Regex::Symbol(_) => self.clone(),
146 Regex::Repeat(r) => {
147 let r = r.normalize(ab);
148 match r.deref() {
149 Regex::EmptySet => Regex::EmptyString.into(),
151 Regex::EmptyString => r,
153 Regex::Repeat(_) => r,
155 _ => Regex::Repeat(r).into(),
156 }
157 }
158 Regex::Complement(c) => {
159 let c = c.normalize(ab);
160 match c.deref() {
161 Regex::Complement(c) => c.clone(),
163 _ => Regex::Complement(c).into(),
164 }
165 }
166 Regex::Or(l, r) => normalize_or(l, r, ab),
167 Regex::And(l, r) => normalize_and(l, r, ab),
168 Regex::Concat(l, r) => {
169 let l = l.normalize(ab);
170 let r = r.normalize(ab);
171
172 match (l.deref(), r.deref()) {
173 (Regex::EmptySet, _) => l,
175 (Regex::EmptyString, _) => r,
177 (Regex::Concat(il, ir), _) => {
179 Regex::Concat(il.clone(), Regex::Concat(ir.clone(), r).into()).into()
180 }
181 (_, Regex::EmptySet) => r,
183 (_, Regex::EmptyString) => l,
185 _ => Regex::Concat(l, r).into(),
186 }
187 }
188 }
189 }
190
191 pub fn is_nullable(&self) -> bool {
200 match self {
201 Regex::EmptySet => false,
202 Regex::EmptyString => true,
203 Regex::Symbol(_) => false,
204 Regex::Concat(l, r) => l.is_nullable() && r.is_nullable(),
205 Regex::Repeat(_) => true,
206 Regex::Or(l, r) => l.is_nullable() || r.is_nullable(),
207 Regex::And(l, r) => l.is_nullable() && r.is_nullable(),
208 Regex::Complement(i) => !i.is_nullable(),
209 }
210 }
211
212 pub(crate) fn alphabet(&self) -> HashSet<Rc<Symbol>> {
216 let mut alphabet = HashSet::new();
217 self.search_alphabet(&mut alphabet);
218 alphabet
219 }
220
221 fn search_alphabet(&self, alphabet: &mut HashSet<Rc<Symbol>>) {
222 match self {
223 Regex::EmptyString => {}
224 Regex::EmptySet => {}
225 Regex::Symbol(s) => {
226 alphabet.insert(s.clone());
227 }
228 Regex::Repeat(i) | Regex::Complement(i) => i.search_alphabet(alphabet),
229 Regex::Or(l, r) | Regex::And(l, r) | Regex::Concat(l, r) => {
230 l.search_alphabet(alphabet);
231 r.search_alphabet(alphabet);
232 }
233 }
234 }
235
236 fn order(&self, ab: &AlphabetOrder) -> i64 {
237 match self {
242 Regex::EmptySet => 1,
243 Regex::EmptyString => 2,
244 Regex::Concat(_, _) => 3,
245 Regex::Repeat(_) => 4,
246 Regex::Or(_, _) => 5,
247 Regex::And(_, _) => 6,
248 Regex::Complement(_) => 7,
249 Regex::Symbol(s) => ab.get(s) + 8,
250 }
251 }
252
253 fn compare(&self, other: &Regex, ab: &AlphabetOrder) -> i64 {
254 match (self, other) {
255 (Regex::Concat(l1, r1), Regex::Concat(l2, r2)) => {
256 let ans = l1.compare(l2, ab);
257 if ans == 0 {
258 r1.compare(r2, ab)
259 } else {
260 ans
261 }
262 }
263 (Regex::Repeat(l), Regex::Repeat(r)) => l.compare(r, ab),
264 (Regex::Or(l1, r1), Regex::Or(l2, r2)) => {
265 let ans = l1.compare(l2, ab);
266 if ans == 0 {
267 r1.compare(r2, ab)
268 } else {
269 ans
270 }
271 }
272 (Regex::And(l1, r1), Regex::And(l2, r2)) => {
273 let ans = l1.compare(l2, ab);
274 if ans == 0 {
275 r1.compare(r2, ab)
276 } else {
277 ans
278 }
279 }
280 (Regex::Complement(l), Regex::Complement(r)) => l.compare(r, ab),
281 _ => self.order(ab) - other.order(ab),
282 }
283 }
284}
285
286fn normalize_or(l: &Rc<Regex>, r: &Rc<Regex>, ab: &AlphabetOrder) -> Rc<Regex> {
287 let l = l.normalize(ab);
288 let r = r.normalize(ab);
289
290 match (l.deref(), r.deref()) {
291 (a, b) if a == b => l,
293 (Regex::EmptySet, _) => r,
295 (Regex::EmptyString, rr) if rr.is_nullable() => r,
297 (Regex::Or(il, ir), _) => normalize_or(il, &normalize_or(ir, &r, ab), ab),
299 (Regex::Complement(c), _) if c.deref() == &Regex::EmptySet => l,
301 (_, Regex::Or(il, ir)) => {
303 if l.compare(il, ab) > 0 {
304 normalize_or(il, &normalize_or(&l, ir, ab), ab)
305 } else {
306 Regex::Or(l, r).into()
307 }
308 }
309 _ if l.compare(&r, ab) > 0 => normalize_or(&r, &l, ab),
311 _ => Regex::Or(l, r).into(),
312 }
313}
314
315fn normalize_and(l: &Rc<Regex>, r: &Rc<Regex>, ab: &AlphabetOrder) -> Rc<Regex> {
316 let l = l.normalize(ab);
317 let r = r.normalize(ab);
318
319 match (l.deref(), r.deref()) {
320 (a, b) if a == b => l,
322 (Regex::EmptySet, _) => l,
324 (Regex::EmptyString, r) if r.is_nullable() => l,
326 (Regex::EmptyString, _) => Regex::EmptySet.into(),
328 (Regex::And(il, ir), _) => normalize_and(il, &normalize_and(ir, &r, ab), ab),
330 (Regex::Complement(c), _) if c.normalize(ab).deref() == &Regex::EmptySet => r,
332 (_, Regex::And(il, ir)) if l.compare(ir, ab) > 0 => {
334 normalize_and(il, &normalize_and(&l, ir, ab), ab)
335 }
336 _ if l.compare(&r, ab) > 0 => normalize_and(&r, &l, ab),
338 _ => Regex::And(l, r).into(),
339 }
340}
341
342impl Display for Regex {
343 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
344 match self {
345 Regex::EmptyString => write!(f, "e"),
346 Regex::EmptySet => write!(f, "0"),
347 Regex::Symbol(s) => write!(f, "{s}"),
348 Regex::Repeat(r) => write!(f, "({r})*"),
349 Regex::Complement(c) => write!(f, "~({c})"),
350 Regex::Or(a, b) => write!(f, "{a} | {b}"),
351 Regex::And(a, b) => write!(f, "{a} | {b}"),
352 Regex::Concat(a, b) => write!(f, "{a} {b}"),
353 }
354 }
355}
356
357#[cfg(test)]
358mod tests {
359 use crate::compile::AlphabetOrder;
360 use crate::parse_regex;
361 use crate::regex::Symbol;
362 use std::rc::Rc;
363
364 #[test]
365 fn nullable() {
366 assert!(parse_regex("e").unwrap().is_nullable());
367 assert!(!parse_regex("0").unwrap().is_nullable());
368
369 assert!(parse_regex("e | e").unwrap().is_nullable());
370 assert!(parse_regex("e | A").unwrap().is_nullable());
371 assert!(parse_regex("A | e").unwrap().is_nullable());
372 assert!(!parse_regex("A | B").unwrap().is_nullable());
373
374 assert!(parse_regex("e | e").unwrap().is_nullable());
375 assert!(!parse_regex("e & A").unwrap().is_nullable());
376 assert!(!parse_regex("e & A").unwrap().is_nullable());
377 assert!(!parse_regex("A & B").unwrap().is_nullable());
378
379 assert!(parse_regex("e | e").unwrap().is_nullable());
380 assert!(!parse_regex("A e").unwrap().is_nullable());
381 assert!(!parse_regex("e B").unwrap().is_nullable());
382 assert!(!parse_regex("A B").unwrap().is_nullable());
383
384 assert!(parse_regex("A*").unwrap().is_nullable());
385 assert!(!parse_regex("A+").unwrap().is_nullable());
386 assert!(parse_regex("A?").unwrap().is_nullable());
387 }
388
389 #[test]
390 fn apply_symbol() {
391 let a = Rc::new(Symbol::from("a"));
392 let b = Rc::new(Symbol::from("b"));
393 let c = Rc::new(Symbol::from("c"));
394 let ab = AlphabetOrder::new(&[a.clone(), b.clone(), c.clone()].into_iter().collect());
395
396 assert_eq!(
397 parse_regex("a b").unwrap().derive(Some(&a)).normalize(&ab),
398 parse_regex("b").unwrap().into()
399 );
400 assert_eq!(
401 parse_regex("a b").unwrap().derive(Some(&b)).normalize(&ab),
402 parse_regex("0").unwrap().into()
403 );
404 }
405}