1use nom::branch::alt;
4use nom::character::complete::{char, none_of, one_of};
5use nom::combinator::{all_consuming, flat_map, success};
6use nom::error::Error as NomError;
7use nom::multi::{fold_many0, many0_count, many1, separated_list1};
8use nom::sequence::{delimited, preceded};
9use nom::{Finish, IResult, Parser};
10
11#[derive(Clone, Debug, Eq, PartialEq)]
13pub enum Regex {
14 Empty,
16 Dot,
18 Literal(char),
20 Star(Box<Regex>),
22 Concat(Box<Regex>, Box<Regex>),
24 Or(Box<Regex>, Box<Regex>),
26 Plus(Box<Regex>),
28 Maybe(Box<Regex>),
30 Not(Box<Regex>),
32 And(Box<Regex>, Box<Regex>),
34}
35
36pub trait Matcher {
38 fn start(&mut self) -> bool;
42
43 fn push(&mut self, c: char) -> bool;
47
48 fn matches(&mut self, text: &str) -> bool {
50 text.chars().fold(self.start(), |_, c| self.push(c))
51 }
52}
53
54impl<T: Matcher + ?Sized> Matcher for Box<T> {
55 fn start(&mut self) -> bool {
56 self.as_mut().start()
57 }
58
59 fn push(&mut self, c: char) -> bool {
60 self.as_mut().push(c)
61 }
62}
63
64pub trait MatcherExt: Matcher + Sized {
66 fn star(self) -> Star<Self> {
68 Star(self)
69 }
70
71 fn concat<T: Matcher>(self, other: T) -> Concat<Self, T> {
73 Concat::new(self, other)
74 }
75
76 fn or<T: Matcher>(self, other: T) -> Or<Self, T> {
78 Or(self, other)
79 }
80
81 fn not(self) -> Not<Self> {
83 Not(self)
84 }
85
86 fn and<T: Matcher>(self, other: T) -> And<Self, T> {
88 And(self, other)
89 }
90}
91
92impl<T: Matcher> MatcherExt for T {}
93
94pub type ParseError<'a> = NomError<&'a str>;
96
97fn reduce<T>(v: Vec<T>, f: impl FnMut(T, T) -> T) -> T {
99 v.into_iter().reduce(f).unwrap()
100}
101
102fn regex(pattern: &str) -> IResult<&str, Regex> {
104 let empty = success(Regex::Empty);
106
107 let group = delimited(char('('), regex, char(')'));
109
110 let dot = char('.').map(|_| Regex::Dot);
112
113 let meta = r"\().*|+?!&";
115
116 let literal = none_of(meta).map(Regex::Literal);
118
119 let escape = preceded(char('\\'), one_of(meta))
121 .map(Regex::Literal);
122
123 let atom = alt((literal, escape, dot, group));
125
126 let repeat = flat_map(atom, |r| fold_many0(
131 one_of("*+?"),
132 move || r.clone(),
133 |r, c| match c {
134 '*' => r.star(),
135 '+' => r.plus(),
136 '?' => r.maybe(),
137 _ => unreachable!(),
138 },
139 ));
140
141 let word = many1(repeat)
143 .map(|v| reduce(v, Regex::concat));
144
145 let not_word = many0_count(char('!'))
148 .and(word)
149 .map(|(n, r)| (0..n).fold(r, |r, _| r.not()));
150
151 let chunk = alt((not_word, empty));
153
154 let clause = separated_list1(char('&'), chunk)
157 .map(|v| reduce(v, Regex::and));
158
159 separated_list1(char('|'), clause)
162 .map(|v| reduce(v, Regex::or))
163 .parse(pattern)
164}
165
166impl Regex {
167 pub fn parse(pattern: &str) -> Result<Self, ParseError<'_>> {
169 all_consuming(regex)
170 .parse(pattern)
171 .finish()
172 .map(|(_, r)| r)
173 }
174
175 pub fn matcher(&self) -> Box<dyn Matcher> {
177 match self {
178 Self::Empty => Box::new(
179 Empty::default()
180 ),
181 Self::Dot => Box::new(
182 Dot::default()
183 ),
184 Self::Literal(c) => Box::new(
185 Literal::new(*c)
186 ),
187 Self::Star(r) => Box::new(
188 r.matcher().star()
189 ),
190 Self::Concat(a, b) => Box::new(
191 a.matcher().concat(b.matcher())
192 ),
193 Self::Or(a, b) => Box::new(
194 a.matcher().or(b.matcher())
195 ),
196 Self::Plus(r) => Box::new(
197 r.matcher().concat(r.matcher().star())
198 ),
199 Self::Maybe(r) => Box::new(
200 r.matcher().or(Empty::default())
201 ),
202 Self::Not(r) => Box::new(
203 r.matcher().not()
204 ),
205 Self::And(a, b) => Box::new(
206 a.matcher().and(b.matcher())
207 ),
208 }
209 }
210
211 pub fn matches(&self, text: &str) -> bool {
213 self.matcher().matches(text)
214 }
215
216 pub fn star(self) -> Self {
218 Self::Star(self.into())
219 }
220
221 pub fn concat(self, other: Self) -> Self {
223 Self::Concat(self.into(), other.into())
224 }
225
226 pub fn or(self, other: Self) -> Self {
228 Self::Or(self.into(), other.into())
229 }
230
231 pub fn plus(self) -> Self {
233 Self::Plus(self.into())
234 }
235
236 pub fn maybe(self) -> Self {
238 Self::Maybe(self.into())
239 }
240
241 pub fn not(self) -> Self {
243 Self::Not(self.into())
244 }
245
246 pub fn and(self, other: Self) -> Self {
248 Self::And(self.into(), other.into())
249 }
250}
251
252#[derive(Debug, Default)]
254pub struct Empty {
255 matched: bool,
256}
257
258impl Matcher for Empty {
259 fn start(&mut self) -> bool {
260 self.matched = true;
261 true
262 }
263
264 fn push(&mut self, _: char) -> bool {
265 self.matched = false;
266 false
267 }
268}
269
270#[derive(Debug, Default)]
272pub struct Dot {
273 started: bool,
274 matched: bool,
275}
276
277impl Matcher for Dot {
278 fn start(&mut self) -> bool {
279 self.started = true;
280 self.matched
281 }
282
283 fn push(&mut self, _: char) -> bool {
284 self.matched = self.started;
285 self.started = false;
286 self.matched
287 }
288}
289
290#[derive(Debug)]
292pub struct Literal {
293 c: char,
294 started: bool,
295 matched: bool,
296}
297
298impl Literal {
299 fn new(c: char) -> Self {
301 Self {
302 c,
303 started: false,
304 matched: false,
305 }
306 }
307}
308
309impl Matcher for Literal {
310 fn start(&mut self) -> bool {
311 self.started = true;
312 self.matched
313 }
314
315 fn push(&mut self, c: char) -> bool {
316 self.matched = self.started && c == self.c;
317 self.started = false;
318 self.matched
319 }
320}
321
322#[derive(Debug)]
324pub struct Star<T>(T);
325
326impl<T: Matcher> Matcher for Star<T> {
327 fn start(&mut self) -> bool {
328 self.0.start() || true
329 }
330
331 fn push(&mut self, c: char) -> bool {
332 self.0.push(c) && self.start()
333 }
334}
335
336#[derive(Debug)]
338pub struct Concat<L, R> {
339 left: L,
340 right: R,
341 right_started: bool,
342}
343
344impl<L: Matcher, R: Matcher> Concat<L, R> {
345 fn new(left: L, right: R) -> Self {
346 Self {
347 left,
348 right,
349 right_started: false,
350 }
351 }
352
353 fn start_right(&mut self) -> bool {
354 self.right_started = true;
355 self.right.start()
356 }
357}
358
359impl<L: Matcher, R: Matcher> Matcher for Concat<L, R> {
360 fn start(&mut self) -> bool {
361 self.left.start() && self.start_right()
362 }
363
364 fn push(&mut self, c: char) -> bool {
365 (self.right_started && self.right.push(c))
366 | (self.left.push(c) && self.start_right())
367 }
368}
369
370#[derive(Debug)]
372pub struct Or<T, U>(T, U);
373
374impl<T: Matcher, U: Matcher> Matcher for Or<T, U> {
375 fn start(&mut self) -> bool {
376 self.0.start() | self.1.start()
377 }
378
379 fn push(&mut self, c: char) -> bool {
380 self.0.push(c) | self.1.push(c)
381 }
382}
383
384#[derive(Debug)]
386pub struct Not<T>(T);
387
388impl<T: Matcher> Matcher for Not<T> {
389 fn start(&mut self) -> bool {
390 !self.0.start()
391 }
392
393 fn push(&mut self, c: char) -> bool {
394 !self.0.push(c)
395 }
396}
397
398#[derive(Debug)]
400pub struct And<T, U>(T, U);
401
402impl<T: Matcher, U: Matcher> Matcher for And<T, U> {
403 fn start(&mut self) -> bool {
404 self.0.start() & self.1.start()
405 }
406
407 fn push(&mut self, c: char) -> bool {
408 self.0.push(c) & self.1.push(c)
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 fn parse(pattern: &str) -> Regex {
418 Regex::parse(pattern)
419 .expect(&format!("Pattern '{pattern}' should parse successfully"))
420 }
421
422 fn assert_parse(pattern: &str, regex: Regex) {
424 assert_eq!(parse(pattern), regex, "'{pattern}'");
425 }
426
427 fn assert_parse_err(pattern: &str) {
429 Regex::parse(pattern)
430 .expect_err(&format!("Pattern '{pattern}' should fail to parse"));
431 }
432
433 #[test]
434 fn parsing() {
435 assert_parse(r"", Regex::Empty);
436 assert_parse(r"()", Regex::Empty);
437 assert_parse(r"(())", Regex::Empty);
438
439 assert_parse(r".", Regex::Dot);
440 assert_parse(r"(.)", Regex::Dot);
441
442 assert_parse(r"a", Regex::Literal('a'));
443 assert_parse(r"\\", Regex::Literal('\\'));
444 assert_parse(r"\(", Regex::Literal('('));
445 assert_parse(r"\)", Regex::Literal(')'));
446 assert_parse(r"\.", Regex::Literal('.'));
447 assert_parse(r"\*", Regex::Literal('*'));
448 assert_parse(r"\+", Regex::Literal('+'));
449 assert_parse(r"\?", Regex::Literal('?'));
450 assert_parse(r"\|", Regex::Literal('|'));
451 assert_parse(r"\!", Regex::Literal('!'));
452 assert_parse(r"\&", Regex::Literal('&'));
453
454 assert_parse(r"()*", Regex::Empty.star());
455 assert_parse(r".*", Regex::Dot.star());
456 assert_parse(r"a*", Regex::Literal('a').star());
457 assert_parse(r"\**", Regex::Literal('*').star());
458 assert_parse(r".**", Regex::Dot.star().star());
459
460 let a = || Regex::Literal('a');
461 let b = || Regex::Literal('b');
462
463 assert_parse("ab", a().concat(b()));
464 assert_parse("ba", b().concat(a()));
465 assert_parse("a*b", a().star().concat(b()));
466 assert_parse("ab*", a().concat(b().star()));
467 assert_parse("(ab)*", a().concat(b()).star());
468
469 assert_parse("a|b", a().or(b()));
470 assert_parse("a|b*", a().or(b().star()));
471 assert_parse("ab|b", a().concat(b()).or(b()));
472 assert_parse("(ab|b)*", a().concat(b()).or(b()).star());
473
474 assert_parse("a+", a().plus());
475 assert_parse("a+b*", a().plus().concat(b().star()));
476 assert_parse("a+*", a().plus().star());
477 assert_parse("a*+", a().star().plus());
478
479 assert_parse("a?", a().maybe());
480
481 assert_parse("!a", a().not());
482 assert_parse("!.*", Regex::Dot.star().not());
483 assert_parse("!a|b", a().not().or(b()));
484
485 assert_parse("a&b", a().and(b()));
486 assert_parse("a&!b|b", a().and(b().not()).or(b()));
487 assert_parse("a&(!b|b)", a().and(b().not().or(b())));
488
489 assert_parse_err(r"\");
490 assert_parse_err(r"(");
491 assert_parse_err(r")");
492 }
493
494 fn assert_matches(pattern: &str, matches: &[&str], non_matches: &[&str]) {
496 let regex = parse(pattern);
497
498 for text in matches {
499 assert!(regex.matches(text), "'{pattern}' should match '{text}'");
500 }
501
502 for text in non_matches {
503 assert!(!regex.matches(text), "'{pattern}' should not match '{text}'");
504 }
505 }
506
507 #[test]
508 fn matching() {
509 assert_matches(
510 r"",
511 &[""],
512 &["a", "b", "ab"],
513 );
514
515 assert_matches(
516 r".",
517 &["a", "b"],
518 &["", "ab", "abc"],
519 );
520
521 assert_matches(
522 r"a",
523 &["a"],
524 &["", "b", "ab"],
525 );
526
527 assert_matches(
528 r"()*",
529 &[""],
530 &["a", "b", "ab"],
531 );
532
533 assert_matches(
534 r".*",
535 &["", "a", "b", "ab", "abc", "abcd"],
536 &[],
537 );
538
539 assert_matches(
540 r"a*",
541 &["", "a", "aa", "aaa"],
542 &["b", "ab", "ba", "aab", "aba", "baa"],
543 );
544
545 assert_matches(
546 r"a**",
547 &["", "a", "aa", "aaa"],
548 &["b", "ab", "ba", "aab", "aba", "baa"],
549 );
550
551 assert_matches(
552 r"a.c",
553 &["aac", "abc", "acc", "adc"],
554 &["", "a", "c", "ac", "aab", "bbc"],
555 );
556
557 assert_matches(
558 r"ab",
559 &["ab"],
560 &["", "a", "b", "ba", "abc"],
561 );
562
563 assert_matches(
564 r"a*b",
565 &["b", "ab", "aab"],
566 &["", "a", "ac", "abc"],
567 );
568
569 assert_matches(
570 r"ab*",
571 &["a", "ab", "abb"],
572 &["", "b", "ac", "abc"],
573 );
574
575 assert_matches(
576 r"a|b",
577 &["a", "b"],
578 &["", "aa", "ab", "bb"],
579 );
580
581 assert_matches(
582 r"(ab*|c)*",
583 &["", "a", "ab", "c", "abc", "abbacca"],
584 &["b", "acb"],
585 );
586
587 assert_matches(
588 r"a+",
589 &["a", "aa", "aaa"],
590 &["", "ab", "ba"],
591 );
592
593 assert_matches(
594 r"a?",
595 &["", "a"],
596 &["b", "aa"],
597 );
598
599 assert_matches(
600 r"!a",
601 &["", "b", "aa", "abc"],
602 &["a"],
603 );
604
605 assert_matches(
606 r"!.*",
607 &[],
608 &["", "a", "ab", "abc"],
609 );
610
611 assert_matches(
612 r"!(!(.*hello.*)|!(.*world.*))",
613 &["hello world", "world hello"],
614 &["", "hello", "world"],
615 );
616
617 assert_matches(
618 r"a&a",
619 &["a"],
620 &["", "b", "ab"],
621 );
622
623 assert_matches(
624 r"a&b",
625 &[],
626 &["", "a", "b", "ab"],
627 );
628
629 assert_matches(
630 r"(.*hello.*)&(.*world.*)",
631 &["hello world", "world hello"],
632 &["", "hello", "world"],
633 );
634 }
635}