Skip to main content

chumsky/
regex.rs

1//! Implementations of regex-based parsers
2
3use super::*;
4use regex_automata::{meta, Anchored, Input as ReInput};
5
6/// See [`regex()`].
7pub struct Regex<I, E> {
8    regex: meta::Regex,
9    #[allow(dead_code)]
10    phantom: EmptyPhantom<(E, I)>,
11}
12
13impl<I, E> Clone for Regex<I, E> {
14    fn clone(&self) -> Self {
15        Self {
16            regex: self.regex.clone(),
17            phantom: EmptyPhantom::new(),
18        }
19    }
20}
21
22/// Match input based on a provided regex pattern
23pub fn regex<I, E>(pattern: &str) -> Regex<I, E> {
24    Regex {
25        regex: meta::Regex::new(pattern).expect("Failed to compile regex"),
26        phantom: EmptyPhantom::new(),
27    }
28}
29
30impl<'src, S, I, E> Parser<'src, I, &'src S, E> for Regex<I, E>
31where
32    I: StrInput<'src, Slice = &'src S>,
33    I::Token: Char,
34    S: ?Sized + AsRef<[u8]> + 'src,
35    E: ParserExtra<'src, I>,
36{
37    #[inline]
38    fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, &'src S> {
39        let before = inp.cursor();
40
41        let re_in = ReInput::new(inp.full_slice())
42            .anchored(Anchored::Yes)
43            .range(before.inner..);
44
45        let res = self.regex.find(re_in).map(|m| m.len());
46
47        match res {
48            Some(len) => {
49                let before = inp.cursor();
50                // SAFETY: `len` *must* be no greater than the byte length of the remaining string
51                unsafe {
52                    inp.skip_bytes(len);
53                }
54                let after = inp.cursor();
55                Ok(M::bind(|| inp.slice(&before..&after)))
56            }
57            None => {
58                // TODO: Improve error
59                let span = inp.span_since(&before);
60                inp.add_alt([DefaultExpected::SomethingElse], None, span);
61                Err(())
62            }
63        }
64    }
65
66    go_extra!(&'src S);
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn regex_parser() {
75        use self::prelude::*;
76        use self::regex::*;
77
78        fn parser<'src, S, I>() -> impl Parser<'src, I, Vec<&'src S>>
79        where
80            S: ?Sized + AsRef<[u8]> + 'src,
81            I: StrInput<'src, Slice = &'src S>,
82            I::Token: Char,
83        {
84            regex("[a-zA-Z_][a-zA-Z0-9_]*")
85                .padded()
86                .repeated()
87                .collect()
88        }
89        assert_eq!(
90            parser().parse("hello world this works").into_result(),
91            Ok(vec!["hello", "world", "this", "works"]),
92        );
93
94        assert_eq!(
95            parser()
96                .parse(b"hello world this works" as &[_])
97                .into_result(),
98            Ok(vec![
99                b"hello" as &[_],
100                b"world" as &[_],
101                b"this" as &[_],
102                b"works" as &[_],
103            ]),
104        );
105    }
106}