1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
use crate::{CreateParserState, Parser};
use regex_automata::{
    dfa::{sparse, Automaton},
    util::primitives::StateID,
};

/// A parser that uses a regex pattern to parse input.
pub struct RegexParser {
    dfa: sparse::DFA<Vec<u8>>,
    config: regex_automata::util::start::Config,
}

impl RegexParser {
    /// Create a new `RegexParser` from a regex pattern.
    pub fn new(regex: &str) -> anyhow::Result<Self> {
        let dfa = sparse::DFA::new(regex)?;

        let config =
            regex_automata::util::start::Config::new().anchored(regex_automata::Anchored::Yes);

        Ok(Self { dfa, config })
    }
}

impl CreateParserState for RegexParser {
    fn create_parser_state(&self) -> <Self as Parser>::PartialState {
        self.dfa.start_state(&self.config).unwrap()
    }
}

impl Parser for RegexParser {
    type Error = regex_automata::MatchError;
    type Output = ();
    type PartialState = StateID;

    fn parse<'a>(
        &self,
        state: &Self::PartialState,
        input: &'a [u8],
    ) -> Result<crate::ParseResult<'a, Self::PartialState, Self::Output>, Self::Error> {
        let mut state = *state;
        for (idx, &b) in input.iter().enumerate() {
            state = self.dfa.next_state(state, b);
            if self.dfa.is_match_state(state) {
                // If this is a match state, accept it only if it's the last byte
                return if idx == input.len() - 1 {
                    Ok(crate::ParseResult::Finished {
                        result: (),
                        remaining: Default::default(),
                    })
                } else {
                    Err(regex_automata::MatchError::quit(b, 0))
                };
            } else if self.dfa.is_dead_state(state) || self.dfa.is_quit_state(state) {
                return Err(regex_automata::MatchError::quit(b, 0));
            }
        }

        Ok(crate::ParseResult::Incomplete {
            new_state: state,
            required_next: "".into(),
        })
    }
}