ere_core/engines/
nfa_static.rs

1//! Implements a version of [`WorkingNFA`] that can be serialized statically into a binary.
2//!
3//! This may not be the most efficient engine to use in most cases,
4//! but is typically the most interpretable.
5
6use std::fmt::Write as _;
7
8use crate::{
9    parse_tree::{Atom, BracketExpressionTerm, CharClass},
10    working_nfa::{EpsilonTransition, EpsilonType, WorkingNFA, WorkingState, WorkingTransition},
11};
12use quote::quote;
13
14/// A statically serializable version of an [`Atom`]
15#[derive(Debug)]
16pub enum AtomStatic {
17    /// Includes normal char and escaped chars
18    NormalChar(char),
19    CharClass(CharClass),
20    /// A matching bracket expression
21    MatchingList(&'static [BracketExpressionTerm]),
22    /// A nonmatching bracket expression
23    NonmatchingList(&'static [BracketExpressionTerm]),
24}
25impl AtomStatic {
26    pub fn check(&self, c: char) -> bool {
27        return match self {
28            AtomStatic::NormalChar(a) => *a == c,
29            AtomStatic::CharClass(char_class) => char_class.check(c),
30            AtomStatic::MatchingList(arr) => arr.into_iter().any(|b| b.check(c)),
31            AtomStatic::NonmatchingList(arr) => !arr.into_iter().any(|b| b.check(c)),
32        };
33    }
34    /// Serialize as [`Atom`], deserialize as [`AtomStatic`]
35    pub(crate) fn serialize_as_token_stream(atom: &Atom) -> proc_macro2::TokenStream {
36        return match atom {
37            Atom::NormalChar(c) => quote! {
38                ::ere::nfa_static::AtomStatic::NormalChar(#c)
39            },
40            Atom::CharClass(char_class) => quote! {
41                ::ere::nfa_static::AtomStatic::CharClass(#char_class)
42            },
43            Atom::MatchingList(bracket_expression_terms) => {
44                quote! {{
45                    const terms: &'static [::ere::parse_tree::BracketExpressionTerm] = &[#(#bracket_expression_terms),*];
46                    ::ere::nfa_static::AtomStatic::MatchingList(terms)
47                }}
48            }
49            Atom::NonmatchingList(bracket_expression_terms) => {
50                quote! {{
51                    const terms: &'static [::ere::parse_tree::BracketExpressionTerm] = &[#(#bracket_expression_terms),*];
52                    ::ere::nfa_static::AtomStatic::NonmatchingList(terms)
53                }}
54            }
55        };
56    }
57}
58impl std::fmt::Display for AtomStatic {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        return match self {
61            AtomStatic::NormalChar(c) if crate::parse_tree::is_escapable_character(*c) => {
62                write!(f, "\\{c}")
63            }
64            AtomStatic::NormalChar(c) => f.write_char(*c),
65            AtomStatic::CharClass(c) => c.fmt(f),
66            AtomStatic::MatchingList(vec) => {
67                f.write_char('[')?;
68                for term in *vec {
69                    write!(f, "{term}")?;
70                }
71                f.write_char(']')
72            }
73            AtomStatic::NonmatchingList(vec) => {
74                f.write_str("[^")?;
75                for term in *vec {
76                    write!(f, "{term}")?;
77                }
78                f.write_char(']')
79            }
80        };
81    }
82}
83
84#[derive(Debug)]
85pub struct NFATransitionStatic {
86    pub(crate) to: usize,
87    pub(crate) symbol: AtomStatic,
88}
89impl NFATransitionStatic {
90    /// Only intended for internal use by macros.
91    pub const fn __load(to: usize, symbol: AtomStatic) -> NFATransitionStatic {
92        return NFATransitionStatic { to, symbol };
93    }
94    fn serialize_as_token_stream(transition: &WorkingTransition) -> proc_macro2::TokenStream {
95        let WorkingTransition { to, symbol } = transition;
96        let symbol = AtomStatic::serialize_as_token_stream(symbol);
97        return quote! {
98            ::ere::nfa_static::NFATransitionStatic::__load(
99                #to,
100                #symbol,
101            )
102        }
103        .into();
104    }
105}
106impl std::fmt::Display for NFATransitionStatic {
107    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        return write!(f, "-{}> {}", self.symbol, self.to);
109    }
110}
111
112#[derive(Debug, Clone)]
113pub struct NFAStateStatic {
114    pub(crate) transitions: &'static [NFATransitionStatic],
115    pub(crate) epsilons: &'static [EpsilonTransition],
116}
117impl NFAStateStatic {
118    pub const fn __load(
119        transitions: &'static [NFATransitionStatic],
120        epsilons: &'static [EpsilonTransition],
121    ) -> NFAStateStatic {
122        return NFAStateStatic {
123            transitions,
124            epsilons,
125        };
126    }
127    fn serialize_as_token_stream(state: &WorkingState) -> proc_macro2::TokenStream {
128        let WorkingState {
129            transitions,
130            epsilons,
131        } = state;
132        let transitions = transitions
133            .iter()
134            .map(NFATransitionStatic::serialize_as_token_stream);
135        return quote! {{
136            const transitions: &'static [::ere::nfa_static::NFATransitionStatic] = &[#(#transitions),*];
137            const epsilons: &'static [::ere::working_nfa::EpsilonTransition] = &[#(#epsilons),*];
138            ::ere::nfa_static::NFAStateStatic::__load(
139                transitions,
140                epsilons,
141            )
142        }}
143        .into();
144    }
145}
146
147/// The statically allocated representation of an NFA
148///
149/// Generic `N` represents the number of capture groups (will always be at least 1)
150#[derive(Debug)]
151pub struct NFAStatic<const N: usize> {
152    states: &'static [NFAStateStatic],
153}
154impl<const N: usize> NFAStatic<N> {
155    /// Using the classical NFA algorithm.
156    pub fn test(&self, text: &str) -> bool {
157        let mut list = vec![false; self.states.len()];
158        let mut new_list = vec![false; self.states.len()];
159        list[0] = true;
160
161        // Adds all states reachable by epsilon transitions
162        let propogate_epsilon = |list: &mut Vec<bool>, idx: usize| {
163            let mut stack: Vec<usize> = list
164                .iter()
165                .enumerate()
166                .filter_map(|(i, set)| set.then_some(i))
167                .collect();
168
169            while let Some(from) = stack.pop() {
170                for EpsilonTransition { to, special } in self.states[from].epsilons {
171                    if list[from]
172                        && !list[*to]
173                        && (match special {
174                            EpsilonType::StartAnchor => idx == 0,
175                            EpsilonType::EndAnchor => idx == text.len(),
176                            _ => true,
177                        })
178                    {
179                        stack.push(*to);
180                        list[*to] = true;
181                    }
182                }
183            }
184        };
185
186        for (i, c) in text.char_indices() {
187            propogate_epsilon(&mut list, i);
188            for (from, state) in self.states.iter().enumerate() {
189                if !list[from] {
190                    continue;
191                }
192                for NFATransitionStatic { to, symbol } in state.transitions {
193                    if list[from] && symbol.check(c) {
194                        new_list[*to] = true;
195                    }
196                }
197            }
198            list.copy_from_slice(&new_list);
199            new_list.fill(false);
200        }
201        propogate_epsilon(&mut list, text.len());
202        return *list.last().unwrap_or(&false);
203    }
204
205    /// Only intended for internal use by macros.
206    pub const fn __load(states: &'static [NFAStateStatic]) -> NFAStatic<N> {
207        return NFAStatic { states };
208    }
209}
210
211impl<const N: usize> std::fmt::Display for NFAStatic<N> {
212    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213        for (i, state) in self.states.iter().enumerate() {
214            writeln!(f, "State {i}")?;
215            for epsilon in state.epsilons {
216                writeln!(f, "  {epsilon}")?;
217            }
218            for transition in state.transitions {
219                writeln!(f, "  {transition}")?;
220            }
221        }
222        return Ok(());
223    }
224}
225
226/// Converts a [`WorkingNFA`] into a format that, when returned by a proc macro, will
227/// create the corresponding [`NFAStatic`].
228pub(crate) fn serialize_nfa_as_token_stream(nfa: &WorkingNFA) -> proc_macro2::TokenStream {
229    let WorkingNFA { states } = nfa;
230
231    let capture_groups = nfa.num_capture_groups();
232    let state_defs = states.iter().map(NFAStateStatic::serialize_as_token_stream);
233    return quote! {{
234        const states: &'static [::ere::nfa_static::NFAStateStatic] = &[#(#state_defs),*];
235
236        ::ere::nfa_static::NFAStatic::<#capture_groups>::__load(states)
237    }};
238}