ere_core/
nfa_static.rs

1//! Implements a version of [`WorkingNFA`] that can be serialized statically into a binary.
2//!
3//! This may not be the most efficient engine to use in most cases,
4//! but is typically the most interpretable.
5
6use std::fmt::Write as _;
7
8use crate::{
9    parse_tree::{Atom, BracketExpressionTerm, CharClass},
10    working_nfa::{EpsilonTransition, EpsilonType, WorkingNFA, WorkingState, WorkingTransition},
11};
12use quote::quote;
13
14/// A statically serializable version of an [`Atom`]
15#[derive(Debug)]
16pub enum AtomStatic {
17    /// Includes normal char and escaped chars
18    NormalChar(char),
19    CharClass(CharClass),
20    /// A matching bracket expression
21    MatchingList(&'static [BracketExpressionTerm]),
22    /// A nonmatching bracket expression
23    NonmatchingList(&'static [BracketExpressionTerm]),
24}
25impl AtomStatic {
26    pub fn check(&self, c: char) -> bool {
27        return match self {
28            AtomStatic::NormalChar(a) => *a == c,
29            AtomStatic::CharClass(char_class) => char_class.check(c),
30            AtomStatic::MatchingList(arr) => arr.into_iter().any(|b| b.check(c)),
31            AtomStatic::NonmatchingList(arr) => !arr.into_iter().any(|b| b.check(c)),
32        };
33    }
34    /// Serialize as [`Atom`], deserialize as [`AtomStatic`]
35    pub(crate) fn serialize_as_token_stream(atom: &Atom) -> proc_macro2::TokenStream {
36        return match atom {
37            Atom::NormalChar(c) => quote! {
38                ere_core::nfa_static::AtomStatic::NormalChar(#c)
39            },
40            Atom::CharClass(char_class) => quote! {
41                ere_core::nfa_static::AtomStatic::CharClass(#char_class)
42            },
43            Atom::MatchingList(bracket_expression_terms) => {
44                let terms: proc_macro2::TokenStream = bracket_expression_terms
45                    .into_iter()
46                    .map(|term| quote! { #term, })
47                    .collect();
48                quote! {{
49                    const terms: &'static [ere_core::parse_tree::BracketExpressionTerm] = &[#terms];
50                    ere_core::nfa_static::AtomStatic::MatchingList(terms)
51                }}
52            }
53            Atom::NonmatchingList(bracket_expression_terms) => {
54                let terms: proc_macro2::TokenStream = bracket_expression_terms
55                    .into_iter()
56                    .map(|term| quote! { #term, })
57                    .collect();
58                quote! {{
59                    const terms: &'static [ere_core::parse_tree::BracketExpressionTerm] = &[#terms];
60                    ere_core::nfa_static::AtomStatic::NonmatchingList(terms)
61                }}
62            }
63        };
64    }
65}
66impl std::fmt::Display for AtomStatic {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        return match self {
69            AtomStatic::NormalChar(c) if crate::parse_tree::is_escapable_character(*c) => {
70                write!(f, "\\{c}")
71            }
72            AtomStatic::NormalChar(c) => f.write_char(*c),
73            AtomStatic::CharClass(c) => c.fmt(f),
74            AtomStatic::MatchingList(vec) => {
75                f.write_char('[')?;
76                for term in *vec {
77                    write!(f, "{term}")?;
78                }
79                f.write_char(']')
80            }
81            AtomStatic::NonmatchingList(vec) => {
82                f.write_str("[^")?;
83                for term in *vec {
84                    write!(f, "{term}")?;
85                }
86                f.write_char(']')
87            }
88        };
89    }
90}
91
92#[derive(Debug)]
93pub struct NFATransitionStatic {
94    pub(crate) to: usize,
95    pub(crate) symbol: AtomStatic,
96}
97impl NFATransitionStatic {
98    /// Only intended for internal use by macros.
99    pub const fn __load(to: usize, symbol: AtomStatic) -> NFATransitionStatic {
100        return NFATransitionStatic { to, symbol };
101    }
102    fn serialize_as_token_stream(transition: &WorkingTransition) -> proc_macro2::TokenStream {
103        let WorkingTransition { to, symbol } = transition;
104        let symbol = AtomStatic::serialize_as_token_stream(symbol);
105        return quote! {
106            ere_core::nfa_static::NFATransitionStatic::__load(
107                #to,
108                #symbol,
109            )
110        }
111        .into();
112    }
113}
114impl std::fmt::Display for NFATransitionStatic {
115    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
116        return write!(f, "-{}> {}", self.symbol, self.to);
117    }
118}
119
120#[derive(Debug, Clone)]
121pub struct NFAStateStatic {
122    pub(crate) transitions: &'static [NFATransitionStatic],
123    pub(crate) epsilons: &'static [EpsilonTransition],
124}
125impl NFAStateStatic {
126    pub const fn __load(
127        transitions: &'static [NFATransitionStatic],
128        epsilons: &'static [EpsilonTransition],
129    ) -> NFAStateStatic {
130        return NFAStateStatic {
131            transitions,
132            epsilons,
133        };
134    }
135    fn serialize_as_token_stream(state: &WorkingState) -> proc_macro2::TokenStream {
136        let WorkingState {
137            transitions,
138            epsilons,
139        } = state;
140        let transitions: proc_macro2::TokenStream = transitions
141            .iter()
142            .map(|t| {
143                let t = NFATransitionStatic::serialize_as_token_stream(t);
144                return quote! { #t, };
145            })
146            .collect();
147        let epsilons: proc_macro2::TokenStream = epsilons.iter().map(|e| quote! { #e, }).collect();
148        return quote! {{
149            const transitions: &'static [::ere_core::nfa_static::NFATransitionStatic] = &[#transitions];
150            const epsilons: &'static [::ere_core::working_nfa::EpsilonTransition] = &[#epsilons];
151            ::ere_core::nfa_static::NFAStateStatic::__load(
152                transitions,
153                epsilons,
154            )
155        }}
156        .into();
157    }
158}
159
160/// The statically allocated representation of an NFA
161///
162/// Generic `N` represents the number of capture groups (will always be at least 1)
163#[derive(Debug)]
164pub struct NFAStatic<const N: usize> {
165    states: &'static [NFAStateStatic],
166}
167impl<const N: usize> NFAStatic<N> {
168    /// Using the classical NFA algorithm.
169    pub fn test(&self, text: &str) -> bool {
170        let mut list = vec![false; self.states.len()];
171        let mut new_list = vec![false; self.states.len()];
172        list[0] = true;
173
174        // Adds all states reachable by epsilon transitions
175        let propogate_epsilon = |list: &mut Vec<bool>, idx: usize| {
176            let mut stack: Vec<usize> = list
177                .iter()
178                .enumerate()
179                .filter_map(|(i, set)| set.then_some(i))
180                .collect();
181
182            while let Some(from) = stack.pop() {
183                for EpsilonTransition { to, special } in self.states[from].epsilons {
184                    if list[from]
185                        && !list[*to]
186                        && (match special {
187                            EpsilonType::StartAnchor => idx == 0,
188                            EpsilonType::EndAnchor => idx == text.len(),
189                            _ => true,
190                        })
191                    {
192                        stack.push(*to);
193                        list[*to] = true;
194                    }
195                }
196            }
197        };
198
199        for (i, c) in text.char_indices() {
200            propogate_epsilon(&mut list, i);
201            for (from, state) in self.states.iter().enumerate() {
202                if !list[from] {
203                    continue;
204                }
205                for NFATransitionStatic { to, symbol } in state.transitions {
206                    if list[from] && symbol.check(c) {
207                        new_list[*to] = true;
208                    }
209                }
210            }
211            list.copy_from_slice(&new_list);
212            new_list.fill(false);
213        }
214        propogate_epsilon(&mut list, text.len());
215        return *list.last().unwrap_or(&false);
216    }
217
218    /// Only intended for internal use by macros.
219    pub const fn __load(states: &'static [NFAStateStatic]) -> NFAStatic<N> {
220        return NFAStatic { states };
221    }
222}
223
224impl<const N: usize> std::fmt::Display for NFAStatic<N> {
225    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
226        for (i, state) in self.states.iter().enumerate() {
227            writeln!(f, "State {i}")?;
228            for epsilon in state.epsilons {
229                writeln!(f, "  {epsilon}")?;
230            }
231            for transition in state.transitions {
232                writeln!(f, "  {transition}")?;
233            }
234        }
235        return Ok(());
236    }
237}
238
239/// Converts a [`WorkingNFA`] into a format that, when returned by a proc macro, will
240/// create the corresponding [`NFAStatic`].
241pub(crate) fn serialize_nfa_as_token_stream(nfa: &WorkingNFA) -> proc_macro2::TokenStream {
242    let WorkingNFA { states } = nfa;
243
244    let capture_groups = nfa.num_capture_groups();
245    let state_defs: proc_macro2::TokenStream = states
246        .iter()
247        .map(|state| {
248            let state = NFAStateStatic::serialize_as_token_stream(state);
249            return quote! { #state, };
250        })
251        .collect();
252    return quote! {{
253        const states: &'static [ere_core::nfa_static::NFAStateStatic] = &[#state_defs];
254
255        ere_core::nfa_static::NFAStatic::<#capture_groups>::__load(states)
256    }};
257}