ere_core/
nfa_static.rs

1//! Implements a version of [`crate::working_nfa::WorkingNFA`] that can be serialized statically into a binary.
2
3use crate::{
4    parse_tree::{Atom, BracketExpressionTerm, CharClass},
5    working_nfa::{EpsilonTransition, EpsilonType, WorkingNFA, WorkingTransition},
6};
7use quote::quote;
8
9/// A statically serializable version of [`crate::parse_tree::Atom`]
10#[derive(Debug)]
11pub enum AtomStatic {
12    /// Includes normal char and escaped chars
13    NormalChar(char),
14    CharClass(CharClass),
15    /// A matching bracket expression
16    MatchingList(&'static [BracketExpressionTerm]),
17    /// A nonmatching bracket expression
18    NonmatchingList(&'static [BracketExpressionTerm]),
19}
20impl AtomStatic {
21    pub fn check(&self, c: char) -> bool {
22        return match self {
23            AtomStatic::NormalChar(a) => *a == c,
24            AtomStatic::CharClass(char_class) => char_class.check(c),
25            AtomStatic::MatchingList(arr) => arr.into_iter().any(|b| b.check(c)),
26            AtomStatic::NonmatchingList(arr) => !arr.into_iter().any(|b| b.check(c)),
27        };
28    }
29    /// Serialize as [`Atom`], deserialize as [`StaticAtom`]
30    fn serialize_as_token_stream(atom: &Atom) -> proc_macro2::TokenStream {
31        return match atom {
32            Atom::NormalChar(c) => quote! {
33                ere_core::nfa_static::AtomStatic::NormalChar(#c)
34            },
35            Atom::CharClass(char_class) => quote! {
36                ere_core::nfa_static::AtomStatic::CharClass(#char_class),
37            },
38            Atom::MatchingList(bracket_expression_terms) => {
39                let terms: proc_macro2::TokenStream = bracket_expression_terms
40                    .into_iter()
41                    .map(|term| quote! { #term, })
42                    .collect();
43                quote! {{
44                    const terms: &'static [ere_core::parse_tree::BracketExpressionTerm] = &[#terms];
45                    ere_core::nfa_static::AtomStatic::MatchingList(terms)
46                }}
47            }
48            Atom::NonmatchingList(bracket_expression_terms) => {
49                let terms: proc_macro2::TokenStream = bracket_expression_terms
50                    .into_iter()
51                    .map(|term| quote! { #term, })
52                    .collect();
53                quote! {{
54                    const terms: &'static [ere_core::parse_tree::BracketExpressionTerm] = &[#terms];
55                    ere_core::nfa_static::AtomStatic::NonmatchingList(terms)
56                }}
57            }
58        };
59    }
60}
61
62#[derive(Debug)]
63pub struct NFATransitionStatic {
64    pub(crate) from: usize,
65    pub(crate) to: usize,
66    pub(crate) symbol: AtomStatic,
67}
68impl NFATransitionStatic {
69    /// Only intended for internal use by macros.
70    pub const fn __load(from: usize, to: usize, symbol: AtomStatic) -> NFATransitionStatic {
71        return NFATransitionStatic { from, to, symbol };
72    }
73    fn serialize_as_token_stream(transition: &WorkingTransition) -> proc_macro2::TokenStream {
74        let WorkingTransition { from, to, symbol } = transition;
75        let symbol = AtomStatic::serialize_as_token_stream(symbol);
76        return quote! {
77            ere_core::nfa_static::NFATransitionStatic::__load(
78                #from,
79                #to,
80                #symbol,
81            )
82        }
83        .into();
84    }
85}
86
87#[derive(Debug)]
88pub struct NFAStatic {
89    transitions: &'static [NFATransitionStatic],
90    epsilons: &'static [EpsilonTransition],
91    states: usize,
92}
93impl NFAStatic {
94    /// Using the classical NFA algorithm.
95    pub fn test(&self, text: &str) -> bool {
96        let mut list = vec![false; self.states];
97        let mut new_list = vec![false; self.states];
98        list[0] = true;
99
100        // Adds all states reachable by epsilon transitions
101        let propogate_epsilon = |list: &mut Vec<bool>, idx: usize| loop {
102            let mut has_new = false;
103            for EpsilonTransition { from, to, special } in self.epsilons {
104                if list[*from]
105                    && !list[*to]
106                    && (match special {
107                        EpsilonType::StartAnchor => idx == 0,
108                        EpsilonType::EndAnchor => idx == text.len(),
109                        _ => true,
110                    })
111                {
112                    list[*to] = true;
113                    has_new = true;
114                }
115            }
116            if !has_new {
117                break;
118            }
119        };
120
121        for (i, c) in text.char_indices() {
122            propogate_epsilon(&mut list, i);
123            for NFATransitionStatic { from, to, symbol } in self.transitions {
124                if list[*from] && symbol.check(c) {
125                    new_list[*to] = true;
126                }
127            }
128            let tmp = list;
129            list = new_list;
130            new_list = tmp;
131            new_list.fill(false);
132        }
133        propogate_epsilon(&mut list, text.len());
134        return *list.last().unwrap_or(&false);
135    }
136
137    /// Only intended for internal use by macros.
138    pub const fn __load(
139        transitions: &'static [NFATransitionStatic],
140        epsilons: &'static [EpsilonTransition],
141        states: usize,
142    ) -> NFAStatic {
143        return NFAStatic {
144            transitions,
145            epsilons,
146            states,
147        };
148    }
149
150    /// Converts a [`WorkingNFA`] into a format that, when returned by a proc macro, will
151    /// create the corresponding [`NFAStatic`].
152    ///
153    /// Warning: `nfa` should have anchors removed already.
154    pub(crate) fn serialize_as_token_stream(nfa: &WorkingNFA) -> proc_macro2::TokenStream {
155        let WorkingNFA {
156            transitions,
157            epsilons,
158            states,
159        } = nfa;
160
161        let transitions_defs: proc_macro2::TokenStream = transitions
162            .into_iter()
163            .map(|t| {
164                let t = NFATransitionStatic::serialize_as_token_stream(t);
165                return quote! { #t, };
166            })
167            .collect();
168        let epsilon_defs: proc_macro2::TokenStream =
169            epsilons.into_iter().map(|e| quote! { #e, }).collect();
170
171        return quote! {{
172            const transitions: &'static [ere_core::nfa_static::NFATransitionStatic] = &[#transitions_defs];
173            const epsilons: &'static [ere_core::working_nfa::EpsilonTransition] = &[#epsilon_defs];
174
175            ere_core::nfa_static::NFAStatic::__load(transitions, epsilons, #states)
176        }};
177    }
178}