ere_core/
lib.rs

1//! This crate provides the core functionality to the `ere` crate.
2
3use proc_macro::TokenStream;
4use quote::quote;
5extern crate proc_macro;
6
7pub mod config;
8pub mod nfa_static;
9pub mod parse_tree;
10pub mod pike_vm;
11pub mod pike_vm_u8;
12pub mod simplified_tree;
13pub mod visualization;
14pub mod working_nfa;
15pub mod working_u8_nfa;
16
17enum RegexEngines<const N: usize> {
18    NFA(nfa_static::NFAStatic<N>),
19    PikeVM(pike_vm::PikeVM<N>),
20    U8PikeVM(pike_vm_u8::U8PikeVM<N>),
21}
22
23/// A regular expression (specifically, a [POSIX ERE](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX-Extended_Regular_Expressions)).
24///
25/// Internally, this may contain one of several engines depending on the expression.
26///
27/// The const generic `N` represents the number of capture groups (including capture group 0 which is the entire expression).
28/// It defaults to `1` (for just capture group 0), but you will need to specify it in the type for expressions with more capture groups.
29pub struct Regex<const N: usize = 1>(RegexEngines<N>);
30impl<const N: usize> Regex<N> {
31    /// Returns whether or not the text is matched by the regular expression.
32    pub fn test(&self, text: &str) -> bool {
33        return match &self.0 {
34            RegexEngines::NFA(nfa) => nfa.test(text),
35            RegexEngines::PikeVM(pike_vm) => pike_vm.test(text),
36            RegexEngines::U8PikeVM(pike_vm) => pike_vm.test(text),
37        };
38    }
39
40    pub fn exec<'a>(&self, text: &'a str) -> Option<[Option<&'a str>; N]> {
41        return match &self.0 {
42            RegexEngines::NFA(nfa) => unimplemented!(),
43            RegexEngines::PikeVM(pike_vm) => pike_vm.exec(text),
44            RegexEngines::U8PikeVM(pike_vm) => pike_vm.exec(text),
45        };
46    }
47}
48impl<const N: usize> std::fmt::Display for Regex<N> {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        return match &self.0 {
51            RegexEngines::NFA(nfastatic) => nfastatic.fmt(f),
52            RegexEngines::PikeVM(_) => f.write_str("<Compiled VM>"),
53            RegexEngines::U8PikeVM(_) => f.write_str("<Compiled VM>"),
54        };
55    }
56}
57
58pub const fn __construct_pikevm_regex<const N: usize>(vm: pike_vm::PikeVM<N>) -> Regex<N> {
59    return Regex(RegexEngines::PikeVM(vm));
60}
61pub const fn __construct_u8pikevm_regex<const N: usize>(vm: pike_vm_u8::U8PikeVM<N>) -> Regex<N> {
62    return Regex(RegexEngines::U8PikeVM(vm));
63}
64pub const fn __construct_nfa_regex<const N: usize>(nfa: nfa_static::NFAStatic<N>) -> Regex<N> {
65    return Regex(RegexEngines::NFA(nfa));
66}
67
68pub fn __compile_regex(stream: TokenStream) -> TokenStream {
69    let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
70    let tree = simplified_tree::SimplifiedTreeNode::from(ere);
71    let nfa = working_nfa::WorkingNFA::new(&tree);
72    // println!("{}", nfa.to_tikz(true));
73
74    // Currently use a conservative check: only use u8 engines when it will only match ascii strings
75    fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
76        return state
77            .transitions
78            .iter()
79            .flat_map(|t| t.symbol.to_ranges())
80            .all(|range| range.end().is_ascii());
81    }
82    let is_ascii = nfa.states.iter().all(is_state_ascii);
83
84    if is_ascii {
85        let nfa = working_u8_nfa::U8NFA::new(&nfa);
86        let engine = pike_vm_u8::serialize_pike_vm_token_stream(&nfa);
87        return quote! { ::ere_core::__construct_u8pikevm_regex(#engine) }.into();
88    } else if true {
89        let engine = pike_vm::serialize_pike_vm_token_stream(&nfa);
90        return quote! { ::ere_core::__construct_pikevm_regex(#engine) }.into();
91    } else {
92        let engine = nfa_static::serialize_nfa_as_token_stream(&nfa);
93        return quote! { ::ere_core::__construct_nfa_regex(#engine) }.into();
94    };
95}