1use proc_macro::TokenStream;
4use quote::quote;
5extern crate proc_macro;
6
7pub mod config;
8pub mod nfa_static;
9pub mod parse_tree;
10pub mod pike_vm;
11pub mod pike_vm_u8;
12pub mod simplified_tree;
13pub mod visualization;
14pub mod working_nfa;
15pub mod working_u8_nfa;
16
17enum RegexEngines<const N: usize> {
18 NFA(nfa_static::NFAStatic<N>),
19 PikeVM(pike_vm::PikeVM<N>),
20 U8PikeVM(pike_vm_u8::U8PikeVM<N>),
21}
22
23pub struct Regex<const N: usize = 1>(RegexEngines<N>);
30impl<const N: usize> Regex<N> {
31 pub fn test(&self, text: &str) -> bool {
33 return match &self.0 {
34 RegexEngines::NFA(nfa) => nfa.test(text),
35 RegexEngines::PikeVM(pike_vm) => pike_vm.test(text),
36 RegexEngines::U8PikeVM(pike_vm) => pike_vm.test(text),
37 };
38 }
39
40 pub fn exec<'a>(&self, text: &'a str) -> Option<[Option<&'a str>; N]> {
41 return match &self.0 {
42 RegexEngines::NFA(nfa) => unimplemented!(),
43 RegexEngines::PikeVM(pike_vm) => pike_vm.exec(text),
44 RegexEngines::U8PikeVM(pike_vm) => pike_vm.exec(text),
45 };
46 }
47}
48impl<const N: usize> std::fmt::Display for Regex<N> {
49 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 return match &self.0 {
51 RegexEngines::NFA(nfastatic) => nfastatic.fmt(f),
52 RegexEngines::PikeVM(_) => f.write_str("<Compiled VM>"),
53 RegexEngines::U8PikeVM(_) => f.write_str("<Compiled VM>"),
54 };
55 }
56}
57
58pub const fn __construct_pikevm_regex<const N: usize>(vm: pike_vm::PikeVM<N>) -> Regex<N> {
59 return Regex(RegexEngines::PikeVM(vm));
60}
61pub const fn __construct_u8pikevm_regex<const N: usize>(vm: pike_vm_u8::U8PikeVM<N>) -> Regex<N> {
62 return Regex(RegexEngines::U8PikeVM(vm));
63}
64pub const fn __construct_nfa_regex<const N: usize>(nfa: nfa_static::NFAStatic<N>) -> Regex<N> {
65 return Regex(RegexEngines::NFA(nfa));
66}
67
68pub fn __compile_regex(stream: TokenStream) -> TokenStream {
69 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
70 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
71 let nfa = working_nfa::WorkingNFA::new(&tree);
72 fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
76 return state
77 .transitions
78 .iter()
79 .flat_map(|t| t.symbol.to_ranges())
80 .all(|range| range.end().is_ascii());
81 }
82 let is_ascii = nfa.states.iter().all(is_state_ascii);
83
84 if is_ascii {
85 let nfa = working_u8_nfa::U8NFA::new(&nfa);
86 let engine = pike_vm_u8::serialize_pike_vm_token_stream(&nfa);
87 return quote! { ::ere_core::__construct_u8pikevm_regex(#engine) }.into();
88 } else if true {
89 let engine = pike_vm::serialize_pike_vm_token_stream(&nfa);
90 return quote! { ::ere_core::__construct_pikevm_regex(#engine) }.into();
91 } else {
92 let engine = nfa_static::serialize_nfa_as_token_stream(&nfa);
93 return quote! { ::ere_core::__construct_nfa_regex(#engine) }.into();
94 };
95}
96
97#[cfg(feature = "unstable-attr-regex")]
98pub fn __compile_regex_attr(attr: TokenStream, input: TokenStream) -> TokenStream {
99 let ere: parse_tree::ERE = syn::parse_macro_input!(attr);
100 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
101 let nfa = working_nfa::WorkingNFA::new(&tree);
102
103 let capture_groups = nfa.num_capture_groups();
104 let optional_captures: Vec<bool> = (0..capture_groups)
105 .map(|group_num| nfa.capture_group_is_optional(group_num))
106 .collect();
107
108 let input_copy = input.clone();
109 let regex_struct: syn::DeriveInput = syn::parse_macro_input!(input_copy);
110 let syn::Data::Struct(data_struct) = regex_struct.data else {
111 return syn::parse::Error::new_spanned(
112 regex_struct,
113 "Attribute regexes currently only support structs.",
114 )
115 .to_compile_error()
116 .into();
117 };
118 let syn::Fields::Unnamed(fields) = data_struct.fields else {
119 return syn::parse::Error::new_spanned(
120 data_struct.fields,
121 "Attribute regexes currently require unnamed structs (tuple syntax).",
122 )
123 .to_compile_error()
124 .into();
125 };
126 if fields.unnamed.len() != optional_captures.len() {
127 return syn::parse::Error::new_spanned(
128 fields.unnamed,
129 format!(
130 "Expected struct to have {} unnamed fields, based on number of captures in regular expression.",
131 optional_captures.len()
132 ),
133 )
134 .to_compile_error()
135 .into();
136 }
137 let mut out: proc_macro2::TokenStream = input.into();
146
147 fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
149 return state
150 .transitions
151 .iter()
152 .flat_map(|t| t.symbol.to_ranges())
153 .all(|range| range.end().is_ascii());
154 }
155 let is_ascii = nfa.states.iter().all(is_state_ascii);
156
157 let struct_args: proc_macro2::TokenStream = optional_captures
158 .iter()
159 .enumerate()
160 .map(|(group_num, opt)| if *opt {
161 quote! { result[#group_num], }
162 } else {
163 quote! {
164 result[#group_num]
165 .expect(
166 "If you are seeing this, there is probably an internal bug in the `ere-core` crate where a capture group was mistakenly marked as non-optional. Please report the bug."
167 ),
168 }
169 })
170 .collect();
171
172 let struct_name = regex_struct.ident;
174 if is_ascii {
175 let nfa = working_u8_nfa::U8NFA::new(&nfa);
176 let engine = pike_vm_u8::serialize_pike_vm_token_stream(&nfa);
177 let implementation = quote! {
178 impl<'a> #struct_name<'a> {
179 const ENGINE: ::ere_core::pike_vm_u8::U8PikeVM::<#capture_groups> = #engine;
180 pub fn test(text: &str) -> bool {
181 return Self::ENGINE.test(text);
182 }
183 pub fn exec(text: &'a str) -> ::core::option::Option<#struct_name<'a>> {
184 let result: [::core::option::Option<&'a str>; #capture_groups] = Self::ENGINE.exec(text)?;
185 return ::core::option::Option::<#struct_name<'a>>::Some(#struct_name(
186 #struct_args
187 ));
188 }
189 }
190 };
191 out.extend(implementation);
192 } else {
193 let engine = pike_vm::serialize_pike_vm_token_stream(&nfa);
194 let implementation = quote! {
195 impl<'a> #struct_name<'a> {
196 const ENGINE: ::ere_core::pike_vm::PikeVM::<#capture_groups> = #engine;
197 pub fn test(text: &str) -> bool {
198 return Self::ENGINE.test(text);
199 }
200 pub fn exec(text: &'a str) -> ::core::option::Option<#struct_name<'a>> {
201 let result: [::core::option::Option<&'a str>; #capture_groups] = Self::ENGINE.exec(text)?;
202 return ::core::option::Option::<#struct_name<'a>>::Some(#struct_name(
203 #struct_args
204 ));
205 }
206 }
207 };
208 out.extend(implementation);
209 }
210
211 return out.into();
212}