1use proc_macro::TokenStream;
4use quote::quote;
5extern crate proc_macro;
6
7pub mod config;
8pub mod nfa_static;
9pub mod one_pass_u8;
10pub mod parse_tree;
11pub mod pike_vm;
12pub mod pike_vm_u8;
13pub mod simplified_tree;
14pub mod visualization;
15pub mod working_nfa;
16pub mod working_u8_nfa;
17
18enum RegexEngines<const N: usize> {
19 NFA(nfa_static::NFAStatic<N>),
20 PikeVM(pike_vm::PikeVM<N>),
21 U8PikeVM(pike_vm_u8::U8PikeVM<N>),
22 U8OnePass(one_pass_u8::U8OnePass<N>),
23}
24
25pub struct Regex<const N: usize = 1>(RegexEngines<N>);
32impl<const N: usize> Regex<N> {
33 pub fn test(&self, text: &str) -> bool {
35 return match &self.0 {
36 RegexEngines::NFA(nfa) => nfa.test(text),
37 RegexEngines::PikeVM(pike_vm) => pike_vm.test(text),
38 RegexEngines::U8PikeVM(pike_vm) => pike_vm.test(text),
39 RegexEngines::U8OnePass(one_pass) => one_pass.test(text),
40 };
41 }
42
43 pub fn exec<'a>(&self, text: &'a str) -> Option<[Option<&'a str>; N]> {
44 return match &self.0 {
45 RegexEngines::NFA(nfa) => unimplemented!(),
46 RegexEngines::PikeVM(pike_vm) => pike_vm.exec(text),
47 RegexEngines::U8PikeVM(pike_vm) => pike_vm.exec(text),
48 RegexEngines::U8OnePass(one_pass) => one_pass.exec(text),
49 };
50 }
51}
52impl<const N: usize> std::fmt::Display for Regex<N> {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 return match &self.0 {
55 RegexEngines::NFA(nfastatic) => nfastatic.fmt(f),
56 RegexEngines::PikeVM(_) => f.write_str("<Compiled VM>"),
57 RegexEngines::U8PikeVM(_) => f.write_str("<Compiled VM>"),
58 RegexEngines::U8OnePass(_) => f.write_str("<Compiled VM>"),
59 };
60 }
61}
62
63pub const fn __construct_pikevm_regex<const N: usize>(vm: pike_vm::PikeVM<N>) -> Regex<N> {
64 return Regex(RegexEngines::PikeVM(vm));
65}
66pub const fn __construct_u8pikevm_regex<const N: usize>(vm: pike_vm_u8::U8PikeVM<N>) -> Regex<N> {
67 return Regex(RegexEngines::U8PikeVM(vm));
68}
69pub const fn __construct_nfa_regex<const N: usize>(nfa: nfa_static::NFAStatic<N>) -> Regex<N> {
70 return Regex(RegexEngines::NFA(nfa));
71}
72pub const fn __construct_u8onepass_regex<const N: usize>(
73 nfa: one_pass_u8::U8OnePass<N>,
74) -> Regex<N> {
75 return Regex(RegexEngines::U8OnePass(nfa));
76}
77
78pub fn __compile_regex(stream: TokenStream) -> TokenStream {
80 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
81 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
82 let nfa = working_nfa::WorkingNFA::new(&tree);
83
84 fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
86 return state
87 .transitions
88 .iter()
89 .flat_map(|t| t.symbol.to_ranges())
90 .all(|range| range.end().is_ascii());
91 }
92 let is_ascii = nfa.states.iter().all(is_state_ascii);
93
94 let u8_nfa = working_u8_nfa::U8NFA::new(&nfa);
95
96 if let Some(engine) = one_pass_u8::serialize_one_pass_token_stream(&u8_nfa) {
97 return quote! { ::ere_core::__construct_u8onepass_regex(#engine) }.into();
98 }
99
100 if is_ascii {
101 let engine = pike_vm_u8::serialize_pike_vm_token_stream(&u8_nfa);
102 return quote! { ::ere_core::__construct_u8pikevm_regex(#engine) }.into();
103 } else if true {
104 let engine = pike_vm::serialize_pike_vm_token_stream(&nfa);
105 return quote! { ::ere_core::__construct_pikevm_regex(#engine) }.into();
106 } else {
107 let engine = nfa_static::serialize_nfa_as_token_stream(&nfa);
108 return quote! { ::ere_core::__construct_nfa_regex(#engine) }.into();
109 };
110}
111
112pub fn __compile_regex_engine_pike_vm(stream: TokenStream) -> TokenStream {
115 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
116 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
117 let nfa = working_nfa::WorkingNFA::new(&tree);
118 return pike_vm::serialize_pike_vm_token_stream(&nfa).into();
119}
120
121pub fn __compile_regex_engine_pike_vm_u8(stream: TokenStream) -> TokenStream {
124 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
125 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
126 let nfa = working_nfa::WorkingNFA::new(&tree);
127 let nfa = working_u8_nfa::U8NFA::new(&nfa);
128 return pike_vm_u8::serialize_pike_vm_token_stream(&nfa).into();
129}
130
131pub fn __compile_regex_engine_one_pass_u8(stream: TokenStream) -> TokenStream {
136 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
137 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
138 let nfa = working_nfa::WorkingNFA::new(&tree);
139 let nfa = working_u8_nfa::U8NFA::new(&nfa);
140 return one_pass_u8::serialize_one_pass_token_stream(&nfa)
141 .unwrap_or(
142 syn::parse::Error::new(
143 proc_macro2::Span::call_site(),
144 "Regex was not one-pass and could not be optimized to become one pass.
145Try using a different engine.",
146 )
147 .to_compile_error(),
148 )
149 .into();
150}
151
152#[cfg(feature = "unstable-attr-regex")]
153pub fn __compile_regex_attr(attr: TokenStream, input: TokenStream) -> TokenStream {
154 let ere: parse_tree::ERE = syn::parse_macro_input!(attr);
155 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
156 let nfa = working_nfa::WorkingNFA::new(&tree);
157
158 let capture_groups = nfa.num_capture_groups();
159 let optional_captures: Vec<bool> = (0..capture_groups)
160 .map(|group_num| nfa.capture_group_is_optional(group_num))
161 .collect();
162
163 let input_copy = input.clone();
164 let regex_struct: syn::DeriveInput = syn::parse_macro_input!(input_copy);
165 let syn::Data::Struct(data_struct) = regex_struct.data else {
166 return syn::parse::Error::new_spanned(
167 regex_struct,
168 "Attribute regexes currently only support structs.",
169 )
170 .to_compile_error()
171 .into();
172 };
173 let syn::Fields::Unnamed(fields) = data_struct.fields else {
174 return syn::parse::Error::new_spanned(
175 data_struct.fields,
176 "Attribute regexes currently require unnamed structs (tuple syntax).",
177 )
178 .to_compile_error()
179 .into();
180 };
181 if fields.unnamed.len() != optional_captures.len() {
182 return syn::parse::Error::new_spanned(
183 fields.unnamed,
184 format!(
185 "Expected struct to have {} unnamed fields, based on number of captures in regular expression.",
186 optional_captures.len()
187 ),
188 )
189 .to_compile_error()
190 .into();
191 }
192 let mut out: proc_macro2::TokenStream = input.into();
201
202 fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
204 return state
205 .transitions
206 .iter()
207 .flat_map(|t| t.symbol.to_ranges())
208 .all(|range| range.end().is_ascii());
209 }
210 let is_ascii = nfa.states.iter().all(is_state_ascii);
211
212 let struct_args: proc_macro2::TokenStream = optional_captures
213 .iter()
214 .enumerate()
215 .map(|(group_num, opt)| if *opt {
216 quote! { result[#group_num], }
217 } else {
218 quote! {
219 result[#group_num]
220 .expect(
221 "If you are seeing this, there is probably an internal bug in the `ere-core` crate where a capture group was mistakenly marked as non-optional. Please report the bug."
222 ),
223 }
224 })
225 .collect();
226
227 let struct_name = regex_struct.ident;
229 if is_ascii {
230 let nfa = working_u8_nfa::U8NFA::new(&nfa);
231 let engine = pike_vm_u8::serialize_pike_vm_token_stream(&nfa);
232 let implementation = quote! {
233 impl<'a> #struct_name<'a> {
234 const ENGINE: ::ere_core::pike_vm_u8::U8PikeVM::<#capture_groups> = #engine;
235 pub fn test(text: &str) -> bool {
236 return Self::ENGINE.test(text);
237 }
238 pub fn exec(text: &'a str) -> ::core::option::Option<#struct_name<'a>> {
239 let result: [::core::option::Option<&'a str>; #capture_groups] = Self::ENGINE.exec(text)?;
240 return ::core::option::Option::<#struct_name<'a>>::Some(#struct_name(
241 #struct_args
242 ));
243 }
244 }
245 };
246 out.extend(implementation);
247 } else {
248 let engine = pike_vm::serialize_pike_vm_token_stream(&nfa);
249 let implementation = quote! {
250 impl<'a> #struct_name<'a> {
251 const ENGINE: ::ere_core::pike_vm::PikeVM::<#capture_groups> = #engine;
252 pub fn test(text: &str) -> bool {
253 return Self::ENGINE.test(text);
254 }
255 pub fn exec(text: &'a str) -> ::core::option::Option<#struct_name<'a>> {
256 let result: [::core::option::Option<&'a str>; #capture_groups] = Self::ENGINE.exec(text)?;
257 return ::core::option::Option::<#struct_name<'a>>::Some(#struct_name(
258 #struct_args
259 ));
260 }
261 }
262 };
263 out.extend(implementation);
264 }
265
266 return out.into();
267}