1use proc_macro::TokenStream;
4use quote::quote;
5extern crate proc_macro;
6
7pub mod config;
8pub mod fixed_offset;
9pub mod nfa_static;
10pub mod one_pass_u8;
11pub mod parse_tree;
12pub mod pike_vm;
13pub mod pike_vm_u8;
14pub mod simplified_tree;
15pub mod visualization;
16pub mod working_nfa;
17pub mod working_u8_nfa;
18
19pub struct Regex<const N: usize = 1> {
26 test_fn: fn(&str) -> bool,
27 exec_fn: for<'a> fn(&'a str) -> Option<[Option<&'a str>; N]>,
28}
29impl<const N: usize> Regex<N> {
30 #[inline]
32 pub fn test(&self, text: &str) -> bool {
33 return (self.test_fn)(text);
34 }
35 #[inline]
36 pub fn exec<'a>(&self, text: &'a str) -> Option<[Option<&'a str>; N]> {
37 return (self.exec_fn)(text);
38 }
39}
40
41#[inline]
43pub const fn __construct_regex<const N: usize>(
44 fn_pair: (
45 fn(&str) -> bool,
46 for<'a> fn(&'a str) -> Option<[Option<&'a str>; N]>,
47 ),
48) -> Regex<N> {
49 return Regex {
50 test_fn: fn_pair.0,
51 exec_fn: fn_pair.1,
52 };
53}
54
55fn pick_base_engine(
59 ere: parse_tree::ERE,
60) -> (
61 proc_macro2::TokenStream,
62 simplified_tree::SimplifiedTreeNode,
63 working_nfa::WorkingNFA,
64 working_u8_nfa::U8NFA,
65 &'static str,
66) {
67 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
68 let nfa = working_nfa::WorkingNFA::new(&tree);
69
70 fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
72 return state
73 .transitions
74 .iter()
75 .flat_map(|t| t.symbol.to_ranges())
76 .all(|range| range.end().is_ascii());
77 }
78 let is_ascii = nfa.states.iter().all(is_state_ascii);
79
80 let u8_nfa = working_u8_nfa::U8NFA::new(&nfa);
81
82 const ONE_PASS_U8_DESC: &str = "This regular expression is [one-pass](https://swtch.com/~rsc/regexp/regexp3.html#:~:text=Let%27s%20define%20a%20%E2%80%9Cone%2Dpass%20regular%20expression%E2%80%9D).
83This allows us to use an efficient [`::ere::one_pass_u8`] implementation.";
84 const PIKE_VM_U8_DESC: &str =
85 "Uses a general-case [`::ere::pike_vm_u8`] implementatation over `u8`s.";
86 const PIKE_VM_DESC: &str =
87 "Uses a general-case [`::ere::pike_vm`] implementatation over `char`s.";
88
89 let (base_engine, description) =
90 if let Some(engine) = one_pass_u8::serialize_one_pass_token_stream(&u8_nfa) {
91 (engine, ONE_PASS_U8_DESC)
92 } else if is_ascii {
93 (
94 pike_vm_u8::serialize_pike_vm_token_stream(&u8_nfa),
95 PIKE_VM_U8_DESC,
96 )
97 } else {
98 (pike_vm::serialize_pike_vm_token_stream(&nfa), PIKE_VM_DESC)
99 };
100 return (base_engine, tree, nfa, u8_nfa, description);
101}
102
103fn pick_engine(ere: parse_tree::ERE) -> (proc_macro2::TokenStream, String) {
107 let (base_engine, _, _, u8_nfa, base_description) = pick_base_engine(ere);
108
109 if let Some(offsets) = fixed_offset::get_fixed_offsets(&u8_nfa) {
111 let engine = fixed_offset::serialize_fixed_offset_token_stream(
112 base_engine,
113 offsets,
114 u8_nfa.num_capture_groups(),
115 );
116 return (
117 engine,
118 format!(
119 "This regular expression's capture groups are always at fixed offsets.
120Because of this, we can skip a complex `exec` implementation, and instead simply run `test` then index into the string.
121
122### Details on the `test` implementation:
123
124{base_description}"
125 ),
126 );
127 };
128 return (base_engine, base_description.to_string());
129}
130
131pub fn __compile_regex(stream: TokenStream) -> TokenStream {
133 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
134 let (fn_pair, _) = pick_engine(ere);
135 return quote! {
136 {
137 ::ere::__construct_regex(#fn_pair)
138 }
139 }
140 .into();
141}
142
143pub fn __compile_regex_engine_pike_vm(stream: TokenStream) -> TokenStream {
145 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
146 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
147 let nfa = working_nfa::WorkingNFA::new(&tree);
148 let fn_pair = pike_vm::serialize_pike_vm_token_stream(&nfa);
149 return quote! {
150 ::ere::__construct_regex(#fn_pair)
151 }
152 .into();
153}
154
155pub fn __compile_regex_engine_pike_vm_u8(stream: TokenStream) -> TokenStream {
157 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
158 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
159 let nfa = working_nfa::WorkingNFA::new(&tree);
160 let nfa = working_u8_nfa::U8NFA::new(&nfa);
161 let fn_pair = pike_vm_u8::serialize_pike_vm_token_stream(&nfa);
162 return quote! {
163 ::ere::__construct_regex(#fn_pair)
164 }
165 .into();
166}
167
168pub fn __compile_regex_engine_one_pass_u8(stream: TokenStream) -> TokenStream {
172 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
173 let tree = simplified_tree::SimplifiedTreeNode::from(ere);
174 let nfa = working_nfa::WorkingNFA::new(&tree);
175 let nfa = working_u8_nfa::U8NFA::new(&nfa);
176 let Some(fn_pair) = one_pass_u8::serialize_one_pass_token_stream(&nfa) else {
177 return syn::parse::Error::new(
178 proc_macro2::Span::call_site(),
179 "Regex was not one-pass and could not be optimized to become one pass.
180Try using a different engine.",
181 )
182 .to_compile_error()
183 .into();
184 };
185 return quote! {
186 ::ere::__construct_regex(#fn_pair)
187 }
188 .into();
189}
190
191pub fn __compile_regex_engine_fixed_offset(stream: TokenStream) -> TokenStream {
195 let ere: parse_tree::ERE = syn::parse_macro_input!(stream);
196 let (base_engine, _, _, u8_nfa, _) = pick_base_engine(ere);
197
198 let Some(offsets) = fixed_offset::get_fixed_offsets(&u8_nfa) else {
199 return syn::parse::Error::new(
200 proc_macro2::Span::call_site(),
201 "Regex capture groups were not fixed offset. Try using a different engine.",
202 )
203 .to_compile_error()
204 .into();
205 };
206 let fn_pair = fixed_offset::serialize_fixed_offset_token_stream(
207 base_engine,
208 offsets,
209 u8_nfa.num_capture_groups(),
210 );
211 return quote! {
212 ::ere::__construct_regex(#fn_pair)
213 }
214 .into();
215}
216
217#[cfg(feature = "unstable-attr-regex")]
218pub fn __compile_regex_attr(attr: TokenStream, input: TokenStream) -> TokenStream {
219 let ere_litstr: syn::LitStr = syn::parse_macro_input!(attr);
220 let ere_str = ere_litstr.value();
221 let ere = match parse_tree::ERE::parse_str_syn(&ere_str, ere_litstr.span()) {
222 Ok(ere) => ere,
223 Err(compile_err) => return compile_err.into_compile_error().into(),
224 };
225
226 let tree = simplified_tree::SimplifiedTreeNode::from(ere.clone());
227 let nfa = working_nfa::WorkingNFA::new(&tree);
228
229 let capture_groups = nfa.num_capture_groups();
230 let optional_captures: Vec<bool> = (0..capture_groups)
231 .map(|group_num| nfa.capture_group_is_optional(group_num))
232 .collect();
233
234 let input_copy = input.clone();
235 let regex_struct: syn::DeriveInput = syn::parse_macro_input!(input_copy);
236 let syn::Data::Struct(data_struct) = regex_struct.data else {
237 return syn::parse::Error::new_spanned(
238 regex_struct,
239 "Attribute regexes currently only support structs.",
240 )
241 .to_compile_error()
242 .into();
243 };
244 let syn::Fields::Unnamed(fields) = data_struct.fields else {
245 return syn::parse::Error::new_spanned(
246 data_struct.fields,
247 "Attribute regexes currently require unnamed structs (tuple syntax).",
248 )
249 .to_compile_error()
250 .into();
251 };
252 if fields.unnamed.len() != optional_captures.len() {
253 return syn::parse::Error::new_spanned(
254 fields.unnamed,
255 format!(
256 "Expected struct to have {} unnamed fields, based on number of captures in regular expression.",
257 optional_captures.len()
258 ),
259 )
260 .to_compile_error()
261 .into();
262 }
263 let mut out: proc_macro2::TokenStream = input.into();
272
273 fn is_state_ascii(state: &working_nfa::WorkingState) -> bool {
275 return state
276 .transitions
277 .iter()
278 .flat_map(|t| t.symbol.to_ranges())
279 .all(|range| range.end().is_ascii());
280 }
281 let is_ascii = nfa.states.iter().all(is_state_ascii);
282
283 let struct_args: proc_macro2::TokenStream = optional_captures
284 .iter()
285 .enumerate()
286 .map(|(group_num, opt)| if *opt {
287 quote! { result[#group_num], }
288 } else {
289 quote! {
290 result[#group_num]
291 .expect(
292 "If you are seeing this, there is probably an internal bug in the `ere-core` crate where a capture group was mistakenly marked as non-optional. Please report the bug."
293 ),
294 }
295 })
296 .collect();
297
298 let (fn_pair, description) = pick_engine(ere);
300 let struct_name = regex_struct.ident;
301
302 let ere_display_doc = format!("`{ere_str}`");
303 let struct_name_link_doc = format!("[`{}`]", struct_name.to_string());
304 let implementation = quote! {
305 impl<'a> #struct_name<'a> {
306 const ENGINE: (
307 fn(&str) -> bool,
308 fn(&'a str) -> ::core::option::Option<[::core::option::Option<&'a str>; #capture_groups]>,
309 ) = #fn_pair;
310 #[doc = #ere_display_doc]
312 #[doc = #description]
317 #[inline]
318 pub fn test(text: &str) -> bool {
319 return (Self::ENGINE.0)(text);
320 }
321 #[doc = #struct_name_link_doc]
323 #[doc = #ere_display_doc]
325 #[doc = #description]
330 pub fn exec(text: &'a str) -> ::core::option::Option<#struct_name<'a>> {
331 let result: [::core::option::Option<&'a str>; #capture_groups] = (Self::ENGINE.1)(text)?;
332 return ::core::option::Option::<#struct_name<'a>>::Some(#struct_name(
333 #struct_args
334 ));
335 }
336 }
337 };
338 out.extend(implementation);
339
340 return out.into();
341}