lazy_regex_proc_macros/mod.rs
1mod args;
2mod regex_code;
3
4use {
5 crate::{args::*, regex_code::*},
6 proc_macro::TokenStream,
7 quote::quote,
8 syn::{parse_macro_input, Expr},
9};
10
11// The following `process*` functions are convenience funcs
12// to reduce boilerplate in macro implementations below.
13fn process<T, F>(input: TokenStream, as_bytes: bool, f: F) -> TokenStream
14where
15 T: Into<TokenStream>,
16 F: Fn(RegexCode) -> T,
17{
18 match RegexCode::from_token_stream(input, as_bytes) {
19 Ok(r) => f(r).into(),
20 Err(e) => e.to_compile_error().into(),
21 }
22}
23
24fn process_with_value<T, F>(input: TokenStream, as_bytes: bool, f: F) -> TokenStream
25where
26 T: Into<TokenStream>,
27 F: Fn(RegexCode, Expr) -> T,
28{
29 let parsed = parse_macro_input!(input as RexValArgs);
30 match RegexCode::from_lit_str(parsed.regex_str, as_bytes) {
31 Ok(r) => f(r, parsed.value).into(),
32 Err(e) => e.to_compile_error().into(),
33 }
34}
35
36/// Return a lazy static Regex checked at compilation time and
37/// built at first use.
38///
39/// Flags can be specified as suffix:
40/// ```
41/// let case_insensitive_regex = regex!("^ab+$"i);
42/// ```
43///
44/// The macro returns a reference to a [regex::Regex]
45/// or a [regex::bytes::Regex] instance,
46/// differentiated by the `B` flag:
47/// ```
48/// let verbose = regex!(r#"_([\d\.]+)"#)
49/// .replace("This is lazy-regex_2.2", " (version $1)");
50/// assert_eq!(verbose, "This is lazy-regex (version 2.2)");
51/// ```
52#[proc_macro]
53pub fn regex(input: TokenStream) -> TokenStream {
54 process(input, false, |regex_code| regex_code.lazy_static())
55}
56
57/// Return a lazy static `regex::bytes::Regex` checked at compilation time and
58/// built at first use.
59///
60/// Flags can be specified as suffix:
61/// ```
62/// let case_insensitive_regex = bytes_regex!("^ab+$"i);
63/// assert!(case_insensitive_regex.is_match(b"abB"));
64/// ```
65#[proc_macro]
66pub fn bytes_regex(input: TokenStream) -> TokenStream {
67 process(input, true, |regex_code| regex_code.lazy_static())
68}
69
70/// Return an instance of `once_cell::sync::Lazy<regex::Regex>` or
71/// `once_cell::sync::Lazy<regex::bytes::Regex>` that
72/// you can use in a public static declaration.
73///
74/// Example:
75///
76/// ```
77/// pub static GLOBAL_REX: Lazy<Regex> = lazy_regex!("^ab+$"i);
78/// ```
79///
80/// As for other macros, the regex is checked at compilation time.
81#[proc_macro]
82pub fn lazy_regex(input: TokenStream) -> TokenStream {
83 process(input, false, |regex_code| regex_code.build)
84}
85
86/// Return an instance of `once_cell::sync::Lazy<bytes::Regex>` that
87/// you can use in a public static declaration.
88///
89/// Example:
90///
91/// ```
92/// pub static GLOBAL_REX: Lazy<bytes::Regex> = bytes_lazy_regex!("^ab+$"i);
93/// ```
94///
95/// As for other macros, the regex is checked at compilation time.
96#[proc_macro]
97pub fn bytes_lazy_regex(input: TokenStream) -> TokenStream {
98 process(input, true, |regex_code| regex_code.build)
99}
100
101/// Test whether an expression matches a lazy static
102/// regular expression (the regex is checked at compile
103/// time)
104///
105/// Example:
106/// ```
107/// let b = regex_is_match!("[ab]+", "car");
108/// assert_eq!(b, true);
109/// ```
110#[proc_macro]
111pub fn regex_is_match(input: TokenStream) -> TokenStream {
112 process_with_value(input, false, |regex_code, value| {
113 let statick = regex_code.statick();
114 quote! {{
115 #statick;
116 RE.is_match(#value)
117 }}
118 })
119}
120
121/// Test whether an expression matches a lazy static
122/// bytes::Regex regular expression (the regex is checked
123/// at compile time)
124///
125/// Example:
126/// ```
127/// let b = bytes_regex_is_match!("[ab]+", b"car");
128/// assert_eq!(b, true);
129/// ```
130#[proc_macro]
131pub fn bytes_regex_is_match(input: TokenStream) -> TokenStream {
132 process_with_value(input, true, |regex_code, value| {
133 let statick = regex_code.statick();
134 quote! {{
135 #statick;
136 RE.is_match(#value)
137 }}
138 })
139}
140
141/// Extract the leftmost match of the regex in the
142/// second argument, as a `&str`, or a `&[u8]` if the `B` flag is set.
143///
144/// Example:
145/// ```
146/// let f_word = regex_find!(r#"\bf\w+\b"#, "The fox jumps.");
147/// assert_eq!(f_word, Some("fox"));
148/// let f_word = regex_find!(r#"\bf\w+\b"#B, "The forest is silent.");
149/// assert_eq!(f_word, Some(b"forest" as &[u8]));
150/// ```
151#[proc_macro]
152pub fn regex_find(input: TokenStream) -> TokenStream {
153 process_with_value(input, false, |regex_code, value| {
154 let statick = regex_code.statick();
155 let as_method = match regex_code.regex {
156 RegexInstance::Regex(..) => quote!(as_str),
157 RegexInstance::Bytes(..) => quote!(as_bytes),
158 };
159 quote! {{
160 #statick;
161 RE.find(#value).map(|mat| mat. #as_method ())
162 }}
163 })
164}
165
166/// Extract the leftmost match of the regex in the
167/// second argument as a `&[u8]`
168///
169/// Example:
170/// ```
171/// let f_word = bytes_regex_find!(r#"\bf\w+\b"#, b"The fox jumps.");
172/// assert_eq!(f_word, Some("fox".as_bytes()));
173/// ```
174#[proc_macro]
175pub fn bytes_regex_find(input: TokenStream) -> TokenStream {
176 process_with_value(input, true, |regex_code, value| {
177 let statick = regex_code.statick();
178 let as_method = match regex_code.regex {
179 RegexInstance::Regex(..) => quote!(as_str),
180 RegexInstance::Bytes(..) => quote!(as_bytes),
181 };
182 quote! {{
183 #statick;
184 RE.find(#value).map(|mat| mat. #as_method ())
185 }}
186 })
187}
188
189/// Extract captured groups as a tuple of &str.
190///
191/// If there's no match, the macro returns `None`.
192///
193/// If an optional group has no value, the tuple
194/// will contain `""` instead.
195///
196/// Example:
197/// ```
198/// let (whole, name, version) = regex_captures!(
199/// r#"(\w+)-([0-9.]+)"#, // a literal regex
200/// "This is lazy_regex-2.0!", // any expression
201/// ).unwrap();
202/// assert_eq!(whole, "lazy_regex-2.0");
203/// assert_eq!(name, "lazy_regex");
204/// assert_eq!(version, "2.0");
205/// ```
206#[proc_macro]
207pub fn regex_captures(input: TokenStream) -> TokenStream {
208 process_with_value(input, false, |regex_code, value| {
209 let statick = regex_code.statick();
210 let n = regex_code.captures_len();
211 let groups = (0..n).map(|i| {
212 quote! {
213 caps.get(#i).map_or("", |c| c.as_str())
214 }
215 });
216 quote! {{
217 #statick;
218 RE.captures(#value)
219 .map(|caps| (
220 #(#groups),*
221 ))
222 }}
223 })
224}
225
226/// Extract captured groups as a tuple of &[u8]
227///
228/// If there's no match, the macro returns `None`.
229///
230/// If an optional group has no value, the tuple
231/// will contain `b""` instead.
232///
233/// Example:
234/// ```
235/// let (whole, name, version) = bytes_regex_captures!(
236/// r#"(\w+)-([0-9.]+)"#, // a literal regex
237/// b"This is lazy_regex-2.0!", // any expression
238/// ).unwrap();
239/// assert_eq!(whole, b"lazy_regex-2.0");
240/// assert_eq!(name, b"lazy_regex");
241/// assert_eq!(version, "2.0".as_bytes());
242/// ```
243#[proc_macro]
244pub fn bytes_regex_captures(input: TokenStream) -> TokenStream {
245 process_with_value(input, true, |regex_code, value| {
246 let statick = regex_code.statick();
247 let n = regex_code.captures_len();
248 let groups = (0..n).map(|i| {
249 quote! {
250 caps.get(#i).map_or(&b""[..], |c| c.as_bytes())
251 }
252 });
253 quote! {{
254 #statick;
255 RE.captures(#value)
256 .map(|caps| (
257 #(#groups),*
258 ))
259 }}
260 })
261}
262
263/// Returns an iterator that yields successive non-overlapping matches in the given haystack.
264/// The iterator yields values of type `regex::Captures`.
265///
266/// Example (adapted from the regex crate):
267/// ```
268/// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
269/// let mut movies = vec![];
270/// let iter = regex_captures_iter!(r"'([^']+)'\s+\(([0-9]{4})\)", hay);
271/// for (_, [title, year]) in iter.map(|c| c.extract()) {
272/// movies.push((title, year.parse::<i64>().unwrap()));
273/// }
274/// assert_eq!(movies, vec![
275/// ("Citizen Kane", 1941),
276/// ("The Wizard of Oz", 1939),
277/// ("M", 1931),
278/// ]);
279/// ```
280#[proc_macro]
281pub fn regex_captures_iter(input: TokenStream) -> TokenStream {
282 process_with_value(input, false, |regex_code, value| {
283 let statick = regex_code.statick();
284 quote! {{
285 #statick;
286 RE.captures_iter(#value)
287 }}
288 })
289}
290
291/// Returns an iterator that yields successive non-overlapping matches in the given haystack.
292#[proc_macro]
293pub fn bytes_regex_captures_iter(input: TokenStream) -> TokenStream {
294 process_with_value(input, true, |regex_code, value| {
295 let statick = regex_code.statick();
296 quote! {{
297 #statick;
298 RE.captures_iter(#value)
299 }}
300 })
301}
302
303/// common implementation of regex_replace and regex_replace_all
304fn replacen(input: TokenStream, limit: usize) -> TokenStream {
305 let parsed = parse_macro_input!(input as ReplaceArgs);
306 let ReplaceArgs { regex_str, value, replacer } = parsed;
307 let regex_code = match RegexCode::from_lit_str(regex_str, false) {
308 Ok(r) => r,
309 Err(e) => {
310 return e.to_compile_error().into();
311 }
312 };
313 let statick = regex_code.statick();
314 let stream = match replacer {
315 MaybeFun::Fun(fun) => {
316 let n = regex_code.captures_len();
317 let groups = (0..n).map(|i| {
318 quote! {
319 caps.get(#i).map_or("", |c| c.as_str())
320 }
321 });
322 quote! {{
323 #statick;
324 RE.replacen(
325 #value,
326 #limit,
327 |caps: &lazy_regex::Captures<'_>| {
328 let mut fun = #fun;
329 fun(
330 #(#groups),*
331 )
332 })
333 }}
334 }
335 MaybeFun::Expr(expr) => {
336 quote! {{
337 #statick;
338 RE.replacen(#value, #limit, #expr)
339 }}
340 }
341 };
342 stream.into()
343}
344
345/// common implementation of bytes_regex_replace and bytes_regex_replace_all
346fn bytes_replacen(input: TokenStream, limit: usize) -> TokenStream {
347 let parsed = parse_macro_input!(input as ReplaceArgs);
348 let ReplaceArgs { regex_str, value, replacer } = parsed;
349 let regex_code = match RegexCode::from_lit_str(regex_str, true) {
350 Ok(r) => r,
351 Err(e) => {
352 return e.to_compile_error().into();
353 }
354 };
355 let statick = regex_code.statick();
356 let stream = match replacer {
357 MaybeFun::Fun(fun) => {
358 let n = regex_code.captures_len();
359 let groups = (0..n).map(|i| {
360 quote! {
361 caps.get(#i).map_or(&b""[..], |c| c.as_bytes())
362 }
363 });
364 quote! {{
365 #statick;
366 RE.replacen(
367 #value,
368 #limit,
369 |caps: &lazy_regex::regex::bytes::Captures<'_>| {
370 let mut fun = #fun;
371 fun(
372 #(#groups),*
373 )
374 })
375 }}
376 }
377 MaybeFun::Expr(expr) => {
378 quote! {{
379 #statick;
380 RE.replacen(#value, #limit, #expr)
381 }}
382 }
383 };
384 stream.into()
385}
386
387/// Replaces the leftmost match in the second argument
388/// using the replacer given as third argument.
389///
390/// When the replacer is a closure, it is given one or more `&str`,
391/// the first one for the whole match and the following ones for
392/// the groups.
393/// Any optional group with no value is replaced with `""`.
394///
395/// Example:
396/// ```
397/// let text = "Fuu fuuu";
398/// let text = regex_replace!(
399/// "f(u*)"i,
400/// text,
401/// |_, suffix: &str| format!("F{}", suffix.len()),
402/// );
403/// assert_eq!(text, "F2 fuuu");
404/// ```
405#[proc_macro]
406pub fn regex_replace(input: TokenStream) -> TokenStream {
407 replacen(input, 1)
408}
409
410/// Replaces the leftmost match in the second argument
411/// using the replacer given as third argument.
412///
413/// When the replacer is a closure, it is given one or more `&str`,
414/// the first one for the whole match and the following ones for
415/// the groups.
416/// Any optional group with no value is replaced with `b""`.
417///
418/// Example:
419/// ```
420/// println!("{:?}", "ck ck".as_bytes());
421/// let text = b"Fuu fuuu";
422/// let text = bytes_regex_replace!(
423/// "f(u*)"i,
424/// text,
425/// b"ck",
426/// );
427/// assert_eq!(text, "ck fuuu".as_bytes());
428/// ```
429#[proc_macro]
430pub fn bytes_regex_replace(input: TokenStream) -> TokenStream {
431 bytes_replacen(input, 1)
432}
433
434/// Replaces all non-overlapping matches in the second argument
435/// using the replacer given as third argument.
436///
437/// When the replacer is a closure, it is given one or more `&str`,
438/// the first one for the whole match and the following ones for
439/// the groups.
440/// Any optional group with no value is replaced with `""`.
441///
442/// Example:
443/// ```
444/// let text = "Foo fuu";
445/// let text = regex_replace_all!(
446/// r#"\bf(?P<suffix>\w+)"#i,
447/// text,
448/// |_, suffix| format!("F<{}>", suffix),
449/// );
450/// assert_eq!(text, "F<oo> F<uu>");
451/// ```
452#[proc_macro]
453pub fn regex_replace_all(input: TokenStream) -> TokenStream {
454 replacen(input, 0)
455}
456
457/// Replaces all non-overlapping matches in the second argument
458/// using the replacer given as third argument.
459///
460/// When the replacer is a closure, it is given one or more `&str`,
461/// the first one for the whole match and the following ones for
462/// the groups.
463/// Any optional group with no value is replaced with `""`.
464///
465/// Example:
466/// ```
467/// let text = b"Foo fuu";
468/// let text = bytes_regex_replace_all!(
469/// r#"\bf(?P<suffix>\w+)"#i,
470/// text,
471/// b"H",
472/// );
473/// assert_eq!(text, "H H".as_bytes());
474/// ```
475#[proc_macro]
476pub fn bytes_regex_replace_all(input: TokenStream) -> TokenStream {
477 bytes_replacen(input, 0)
478}
479
480/// Return an Option<T>, with T being the type returned by the block or expression
481/// given as third argument.
482///
483/// If the regex matches, executes the expression and return it as Some.
484/// Return None if the regex doesn't match.
485///
486/// ```
487/// let grey = regex_if!(r#"^gr(a|e)y\((?<level>\d{1,2})\)$"#, "grey(22)", {
488/// level.parse().unwrap()
489/// });
490/// assert_eq!(grey, Some(22));
491/// ```
492#[proc_macro]
493pub fn regex_if(input: TokenStream) -> TokenStream {
494 let RexIfArgs {
495 regex_str,
496 value,
497 then,
498 } = parse_macro_input!(input as RexIfArgs);
499 let regex_code = match RegexCode::from_lit_str(regex_str, false) {
500 Ok(r) => r,
501 Err(e) => {
502 return e.to_compile_error().into();
503 }
504 };
505 let statick = regex_code.statick();
506 let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
507 let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
508 quote! {
509 let #var_name: &str = caps.get(#idx).map_or("", |c| c.as_str());
510 }
511 });
512 quote! {{
513 #statick;
514 match RE.captures(#value) {
515 Some(caps) => {
516 #(#assigns);*
517 Some(#then)
518 }
519 None => None,
520 }
521 }}.into()
522}
523
524#[proc_macro]
525pub fn bytes_regex_if(input: TokenStream) -> TokenStream {
526 let RexIfArgs {
527 regex_str,
528 value,
529 then,
530 } = parse_macro_input!(input as RexIfArgs);
531 let regex_code = match RegexCode::from_lit_str(regex_str, true) {
532 Ok(r) => r,
533 Err(e) => {
534 return e.to_compile_error().into();
535 }
536 };
537 let statick = regex_code.statick();
538 let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
539 let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
540 quote! {
541 let #var_name: &[u8] = caps.get(#idx).map_or(&b""[..], |c| c.as_bytes());
542 }
543 });
544 quote! {{
545 #statick;
546 match RE.captures(#value) {
547 Some(caps) => {
548 #(#assigns);*
549 Some(#then)
550 }
551 None => None,
552 }
553 }}.into()
554}
555
556/// Define a set of lazy static statically compiled regexes, with a block
557/// or expression for each one. The first matching expression is computed
558/// with the named capture groups declaring `&str` variables available for this
559/// computation.
560/// If no regex matches, return `None`.
561///
562/// Example:
563/// ```
564/// #[derive(Debug, PartialEq)]
565/// enum Color {
566/// Grey(u8),
567/// Pink,
568/// Rgb(u8, u8, u8),
569/// }
570///
571/// let input = "rgb(1, 2, 3)";
572/// let color = regex_switch!(input,
573/// r#"^gr(a|e)y\((?<level>\d{1,2})\)$"#i => {
574/// Color::Grey(level.parse()?)
575/// }
576/// "^pink"i => Color::Pink,
577/// r#"^rgb\((?<r>\d+),\s*(?<g>\d+),\s*(?<b>\d+),?\)$"#i => Color::Rgb (
578/// r.parse()?,
579/// g.parse()?,
580/// b.parse()?,
581/// ),
582/// );
583/// assert_eq!(color, Some(Color::Rgb(1, 2, 3)));
584///
585/// ```
586#[proc_macro]
587pub fn regex_switch(input: TokenStream) -> TokenStream {
588 let RexSwitchArgs {
589 value,
590 arms,
591 } = parse_macro_input!(input as RexSwitchArgs);
592 let mut q_arms = Vec::new();
593 for RexSwitchArmArgs { regex_str, then } in arms.into_iter() {
594 let regex_code = match RegexCode::from_lit_str(regex_str, false) {
595 Ok(r) => r,
596 Err(e) => {
597 return e.to_compile_error().into();
598 }
599 };
600 let statick = regex_code.statick();
601 let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
602 let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
603 quote! {
604 let #var_name: &str = caps.get(#idx).map_or("", |c| c.as_str());
605 }
606 });
607 q_arms.push(
608 quote! {{
609 #statick;
610 if let Some(caps) = RE.captures(#value) {
611 #(#assigns);*
612 let output = Some(#then);
613 break 'switch output;
614 }
615 }}
616 );
617 }
618 quote! {{
619 'switch: {
620 #(#q_arms)*
621 None
622 }
623 }}.into()
624}
625
626/// Define a set of lazy static statically compiled regexes, with a block
627/// or expression for each one. The first matching expression is computed
628/// with the named capture groups declaring `&str` variables available for this
629/// computation.
630/// If no regex matches, return `None`.
631///
632/// Example:
633/// ```
634/// #[derive(Debug, PartialEq)]
635/// enum Color {
636/// Grey(u8),
637/// Pink,
638/// Rgb(u8, u8, u8),
639/// }
640///
641/// let input = "rgb(1, 2, 3)";
642/// let color = regex_switch!(input,
643/// r#"^gr(a|e)y\((?<level>\d{1,2})\)$"#i => {
644/// Color::Grey(level.parse()?)
645/// }
646/// "^pink"i => Color::Pink,
647/// r#"^rgb\((?<r>\d+),\s*(?<g>\d+),\s*(?<b>\d+),?\)$"#i => Color::Rgb (
648/// r.parse()?,
649/// g.parse()?,
650/// b.parse()?,
651/// ),
652/// );
653/// assert_eq!(color, Some(Color::Rgb(1, 2, 3)));
654///
655/// ```
656#[proc_macro]
657pub fn bytes_regex_switch(input: TokenStream) -> TokenStream {
658 let RexSwitchArgs {
659 value,
660 arms,
661 } = parse_macro_input!(input as RexSwitchArgs);
662 let mut q_arms = Vec::new();
663 for RexSwitchArmArgs { regex_str, then } in arms.into_iter() {
664 let regex_code = match RegexCode::from_lit_str(regex_str, true) {
665 Ok(r) => r,
666 Err(e) => {
667 return e.to_compile_error().into();
668 }
669 };
670 let statick = regex_code.statick();
671 let assigns = regex_code.named_groups().into_iter().map(|(idx, name)| {
672 let var_name = syn::Ident::new(name, proc_macro2::Span::call_site());
673 quote! {
674 let #var_name: &[u8] = caps.get(#idx).map_or(&b""[..], |c| c.as_bytes());
675 }
676 });
677 q_arms.push(
678 quote! {{
679 #statick;
680 if let Some(caps) = RE.captures(#value) {
681 #(#assigns);*
682 let output = Some(#then);
683 break 'switch output;
684 }
685 }}
686 );
687 }
688 quote! {{
689 'switch: {
690 #(#q_arms)*
691 None
692 }
693 }}.into()
694}