ctreg/
lib.rs

1/*!
2`ctreg` (pronounced cuh-tredge, in the style of Cthulhu) is a macro providing
3static typing to your regular expressions, allowing syntax errors to be
4detected at compile time and capture groups to be matched infallibly.
5
6```
7use ctreg::regex;
8
9// Create a regular expression with the macro. This regular expression is
10// analyzed at compile time and its normalized representation is emitted as the
11// `HelloWorld` type.
12regex! { pub HelloWorld = "(?<greeting>[a-zA-Z0-9-_.]+)(, (?<target>[a-zA-Z0-9-_.]+))?!" }
13
14// Create an instance of the regular expression.
15let regex = HelloWorld::new();
16
17// Use `is_match` to test if there was a match
18assert!(regex.is_match("Hello, World!"));
19assert!(regex.is_match("Goodbye!"));
20assert!(!regex.is_match("Nothing to see here."));
21
22// Use `find` to find the location of a match
23let cap = regex.find("abc Greetings, Rustacean! 123").unwrap();
24assert_eq!(cap.content, "Greetings, Rustacean!");
25assert_eq!(cap.start, 4);
26assert_eq!(cap.end, 25);
27
28assert!(regex.find("Nothing to see here.").is_none());
29
30// Use `captures` to find all of the named capture groups of a match (`greeting`
31// and `target`, in this case). Capture groups are emitted at compile time and
32// evaluated infallibly.
33let groups = regex.captures("ah, Bonjour, reader!").unwrap();
34assert_eq!(groups.greeting.content, "Bonjour");
35assert_eq!(groups.target.unwrap().content, "reader");
36
37let groups = regex.captures("This is goodbye!").unwrap();
38assert_eq!(groups.greeting.content, "goodbye");
39assert!(groups.target.is_none());
40
41assert!(regex.captures("nothing to see here.").is_none());
42```
43
44# Syntax Checking
45
46If the regular expression includes any syntax errors, this will appear as a
47compile error, rather than a runtime panic.
48```compile_fail
49use ctreg::regex;
50
51regex! { HelloWorld = "(?<greeting>Mismatched Parenthesis" };
52
53let regex = HelloWorld::new();
54```
55*/
56
57#[doc(hidden)]
58pub mod ඞ {
59    pub use ::regex_automata;
60    pub use ::regex_syntax;
61}
62
63#[doc(hidden)]
64pub use ctreg_macro::regex_impl;
65
66/**
67Create a type representing a regular expression. See the [module docs][crate]
68for an example.
69
70This macro creates a type, called `$Type`, representing the given `$regex`. The
71regular expression is analyzed at compile time, and the `$Type` is emitted
72containing its normalized representation, with a regex-like API for searching
73and matching capture groups. See the [`demo::HelloWorld`] type for an example
74of the methods it generates.
75
76Additionally, it creates a type called `${Type}Captures`, which contains a
77[`Capture`] field for each named capture group in the regular expression. See
78the [`demo::HelloWorldCaptures`] type for an example of this. The
79[`captures`][demo::HelloWorld::captures] method performs a capturing search,
80which returns this type. This search is evaluated infallibly: all groups that
81are unconditionally present in the regular expression are also present in the
82captures type. Any groups that are optional or part of an alternation appear as
83an `Option<Capture>`. Named capture groups cannot be part of repetitions, since
84there isn't a sensible thing to capture.
85
86To keep the API and output types simple, anonymous capture groups are not
87present in the capture groups, and are treated identically to non-capturing
88groups.
89
90If the regex has no named capture groups, no `captures` method or `Captures`
91type is generated.
92
93Because it is not currently possible to create a regular expression in a
94`const` context, this macro operates by creating a type instead of an object;
95this type's constructor builds a regex at runtime using the post-parse
96[normalized form](https://docs.rs/regex-syntax/latest/regex_syntax/hir/struct.Hir.html)
97of the expression. In the spirit of 0-cost abstraction, we currently ask the
98caller to use their own `OnceLock` or whatever other abstraction is appropriate
99to manage the creation and lifespan of this object. This may change in the
100future.
101*/
102#[macro_export]
103macro_rules! regex {
104    ($Type:ident = $regex:literal) => {
105        $crate::regex_impl! { $Type = $regex }
106    };
107
108    (pub $Type:ident = $regex:literal) => {
109        $crate::regex_impl! { pub $Type = $regex }
110    };
111}
112
113/**
114Represents a single match of a regex in a haystack. It contains `start` and
115`end`, which are byte offsets of the location of the match, as well as the
116actual `content` of the match.
117
118This type is used by [`find`](demo::HelloWorld::find) to indicate the overall
119location of the match, and by [`captures`](demo::HelloWorld::captures), which
120returns a separate [`Capture`] for each named capture group that matched.
121
122This type is equivalent to the [`Match`
123](https://docs.rs/regex/latest/regex/struct.Match.html) type from the `regex`
124crate.
125 */
126#[derive(Debug, Clone, Copy)]
127pub struct Capture<'a> {
128    pub start: usize,
129    pub end: usize,
130    pub content: &'a str,
131}
132
133/**
134Demo module, showing the types created by the [`regex`] macro.
135
136This module contains the output of:
137
138```
139# use ctreg::regex;
140regex! { pub HelloWorld = "(?<greeting>[a-zA-Z0-9-_.]+)(, (?<target>[a-zA-Z0-9-_.]+))?!" }
141```
142
143It is intended to provide a comprehensive demonstration of the types and
144methods generated by [`regex`].
145*/
146#[cfg(feature = "demo")]
147pub mod demo {
148    use super::Capture;
149
150    /**
151    Example regular expression object.
152
153    This type is the output of the [`regex`] macro, compiling the regular expression:
154
155    ```text
156    (?<greeting>[a-zA-Z0-9-_.]+)(, (?<target>[a-zA-Z0-9-_.]+))?!
157    ```
158
159    It matches strings like `"Hello, World!`" and `"Goodbye!"`. It is provided
160    here as an example of the type and methods created by [`regex`].
161
162    See also the [`HelloWorldCaptures`] type, which is the generated type for
163    getting capture groups.
164    */
165    #[derive(Debug, Clone)]
166    pub struct HelloWorld {
167        regex: ::regex_automata::meta::Regex,
168    }
169
170    impl HelloWorld {
171        /**
172        Construct a new instance of this regular expression object
173        */
174        #[inline]
175        #[must_use]
176        pub fn new() -> Self {
177            let hir: ::regex_syntax::hir::Hir = ::regex_syntax::hir::Hir::concat(Vec::from([
178                ::regex_syntax::hir::Hir::capture(::regex_syntax::hir::Capture {
179                    index: 1u32,
180                    name: Some(From::from("greeting")),
181                    sub: Box::new(::regex_syntax::hir::Hir::repetition(
182                        ::regex_syntax::hir::Repetition {
183                            min: 1u32,
184                            max: None,
185                            greedy: true,
186                            sub: Box::new(::regex_syntax::hir::Hir::class(
187                                ::regex_syntax::hir::Class::Unicode(
188                                    ::regex_syntax::hir::ClassUnicode::new([
189                                        ::regex_syntax::hir::ClassUnicodeRange::new('-', '.'),
190                                        ::regex_syntax::hir::ClassUnicodeRange::new('0', '9'),
191                                        ::regex_syntax::hir::ClassUnicodeRange::new('A', 'Z'),
192                                        ::regex_syntax::hir::ClassUnicodeRange::new('_', '_'),
193                                        ::regex_syntax::hir::ClassUnicodeRange::new('a', 'z'),
194                                    ]),
195                                ),
196                            )),
197                        },
198                    )),
199                }),
200                ::regex_syntax::hir::Hir::repetition(::regex_syntax::hir::Repetition {
201                    min: 0u32,
202                    max: Some(1u32),
203                    greedy: true,
204                    sub: Box::new(::regex_syntax::hir::Hir::concat(Vec::from([
205                        ::regex_syntax::hir::Hir::literal(*b", "),
206                        ::regex_syntax::hir::Hir::capture(::regex_syntax::hir::Capture {
207                            index: 2u32,
208                            name: Some(From::from("target")),
209                            sub: Box::new(::regex_syntax::hir::Hir::repetition(
210                                ::regex_syntax::hir::Repetition {
211                                    min: 1u32,
212                                    max: None,
213                                    greedy: true,
214                                    sub: Box::new(::regex_syntax::hir::Hir::class(
215                                        ::regex_syntax::hir::Class::Unicode(
216                                            ::regex_syntax::hir::ClassUnicode::new([
217                                                ::regex_syntax::hir::ClassUnicodeRange::new(
218                                                    '-', '.',
219                                                ),
220                                                ::regex_syntax::hir::ClassUnicodeRange::new(
221                                                    '0', '9',
222                                                ),
223                                                ::regex_syntax::hir::ClassUnicodeRange::new(
224                                                    'A', 'Z',
225                                                ),
226                                                ::regex_syntax::hir::ClassUnicodeRange::new(
227                                                    '_', '_',
228                                                ),
229                                                ::regex_syntax::hir::ClassUnicodeRange::new(
230                                                    'a', 'z',
231                                                ),
232                                            ]),
233                                        ),
234                                    )),
235                                },
236                            )),
237                        }),
238                    ]))),
239                }),
240                ::regex_syntax::hir::Hir::literal(*b"!"),
241            ]));
242            let regex = ::regex_automata::meta::Regex::builder()
243                .build_from_hir(&hir)
244                .expect("regex compilation failed, despite compile-time verification");
245            Self { regex }
246        }
247
248        /**
249        Test if this regular expression matches the `haystack` string, without
250        getting any information about the location of the match.
251
252        Prefer this method if you only care *that* there was a match, as it
253        might be faster than [`find`][HelloWorld::find] or
254        [`captures`][HelloWorld::captures].
255        */
256        #[inline]
257        #[must_use]
258        pub fn is_match(&self, haystack: &str) -> bool {
259            self.regex.is_match(haystack)
260        }
261
262        /**
263        Find the first match of this regex in the `haystack`, and return it as a
264        [`Capture`].
265
266        Prefer this method if you only care about the overall location of a match
267        in the haystack, without regard for the specific capture groups.
268        */
269        #[inline]
270        #[must_use]
271        pub fn find<'i>(&self, haystack: &'i str) -> Option<Capture<'i>> {
272            let capture = self.regex.find(haystack)?;
273            let span = capture.span();
274            let start = span.start;
275            let end = span.end;
276            let content = unsafe { haystack.get_unchecked(start..end) };
277            Some(Capture {
278                start,
279                end,
280                content,
281            })
282        }
283    }
284
285    impl Default for HelloWorld {
286        fn default() -> Self {
287            Self::new()
288        }
289    }
290
291    impl HelloWorld {
292        /**
293        Search for the first match of this regex in the `haystack`, and return
294        an object containing all of the named capture groups that were found.
295        */
296        #[inline]
297        #[must_use]
298        pub fn captures<'i>(&self, haystack: &'i str) -> Option<HelloWorldCaptures<'i>> {
299            let mut slots = [None; (2usize + 1) * 2];
300            let _ = self
301                .regex
302                .search_slots(&::regex_automata::Input::new(haystack), &mut slots)?;
303
304            Some(HelloWorldCaptures {
305                #[allow(clippy::blocks_in_conditions)]
306                greeting: match {
307                    let slot_start = slots[2usize];
308                    let slot_end = slots[3usize];
309                    match slot_start {
310                        None => None,
311                        Some(start) => {
312                            let start = start.get();
313                            let end = unsafe { slot_end.unwrap_unchecked() }.get();
314                            let content = unsafe { haystack.get_unchecked(start..end) };
315                            Some(Capture {
316                                start,
317                                end,
318                                content,
319                            })
320                        }
321                    }
322                } {
323                    Some(capture) => capture,
324                    None => unsafe { core::hint::unreachable_unchecked() },
325                },
326                target: {
327                    let slot_start = slots[4usize];
328                    let slot_end = slots[5usize];
329                    match slot_start {
330                        None => None,
331                        Some(start) => {
332                            let start = start.get();
333                            let end = unsafe { slot_end.unwrap_unchecked() }.get();
334                            let content = unsafe { haystack.get_unchecked(start..end) };
335                            Some(Capture {
336                                start,
337                                end,
338                                content,
339                            })
340                        }
341                    }
342                },
343            })
344        }
345    }
346
347    /**
348    Example captures object.
349
350    This type is the output of the [`regex`] macro for the capture groups
351    returned by the [`HelloWorld`] expression.
352     */
353    #[derive(Debug, Clone, Copy)]
354    pub struct HelloWorldCaptures<'a> {
355        /**
356        The greeting is an non-optional [`Capture`], because there will always
357        be a greeting when the expression matches.
358         */
359        pub greeting: Capture<'a>,
360
361        /**
362        The target is an optional [`Capture`], because the group is inside an
363        `()?` optional group, so it may not be present even if the expression
364        matched. Optional groups are also created by alternations.
365        */
366        pub target: Option<Capture<'a>>,
367    }
368}