ctreg/lib.rs
1/*!
2`ctreg` (pronounced cuh-tredge, in the style of Cthulhu) is a macro providing
3static typing to your regular expressions, allowing syntax errors to be
4detected at compile time and capture groups to be matched infallibly.
5
6```
7use ctreg::regex;
8
9// Create a regular expression with the macro. This regular expression is
10// analyzed at compile time and its normalized representation is emitted as the
11// `HelloWorld` type.
12regex! { pub HelloWorld = "(?<greeting>[a-zA-Z0-9-_.]+)(, (?<target>[a-zA-Z0-9-_.]+))?!" }
13
14// Create an instance of the regular expression.
15let regex = HelloWorld::new();
16
17// Use `is_match` to test if there was a match
18assert!(regex.is_match("Hello, World!"));
19assert!(regex.is_match("Goodbye!"));
20assert!(!regex.is_match("Nothing to see here."));
21
22// Use `find` to find the location of a match
23let cap = regex.find("abc Greetings, Rustacean! 123").unwrap();
24assert_eq!(cap.content, "Greetings, Rustacean!");
25assert_eq!(cap.start, 4);
26assert_eq!(cap.end, 25);
27
28assert!(regex.find("Nothing to see here.").is_none());
29
30// Use `captures` to find all of the named capture groups of a match (`greeting`
31// and `target`, in this case). Capture groups are emitted at compile time and
32// evaluated infallibly.
33let groups = regex.captures("ah, Bonjour, reader!").unwrap();
34assert_eq!(groups.greeting.content, "Bonjour");
35assert_eq!(groups.target.unwrap().content, "reader");
36
37let groups = regex.captures("This is goodbye!").unwrap();
38assert_eq!(groups.greeting.content, "goodbye");
39assert!(groups.target.is_none());
40
41assert!(regex.captures("nothing to see here.").is_none());
42```
43
44# Syntax Checking
45
46If the regular expression includes any syntax errors, this will appear as a
47compile error, rather than a runtime panic.
48```compile_fail
49use ctreg::regex;
50
51regex! { HelloWorld = "(?<greeting>Mismatched Parenthesis" };
52
53let regex = HelloWorld::new();
54```
55*/
56
57#[doc(hidden)]
58pub mod ඞ {
59 pub use ::regex_automata;
60 pub use ::regex_syntax;
61}
62
63#[doc(hidden)]
64pub use ctreg_macro::regex_impl;
65
66/**
67Create a type representing a regular expression. See the [module docs][crate]
68for an example.
69
70This macro creates a type, called `$Type`, representing the given `$regex`. The
71regular expression is analyzed at compile time, and the `$Type` is emitted
72containing its normalized representation, with a regex-like API for searching
73and matching capture groups. See the [`demo::HelloWorld`] type for an example
74of the methods it generates.
75
76Additionally, it creates a type called `${Type}Captures`, which contains a
77[`Capture`] field for each named capture group in the regular expression. See
78the [`demo::HelloWorldCaptures`] type for an example of this. The
79[`captures`][demo::HelloWorld::captures] method performs a capturing search,
80which returns this type. This search is evaluated infallibly: all groups that
81are unconditionally present in the regular expression are also present in the
82captures type. Any groups that are optional or part of an alternation appear as
83an `Option<Capture>`. Named capture groups cannot be part of repetitions, since
84there isn't a sensible thing to capture.
85
86To keep the API and output types simple, anonymous capture groups are not
87present in the capture groups, and are treated identically to non-capturing
88groups.
89
90If the regex has no named capture groups, no `captures` method or `Captures`
91type is generated.
92
93Because it is not currently possible to create a regular expression in a
94`const` context, this macro operates by creating a type instead of an object;
95this type's constructor builds a regex at runtime using the post-parse
96[normalized form](https://docs.rs/regex-syntax/latest/regex_syntax/hir/struct.Hir.html)
97of the expression. In the spirit of 0-cost abstraction, we currently ask the
98caller to use their own `OnceLock` or whatever other abstraction is appropriate
99to manage the creation and lifespan of this object. This may change in the
100future.
101*/
102#[macro_export]
103macro_rules! regex {
104 ($Type:ident = $regex:literal) => {
105 $crate::regex_impl! { $Type = $regex }
106 };
107
108 (pub $Type:ident = $regex:literal) => {
109 $crate::regex_impl! { pub $Type = $regex }
110 };
111}
112
113/**
114Represents a single match of a regex in a haystack. It contains `start` and
115`end`, which are byte offsets of the location of the match, as well as the
116actual `content` of the match.
117
118This type is used by [`find`](demo::HelloWorld::find) to indicate the overall
119location of the match, and by [`captures`](demo::HelloWorld::captures), which
120returns a separate [`Capture`] for each named capture group that matched.
121
122This type is equivalent to the [`Match`
123](https://docs.rs/regex/latest/regex/struct.Match.html) type from the `regex`
124crate.
125 */
126#[derive(Debug, Clone, Copy)]
127pub struct Capture<'a> {
128 pub start: usize,
129 pub end: usize,
130 pub content: &'a str,
131}
132
133/**
134Demo module, showing the types created by the [`regex`] macro.
135
136This module contains the output of:
137
138```
139# use ctreg::regex;
140regex! { pub HelloWorld = "(?<greeting>[a-zA-Z0-9-_.]+)(, (?<target>[a-zA-Z0-9-_.]+))?!" }
141```
142
143It is intended to provide a comprehensive demonstration of the types and
144methods generated by [`regex`].
145*/
146#[cfg(feature = "demo")]
147pub mod demo {
148 use super::Capture;
149
150 /**
151 Example regular expression object.
152
153 This type is the output of the [`regex`] macro, compiling the regular expression:
154
155 ```text
156 (?<greeting>[a-zA-Z0-9-_.]+)(, (?<target>[a-zA-Z0-9-_.]+))?!
157 ```
158
159 It matches strings like `"Hello, World!`" and `"Goodbye!"`. It is provided
160 here as an example of the type and methods created by [`regex`].
161
162 See also the [`HelloWorldCaptures`] type, which is the generated type for
163 getting capture groups.
164 */
165 #[derive(Debug, Clone)]
166 pub struct HelloWorld {
167 regex: ::regex_automata::meta::Regex,
168 }
169
170 impl HelloWorld {
171 /**
172 Construct a new instance of this regular expression object
173 */
174 #[inline]
175 #[must_use]
176 pub fn new() -> Self {
177 let hir: ::regex_syntax::hir::Hir = ::regex_syntax::hir::Hir::concat(Vec::from([
178 ::regex_syntax::hir::Hir::capture(::regex_syntax::hir::Capture {
179 index: 1u32,
180 name: Some(From::from("greeting")),
181 sub: Box::new(::regex_syntax::hir::Hir::repetition(
182 ::regex_syntax::hir::Repetition {
183 min: 1u32,
184 max: None,
185 greedy: true,
186 sub: Box::new(::regex_syntax::hir::Hir::class(
187 ::regex_syntax::hir::Class::Unicode(
188 ::regex_syntax::hir::ClassUnicode::new([
189 ::regex_syntax::hir::ClassUnicodeRange::new('-', '.'),
190 ::regex_syntax::hir::ClassUnicodeRange::new('0', '9'),
191 ::regex_syntax::hir::ClassUnicodeRange::new('A', 'Z'),
192 ::regex_syntax::hir::ClassUnicodeRange::new('_', '_'),
193 ::regex_syntax::hir::ClassUnicodeRange::new('a', 'z'),
194 ]),
195 ),
196 )),
197 },
198 )),
199 }),
200 ::regex_syntax::hir::Hir::repetition(::regex_syntax::hir::Repetition {
201 min: 0u32,
202 max: Some(1u32),
203 greedy: true,
204 sub: Box::new(::regex_syntax::hir::Hir::concat(Vec::from([
205 ::regex_syntax::hir::Hir::literal(*b", "),
206 ::regex_syntax::hir::Hir::capture(::regex_syntax::hir::Capture {
207 index: 2u32,
208 name: Some(From::from("target")),
209 sub: Box::new(::regex_syntax::hir::Hir::repetition(
210 ::regex_syntax::hir::Repetition {
211 min: 1u32,
212 max: None,
213 greedy: true,
214 sub: Box::new(::regex_syntax::hir::Hir::class(
215 ::regex_syntax::hir::Class::Unicode(
216 ::regex_syntax::hir::ClassUnicode::new([
217 ::regex_syntax::hir::ClassUnicodeRange::new(
218 '-', '.',
219 ),
220 ::regex_syntax::hir::ClassUnicodeRange::new(
221 '0', '9',
222 ),
223 ::regex_syntax::hir::ClassUnicodeRange::new(
224 'A', 'Z',
225 ),
226 ::regex_syntax::hir::ClassUnicodeRange::new(
227 '_', '_',
228 ),
229 ::regex_syntax::hir::ClassUnicodeRange::new(
230 'a', 'z',
231 ),
232 ]),
233 ),
234 )),
235 },
236 )),
237 }),
238 ]))),
239 }),
240 ::regex_syntax::hir::Hir::literal(*b"!"),
241 ]));
242 let regex = ::regex_automata::meta::Regex::builder()
243 .build_from_hir(&hir)
244 .expect("regex compilation failed, despite compile-time verification");
245 Self { regex }
246 }
247
248 /**
249 Test if this regular expression matches the `haystack` string, without
250 getting any information about the location of the match.
251
252 Prefer this method if you only care *that* there was a match, as it
253 might be faster than [`find`][HelloWorld::find] or
254 [`captures`][HelloWorld::captures].
255 */
256 #[inline]
257 #[must_use]
258 pub fn is_match(&self, haystack: &str) -> bool {
259 self.regex.is_match(haystack)
260 }
261
262 /**
263 Find the first match of this regex in the `haystack`, and return it as a
264 [`Capture`].
265
266 Prefer this method if you only care about the overall location of a match
267 in the haystack, without regard for the specific capture groups.
268 */
269 #[inline]
270 #[must_use]
271 pub fn find<'i>(&self, haystack: &'i str) -> Option<Capture<'i>> {
272 let capture = self.regex.find(haystack)?;
273 let span = capture.span();
274 let start = span.start;
275 let end = span.end;
276 let content = unsafe { haystack.get_unchecked(start..end) };
277 Some(Capture {
278 start,
279 end,
280 content,
281 })
282 }
283 }
284
285 impl Default for HelloWorld {
286 fn default() -> Self {
287 Self::new()
288 }
289 }
290
291 impl HelloWorld {
292 /**
293 Search for the first match of this regex in the `haystack`, and return
294 an object containing all of the named capture groups that were found.
295 */
296 #[inline]
297 #[must_use]
298 pub fn captures<'i>(&self, haystack: &'i str) -> Option<HelloWorldCaptures<'i>> {
299 let mut slots = [None; (2usize + 1) * 2];
300 let _ = self
301 .regex
302 .search_slots(&::regex_automata::Input::new(haystack), &mut slots)?;
303
304 Some(HelloWorldCaptures {
305 #[allow(clippy::blocks_in_conditions)]
306 greeting: match {
307 let slot_start = slots[2usize];
308 let slot_end = slots[3usize];
309 match slot_start {
310 None => None,
311 Some(start) => {
312 let start = start.get();
313 let end = unsafe { slot_end.unwrap_unchecked() }.get();
314 let content = unsafe { haystack.get_unchecked(start..end) };
315 Some(Capture {
316 start,
317 end,
318 content,
319 })
320 }
321 }
322 } {
323 Some(capture) => capture,
324 None => unsafe { core::hint::unreachable_unchecked() },
325 },
326 target: {
327 let slot_start = slots[4usize];
328 let slot_end = slots[5usize];
329 match slot_start {
330 None => None,
331 Some(start) => {
332 let start = start.get();
333 let end = unsafe { slot_end.unwrap_unchecked() }.get();
334 let content = unsafe { haystack.get_unchecked(start..end) };
335 Some(Capture {
336 start,
337 end,
338 content,
339 })
340 }
341 }
342 },
343 })
344 }
345 }
346
347 /**
348 Example captures object.
349
350 This type is the output of the [`regex`] macro for the capture groups
351 returned by the [`HelloWorld`] expression.
352 */
353 #[derive(Debug, Clone, Copy)]
354 pub struct HelloWorldCaptures<'a> {
355 /**
356 The greeting is an non-optional [`Capture`], because there will always
357 be a greeting when the expression matches.
358 */
359 pub greeting: Capture<'a>,
360
361 /**
362 The target is an optional [`Capture`], because the group is inside an
363 `()?` optional group, so it may not be present even if the expression
364 matched. Optional groups are also created by alternations.
365 */
366 pub target: Option<Capture<'a>>,
367 }
368}