fancy_regex/lib.rs
1// Copyright 2026 The Fancy Regex Authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#![doc = include_str!("../docs/main.md")]
22#![doc = include_str!("../docs/features.md")]
23#![doc = include_str!("../docs/syntax.md")]
24#![doc = include_str!("../docs/subroutines/1_intro.md")]
25#![doc = include_str!("../docs/subroutines/2_flags.md")]
26#![doc = include_str!("../docs/subroutines/3_left_recursion.md")]
27#![doc = include_str!("../docs/subroutines/4_recursion.md")]
28#![doc = include_str!("../docs/absent.md")]
29#![deny(missing_docs)]
30#![deny(missing_debug_implementations)]
31#![cfg_attr(not(feature = "std"), no_std)]
32
33extern crate alloc;
34
35use alloc::borrow::Cow;
36use alloc::boxed::Box;
37use alloc::string::{String, ToString};
38use alloc::sync::Arc;
39use alloc::vec;
40use alloc::vec::Vec;
41
42use core::convert::TryFrom;
43use core::fmt;
44use core::fmt::{Debug, Formatter};
45use core::ops::{Index, Range};
46use core::str::FromStr;
47use regex_automata::meta::Regex as RaRegex;
48use regex_automata::util::captures::Captures as RaCaptures;
49use regex_automata::util::syntax::Config as SyntaxConfig;
50use regex_automata::Input as RaInput;
51
52mod analyze;
53mod compile;
54mod error;
55mod expand;
56mod optimize;
57mod parse;
58mod parse_flags;
59mod replacer;
60mod vm;
61
62use crate::analyze::can_compile_as_anchored;
63use crate::analyze::{analyze, AnalyzeContext};
64use crate::compile::{compile, CompileOptions};
65use crate::optimize::optimize;
66use crate::parse::{ExprTree, NamedGroups, Parser};
67use crate::parse_flags::*;
68use crate::vm::{Prog, OPTION_FIND_NOT_EMPTY, OPTION_SKIPPED_EMPTY_MATCH};
69
70pub use crate::error::{CompileError, Error, ParseError, Result, RuntimeError};
71pub use crate::expand::Expander;
72pub use crate::replacer::{NoExpand, Replacer, ReplacerRef};
73
74const MAX_RECURSION: usize = 64;
75
76// the public API
77
78/// A builder for a `Regex` to allow configuring options.
79#[derive(Debug)]
80pub struct RegexBuilder {
81 pattern: String,
82 options: RegexOptionsBuilder,
83}
84
85/// A builder for a `Regex` to allow configuring options.
86#[derive(Debug)]
87pub struct RegexOptionsBuilder {
88 options: RegexOptions,
89}
90
91/// A compiled regular expression.
92#[derive(Clone)]
93pub struct Regex {
94 inner: RegexImpl,
95 named_groups: Arc<NamedGroups>,
96}
97
98// Separate enum because we don't want to expose any of this
99#[derive(Clone)]
100enum RegexImpl {
101 // Do we want to box this? It's pretty big...
102 Wrap {
103 inner: RaRegex,
104 /// The original pattern which the regex was constructed from
105 pattern: String,
106 /// Some optimizations avoid the VM, but need to use an extra capture group to represent the match boundaries
107 explicit_capture_group_0: bool,
108 /// The actual pattern passed to regex-automata for delegation
109 delegated_pattern: String,
110 },
111 Fancy {
112 prog: Arc<Prog>,
113 n_groups: usize,
114 /// The original pattern which the regex was constructed from
115 pattern: String,
116 options: HardRegexRuntimeOptions,
117 },
118}
119
120/// A single match of a regex or group in an input text
121#[derive(Copy, Clone, Debug, Eq, PartialEq)]
122pub struct Match<'t> {
123 text: &'t str,
124 start: usize,
125 end: usize,
126}
127
128/// An iterator over all non-overlapping matches for a particular string.
129///
130/// The iterator yields a `Result<Match>`. The iterator stops when no more
131/// matches can be found.
132///
133/// `'r` is the lifetime of the compiled regular expression and `'t` is the
134/// lifetime of the matched string.
135#[derive(Debug)]
136pub struct Matches<'r, 't> {
137 re: &'r Regex,
138 text: &'t str,
139 last_end: usize,
140 last_match: Option<usize>,
141 last_skipped_empty: bool,
142}
143
144impl<'r, 't> Matches<'r, 't> {
145 /// Return the text being searched.
146 pub fn text(&self) -> &'t str {
147 self.text
148 }
149
150 /// Return the underlying regex.
151 pub fn regex(&self) -> &'r Regex {
152 self.re
153 }
154
155 /// Adapted from the `regex` crate. Calls `find_from_pos`/`captures_from_pos` repeatedly.
156 /// Ignores empty matches immediately after a match.
157 /// Also passes a flag when skipping an empty match, so that \G wouldn't match at the new start position.
158 fn next_with<F, R>(&mut self, mut search: F) -> Option<Result<R>>
159 where
160 F: FnMut(&Regex, usize, u32) -> Result<Option<(R, Match<'t>)>>,
161 {
162 if self.last_end > self.text.len() {
163 return None;
164 }
165
166 let option_flags = if self.last_skipped_empty {
167 OPTION_SKIPPED_EMPTY_MATCH
168 } else {
169 0
170 };
171
172 let pos = self.last_end;
173 let (result, mat) = match search(self.re, pos, option_flags) {
174 Err(error) => {
175 // Stop on first error: If an error is encountered, return it, and set the "last match position"
176 // to the string length, so that the next next() call will return None, to prevent an infinite loop.
177 self.last_end = self.text.len() + 1;
178 return Some(Err(error));
179 }
180 Ok(None) => return None,
181 Ok(Some(pair)) => pair,
182 };
183
184 if mat.start == mat.end {
185 // This is an empty match. To ensure we make progress, start
186 // the next search at the smallest possible starting position
187 // of the next match following this one.
188 self.last_end = next_utf8(self.text, mat.end);
189 // Only set OPTION_SKIPPED_EMPTY_MATCH on the next call if this was a
190 // truly zero-length match (the VM consumed no bytes from `pos`).
191 // This means that \K won't prevent \G from matching.
192 self.last_skipped_empty = mat.end == pos;
193 // Don't accept empty matches immediately following a match.
194 // Just move on to the next match.
195 if Some(mat.end) == self.last_match {
196 return self.next_with(search);
197 }
198 } else {
199 self.last_end = mat.end;
200 self.last_skipped_empty = false;
201 }
202
203 self.last_match = Some(mat.end);
204
205 Some(Ok(result))
206 }
207}
208
209impl<'r, 't> Iterator for Matches<'r, 't> {
210 type Item = Result<Match<'t>>;
211
212 fn next(&mut self) -> Option<Self::Item> {
213 let text = self.text;
214 self.next_with(move |re, pos, flags| {
215 re.find_from_pos_with_option_flags(text, pos, flags)
216 .map(|opt| opt.map(|m| (m, m)))
217 })
218 }
219}
220
221/// An iterator that yields all non-overlapping capture groups matching a
222/// particular regular expression.
223///
224/// The iterator stops when no more matches can be found.
225///
226/// `'r` is the lifetime of the compiled regular expression and `'t` is the
227/// lifetime of the matched string.
228#[derive(Debug)]
229pub struct CaptureMatches<'r, 't>(Matches<'r, 't>);
230
231impl<'r, 't> CaptureMatches<'r, 't> {
232 /// Return the text being searched.
233 pub fn text(&self) -> &'t str {
234 self.0.text
235 }
236
237 /// Return the underlying regex.
238 pub fn regex(&self) -> &'r Regex {
239 self.0.re
240 }
241}
242
243impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
244 type Item = Result<Captures<'t>>;
245
246 fn next(&mut self) -> Option<Self::Item> {
247 let text = self.0.text;
248 self.0.next_with(move |re, pos, flags| {
249 let captures = re.captures_from_pos_with_option_flags(text, pos, flags)?;
250 Ok(captures.map(|c| {
251 let mat = c
252 .get(0)
253 .expect("`Captures` is expected to have entire match at 0th position");
254 (c, mat)
255 }))
256 })
257 }
258}
259
260/// A set of capture groups found for a regex.
261#[derive(Debug)]
262pub struct Captures<'t> {
263 inner: CapturesImpl<'t>,
264 named_groups: Arc<NamedGroups>,
265}
266
267#[derive(Debug)]
268enum CapturesImpl<'t> {
269 Wrap {
270 text: &'t str,
271 locations: RaCaptures,
272 /// Some optimizations avoid the VM but need an extra capture group to represent the match boundaries.
273 /// Therefore what is actually capture group 1 should be treated as capture group 0, and all other
274 /// capture groups should have their index reduced by one as well to line up with what the pattern specifies.
275 explicit_capture_group_0: bool,
276 },
277 Fancy {
278 text: &'t str,
279 saves: Vec<usize>,
280 },
281}
282
283/// Iterator for captured groups in order in which they appear in the regex.
284#[derive(Debug)]
285pub struct SubCaptureMatches<'c, 't> {
286 caps: &'c Captures<'t>,
287 i: usize,
288}
289
290/// An iterator over all substrings delimited by a regex.
291///
292/// This iterator yields `Result<&'h str>`, where each item is a substring of the
293/// target string that is delimited by matches of the regular expression. It stops when there
294/// are no more substrings to yield.
295///
296/// `'r` is the lifetime of the compiled regular expression, and `'h` is the
297/// lifetime of the target string being split.
298///
299/// This iterator can be created by the [`Regex::split`] method.
300#[derive(Debug)]
301pub struct Split<'r, 'h> {
302 matches: Matches<'r, 'h>,
303 next_start: usize,
304 target: &'h str,
305}
306
307impl<'r, 'h> Iterator for Split<'r, 'h> {
308 type Item = Result<&'h str>;
309
310 /// Returns the next substring that results from splitting the target string by the regex.
311 ///
312 /// If no more matches are found, returns the remaining part of the string,
313 /// or `None` if all substrings have been yielded.
314 fn next(&mut self) -> Option<Result<&'h str>> {
315 match self.matches.next() {
316 None => {
317 let len = self.target.len();
318 if self.next_start > len {
319 // No more substrings to return
320 None
321 } else {
322 // Return the last part of the target string
323 // Next call will return None
324 let part = &self.target[self.next_start..len];
325 self.next_start = len + 1;
326 Some(Ok(part))
327 }
328 }
329 // Return the next substring
330 Some(Ok(m)) => {
331 let part = &self.target[self.next_start..m.start()];
332 self.next_start = m.end();
333 Some(Ok(part))
334 }
335 Some(Err(e)) => Some(Err(e)),
336 }
337 }
338}
339
340impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {}
341
342/// An iterator over at most `N` substrings delimited by a regex.
343///
344/// This iterator yields `Result<&'h str>`, where each item is a substring of the
345/// target that is delimited by matches of the regular expression. It stops either when
346/// there are no more substrings to yield, or after `N` substrings have been yielded.
347///
348/// The `N`th substring is the remaining part of the target.
349///
350/// `'r` is the lifetime of the compiled regular expression, and `'h` is the
351/// lifetime of the target string being split.
352///
353/// This iterator can be created by the [`Regex::splitn`] method.
354#[derive(Debug)]
355pub struct SplitN<'r, 'h> {
356 splits: Split<'r, 'h>,
357 limit: usize,
358}
359
360impl<'r, 'h> Iterator for SplitN<'r, 'h> {
361 type Item = Result<&'h str>;
362
363 /// Returns the next substring resulting from splitting the target by the regex,
364 /// limited to `N` splits.
365 ///
366 /// Returns `None` if no more matches are found or if the limit is reached after yielding
367 /// the remaining part of the target.
368 fn next(&mut self) -> Option<Result<&'h str>> {
369 if self.limit == 0 {
370 // Limit reached. No more substrings available.
371 return None;
372 }
373
374 // Decrement the limit for each split.
375 self.limit -= 1;
376 if self.limit > 0 {
377 return self.splits.next();
378 }
379
380 // Nth split
381 let len = self.splits.target.len();
382 if self.splits.next_start > len {
383 // No more substrings available.
384 None
385 } else {
386 // Return the remaining part of the target
387 let start = self.splits.next_start;
388 self.splits.next_start = len + 1;
389 Some(Ok(&self.splits.target[start..len]))
390 }
391 }
392
393 fn size_hint(&self) -> (usize, Option<usize>) {
394 (0, Some(self.limit))
395 }
396}
397
398impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
399
400#[derive(Clone, Debug, Default)]
401struct RegexOptions {
402 syntaxc: SyntaxConfig,
403 delegate_size_limit: Option<usize>,
404 delegate_dfa_size_limit: Option<usize>,
405 oniguruma_mode: bool,
406 ignore_numbered_groups_when_named_groups_exist: bool,
407 hard_regex_runtime_options: HardRegexRuntimeOptions,
408}
409
410#[derive(Copy, Clone, Debug)]
411struct HardRegexRuntimeOptions {
412 backtrack_limit: usize,
413 find_not_empty: bool,
414}
415
416impl RegexOptions {
417 fn get_flag_value(flag_value: bool, enum_value: u32) -> u32 {
418 if flag_value {
419 enum_value
420 } else {
421 0
422 }
423 }
424
425 fn compute_flags(&self) -> u32 {
426 let insensitive = Self::get_flag_value(self.syntaxc.get_case_insensitive(), FLAG_CASEI);
427 let multiline = Self::get_flag_value(self.syntaxc.get_multi_line(), FLAG_MULTI);
428 let whitespace =
429 Self::get_flag_value(self.syntaxc.get_ignore_whitespace(), FLAG_IGNORE_SPACE);
430 let dotnl = Self::get_flag_value(self.syntaxc.get_dot_matches_new_line(), FLAG_DOTNL);
431 let unicode = Self::get_flag_value(self.syntaxc.get_unicode(), FLAG_UNICODE);
432 let oniguruma_mode = Self::get_flag_value(self.oniguruma_mode, FLAG_ONIGURUMA_MODE);
433 let crlf = Self::get_flag_value(self.syntaxc.get_crlf(), FLAG_CRLF);
434 let named_groups_only = Self::get_flag_value(
435 self.ignore_numbered_groups_when_named_groups_exist,
436 FLAG_IGNORE_NUMBERED_GROUPS_WHEN_NAMED_GROUPS_EXIST,
437 );
438
439 insensitive
440 | multiline
441 | whitespace
442 | dotnl
443 | unicode
444 | oniguruma_mode
445 | crlf
446 | named_groups_only
447 }
448}
449
450impl Default for HardRegexRuntimeOptions {
451 fn default() -> Self {
452 HardRegexRuntimeOptions {
453 backtrack_limit: 1_000_000,
454 find_not_empty: false,
455 }
456 }
457}
458
459impl Default for RegexOptionsBuilder {
460 fn default() -> Self {
461 Self::new()
462 }
463}
464
465impl RegexOptionsBuilder {
466 /// Create a new regex options builder.
467 pub fn new() -> Self {
468 RegexOptionsBuilder {
469 options: RegexOptions::default(),
470 }
471 }
472
473 /// Build a `Regex` from the given pattern.
474 ///
475 /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
476 pub fn build(&self, pattern: String) -> Result<Regex> {
477 Regex::new_options(pattern, &self.options)
478 }
479
480 fn set_config(&mut self, func: impl Fn(SyntaxConfig) -> SyntaxConfig) -> &mut Self {
481 self.options.syntaxc = func(self.options.syntaxc);
482 self
483 }
484
485 /// Override default case insensitive
486 /// this is to enable/disable casing via builder instead of a flag within
487 /// the raw string pattern which will be parsed
488 ///
489 /// Default is false
490 pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
491 self.set_config(|x| x.case_insensitive(yes))
492 }
493
494 /// Enable multi-line regex
495 pub fn multi_line(&mut self, yes: bool) -> &mut Self {
496 self.set_config(|x| x.multi_line(yes))
497 }
498
499 /// Allow ignore whitespace
500 pub fn ignore_whitespace(&mut self, yes: bool) -> &mut Self {
501 self.set_config(|x| x.ignore_whitespace(yes))
502 }
503
504 /// Enable or disable the "dot matches any character" flag.
505 /// When this is enabled, `.` will match any character. When it's disabled, then `.` will match any character
506 /// except for a new line character.
507 pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut Self {
508 self.set_config(|x| x.dot_matches_new_line(yes))
509 }
510
511 /// Enable or disable the CRLF mode flag (`R`).
512 ///
513 /// When enabled, `\r\n` is treated as a single line ending for the purposes of
514 /// `^` and `$` in multi-line mode, instead of treating `\r` and `\n` as separate
515 /// line endings.
516 ///
517 /// By default, this is disabled. It may be selectively enabled in the regular
518 /// expression by using the `R` flag, e.g. `(?mR)` or `(?Rm)`.
519 pub fn crlf(&mut self, yes: bool) -> &mut Self {
520 self.set_config(|x| x.crlf(yes))
521 }
522
523 /// Enable verbose mode in the regular expression.
524 ///
525 /// The same as ignore_whitespace
526 ///
527 /// When enabled, verbose mode permits insigificant whitespace in many
528 /// places in the regular expression, as well as comments. Comments are
529 /// started using `#` and continue until the end of the line.
530 ///
531 /// By default, this is disabled. It may be selectively enabled in the
532 /// regular expression by using the `x` flag regardless of this setting.
533 pub fn verbose_mode(&mut self, yes: bool) -> &mut Self {
534 self.set_config(|x| x.ignore_whitespace(yes))
535 }
536
537 /// Enable or disable the Unicode flag (`u`) by default.
538 ///
539 /// By default this is **enabled**. It may alternatively be selectively
540 /// disabled in the regular expression itself via the `u` flag.
541 ///
542 /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
543 /// default), a regular expression will fail to parse if Unicode mode is
544 /// disabled and a sub-expression could possibly match invalid UTF-8.
545 ///
546 /// **WARNING**: Unicode mode can greatly increase the size of the compiled
547 /// DFA, which can noticeably impact both memory usage and compilation
548 /// time. This is especially noticeable if your regex contains character
549 /// classes like `\w` that are impacted by whether Unicode is enabled or
550 /// not. If Unicode is not necessary, you are encouraged to disable it.
551 pub fn unicode_mode(&mut self, yes: bool) -> &mut Self {
552 self.set_config(|x| x.unicode(yes))
553 }
554
555 /// Limit for how many times backtracking should be attempted for fancy regexes (where
556 /// backtracking is used). If this limit is exceeded, execution returns an error with
557 /// [`Error::BacktrackLimitExceeded`](enum.Error.html#variant.BacktrackLimitExceeded).
558 /// This is for preventing a regex with catastrophic backtracking to run for too long.
559 ///
560 /// Default is `1_000_000` (1 million).
561 pub fn backtrack_limit(&mut self, limit: usize) -> &mut Self {
562 self.options.hard_regex_runtime_options.backtrack_limit = limit;
563 self
564 }
565
566 /// Set the approximate size limit of the compiled regular expression.
567 ///
568 /// This option is forwarded from the wrapped `regex` crate. Note that depending on the used
569 /// regex features there may be multiple delegated sub-regexes fed to the `regex` crate. As
570 /// such the actual limit is closer to `<number of delegated regexes> * delegate_size_limit`.
571 pub fn delegate_size_limit(&mut self, limit: usize) -> &mut Self {
572 self.options.delegate_size_limit = Some(limit);
573 self
574 }
575
576 /// Set the approximate size of the cache used by the DFA.
577 ///
578 /// This option is forwarded from the wrapped `regex` crate. Note that depending on the used
579 /// regex features there may be multiple delegated sub-regexes fed to the `regex` crate. As
580 /// such the actual limit is closer to `<number of delegated regexes> *
581 /// delegate_dfa_size_limit`.
582 pub fn delegate_dfa_size_limit(&mut self, limit: usize) -> &mut Self {
583 self.options.delegate_dfa_size_limit = Some(limit);
584 self
585 }
586
587 /// Require that matches are non-empty (i.e. match at least one character).
588 ///
589 /// When this is enabled, any match attempt that would result in a zero-length match is
590 /// rejected.
591 ///
592 /// Default is `false`.
593 ///
594 /// N.B. When `find_not_empty` is set and analysis determines the pattern will only ever
595 /// produce an empty match, compiling the regex will return
596 /// `CompileError::PatternCanNeverMatch` instead of silently constructing a regex that can never
597 /// return a result. This catches the user error at compile time rather than allowing the
598 /// combination to execute pointlessly at runtime.
599 pub fn find_not_empty(&mut self, yes: bool) -> &mut Self {
600 self.options.hard_regex_runtime_options.find_not_empty = yes;
601 self
602 }
603
604 /// Treat unnamed capture groups as non-capturing when named groups exist.
605 /// Prevents accessing capture groups by number from within the pattern
606 /// (backrefs, subroutine calls) when named groups are present.
607 pub fn ignore_numbered_groups_when_named_groups_exist(&mut self, yes: bool) -> &mut Self {
608 self.options.ignore_numbered_groups_when_named_groups_exist = yes;
609 self
610 }
611
612 /// Attempts to better match [Oniguruma](https://github.com/kkos/oniguruma)'s default behavior
613 ///
614 /// Currently this amounts to changing behavior with:
615 ///
616 /// # Left and right word bounds
617 ///
618 /// `fancy-regex` follows the default of other regex engines such as the `regex` crate itself
619 /// where `\<` and `\>` correspond to a _left_ and _right_ word-bound respectively. This
620 /// differs from Oniguruma's defaults which treat them as matching the literals `<` and `>`.
621 /// When this option is set using `\<` and `\>` in the pattern will match the literals
622 /// `<` and `>` instead of word bounds.
623 ///
624 /// # Repetition/Quantifiers on empty groups
625 ///
626 /// `fancy-regex` would normally reject patterns like `(?:)+` because the `+` has nothing
627 /// to target. In Oniguruma mode, the empty repeat is silently dropped at parse time.
628 ///
629 /// ## Example
630 ///
631 /// ```
632 /// use fancy_regex::{Regex, RegexBuilder};
633 ///
634 /// let haystack = "turbo::<Fish>";
635 /// let regex = r"\<\w*\>";
636 ///
637 /// // By default `\<` and `\>` will match the start and end of a word boundary
638 /// let word_bounds_regex = Regex::new(regex).unwrap();
639 /// let word_bounds = word_bounds_regex.find(haystack).unwrap().unwrap();
640 /// assert_eq!(word_bounds.as_str(), "turbo");
641 ///
642 /// // With the option set they instead match the literal `<` and `>` characters
643 /// let literals_regex = RegexBuilder::new(regex).oniguruma_mode(true).build().unwrap();
644 /// let literals = literals_regex.find(haystack).unwrap().unwrap();
645 /// assert_eq!(literals.as_str(), "<Fish>");
646 /// ```
647 pub fn oniguruma_mode(&mut self, yes: bool) -> &mut Self {
648 self.options.oniguruma_mode = yes;
649 self
650 }
651}
652
653impl RegexBuilder {
654 /// Create a new regex builder.
655 pub fn new(pattern: &str) -> Self {
656 RegexBuilder {
657 pattern: pattern.to_string(),
658 options: RegexOptionsBuilder::new(),
659 }
660 }
661
662 /// Build a `Regex` from the given pattern.
663 ///
664 /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
665 pub fn build(&self) -> Result<Regex> {
666 self.options.build(self.pattern.clone())
667 }
668
669 /// Change the pattern to build. Useful when building multiple regexes from
670 /// many patterns.
671 pub fn pattern(&mut self, pattern: String) -> &mut Self {
672 self.pattern = pattern;
673 self
674 }
675
676 /// See [`RegexOptionsBuilder::case_insensitive`]
677 pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
678 self.options.case_insensitive(yes);
679 self
680 }
681
682 /// See [`RegexOptionsBuilder::multi_line`]
683 pub fn multi_line(&mut self, yes: bool) -> &mut Self {
684 self.options.multi_line(yes);
685 self
686 }
687
688 /// See [`RegexOptionsBuilder::ignore_whitespace`]
689 pub fn ignore_whitespace(&mut self, yes: bool) -> &mut Self {
690 self.options.ignore_whitespace(yes);
691 self
692 }
693
694 /// See [`RegexOptionsBuilder::dot_matches_new_line`]
695 pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut Self {
696 self.options.dot_matches_new_line(yes);
697 self
698 }
699
700 /// See [`RegexOptionsBuilder::verbose_mode`]
701 pub fn verbose_mode(&mut self, yes: bool) -> &mut Self {
702 self.options.ignore_whitespace(yes);
703 self
704 }
705
706 /// See [`RegexOptionsBuilder::unicode_mode`]
707 pub fn unicode_mode(&mut self, yes: bool) -> &mut Self {
708 self.options.unicode_mode(yes);
709 self
710 }
711
712 /// See [`RegexOptionsBuilder::backtrack_limit`]
713 pub fn backtrack_limit(&mut self, limit: usize) -> &mut Self {
714 self.options.backtrack_limit(limit);
715 self
716 }
717
718 /// See [`RegexOptionsBuilder::delegate_size_limit`]
719 pub fn delegate_size_limit(&mut self, limit: usize) -> &mut Self {
720 self.options.delegate_size_limit(limit);
721 self
722 }
723
724 /// See [`RegexOptionsBuilder::delegate_dfa_size_limit`]
725 pub fn delegate_dfa_size_limit(&mut self, limit: usize) -> &mut Self {
726 self.options.delegate_dfa_size_limit(limit);
727 self
728 }
729
730 /// See [`RegexOptionsBuilder::oniguruma_mode`]
731 pub fn oniguruma_mode(&mut self, yes: bool) -> &mut Self {
732 self.options.oniguruma_mode(yes);
733 self
734 }
735
736 /// See [`RegexOptionsBuilder::crlf`]
737 pub fn crlf(&mut self, yes: bool) -> &mut Self {
738 self.options.crlf(yes);
739 self
740 }
741
742 /// See [`RegexOptionsBuilder::find_not_empty`]
743 pub fn find_not_empty(&mut self, yes: bool) -> &mut Self {
744 self.options.find_not_empty(yes);
745 self
746 }
747
748 /// See [`RegexOptionsBuilder::ignore_numbered_groups_when_named_groups_exist`]
749 pub fn ignore_numbered_groups_when_named_groups_exist(&mut self, yes: bool) -> &mut Self {
750 self.options
751 .ignore_numbered_groups_when_named_groups_exist(yes);
752 self
753 }
754}
755
756impl fmt::Debug for Regex {
757 /// Shows the original regular expression.
758 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
759 write!(f, "{}", self.as_str())
760 }
761}
762
763impl fmt::Display for Regex {
764 /// Shows the original regular expression
765 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
766 write!(f, "{}", self.as_str())
767 }
768}
769
770impl FromStr for Regex {
771 type Err = Error;
772
773 /// Attempts to parse a string into a regular expression
774 fn from_str(s: &str) -> Result<Regex> {
775 Regex::new(s)
776 }
777}
778
779impl Regex {
780 /// Parse and compile a regex with default options, see `RegexBuilder`.
781 ///
782 /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
783 pub fn new(re: &str) -> Result<Regex> {
784 Self::new_options(re.to_string(), &RegexOptions::default())
785 }
786
787 fn new_options(pattern: String, options: &RegexOptions) -> Result<Regex> {
788 let mut tree = Expr::parse_tree_with_flags(&pattern, options.compute_flags())?;
789
790 let find_not_empty = options.hard_regex_runtime_options.find_not_empty;
791
792 let requires_capture_group_fixup = if find_not_empty {
793 // if the find_not_empty flag is set, we skip optimizations
794 // partially because we have to go though the VM anyway
795 // partially because having the last instruction of the expression not have
796 // ix be at the end of capture group 0 ruins our empty match checking logic.
797 false
798 } else {
799 // try to optimize the expression tree so that a hard pattern could become easy
800 // with a fixup of the capture groups
801 optimize(&mut tree)
802 };
803 let info = analyze(
804 &tree,
805 AnalyzeContext {
806 explicit_capture_group_0: requires_capture_group_fixup,
807 find_not_empty,
808 },
809 )?;
810
811 if find_not_empty && info.const_size && info.min_size == 0 {
812 return Err(CompileError::PatternCanNeverMatch.into());
813 }
814
815 if !info.hard {
816 // easy case, wrap regex
817
818 // we do our own to_str because escapes are different
819 // NOTE: there is a good opportunity here to use Hir to avoid regex-automata re-parsing it
820 let mut re_cooked = String::new();
821 tree.expr.to_str(&mut re_cooked, 0);
822 let inner = compile::compile_inner(&re_cooked, options)?;
823 return Ok(Regex {
824 inner: RegexImpl::Wrap {
825 inner,
826 pattern,
827 explicit_capture_group_0: requires_capture_group_fixup,
828 delegated_pattern: re_cooked,
829 },
830 named_groups: Arc::new(tree.named_groups),
831 });
832 }
833
834 let prog = compile(
835 &info,
836 CompileOptions {
837 anchored: can_compile_as_anchored(&tree.expr),
838 contains_subroutines: tree.contains_subroutines,
839 },
840 )?;
841 Ok(Regex {
842 inner: RegexImpl::Fancy {
843 prog: Arc::new(prog),
844 n_groups: info.end_group(),
845 options: options.hard_regex_runtime_options,
846 pattern,
847 },
848 named_groups: Arc::new(tree.named_groups),
849 })
850 }
851
852 /// Returns the original string of this regex.
853 pub fn as_str(&self) -> &str {
854 match &self.inner {
855 RegexImpl::Wrap { pattern, .. } => pattern,
856 RegexImpl::Fancy { pattern, .. } => pattern,
857 }
858 }
859
860 /// Check if the regex matches the input text.
861 ///
862 /// # Example
863 ///
864 /// Test if some text contains the same word twice:
865 ///
866 /// ```rust
867 /// # use fancy_regex::Regex;
868 ///
869 /// let re = Regex::new(r"(\w+) \1").unwrap();
870 /// assert!(re.is_match("mirror mirror on the wall").unwrap());
871 /// ```
872 pub fn is_match(&self, text: &str) -> Result<bool> {
873 match &self.inner {
874 RegexImpl::Wrap { inner, .. } => Ok(inner.is_match(text)),
875 RegexImpl::Fancy { .. } => self.find(text).map(|m| m.is_some()),
876 }
877 }
878
879 /// Returns an iterator for each successive non-overlapping match in `text`.
880 ///
881 /// If you have capturing groups in your regex that you want to extract, use the [Regex::captures_iter()]
882 /// method.
883 ///
884 /// # Example
885 ///
886 /// Find all words followed by an exclamation point:
887 ///
888 /// ```rust
889 /// # use fancy_regex::Regex;
890 ///
891 /// let re = Regex::new(r"\w+(?=!)").unwrap();
892 /// let mut matches = re.find_iter("so fancy! even with! iterators!");
893 /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "fancy");
894 /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "with");
895 /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "iterators");
896 /// assert!(matches.next().is_none());
897 /// ```
898 pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
899 Matches {
900 re: self,
901 text,
902 last_end: 0,
903 last_match: None,
904 last_skipped_empty: false,
905 }
906 }
907
908 /// Find the first match in the input text.
909 ///
910 /// If you have capturing groups in your regex that you want to extract, use the [Regex::captures()]
911 /// method.
912 ///
913 /// # Example
914 ///
915 /// Find a word that is followed by an exclamation point:
916 ///
917 /// ```rust
918 /// # use fancy_regex::Regex;
919 ///
920 /// let re = Regex::new(r"\w+(?=!)").unwrap();
921 /// assert_eq!(re.find("so fancy!").unwrap().unwrap().as_str(), "fancy");
922 /// ```
923 pub fn find<'t>(&self, text: &'t str) -> Result<Option<Match<'t>>> {
924 self.find_from_pos(text, 0)
925 }
926
927 /// Returns the first match in `text`, starting from the specified byte position `pos`.
928 ///
929 /// # Examples
930 ///
931 /// Finding match starting at a position:
932 ///
933 /// ```
934 /// # use fancy_regex::Regex;
935 /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
936 /// let text = "1 test 123\n2 foo";
937 /// let mat = re.find_from_pos(text, 7).unwrap().unwrap();
938 ///
939 /// assert_eq!(mat.start(), 11);
940 /// assert_eq!(mat.end(), 12);
941 /// ```
942 ///
943 /// Note that in some cases this is not the same as using the `find`
944 /// method and passing a slice of the string, see [Regex::captures_from_pos()] for details.
945 pub fn find_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Match<'t>>> {
946 self.find_from_pos_with_option_flags(text, pos, 0)
947 }
948
949 fn find_from_pos_with_option_flags<'t>(
950 &self,
951 text: &'t str,
952 pos: usize,
953 option_flags: u32,
954 ) -> Result<Option<Match<'t>>> {
955 if pos > text.len() {
956 return Ok(None);
957 }
958 match &self.inner {
959 RegexImpl::Wrap {
960 inner,
961 explicit_capture_group_0,
962 ..
963 } => {
964 let result = if !*explicit_capture_group_0 {
965 inner
966 .search(&RaInput::new(text).span(pos..text.len()))
967 .map(|m| Match::new(text, m.start(), m.end()))
968 } else {
969 let mut locations = inner.create_captures();
970 inner.captures(RaInput::new(text).span(pos..text.len()), &mut locations);
971 locations
972 .get_group(1)
973 .map(|group1| Match::new(text, group1.start, group1.end))
974 };
975 Ok(result)
976 }
977 RegexImpl::Fancy { prog, options, .. } => {
978 let option_flags = option_flags
979 | if options.find_not_empty {
980 OPTION_FIND_NOT_EMPTY
981 } else {
982 0
983 };
984 let result = vm::run(prog, text, pos, option_flags, options)?;
985 Ok(result.map(|saves| Match::new(text, saves[0], saves[1])))
986 }
987 }
988 }
989
990 /// Returns an iterator over all the non-overlapping capture groups matched in `text`.
991 ///
992 /// # Examples
993 ///
994 /// Finding all matches and capturing parts of each:
995 ///
996 /// ```rust
997 /// # use fancy_regex::Regex;
998 ///
999 /// let re = Regex::new(r"(\d{4})-(\d{2})").unwrap();
1000 /// let text = "It was between 2018-04 and 2020-01";
1001 /// let mut all_captures = re.captures_iter(text);
1002 ///
1003 /// let first = all_captures.next().unwrap().unwrap();
1004 /// assert_eq!(first.get(1).unwrap().as_str(), "2018");
1005 /// assert_eq!(first.get(2).unwrap().as_str(), "04");
1006 /// assert_eq!(first.get(0).unwrap().as_str(), "2018-04");
1007 ///
1008 /// let second = all_captures.next().unwrap().unwrap();
1009 /// assert_eq!(second.get(1).unwrap().as_str(), "2020");
1010 /// assert_eq!(second.get(2).unwrap().as_str(), "01");
1011 /// assert_eq!(second.get(0).unwrap().as_str(), "2020-01");
1012 ///
1013 /// assert!(all_captures.next().is_none());
1014 /// ```
1015 pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CaptureMatches<'r, 't> {
1016 CaptureMatches(self.find_iter(text))
1017 }
1018
1019 /// Returns the capture groups for the first match in `text`.
1020 ///
1021 /// If no match is found, then `Ok(None)` is returned.
1022 ///
1023 /// # Examples
1024 ///
1025 /// Finding matches and capturing parts of the match:
1026 ///
1027 /// ```rust
1028 /// # use fancy_regex::Regex;
1029 ///
1030 /// let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
1031 /// let text = "The date was 2018-04-07";
1032 /// let captures = re.captures(text).unwrap().unwrap();
1033 ///
1034 /// assert_eq!(captures.get(1).unwrap().as_str(), "2018");
1035 /// assert_eq!(captures.get(2).unwrap().as_str(), "04");
1036 /// assert_eq!(captures.get(3).unwrap().as_str(), "07");
1037 /// assert_eq!(captures.get(0).unwrap().as_str(), "2018-04-07");
1038 /// ```
1039 pub fn captures<'t>(&self, text: &'t str) -> Result<Option<Captures<'t>>> {
1040 self.captures_from_pos(text, 0)
1041 }
1042
1043 /// Returns the capture groups for the first match in `text`, starting from
1044 /// the specified byte position `pos`.
1045 ///
1046 /// # Examples
1047 ///
1048 /// Finding captures starting at a position:
1049 ///
1050 /// ```
1051 /// # use fancy_regex::Regex;
1052 /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
1053 /// let text = "1 test 123\n2 foo";
1054 /// let captures = re.captures_from_pos(text, 7).unwrap().unwrap();
1055 ///
1056 /// let group = captures.get(1).unwrap();
1057 /// assert_eq!(group.as_str(), "2");
1058 /// assert_eq!(group.start(), 11);
1059 /// assert_eq!(group.end(), 12);
1060 /// ```
1061 ///
1062 /// Note that in some cases this is not the same as using the `captures`
1063 /// method and passing a slice of the string, see the capture that we get
1064 /// when we do this:
1065 ///
1066 /// ```
1067 /// # use fancy_regex::Regex;
1068 /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
1069 /// let text = "1 test 123\n2 foo";
1070 /// let captures = re.captures(&text[7..]).unwrap().unwrap();
1071 /// assert_eq!(captures.get(1).unwrap().as_str(), "123");
1072 /// ```
1073 ///
1074 /// This matched the number "123" because it's at the beginning of the text
1075 /// of the string slice.
1076 ///
1077 pub fn captures_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Captures<'t>>> {
1078 self.captures_from_pos_with_option_flags(text, pos, 0)
1079 }
1080
1081 fn captures_from_pos_with_option_flags<'t>(
1082 &self,
1083 text: &'t str,
1084 pos: usize,
1085 option_flags: u32,
1086 ) -> Result<Option<Captures<'t>>> {
1087 if pos > text.len() {
1088 return Ok(None);
1089 }
1090 let named_groups = self.named_groups.clone();
1091 match &self.inner {
1092 RegexImpl::Wrap {
1093 inner,
1094 explicit_capture_group_0,
1095 ..
1096 } => {
1097 // find_not_empty patterns are always compiled as Fancy, so find_not_empty is
1098 // always false here.
1099 let explicit = *explicit_capture_group_0;
1100 let mut locations = inner.create_captures();
1101 inner.captures(RaInput::new(text).span(pos..text.len()), &mut locations);
1102 Ok(locations.is_match().then_some(Captures {
1103 inner: CapturesImpl::Wrap {
1104 text,
1105 locations,
1106 explicit_capture_group_0: explicit,
1107 },
1108 named_groups,
1109 }))
1110 }
1111 RegexImpl::Fancy {
1112 prog,
1113 n_groups,
1114 options,
1115 ..
1116 } => {
1117 let option_flags = option_flags
1118 | if options.find_not_empty {
1119 OPTION_FIND_NOT_EMPTY
1120 } else {
1121 0
1122 };
1123 let result = vm::run(prog, text, pos, option_flags, options)?;
1124 Ok(result.map(|mut saves| {
1125 saves.truncate(n_groups * 2);
1126 Captures {
1127 inner: CapturesImpl::Fancy { text, saves },
1128 named_groups,
1129 }
1130 }))
1131 }
1132 }
1133 }
1134
1135 /// Returns the number of captures, including the implicit capture of the entire expression.
1136 pub fn captures_len(&self) -> usize {
1137 match &self.inner {
1138 RegexImpl::Wrap {
1139 inner,
1140 explicit_capture_group_0,
1141 ..
1142 } => inner.captures_len() - if *explicit_capture_group_0 { 1 } else { 0 },
1143 RegexImpl::Fancy { n_groups, .. } => *n_groups,
1144 }
1145 }
1146
1147 /// Returns an iterator over the capture names.
1148 pub fn capture_names(&self) -> CaptureNames<'_> {
1149 let mut names = Vec::new();
1150 names.resize(self.captures_len(), None);
1151 for (name, &i) in self.named_groups.iter() {
1152 names[i] = Some(name.as_str());
1153 }
1154 CaptureNames(names.into_iter())
1155 }
1156
1157 // for debugging only
1158 #[doc(hidden)]
1159 pub fn debug_print(&self, writer: &mut Formatter<'_>) -> fmt::Result {
1160 match &self.inner {
1161 RegexImpl::Wrap {
1162 delegated_pattern,
1163 explicit_capture_group_0,
1164 ..
1165 } => {
1166 write!(
1167 writer,
1168 "wrapped Regex {:?}, explicit_capture_group_0: {:}",
1169 delegated_pattern, *explicit_capture_group_0
1170 )
1171 }
1172 RegexImpl::Fancy { prog, .. } => prog.debug_print(writer),
1173 }
1174 }
1175
1176 /// Replaces the leftmost-first match with the replacement provided.
1177 /// The replacement can be a regular string (where `$N` and `$name` are
1178 /// expanded to match capture groups) or a function that takes the matches'
1179 /// `Captures` and returns the replaced string.
1180 ///
1181 /// If no match is found, then a copy of the string is returned unchanged.
1182 ///
1183 /// # Replacement string syntax
1184 ///
1185 /// All instances of `$name` in the replacement text is replaced with the
1186 /// corresponding capture group `name`.
1187 ///
1188 /// `name` may be an integer corresponding to the index of the
1189 /// capture group (counted by order of opening parenthesis where `0` is the
1190 /// entire match) or it can be a name (consisting of letters, digits or
1191 /// underscores) corresponding to a named capture group.
1192 ///
1193 /// If `name` isn't a valid capture group (whether the name doesn't exist
1194 /// or isn't a valid index), then it is replaced with the empty string.
1195 ///
1196 /// The longest possible name is used. e.g., `$1a` looks up the capture
1197 /// group named `1a` and not the capture group at index `1`. To exert more
1198 /// precise control over the name, use braces, e.g., `${1}a`.
1199 ///
1200 /// To write a literal `$` use `$$`.
1201 ///
1202 /// # Examples
1203 ///
1204 /// Note that this function is polymorphic with respect to the replacement.
1205 /// In typical usage, this can just be a normal string:
1206 ///
1207 /// ```rust
1208 /// # use fancy_regex::Regex;
1209 /// let re = Regex::new("[^01]+").unwrap();
1210 /// assert_eq!(re.replace("1078910", ""), "1010");
1211 /// ```
1212 ///
1213 /// But anything satisfying the `Replacer` trait will work. For example,
1214 /// a closure of type `|&Captures| -> String` provides direct access to the
1215 /// captures corresponding to a match. This allows one to access
1216 /// capturing group matches easily:
1217 ///
1218 /// ```rust
1219 /// # use fancy_regex::{Regex, Captures};
1220 /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
1221 /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
1222 /// format!("{} {}", &caps[2], &caps[1])
1223 /// });
1224 /// assert_eq!(result, "Bruce Springsteen");
1225 /// ```
1226 ///
1227 /// But this is a bit cumbersome to use all the time. Instead, a simple
1228 /// syntax is supported that expands `$name` into the corresponding capture
1229 /// group. Here's the last example, but using this expansion technique
1230 /// with named capture groups:
1231 ///
1232 /// ```rust
1233 /// # use fancy_regex::Regex;
1234 /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
1235 /// let result = re.replace("Springsteen, Bruce", "$first $last");
1236 /// assert_eq!(result, "Bruce Springsteen");
1237 /// ```
1238 ///
1239 /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
1240 /// would produce the same result. To write a literal `$` use `$$`.
1241 ///
1242 /// Sometimes the replacement string requires use of curly braces to
1243 /// delineate a capture group replacement and surrounding literal text.
1244 /// For example, if we wanted to join two words together with an
1245 /// underscore:
1246 ///
1247 /// ```rust
1248 /// # use fancy_regex::Regex;
1249 /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
1250 /// let result = re.replace("deep fried", "${first}_$second");
1251 /// assert_eq!(result, "deep_fried");
1252 /// ```
1253 ///
1254 /// Without the curly braces, the capture group name `first_` would be
1255 /// used, and since it doesn't exist, it would be replaced with the empty
1256 /// string.
1257 ///
1258 /// Finally, sometimes you just want to replace a literal string with no
1259 /// regard for capturing group expansion. This can be done by wrapping a
1260 /// byte string with `NoExpand`:
1261 ///
1262 /// ```rust
1263 /// # use fancy_regex::Regex;
1264 /// use fancy_regex::NoExpand;
1265 ///
1266 /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
1267 /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
1268 /// assert_eq!(result, "$2 $last");
1269 /// ```
1270 pub fn replace<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
1271 self.replacen(text, 1, rep)
1272 }
1273
1274 /// Replaces all non-overlapping matches in `text` with the replacement
1275 /// provided. This is the same as calling `replacen` with `limit` set to
1276 /// `0`.
1277 ///
1278 /// See the documentation for `replace` for details on how to access
1279 /// capturing group matches in the replacement string.
1280 pub fn replace_all<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
1281 self.replacen(text, 0, rep)
1282 }
1283
1284 /// Replaces at most `limit` non-overlapping matches in `text` with the
1285 /// replacement provided. If `limit` is 0, then all non-overlapping matches
1286 /// are replaced.
1287 ///
1288 /// Will panic if any errors are encountered. Use `try_replacen`, which this
1289 /// function unwraps, if you want to handle errors.
1290 ///
1291 /// See the documentation for `replace` for details on how to access
1292 /// capturing group matches in the replacement string.
1293 ///
1294 pub fn replacen<'t, R: Replacer>(&self, text: &'t str, limit: usize, rep: R) -> Cow<'t, str> {
1295 self.try_replacen(text, limit, rep).unwrap()
1296 }
1297
1298 /// Replaces at most `limit` non-overlapping matches in `text` with the
1299 /// replacement provided. If `limit` is 0, then all non-overlapping matches
1300 /// are replaced.
1301 ///
1302 /// Propagates any errors encountered, such as `RuntimeError::BacktrackLimitExceeded`.
1303 ///
1304 /// See the documentation for `replace` for details on how to access
1305 /// capturing group matches in the replacement string.
1306 pub fn try_replacen<'t, R: Replacer>(
1307 &self,
1308 text: &'t str,
1309 limit: usize,
1310 mut rep: R,
1311 ) -> Result<Cow<'t, str>> {
1312 // If we know that the replacement doesn't have any capture expansions,
1313 // then we can fast path. The fast path can make a tremendous
1314 // difference:
1315 //
1316 // 1) We use `find_iter` instead of `captures_iter`. Not asking for
1317 // captures generally makes the regex engines faster.
1318 // 2) We don't need to look up all of the capture groups and do
1319 // replacements inside the replacement string. We just push it
1320 // at each match and be done with it.
1321 if let Some(rep) = rep.no_expansion() {
1322 let mut it = self.find_iter(text).enumerate().peekable();
1323 if it.peek().is_none() {
1324 return Ok(Cow::Borrowed(text));
1325 }
1326 let mut new = String::with_capacity(text.len());
1327 let mut last_match = 0;
1328 for (i, m) in it {
1329 let m = m?;
1330
1331 if limit > 0 && i >= limit {
1332 break;
1333 }
1334 new.push_str(&text[last_match..m.start()]);
1335 new.push_str(&rep);
1336 last_match = m.end();
1337 }
1338 new.push_str(&text[last_match..]);
1339 return Ok(Cow::Owned(new));
1340 }
1341
1342 // The slower path, which we use if the replacement needs access to
1343 // capture groups.
1344 let mut it = self.captures_iter(text).enumerate().peekable();
1345 if it.peek().is_none() {
1346 return Ok(Cow::Borrowed(text));
1347 }
1348 let mut new = String::with_capacity(text.len());
1349 let mut last_match = 0;
1350 for (i, cap) in it {
1351 let cap = cap?;
1352
1353 if limit > 0 && i >= limit {
1354 break;
1355 }
1356 // unwrap on 0 is OK because captures only reports matches
1357 let m = cap.get(0).unwrap();
1358 new.push_str(&text[last_match..m.start()]);
1359 rep.replace_append(&cap, &mut new);
1360 last_match = m.end();
1361 }
1362 new.push_str(&text[last_match..]);
1363 Ok(Cow::Owned(new))
1364 }
1365
1366 /// Splits the string by matches of the regex.
1367 ///
1368 /// Returns an iterator over the substrings of the target string
1369 /// that *aren't* matched by the regex.
1370 ///
1371 /// # Example
1372 ///
1373 /// To split a string delimited by arbitrary amounts of spaces or tabs:
1374 ///
1375 /// ```rust
1376 /// # use fancy_regex::Regex;
1377 /// let re = Regex::new(r"[ \t]+").unwrap();
1378 /// let target = "a b \t c\td e";
1379 /// let fields: Vec<&str> = re.split(target).map(|x| x.unwrap()).collect();
1380 /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
1381 /// ```
1382 pub fn split<'r, 'h>(&'r self, target: &'h str) -> Split<'r, 'h> {
1383 Split {
1384 matches: self.find_iter(target),
1385 next_start: 0,
1386 target,
1387 }
1388 }
1389
1390 /// Splits the string by matches of the regex at most `limit` times.
1391 ///
1392 /// Returns an iterator over the substrings of the target string
1393 /// that *aren't* matched by the regex.
1394 ///
1395 /// The `N`th substring is the remaining part of the target.
1396 ///
1397 /// # Example
1398 ///
1399 /// To split a string delimited by arbitrary amounts of spaces or tabs
1400 /// 3 times:
1401 ///
1402 /// ```rust
1403 /// # use fancy_regex::Regex;
1404 /// let re = Regex::new(r"[ \t]+").unwrap();
1405 /// let target = "a b \t c\td e";
1406 /// let fields: Vec<&str> = re.splitn(target, 3).map(|x| x.unwrap()).collect();
1407 /// assert_eq!(fields, vec!["a", "b", "c\td e"]);
1408 /// ```
1409 pub fn splitn<'r, 'h>(&'r self, target: &'h str, limit: usize) -> SplitN<'r, 'h> {
1410 SplitN {
1411 splits: self.split(target),
1412 limit,
1413 }
1414 }
1415}
1416
1417impl TryFrom<&str> for Regex {
1418 type Error = Error;
1419
1420 /// Attempts to parse a string into a regular expression
1421 fn try_from(s: &str) -> Result<Self> {
1422 Self::new(s)
1423 }
1424}
1425
1426impl TryFrom<String> for Regex {
1427 type Error = Error;
1428
1429 /// Attempts to parse a string into a regular expression
1430 fn try_from(s: String) -> Result<Self> {
1431 Self::new(&s)
1432 }
1433}
1434
1435impl<'t> Match<'t> {
1436 /// Returns the starting byte offset of the match in the text.
1437 #[inline]
1438 pub fn start(&self) -> usize {
1439 self.start
1440 }
1441
1442 /// Returns the ending byte offset of the match in the text.
1443 #[inline]
1444 pub fn end(&self) -> usize {
1445 self.end
1446 }
1447
1448 /// Returns the range over the starting and ending byte offsets of the match in text.
1449 #[inline]
1450 pub fn range(&self) -> Range<usize> {
1451 self.start..self.end
1452 }
1453
1454 /// Returns the matched text.
1455 #[inline]
1456 pub fn as_str(&self) -> &'t str {
1457 &self.text[self.start..self.end]
1458 }
1459
1460 /// Creates a new match from the given text and byte offsets.
1461 fn new(text: &'t str, start: usize, end: usize) -> Match<'t> {
1462 Match { text, start, end }
1463 }
1464}
1465
1466impl<'t> From<Match<'t>> for &'t str {
1467 fn from(m: Match<'t>) -> &'t str {
1468 m.as_str()
1469 }
1470}
1471
1472impl<'t> From<Match<'t>> for Range<usize> {
1473 fn from(m: Match<'t>) -> Range<usize> {
1474 m.range()
1475 }
1476}
1477
1478#[allow(clippy::len_without_is_empty)] // follow regex's API
1479impl<'t> Captures<'t> {
1480 /// Get the capture group by its index in the regex.
1481 ///
1482 /// If there is no match for that group or the index does not correspond to a group, `None` is
1483 /// returned. The index 0 returns the whole match.
1484 pub fn get(&self, i: usize) -> Option<Match<'t>> {
1485 match &self.inner {
1486 CapturesImpl::Wrap {
1487 text,
1488 locations,
1489 explicit_capture_group_0,
1490 } => locations
1491 .get_group(i + if *explicit_capture_group_0 { 1 } else { 0 })
1492 .map(|span| Match {
1493 text,
1494 start: span.start,
1495 end: span.end,
1496 }),
1497 CapturesImpl::Fancy { text, saves } => {
1498 let slot = i * 2;
1499 if slot >= saves.len() {
1500 return None;
1501 }
1502 let lo = saves[slot];
1503 if lo == usize::MAX {
1504 return None;
1505 }
1506 let hi = saves[slot + 1];
1507 Some(Match {
1508 text,
1509 start: lo,
1510 end: hi,
1511 })
1512 }
1513 }
1514 }
1515
1516 /// Returns the match for a named capture group. Returns `None` the capture
1517 /// group did not match or if there is no group with the given name.
1518 pub fn name(&self, name: &str) -> Option<Match<'t>> {
1519 self.named_groups.get(name).and_then(|i| self.get(*i))
1520 }
1521
1522 /// Expands all instances of `$group` in `replacement` to the corresponding
1523 /// capture group `name`, and writes them to the `dst` buffer given.
1524 ///
1525 /// `group` may be an integer corresponding to the index of the
1526 /// capture group (counted by order of opening parenthesis where `\0` is the
1527 /// entire match) or it can be a name (consisting of letters, digits or
1528 /// underscores) corresponding to a named capture group.
1529 ///
1530 /// If `group` isn't a valid capture group (whether the name doesn't exist
1531 /// or isn't a valid index), then it is replaced with the empty string.
1532 ///
1533 /// The longest possible name is used. e.g., `$1a` looks up the capture
1534 /// group named `1a` and not the capture group at index `1`. To exert more
1535 /// precise control over the name, use braces, e.g., `${1}a`.
1536 ///
1537 /// To write a literal `$`, use `$$`.
1538 ///
1539 /// For more control over expansion, see [`Expander`].
1540 ///
1541 /// [`Expander`]: expand/struct.Expander.html
1542 pub fn expand(&self, replacement: &str, dst: &mut String) {
1543 Expander::default().append_expansion(dst, replacement, self);
1544 }
1545
1546 /// Iterate over the captured groups in order in which they appeared in the regex. The first
1547 /// capture corresponds to the whole match.
1548 pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
1549 SubCaptureMatches { caps: self, i: 0 }
1550 }
1551
1552 /// How many groups were captured. This is always at least 1 because group 0 returns the whole
1553 /// match.
1554 pub fn len(&self) -> usize {
1555 match &self.inner {
1556 CapturesImpl::Wrap {
1557 locations,
1558 explicit_capture_group_0,
1559 ..
1560 } => locations.group_len() - if *explicit_capture_group_0 { 1 } else { 0 },
1561 CapturesImpl::Fancy { saves, .. } => saves.len() / 2,
1562 }
1563 }
1564}
1565
1566/// Get a group by index.
1567///
1568/// `'t` is the lifetime of the matched text.
1569///
1570/// The text can't outlive the `Captures` object if this method is
1571/// used, because of how `Index` is defined (normally `a[i]` is part
1572/// of `a` and can't outlive it); to do that, use `get()` instead.
1573///
1574/// # Panics
1575///
1576/// If there is no group at the given index.
1577impl<'t> Index<usize> for Captures<'t> {
1578 type Output = str;
1579
1580 fn index(&self, i: usize) -> &str {
1581 self.get(i)
1582 .map(|m| m.as_str())
1583 .unwrap_or_else(|| panic!("no group at index '{}'", i))
1584 }
1585}
1586
1587/// Get a group by name.
1588///
1589/// `'t` is the lifetime of the matched text and `'i` is the lifetime
1590/// of the group name (the index).
1591///
1592/// The text can't outlive the `Captures` object if this method is
1593/// used, because of how `Index` is defined (normally `a[i]` is part
1594/// of `a` and can't outlive it); to do that, use `name` instead.
1595///
1596/// # Panics
1597///
1598/// If there is no group named by the given value.
1599impl<'t, 'i> Index<&'i str> for Captures<'t> {
1600 type Output = str;
1601
1602 fn index<'a>(&'a self, name: &'i str) -> &'a str {
1603 self.name(name)
1604 .map(|m| m.as_str())
1605 .unwrap_or_else(|| panic!("no group named '{}'", name))
1606 }
1607}
1608
1609impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
1610 type Item = Option<Match<'t>>;
1611
1612 fn next(&mut self) -> Option<Option<Match<'t>>> {
1613 if self.i < self.caps.len() {
1614 let result = self.caps.get(self.i);
1615 self.i += 1;
1616 Some(result)
1617 } else {
1618 None
1619 }
1620 }
1621}
1622
1623// TODO: might be nice to implement ExactSizeIterator etc for SubCaptures
1624
1625/// Regular expression AST. This is public for now but may change.
1626#[derive(Debug, PartialEq, Eq, Clone)]
1627pub enum Expr {
1628 /// An empty expression, e.g. the last branch in `(a|b|)`
1629 Empty,
1630 /// Any character, regex `.`
1631 Any {
1632 /// Whether it also matches newlines or not
1633 newline: bool,
1634 /// Whether CRLF mode is enabled (`\r` also counts as a newline, so dot
1635 /// excludes both `\r` and `\n`)
1636 crlf: bool,
1637 },
1638 /// An assertion
1639 Assertion(Assertion),
1640 /// General newline sequence, `\R`
1641 /// Matches `\r\n` or any single newline character (\n, \v, \f, \r)
1642 /// In Unicode mode, also matches U+0085, U+2028, U+2029
1643 GeneralNewline {
1644 /// Whether Unicode mode is enabled
1645 unicode: bool,
1646 },
1647 /// The string as a literal, e.g. `a`
1648 Literal {
1649 /// The string to match
1650 val: String,
1651 /// Whether match is case-insensitive or not
1652 casei: bool,
1653 },
1654 /// Concatenation of multiple expressions, must match in order, e.g. `a.` is a concatenation of
1655 /// the literal `a` and `.` for any character
1656 Concat(Vec<Expr>),
1657 /// Alternative of multiple expressions, one of them must match, e.g. `a|b` is an alternative
1658 /// where either the literal `a` or `b` must match
1659 Alt(Vec<Expr>),
1660 /// Capturing group of expression, e.g. `(a.)` matches `a` and any character and "captures"
1661 /// (remembers) the match
1662 Group(Arc<Expr>),
1663 /// Look-around (e.g. positive/negative look-ahead or look-behind) with an expression, e.g.
1664 /// `(?=a)` means the next character must be `a` (but the match is not consumed)
1665 LookAround(Box<Expr>, LookAround),
1666 /// Repeat of an expression, e.g. `a*` or `a+` or `a{1,3}`
1667 Repeat {
1668 /// The expression that is being repeated
1669 child: Box<Expr>,
1670 /// The minimum number of repetitions
1671 lo: usize,
1672 /// The maximum number of repetitions (or `usize::MAX`)
1673 hi: usize,
1674 /// Greedy means as much as possible is matched, e.g. `.*b` would match all of `abab`.
1675 /// Non-greedy means as little as possible, e.g. `.*?b` would match only `ab` in `abab`.
1676 greedy: bool,
1677 },
1678 /// Delegate a regex to the regex crate. This is used as a simplification so that we don't have
1679 /// to represent all the expressions in the AST, e.g. character classes.
1680 ///
1681 /// **Constraint**: All Delegate expressions must match exactly 1 character. This ensures
1682 /// consistent analysis and compilation behavior. For zero-width or multi-character patterns,
1683 /// use the appropriate Expr variants instead (e.g., Assertion, Repeat, Concat).
1684 Delegate {
1685 /// The regex
1686 inner: String,
1687 /// Whether the matching is case-insensitive or not
1688 casei: bool,
1689 },
1690 /// Back reference to a capture group, e.g. `\1` in `(abc|def)\1` references the captured group
1691 /// and the whole regex matches either `abcabc` or `defdef`.
1692 Backref {
1693 /// The capture group number being referenced
1694 group: usize,
1695 /// Whether the matching is case-insensitive or not
1696 casei: bool,
1697 },
1698 /// Back reference to a capture group at the given specified relative recursion level.
1699 BackrefWithRelativeRecursionLevel {
1700 /// The capture group number being referenced
1701 group: usize,
1702 /// Relative recursion level
1703 relative_level: isize,
1704 /// Whether the matching is case-insensitive or not
1705 casei: bool,
1706 },
1707 /// Atomic non-capturing group, e.g. `(?>ab|a)` in text that contains `ab` will match `ab` and
1708 /// never backtrack and try `a`, even if matching fails after the atomic group.
1709 AtomicGroup(Box<Expr>),
1710 /// Keep matched text so far out of overall match
1711 KeepOut,
1712 /// Anchor to match at the position where the previous match ended
1713 ContinueFromPreviousMatchEnd,
1714 /// Conditional expression based on whether the numbered capture group matched or not.
1715 /// The optional `relative_recursion_level` qualifies which recursion level's capture is
1716 /// tested (Oniguruma `(?(name+N)...)` syntax).
1717 BackrefExistsCondition {
1718 /// The resolved capture group number
1719 group: usize,
1720 /// Optional relative recursion level (e.g. `+0`, `-1`)
1721 relative_recursion_level: Option<isize>,
1722 },
1723 /// If/Then/Else Condition. If there is no Then/Else, these will just be empty expressions.
1724 Conditional {
1725 /// The conditional expression to evaluate
1726 condition: Box<Expr>,
1727 /// What to execute if the condition is true
1728 true_branch: Box<Expr>,
1729 /// What to execute if the condition is false
1730 false_branch: Box<Expr>,
1731 },
1732 /// Subroutine call to the specified group number
1733 SubroutineCall(usize),
1734 /// Backtracking control verb
1735 BacktrackingControlVerb(BacktrackingControlVerb),
1736 /// Match while the given expression is absent from the haystack
1737 Absent(Absent),
1738 /// DEFINE group - defines capture groups for subroutines without matching anything
1739 /// The expressions inside are parsed and assigned group numbers, but no VM instructions
1740 /// are generated for the DEFINE block itself.
1741 DefineGroup {
1742 /// The expressions/groups being defined
1743 definitions: Box<Expr>,
1744 },
1745 /// Abstract Syntax Tree node - will be resolved into an Expr before analysis.
1746 /// Contains the position in the pattern where the node was parsed from
1747 AstNode(AstNode, usize),
1748}
1749
1750/// Target of a backreference or subroutine call
1751#[derive(Debug, PartialEq, Eq, Clone)]
1752pub enum CaptureGroupTarget {
1753 /// Direct numbered reference
1754 ByNumber(usize),
1755
1756 /// Named reference
1757 ByName(String),
1758
1759 /// Relative reference (e.g., -1, -2, etc.)
1760 Relative(isize),
1761}
1762
1763/// Abstract Syntax Tree node - will be resolved into an Expr before analysis
1764#[derive(Debug, PartialEq, Eq, Clone)]
1765pub enum AstNode {
1766 /// Group with optional name - name is only present if explicitly specified in pattern
1767 AstGroup {
1768 /// Optional name of the capture group, present only when explicitly named in the pattern
1769 name: Option<String>,
1770 /// The inner expression of the group
1771 inner: Box<Expr>,
1772 },
1773 /// Backreference
1774 Backref {
1775 /// The target capture group being referenced
1776 target: CaptureGroupTarget,
1777 /// Whether the matching is case-insensitive or not
1778 // TODO: move out of Backref and prefer a Flags AstNode. The resolver can then track the flags and set casei on the resolved Expr accordingly
1779 casei: bool,
1780 /// Optional relative recursion level for the backreference
1781 relative_recursion_level: Option<isize>,
1782 },
1783 /// Subroutine Call
1784 SubroutineCall(CaptureGroupTarget),
1785 /// Backreference exists condition `(?(name)...)` or `(?(1)...)` - unresolved target.
1786 /// The optional `relative_recursion_level` corresponds to the Oniguruma `+N`/`-N` suffix
1787 /// (e.g. `(?(name+0)...)`) which qualifies which recursion level's capture is tested.
1788 BackrefExistsCondition {
1789 /// The target capture group being tested for existence
1790 target: CaptureGroupTarget,
1791 /// Optional relative recursion level qualifier (e.g. `+0`, `-1`)
1792 relative_recursion_level: Option<isize>,
1793 },
1794}
1795
1796/// Type of look-around assertion as used for a look-around expression.
1797#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1798pub enum LookAround {
1799 /// Look-ahead assertion, e.g. `(?=a)`
1800 LookAhead,
1801 /// Negative look-ahead assertion, e.g. `(?!a)`
1802 LookAheadNeg,
1803 /// Look-behind assertion, e.g. `(?<=a)`
1804 LookBehind,
1805 /// Negative look-behind assertion, e.g. `(?<!a)`
1806 LookBehindNeg,
1807}
1808
1809/// Type of absent operator as used for Oniguruma's absent functionality.
1810#[derive(Debug, PartialEq, Eq, Clone)]
1811pub enum Absent {
1812 /// Absent repeater `(?~absent)` - works like `\O*` (match any character including newline, repeated)
1813 /// but is limited by the range that does not include the string match with `absent`.
1814 /// This is a written abbreviation of `(?~|absent|\O*)`.
1815 Repeater(Box<Expr>),
1816 /// Absent expression `(?~|absent|exp)` - works like `exp`, but is limited by the range
1817 /// that does not include the string match with `absent`.
1818 Expression {
1819 /// The expression to avoid matching
1820 absent: Box<Expr>,
1821 /// The expression to match
1822 exp: Box<Expr>,
1823 },
1824 /// Absent stopper `(?~|absent)` - after this operator, haystack range is limited
1825 /// up to the point where `absent` matches.
1826 Stopper(Box<Expr>),
1827 /// Range clear `(?~|)` - clears the effects caused by absent stoppers.
1828 Clear,
1829}
1830
1831/// Type of backtracking control verb which affects how backtracking will behave.
1832/// See <https://www.regular-expressions.info/verb.html>
1833#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1834pub enum BacktrackingControlVerb {
1835 /// Fail this branch immediately
1836 Fail,
1837 /// Treat match so far as successful overall match
1838 Accept,
1839 /// Abort the entire match on failure
1840 Commit,
1841 /// Restart the entire match attempt at the current position
1842 Skip,
1843 /// Prune all backtracking states and restart the entire match attempt at the next position
1844 Prune,
1845}
1846
1847/// An iterator over capture names in a [Regex]. The iterator
1848/// returns the name of each group, or [None] if the group has
1849/// no name. Because capture group 0 cannot have a name, the
1850/// first item returned is always [None].
1851pub struct CaptureNames<'r>(vec::IntoIter<Option<&'r str>>);
1852
1853impl Debug for CaptureNames<'_> {
1854 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1855 f.write_str("<CaptureNames>")
1856 }
1857}
1858
1859impl<'r> Iterator for CaptureNames<'r> {
1860 type Item = Option<&'r str>;
1861
1862 fn next(&mut self) -> Option<Self::Item> {
1863 self.0.next()
1864 }
1865}
1866
1867// silly to write my own, but this is super-fast for the common 1-digit
1868// case.
1869fn push_usize(s: &mut String, x: usize) {
1870 if x >= 10 {
1871 push_usize(s, x / 10);
1872 s.push((b'0' + (x % 10) as u8) as char);
1873 } else {
1874 s.push((b'0' + (x as u8)) as char);
1875 }
1876}
1877
1878fn is_special(c: char) -> bool {
1879 matches!(
1880 c,
1881 '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' | '#'
1882 )
1883}
1884
1885fn push_quoted(buf: &mut String, s: &str) {
1886 for c in s.chars() {
1887 if is_special(c) {
1888 buf.push('\\');
1889 }
1890 buf.push(c);
1891 }
1892}
1893
1894/// Escapes special characters in `text` with '\\'. Returns a string which, when interpreted
1895/// as a regex, matches exactly `text`.
1896pub fn escape(text: &str) -> Cow<'_, str> {
1897 // Using bytes() is OK because all special characters are single bytes.
1898 match text.bytes().filter(|&b| is_special(b as char)).count() {
1899 0 => Cow::Borrowed(text),
1900 n => {
1901 // The capacity calculation is exact because '\\' is a single byte.
1902 let mut buf = String::with_capacity(text.len() + n);
1903 push_quoted(&mut buf, text);
1904 Cow::Owned(buf)
1905 }
1906 }
1907}
1908
1909/// Type of assertions
1910#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1911pub enum Assertion {
1912 /// Start of input text
1913 StartText,
1914 /// End of input text
1915 EndText,
1916 /// End of input text, or before any trailing newlines at the end (Oniguruma's `\Z`)
1917 EndTextIgnoreTrailingNewlines {
1918 /// Whether CRLF mode is enabled.
1919 /// If `true`, trailing `\r\n` pairs (in addition to bare `\n`) are also ignored.
1920 crlf: bool,
1921 },
1922 /// Start of a line
1923 StartLine {
1924 /// CRLF mode.
1925 /// If true, this assertion matches at the starting position of the input text, or at the position immediately
1926 /// following either a `\r` or `\n` character, but never after a `\r` when a `\n` follows.
1927 crlf: bool,
1928 },
1929 /// End of a line
1930 EndLine {
1931 /// CRLF mode
1932 /// If true, this assertion matches at the ending position of the input text, or at the position immediately
1933 /// preceding either a `\r` or `\n` character, but never after a `\r` when a `\n` follows.
1934 crlf: bool,
1935 },
1936 /// Left word boundary
1937 LeftWordBoundary,
1938 /// Left word half boundary
1939 LeftWordHalfBoundary,
1940 /// Right word boundary
1941 RightWordBoundary,
1942 /// Right word half boundary
1943 RightWordHalfBoundary,
1944 /// Both word boundaries
1945 WordBoundary,
1946 /// Not word boundary
1947 NotWordBoundary,
1948}
1949
1950impl Assertion {
1951 pub(crate) fn is_hard(&self) -> bool {
1952 use Assertion::*;
1953 matches!(
1954 self,
1955 // these will make regex-automata use PikeVM
1956 LeftWordBoundary
1957 | LeftWordHalfBoundary
1958 | RightWordBoundary
1959 | RightWordHalfBoundary
1960 | WordBoundary
1961 | NotWordBoundary
1962 | EndTextIgnoreTrailingNewlines { .. }
1963 )
1964 }
1965}
1966
1967/// An iterator over the immediate children of an [`Expr`].
1968///
1969/// This iterator yields references to child expressions but does not recurse into them.
1970#[derive(Debug)]
1971pub enum ExprChildrenIter<'a> {
1972 /// No children (leaf node)
1973 Empty,
1974 /// A single child (Group, LookAround, AtomicGroup, Repeat)
1975 Single(Option<&'a Expr>),
1976 /// Multiple children in a Vec (Concat, Alt)
1977 Vec(alloc::slice::Iter<'a, Expr>),
1978 /// Three children (Conditional)
1979 Triple {
1980 /// First child
1981 first: Option<&'a Expr>,
1982 /// Second child
1983 second: Option<&'a Expr>,
1984 /// Third child
1985 third: Option<&'a Expr>,
1986 },
1987}
1988
1989/// An iterator over the immediate children of an [`Expr`] for mutable access.
1990///
1991/// This iterator yields mutable references to child expressions but does not recurse into them.
1992#[derive(Debug)]
1993pub enum ExprChildrenIterMut<'a> {
1994 /// No children (leaf node)
1995 Empty,
1996 /// A single child (Group, LookAround, AtomicGroup, Repeat)
1997 Single(Option<&'a mut Expr>),
1998 /// Multiple children in a Vec (Concat, Alt)
1999 Vec(alloc::slice::IterMut<'a, Expr>),
2000 /// Three children (Conditional)
2001 Triple {
2002 /// First child
2003 first: Option<&'a mut Expr>,
2004 /// Second child
2005 second: Option<&'a mut Expr>,
2006 /// Third child
2007 third: Option<&'a mut Expr>,
2008 },
2009}
2010
2011impl<'a> Iterator for ExprChildrenIter<'a> {
2012 type Item = &'a Expr;
2013
2014 fn next(&mut self) -> Option<Self::Item> {
2015 match self {
2016 ExprChildrenIter::Empty => None,
2017 ExprChildrenIter::Single(ref mut child) => child.take(),
2018 ExprChildrenIter::Vec(ref mut iter) => iter.next(),
2019 ExprChildrenIter::Triple {
2020 ref mut first,
2021 ref mut second,
2022 ref mut third,
2023 } => first
2024 .take()
2025 .or_else(|| second.take())
2026 .or_else(|| third.take()),
2027 }
2028 }
2029}
2030
2031impl<'a> Iterator for ExprChildrenIterMut<'a> {
2032 type Item = &'a mut Expr;
2033
2034 fn next(&mut self) -> Option<Self::Item> {
2035 match self {
2036 ExprChildrenIterMut::Empty => None,
2037 ExprChildrenIterMut::Single(ref mut child) => child.take(),
2038 ExprChildrenIterMut::Vec(ref mut iter) => iter.next(),
2039 ExprChildrenIterMut::Triple {
2040 ref mut first,
2041 ref mut second,
2042 ref mut third,
2043 } => first
2044 .take()
2045 .or_else(|| second.take())
2046 .or_else(|| third.take()),
2047 }
2048 }
2049}
2050
2051macro_rules! children_iter_match {
2052 ($self:expr, $iter:ident, $vec_method:ident, $single_method:ident, $group_method:ident) => {
2053 match $self {
2054 Expr::Concat(children) | Expr::Alt(children) => $iter::Vec(children.$vec_method()),
2055 Expr::Group(child) => $iter::Single(Some(Arc::$group_method(child))),
2056 Expr::Absent(Absent::Repeater(child))
2057 | Expr::Absent(Absent::Stopper(child))
2058 | Expr::LookAround(child, _)
2059 | Expr::AtomicGroup(child)
2060 | Expr::Repeat { child, .. } => $iter::Single(Some(child.$single_method())),
2061 Expr::Conditional {
2062 condition,
2063 true_branch,
2064 false_branch,
2065 } => $iter::Triple {
2066 first: Some(condition.$single_method()),
2067 second: Some(true_branch.$single_method()),
2068 third: Some(false_branch.$single_method()),
2069 },
2070 Expr::Absent(Absent::Expression { absent, exp }) => $iter::Triple {
2071 first: Some(absent.$single_method()),
2072 second: Some(exp.$single_method()),
2073 third: None,
2074 },
2075 Expr::DefineGroup { definitions } => $iter::Single(Some(definitions.$single_method())),
2076 _ if $self.is_leaf_node() => $iter::Empty,
2077 _ => unimplemented!(),
2078 }
2079 };
2080}
2081impl Expr {
2082 /// Parse the regex and return an expression (AST) and a bit set with the indexes of groups
2083 /// that are referenced by backrefs.
2084 pub fn parse_tree(re: &str) -> Result<ExprTree> {
2085 Parser::parse(re)
2086 }
2087
2088 /// Parse the regex and return an expression (AST)
2089 /// Flags should be bit based based on flags
2090 pub fn parse_tree_with_flags(re: &str, flags: u32) -> Result<ExprTree> {
2091 Parser::parse_with_flags(re, flags)
2092 }
2093
2094 /// Returns `true` if this expression is a leaf node (has no children).
2095 ///
2096 /// Leaf nodes include literals, assertions, backreferences, and other atomic expressions.
2097 /// Non-leaf nodes include groups, concatenations, alternations, and repetitions.
2098 pub fn is_leaf_node(&self) -> bool {
2099 matches!(
2100 self,
2101 Expr::Empty
2102 | Expr::Any { .. }
2103 | Expr::Assertion(_)
2104 | Expr::GeneralNewline { .. }
2105 | Expr::Literal { .. }
2106 | Expr::Delegate { .. }
2107 | Expr::Backref { .. }
2108 | Expr::BackrefWithRelativeRecursionLevel { .. }
2109 | Expr::KeepOut
2110 | Expr::ContinueFromPreviousMatchEnd
2111 | Expr::BackrefExistsCondition { .. }
2112 | Expr::BacktrackingControlVerb(_)
2113 | Expr::SubroutineCall(_)
2114 | Expr::Absent(Absent::Clear)
2115 // An unresolved AstNode has no separate child Expr to iterate; the resolver
2116 // should have replaced it before analysis, so treat it as a leaf so that
2117 // collection/iteration doesn't panic, and let the analyzer emit the error.
2118 | Expr::AstNode(..),
2119 )
2120 }
2121
2122 /// Returns `true` if any descendant of this expression (not including itself)
2123 /// satisfies the given predicate.
2124 ///
2125 /// This performs an iterative depth-first search using [`children_iter`](Self::children_iter).
2126 pub fn has_descendant(&self, predicate: impl Fn(&Expr) -> bool) -> bool {
2127 let mut stack: Vec<&Expr> = self.children_iter().collect();
2128 while let Some(expr) = stack.pop() {
2129 if predicate(expr) {
2130 return true;
2131 }
2132 stack.extend(expr.children_iter());
2133 }
2134 false
2135 }
2136
2137 /// Returns an iterator over the immediate children of this expression.
2138 ///
2139 /// For leaf nodes, this returns an empty iterator. For non-leaf nodes, it returns
2140 /// references to their immediate children (non-recursive).
2141 pub fn children_iter(&self) -> ExprChildrenIter<'_> {
2142 children_iter_match!(self, ExprChildrenIter, iter, as_ref, as_ref)
2143 }
2144
2145 /// Returns an iterator over the immediate children of this expression for mutable access.
2146 ///
2147 /// For leaf nodes, this returns an empty iterator. For non-leaf nodes, it returns
2148 /// mutable references to their immediate children (non-recursive).
2149 pub fn children_iter_mut(&mut self) -> ExprChildrenIterMut<'_> {
2150 children_iter_match!(self, ExprChildrenIterMut, iter_mut, as_mut, make_mut)
2151 }
2152
2153 /// Convert expression to a regex string in the regex crate's syntax.
2154 ///
2155 /// # Panics
2156 ///
2157 /// Panics for expressions that are hard, i.e. can not be handled by the regex crate.
2158 pub fn to_str(&self, buf: &mut String, precedence: u8) {
2159 match *self {
2160 Expr::Empty => (),
2161 Expr::Any { newline, crlf } => buf.push_str(match (newline, crlf) {
2162 (true, _) => "(?s:.)",
2163 (false, true) => "(?R-s:.)",
2164 (false, false) => ".",
2165 }),
2166 Expr::Literal { ref val, casei } => {
2167 if casei {
2168 buf.push_str("(?i:");
2169 }
2170 push_quoted(buf, val);
2171 if casei {
2172 buf.push(')');
2173 }
2174 }
2175 Expr::Assertion(Assertion::StartText) => buf.push('^'),
2176 Expr::Assertion(Assertion::EndText) => buf.push('$'),
2177 Expr::Assertion(Assertion::StartLine { crlf: false }) => buf.push_str("(?m:^)"),
2178 Expr::Assertion(Assertion::EndLine { crlf: false }) => buf.push_str("(?m:$)"),
2179 Expr::Assertion(Assertion::StartLine { crlf: true }) => buf.push_str("(?Rm:^)"),
2180 Expr::Assertion(Assertion::EndLine { crlf: true }) => buf.push_str("(?Rm:$)"),
2181 Expr::Concat(ref children) => {
2182 if precedence > 1 {
2183 buf.push_str("(?:");
2184 }
2185 for child in children {
2186 child.to_str(buf, 2);
2187 }
2188 if precedence > 1 {
2189 buf.push(')')
2190 }
2191 }
2192 Expr::Alt(_) => {
2193 if precedence > 0 {
2194 buf.push_str("(?:");
2195 }
2196 let mut children = self.children_iter();
2197 if let Some(first) = children.next() {
2198 first.to_str(buf, 1);
2199 for child in children {
2200 buf.push('|');
2201 child.to_str(buf, 1);
2202 }
2203 }
2204 if precedence > 0 {
2205 buf.push(')');
2206 }
2207 }
2208 Expr::Group(ref child) => {
2209 buf.push('(');
2210 child.to_str(buf, 0);
2211 buf.push(')');
2212 }
2213 Expr::Repeat {
2214 ref child,
2215 lo,
2216 hi,
2217 greedy,
2218 } => {
2219 if precedence > 2 {
2220 buf.push_str("(?:");
2221 }
2222 child.to_str(buf, 3);
2223 match (lo, hi) {
2224 (0, 1) => buf.push('?'),
2225 (0, usize::MAX) => buf.push('*'),
2226 (1, usize::MAX) => buf.push('+'),
2227 (lo, hi) => {
2228 buf.push('{');
2229 push_usize(buf, lo);
2230 if lo != hi {
2231 buf.push(',');
2232 if hi != usize::MAX {
2233 push_usize(buf, hi);
2234 }
2235 }
2236 buf.push('}');
2237 }
2238 }
2239 if !greedy {
2240 buf.push('?');
2241 }
2242 if precedence > 2 {
2243 buf.push(')');
2244 }
2245 }
2246 Expr::Delegate {
2247 ref inner, casei, ..
2248 } => {
2249 // at the moment, delegate nodes are just atoms
2250 if casei {
2251 buf.push_str("(?i:");
2252 }
2253 buf.push_str(inner);
2254 if casei {
2255 buf.push(')');
2256 }
2257 }
2258 Expr::DefineGroup { .. } => {
2259 // DEFINE groups match nothing - output empty string for delegation
2260 }
2261 _ => panic!("attempting to format hard expr {:?}", self),
2262 }
2263 }
2264}
2265
2266// precondition: ix > 0
2267fn prev_codepoint_ix(s: &str, mut ix: usize) -> usize {
2268 let bytes = s.as_bytes();
2269 loop {
2270 ix -= 1;
2271 // fancy bit magic for ranges 0..0x80 + 0xc0..
2272 if (bytes[ix] as i8) >= -0x40 {
2273 break;
2274 }
2275 }
2276 ix
2277}
2278
2279fn codepoint_len(b: u8) -> usize {
2280 match b {
2281 b if b < 0x80 => 1,
2282 b if b < 0xe0 => 2,
2283 b if b < 0xf0 => 3,
2284 _ => 4,
2285 }
2286}
2287
2288/// Returns the smallest possible index of the next valid UTF-8 sequence
2289/// starting after `i`.
2290/// Adapted from a function with the same name in the `regex` crate.
2291pub(crate) fn next_utf8(text: &str, i: usize) -> usize {
2292 let b = match text.as_bytes().get(i) {
2293 None => return i + 1,
2294 Some(&b) => b,
2295 };
2296 i + codepoint_len(b)
2297}
2298
2299// If this returns false, then there is no possible backref in the re
2300
2301// Both potential implementations are turned off, because we currently
2302// always need to do a deeper analysis because of 1-character
2303// look-behind. If we could call a find_from_pos method of regex::Regex,
2304// it would make sense to bring this back.
2305/*
2306pub fn detect_possible_backref(re: &str) -> bool {
2307 let mut last = b'\x00';
2308 for b in re.as_bytes() {
2309 if b'0' <= *b && *b <= b'9' && last == b'\\' { return true; }
2310 last = *b;
2311 }
2312 false
2313}
2314
2315pub fn detect_possible_backref(re: &str) -> bool {
2316 let mut bytes = re.as_bytes();
2317 loop {
2318 match memchr::memchr(b'\\', &bytes[..bytes.len() - 1]) {
2319 Some(i) => {
2320 bytes = &bytes[i + 1..];
2321 let c = bytes[0];
2322 if b'0' <= c && c <= b'9' { return true; }
2323 }
2324 None => return false
2325 }
2326 }
2327}
2328*/
2329
2330/// The internal module only exists so that the toy example can access internals for debugging and
2331/// experimenting.
2332#[doc(hidden)]
2333pub mod internal {
2334 pub use crate::analyze::{analyze, can_compile_as_anchored, AnalyzeContext, Info};
2335 pub use crate::compile::{compile, CompileOptions};
2336 pub use crate::optimize::optimize;
2337 pub use crate::parse_flags::{
2338 FLAG_CASEI, FLAG_CRLF, FLAG_DOTNL, FLAG_IGNORE_NUMBERED_GROUPS_WHEN_NAMED_GROUPS_EXIST,
2339 FLAG_IGNORE_SPACE, FLAG_MULTI, FLAG_ONIGURUMA_MODE, FLAG_UNICODE,
2340 };
2341 pub use crate::vm::{run_default, run_trace, Insn, Prog};
2342}
2343
2344#[cfg(test)]
2345mod tests {
2346 use alloc::borrow::Cow;
2347 use alloc::boxed::Box;
2348 use alloc::string::{String, ToString};
2349 use alloc::sync::Arc;
2350 use alloc::vec::Vec;
2351 use alloc::{format, vec};
2352
2353 use crate::parse::{make_group, make_literal};
2354 use crate::{Absent, Expr, Regex, RegexImpl};
2355
2356 //use detect_possible_backref;
2357
2358 // tests for to_str
2359
2360 fn to_str(e: Expr) -> String {
2361 let mut s = String::new();
2362 e.to_str(&mut s, 0);
2363 s
2364 }
2365
2366 #[test]
2367 fn to_str_concat_alt() {
2368 let e = Expr::Concat(vec![
2369 Expr::Alt(vec![make_literal("a"), make_literal("b")]),
2370 make_literal("c"),
2371 ]);
2372 assert_eq!(to_str(e), "(?:a|b)c");
2373 }
2374
2375 #[test]
2376 fn to_str_rep_concat() {
2377 let e = Expr::Repeat {
2378 child: Box::new(Expr::Concat(vec![make_literal("a"), make_literal("b")])),
2379 lo: 2,
2380 hi: 3,
2381 greedy: true,
2382 };
2383 assert_eq!(to_str(e), "(?:ab){2,3}");
2384 }
2385
2386 #[test]
2387 fn to_str_group_alt() {
2388 let e = Expr::Group(Arc::new(Expr::Alt(vec![
2389 make_literal("a"),
2390 make_literal("b"),
2391 ])));
2392 assert_eq!(to_str(e), "(a|b)");
2393 }
2394
2395 #[test]
2396 fn as_str_debug() {
2397 let s = r"(a+)b\1";
2398 let regex = Regex::new(s).unwrap();
2399 assert_eq!(s, regex.as_str());
2400 assert_eq!(s, format!("{:?}", regex));
2401 }
2402
2403 #[test]
2404 fn display() {
2405 let s = r"(a+)b\1";
2406 let regex = Regex::new(s).unwrap();
2407 assert_eq!(s, format!("{}", regex));
2408 }
2409
2410 #[test]
2411 fn from_str() {
2412 let s = r"(a+)b\1";
2413 let regex = s.parse::<Regex>().unwrap();
2414 assert_eq!(regex.as_str(), s);
2415 }
2416
2417 #[test]
2418 fn to_str_repeat() {
2419 fn repeat(lo: usize, hi: usize, greedy: bool) -> Expr {
2420 Expr::Repeat {
2421 child: Box::new(make_literal("a")),
2422 lo,
2423 hi,
2424 greedy,
2425 }
2426 }
2427
2428 assert_eq!(to_str(repeat(2, 2, true)), "a{2}");
2429 assert_eq!(to_str(repeat(2, 2, false)), "a{2}?");
2430 assert_eq!(to_str(repeat(2, 3, true)), "a{2,3}");
2431 assert_eq!(to_str(repeat(2, 3, false)), "a{2,3}?");
2432 assert_eq!(to_str(repeat(2, usize::MAX, true)), "a{2,}");
2433 assert_eq!(to_str(repeat(2, usize::MAX, false)), "a{2,}?");
2434 assert_eq!(to_str(repeat(0, 1, true)), "a?");
2435 assert_eq!(to_str(repeat(0, 1, false)), "a??");
2436 assert_eq!(to_str(repeat(0, usize::MAX, true)), "a*");
2437 assert_eq!(to_str(repeat(0, usize::MAX, false)), "a*?");
2438 assert_eq!(to_str(repeat(1, usize::MAX, true)), "a+");
2439 assert_eq!(to_str(repeat(1, usize::MAX, false)), "a+?");
2440 }
2441
2442 #[test]
2443 fn escape() {
2444 // Check that strings that need no quoting are borrowed, and that non-special punctuation
2445 // is not quoted.
2446 match crate::escape("@foo") {
2447 Cow::Borrowed(s) => assert_eq!(s, "@foo"),
2448 _ => panic!("Value should be borrowed."),
2449 }
2450
2451 // Check typical usage.
2452 assert_eq!(crate::escape("fo*o").into_owned(), "fo\\*o");
2453
2454 // Check that multibyte characters are handled correctly.
2455 assert_eq!(crate::escape("fø*ø").into_owned(), "fø\\*ø");
2456 }
2457
2458 #[test]
2459 fn trailing_positive_lookahead_wrap_capture_group_fixup() {
2460 let s = r"a+(?=c)";
2461 let regex = s.parse::<Regex>().unwrap();
2462 assert!(matches!(regex.inner,
2463 RegexImpl::Wrap { explicit_capture_group_0: true, .. }),
2464 "trailing positive lookahead for an otherwise easy pattern should avoid going through the VM");
2465 assert_eq!(s, regex.as_str());
2466 assert_eq!(s, format!("{:?}", regex));
2467 }
2468
2469 #[test]
2470 fn easy_regex() {
2471 let s = r"(a+)b";
2472 let regex = s.parse::<Regex>().unwrap();
2473 assert!(
2474 matches!(regex.inner, RegexImpl::Wrap { explicit_capture_group_0: false, .. }),
2475 "easy pattern should avoid going through the VM, and capture group 0 should be implicit"
2476 );
2477
2478 assert_eq!(s, regex.as_str());
2479 assert_eq!(s, format!("{:?}", regex));
2480 }
2481
2482 #[test]
2483 fn hard_regex() {
2484 let s = r"(a+)(?>c)";
2485 let regex = s.parse::<Regex>().unwrap();
2486 assert!(
2487 matches!(regex.inner, RegexImpl::Fancy { .. }),
2488 "hard regex should be compiled into a VM"
2489 );
2490 assert_eq!(s, regex.as_str());
2491 assert_eq!(s, format!("{:?}", regex));
2492 }
2493
2494 /*
2495 #[test]
2496 fn detect_backref() {
2497 assert_eq!(detect_possible_backref("a0a1a2"), false);
2498 assert_eq!(detect_possible_backref("a0a1\\a2"), false);
2499 assert_eq!(detect_possible_backref("a0a\\1a2"), true);
2500 assert_eq!(detect_possible_backref("a0a1a2\\"), false);
2501 }
2502 */
2503
2504 #[test]
2505 fn test_is_leaf_node_leaf_nodes() {
2506 // Test all leaf node variants
2507 assert!(Expr::Empty.is_leaf_node());
2508 assert!(Expr::Any {
2509 newline: false,
2510 crlf: false
2511 }
2512 .is_leaf_node());
2513 assert!(Expr::Any {
2514 newline: true,
2515 crlf: false
2516 }
2517 .is_leaf_node());
2518 assert!(Expr::Assertion(crate::Assertion::StartText).is_leaf_node());
2519 assert!(Expr::Literal {
2520 val: "test".to_string(),
2521 casei: false
2522 }
2523 .is_leaf_node());
2524 assert!(Expr::Delegate {
2525 inner: "[0-9]".to_string(),
2526 casei: false
2527 }
2528 .is_leaf_node());
2529 assert!(Expr::Backref {
2530 group: 1,
2531 casei: false
2532 }
2533 .is_leaf_node());
2534 assert!(Expr::BackrefWithRelativeRecursionLevel {
2535 group: 1,
2536 relative_level: -1,
2537 casei: false
2538 }
2539 .is_leaf_node());
2540 assert!(Expr::KeepOut.is_leaf_node());
2541 assert!(Expr::ContinueFromPreviousMatchEnd.is_leaf_node());
2542 assert!(Expr::BackrefExistsCondition {
2543 group: 1,
2544 relative_recursion_level: None
2545 }
2546 .is_leaf_node());
2547 assert!(Expr::BacktrackingControlVerb(crate::BacktrackingControlVerb::Fail).is_leaf_node());
2548 assert!(Expr::SubroutineCall(1).is_leaf_node());
2549
2550 assert!(Expr::Absent(Absent::Clear).is_leaf_node());
2551 }
2552
2553 #[test]
2554 fn test_is_leaf_node_non_leaf_nodes() {
2555 // Test all non-leaf node variants
2556 assert!(!Expr::Concat(vec![make_literal("a")]).is_leaf_node());
2557 assert!(!Expr::Alt(vec![make_literal("a"), make_literal("b")]).is_leaf_node());
2558 assert!(!make_group(make_literal("a")).is_leaf_node());
2559 assert!(
2560 !Expr::LookAround(Box::new(make_literal("a")), crate::LookAround::LookAhead)
2561 .is_leaf_node()
2562 );
2563 assert!(!Expr::Repeat {
2564 child: Box::new(make_literal("a")),
2565 lo: 0,
2566 hi: 1,
2567 greedy: true
2568 }
2569 .is_leaf_node());
2570 assert!(!Expr::AtomicGroup(Box::new(make_literal("a"))).is_leaf_node());
2571 assert!(!Expr::Conditional {
2572 condition: Box::new(Expr::BackrefExistsCondition {
2573 group: 1,
2574 relative_recursion_level: None
2575 }),
2576 true_branch: Box::new(make_literal("a")),
2577 false_branch: Box::new(Expr::Empty)
2578 }
2579 .is_leaf_node());
2580
2581 assert!(!Expr::Absent(Absent::Repeater(Box::new(make_literal("a")))).is_leaf_node());
2582 assert!(!Expr::Absent(Absent::Expression {
2583 absent: Box::new(make_literal("/*")),
2584 exp: Box::new(Expr::Repeat {
2585 child: Box::new(Expr::Any {
2586 newline: true,
2587 crlf: false
2588 }),
2589 lo: 0,
2590 hi: usize::MAX,
2591 greedy: true
2592 })
2593 })
2594 .is_leaf_node());
2595 assert!(!Expr::Absent(Absent::Stopper(Box::new(make_literal("/*")))).is_leaf_node());
2596 }
2597
2598 #[test]
2599 fn test_children_iter_empty() {
2600 // Leaf nodes should return empty iterator
2601 let expr = Expr::Empty;
2602 let mut iter = expr.children_iter();
2603 assert!(iter.next().is_none());
2604
2605 let expr = make_literal("test");
2606 let mut iter = expr.children_iter();
2607 assert!(iter.next().is_none());
2608 }
2609
2610 #[test]
2611 fn test_children_iter_single() {
2612 // Group, LookAround, AtomicGroup, Repeat should return single child
2613 let child = make_literal("a");
2614 let expr = make_group(child.clone());
2615 let children: Vec<_> = expr.children_iter().collect();
2616 assert_eq!(children.len(), 1);
2617
2618 let expr = Expr::Repeat {
2619 child: Box::new(child.clone()),
2620 lo: 0,
2621 hi: 1,
2622 greedy: true,
2623 };
2624 let children: Vec<_> = expr.children_iter().collect();
2625 assert_eq!(children.len(), 1);
2626 }
2627
2628 #[test]
2629 fn test_children_iter_vec() {
2630 // Concat and Alt should return all children
2631 let children_vec = vec![make_literal("a"), make_literal("b"), make_literal("c")];
2632 let expr = Expr::Concat(children_vec.clone());
2633 let children: Vec<_> = expr.children_iter().collect();
2634 assert_eq!(children.len(), 3);
2635
2636 let expr = Expr::Alt(children_vec);
2637 let children: Vec<_> = expr.children_iter().collect();
2638 assert_eq!(children.len(), 3);
2639 }
2640
2641 #[test]
2642 fn test_children_iter_triple() {
2643 // Conditional should return three children
2644 let expr = Expr::Conditional {
2645 condition: Box::new(Expr::BackrefExistsCondition {
2646 group: 1,
2647 relative_recursion_level: None,
2648 }),
2649 true_branch: Box::new(make_literal("a")),
2650 false_branch: Box::new(make_literal("b")),
2651 };
2652 let children: Vec<_> = expr.children_iter().collect();
2653 assert_eq!(children.len(), 3);
2654
2655 // Absent expression should return two children
2656 let expr = Expr::Absent(Absent::Expression {
2657 absent: Box::new(make_literal("/*")),
2658 exp: Box::new(Expr::Repeat {
2659 child: Box::new(Expr::Any {
2660 newline: true,
2661 crlf: false,
2662 }),
2663 lo: 0,
2664 hi: usize::MAX,
2665 greedy: true,
2666 }),
2667 });
2668 let children: Vec<_> = expr.children_iter().collect();
2669 assert_eq!(children.len(), 2);
2670 }
2671}