Skip to main content

fancy_regex/
lib.rs

1// Copyright 2026 The Fancy Regex Authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#![doc = include_str!("../docs/main.md")]
22#![doc = include_str!("../docs/features.md")]
23#![doc = include_str!("../docs/syntax.md")]
24#![doc = include_str!("../docs/subroutines/1_intro.md")]
25#![doc = include_str!("../docs/subroutines/2_flags.md")]
26#![doc = include_str!("../docs/subroutines/3_left_recursion.md")]
27#![doc = include_str!("../docs/subroutines/4_recursion.md")]
28#![doc = include_str!("../docs/absent.md")]
29#![deny(missing_docs)]
30#![deny(missing_debug_implementations)]
31#![cfg_attr(not(feature = "std"), no_std)]
32
33extern crate alloc;
34
35use alloc::borrow::Cow;
36use alloc::boxed::Box;
37use alloc::string::{String, ToString};
38use alloc::sync::Arc;
39use alloc::vec;
40use alloc::vec::Vec;
41
42use core::convert::TryFrom;
43use core::fmt;
44use core::fmt::{Debug, Formatter};
45use core::ops::{Index, Range};
46use core::str::FromStr;
47use regex_automata::meta::Regex as RaRegex;
48use regex_automata::util::captures::Captures as RaCaptures;
49use regex_automata::util::syntax::Config as SyntaxConfig;
50use regex_automata::Input as RaInput;
51
52mod analyze;
53mod compile;
54mod error;
55mod expand;
56mod optimize;
57mod parse;
58mod parse_flags;
59mod replacer;
60mod vm;
61
62use crate::analyze::can_compile_as_anchored;
63use crate::analyze::{analyze, AnalyzeContext};
64use crate::compile::{compile, CompileOptions};
65use crate::optimize::optimize;
66use crate::parse::{ExprTree, NamedGroups, Parser};
67use crate::parse_flags::*;
68use crate::vm::{Prog, OPTION_FIND_NOT_EMPTY, OPTION_SKIPPED_EMPTY_MATCH};
69
70pub use crate::error::{CompileError, Error, ParseError, Result, RuntimeError};
71pub use crate::expand::Expander;
72pub use crate::replacer::{NoExpand, Replacer, ReplacerRef};
73
74const MAX_RECURSION: usize = 64;
75
76// the public API
77
78/// A builder for a `Regex` to allow configuring options.
79#[derive(Debug)]
80pub struct RegexBuilder {
81    pattern: String,
82    options: RegexOptionsBuilder,
83}
84
85/// A builder for a `Regex` to allow configuring options.
86#[derive(Debug)]
87pub struct RegexOptionsBuilder {
88    options: RegexOptions,
89}
90
91/// A compiled regular expression.
92#[derive(Clone)]
93pub struct Regex {
94    inner: RegexImpl,
95    named_groups: Arc<NamedGroups>,
96}
97
98// Separate enum because we don't want to expose any of this
99#[derive(Clone)]
100enum RegexImpl {
101    // Do we want to box this? It's pretty big...
102    Wrap {
103        inner: RaRegex,
104        /// The original pattern which the regex was constructed from
105        pattern: String,
106        /// Some optimizations avoid the VM, but need to use an extra capture group to represent the match boundaries
107        explicit_capture_group_0: bool,
108        /// The actual pattern passed to regex-automata for delegation
109        delegated_pattern: String,
110    },
111    Fancy {
112        prog: Arc<Prog>,
113        n_groups: usize,
114        /// The original pattern which the regex was constructed from
115        pattern: String,
116        options: HardRegexRuntimeOptions,
117    },
118}
119
120/// A single match of a regex or group in an input text
121#[derive(Copy, Clone, Debug, Eq, PartialEq)]
122pub struct Match<'t> {
123    text: &'t str,
124    start: usize,
125    end: usize,
126}
127
128/// An iterator over all non-overlapping matches for a particular string.
129///
130/// The iterator yields a `Result<Match>`. The iterator stops when no more
131/// matches can be found.
132///
133/// `'r` is the lifetime of the compiled regular expression and `'t` is the
134/// lifetime of the matched string.
135#[derive(Debug)]
136pub struct Matches<'r, 't> {
137    re: &'r Regex,
138    text: &'t str,
139    last_end: usize,
140    last_match: Option<usize>,
141    last_skipped_empty: bool,
142}
143
144impl<'r, 't> Matches<'r, 't> {
145    /// Return the text being searched.
146    pub fn text(&self) -> &'t str {
147        self.text
148    }
149
150    /// Return the underlying regex.
151    pub fn regex(&self) -> &'r Regex {
152        self.re
153    }
154
155    /// Adapted from the `regex` crate. Calls `find_from_pos`/`captures_from_pos` repeatedly.
156    /// Ignores empty matches immediately after a match.
157    /// Also passes a flag when skipping an empty match, so that \G wouldn't match at the new start position.
158    fn next_with<F, R>(&mut self, mut search: F) -> Option<Result<R>>
159    where
160        F: FnMut(&Regex, usize, u32) -> Result<Option<(R, Match<'t>)>>,
161    {
162        if self.last_end > self.text.len() {
163            return None;
164        }
165
166        let option_flags = if self.last_skipped_empty {
167            OPTION_SKIPPED_EMPTY_MATCH
168        } else {
169            0
170        };
171
172        let pos = self.last_end;
173        let (result, mat) = match search(self.re, pos, option_flags) {
174            Err(error) => {
175                // Stop on first error: If an error is encountered, return it, and set the "last match position"
176                // to the string length, so that the next next() call will return None, to prevent an infinite loop.
177                self.last_end = self.text.len() + 1;
178                return Some(Err(error));
179            }
180            Ok(None) => return None,
181            Ok(Some(pair)) => pair,
182        };
183
184        if mat.start == mat.end {
185            // This is an empty match. To ensure we make progress, start
186            // the next search at the smallest possible starting position
187            // of the next match following this one.
188            self.last_end = next_utf8(self.text, mat.end);
189            // Only set OPTION_SKIPPED_EMPTY_MATCH on the next call if this was a
190            // truly zero-length match (the VM consumed no bytes from `pos`).
191            // This means that \K won't prevent \G from matching.
192            self.last_skipped_empty = mat.end == pos;
193            // Don't accept empty matches immediately following a match.
194            // Just move on to the next match.
195            if Some(mat.end) == self.last_match {
196                return self.next_with(search);
197            }
198        } else {
199            self.last_end = mat.end;
200            self.last_skipped_empty = false;
201        }
202
203        self.last_match = Some(mat.end);
204
205        Some(Ok(result))
206    }
207}
208
209impl<'r, 't> Iterator for Matches<'r, 't> {
210    type Item = Result<Match<'t>>;
211
212    fn next(&mut self) -> Option<Self::Item> {
213        let text = self.text;
214        self.next_with(move |re, pos, flags| {
215            re.find_from_pos_with_option_flags(text, pos, flags)
216                .map(|opt| opt.map(|m| (m, m)))
217        })
218    }
219}
220
221/// An iterator that yields all non-overlapping capture groups matching a
222/// particular regular expression.
223///
224/// The iterator stops when no more matches can be found.
225///
226/// `'r` is the lifetime of the compiled regular expression and `'t` is the
227/// lifetime of the matched string.
228#[derive(Debug)]
229pub struct CaptureMatches<'r, 't>(Matches<'r, 't>);
230
231impl<'r, 't> CaptureMatches<'r, 't> {
232    /// Return the text being searched.
233    pub fn text(&self) -> &'t str {
234        self.0.text
235    }
236
237    /// Return the underlying regex.
238    pub fn regex(&self) -> &'r Regex {
239        self.0.re
240    }
241}
242
243impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
244    type Item = Result<Captures<'t>>;
245
246    fn next(&mut self) -> Option<Self::Item> {
247        let text = self.0.text;
248        self.0.next_with(move |re, pos, flags| {
249            let captures = re.captures_from_pos_with_option_flags(text, pos, flags)?;
250            Ok(captures.map(|c| {
251                let mat = c
252                    .get(0)
253                    .expect("`Captures` is expected to have entire match at 0th position");
254                (c, mat)
255            }))
256        })
257    }
258}
259
260/// A set of capture groups found for a regex.
261#[derive(Debug)]
262pub struct Captures<'t> {
263    inner: CapturesImpl<'t>,
264    named_groups: Arc<NamedGroups>,
265}
266
267#[derive(Debug)]
268enum CapturesImpl<'t> {
269    Wrap {
270        text: &'t str,
271        locations: RaCaptures,
272        /// Some optimizations avoid the VM but need an extra capture group to represent the match boundaries.
273        /// Therefore what is actually capture group 1 should be treated as capture group 0, and all other
274        /// capture groups should have their index reduced by one as well to line up with what the pattern specifies.
275        explicit_capture_group_0: bool,
276    },
277    Fancy {
278        text: &'t str,
279        saves: Vec<usize>,
280    },
281}
282
283/// Iterator for captured groups in order in which they appear in the regex.
284#[derive(Debug)]
285pub struct SubCaptureMatches<'c, 't> {
286    caps: &'c Captures<'t>,
287    i: usize,
288}
289
290/// An iterator over all substrings delimited by a regex.
291///
292/// This iterator yields `Result<&'h str>`, where each item is a substring of the
293/// target string that is delimited by matches of the regular expression. It stops when there
294/// are no more substrings to yield.
295///
296/// `'r` is the lifetime of the compiled regular expression, and `'h` is the
297/// lifetime of the target string being split.
298///
299/// This iterator can be created by the [`Regex::split`] method.
300#[derive(Debug)]
301pub struct Split<'r, 'h> {
302    matches: Matches<'r, 'h>,
303    next_start: usize,
304    target: &'h str,
305}
306
307impl<'r, 'h> Iterator for Split<'r, 'h> {
308    type Item = Result<&'h str>;
309
310    /// Returns the next substring that results from splitting the target string by the regex.
311    ///
312    /// If no more matches are found, returns the remaining part of the string,
313    /// or `None` if all substrings have been yielded.
314    fn next(&mut self) -> Option<Result<&'h str>> {
315        match self.matches.next() {
316            None => {
317                let len = self.target.len();
318                if self.next_start > len {
319                    // No more substrings to return
320                    None
321                } else {
322                    // Return the last part of the target string
323                    // Next call will return None
324                    let part = &self.target[self.next_start..len];
325                    self.next_start = len + 1;
326                    Some(Ok(part))
327                }
328            }
329            // Return the next substring
330            Some(Ok(m)) => {
331                let part = &self.target[self.next_start..m.start()];
332                self.next_start = m.end();
333                Some(Ok(part))
334            }
335            Some(Err(e)) => Some(Err(e)),
336        }
337    }
338}
339
340impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {}
341
342/// An iterator over at most `N` substrings delimited by a regex.
343///
344/// This iterator yields `Result<&'h str>`, where each item is a substring of the
345/// target that is delimited by matches of the regular expression. It stops either when
346/// there are no more substrings to yield, or after `N` substrings have been yielded.
347///
348/// The `N`th substring is the remaining part of the target.
349///
350/// `'r` is the lifetime of the compiled regular expression, and `'h` is the
351/// lifetime of the target string being split.
352///
353/// This iterator can be created by the [`Regex::splitn`] method.
354#[derive(Debug)]
355pub struct SplitN<'r, 'h> {
356    splits: Split<'r, 'h>,
357    limit: usize,
358}
359
360impl<'r, 'h> Iterator for SplitN<'r, 'h> {
361    type Item = Result<&'h str>;
362
363    /// Returns the next substring resulting from splitting the target by the regex,
364    /// limited to `N` splits.
365    ///
366    /// Returns `None` if no more matches are found or if the limit is reached after yielding
367    /// the remaining part of the target.
368    fn next(&mut self) -> Option<Result<&'h str>> {
369        if self.limit == 0 {
370            // Limit reached. No more substrings available.
371            return None;
372        }
373
374        // Decrement the limit for each split.
375        self.limit -= 1;
376        if self.limit > 0 {
377            return self.splits.next();
378        }
379
380        // Nth split
381        let len = self.splits.target.len();
382        if self.splits.next_start > len {
383            // No more substrings available.
384            None
385        } else {
386            // Return the remaining part of the target
387            let start = self.splits.next_start;
388            self.splits.next_start = len + 1;
389            Some(Ok(&self.splits.target[start..len]))
390        }
391    }
392
393    fn size_hint(&self) -> (usize, Option<usize>) {
394        (0, Some(self.limit))
395    }
396}
397
398impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
399
400#[derive(Clone, Debug, Default)]
401struct RegexOptions {
402    syntaxc: SyntaxConfig,
403    delegate_size_limit: Option<usize>,
404    delegate_dfa_size_limit: Option<usize>,
405    oniguruma_mode: bool,
406    ignore_numbered_groups_when_named_groups_exist: bool,
407    hard_regex_runtime_options: HardRegexRuntimeOptions,
408}
409
410#[derive(Copy, Clone, Debug)]
411struct HardRegexRuntimeOptions {
412    backtrack_limit: usize,
413    find_not_empty: bool,
414}
415
416impl RegexOptions {
417    fn get_flag_value(flag_value: bool, enum_value: u32) -> u32 {
418        if flag_value {
419            enum_value
420        } else {
421            0
422        }
423    }
424
425    fn compute_flags(&self) -> u32 {
426        let insensitive = Self::get_flag_value(self.syntaxc.get_case_insensitive(), FLAG_CASEI);
427        let multiline = Self::get_flag_value(self.syntaxc.get_multi_line(), FLAG_MULTI);
428        let whitespace =
429            Self::get_flag_value(self.syntaxc.get_ignore_whitespace(), FLAG_IGNORE_SPACE);
430        let dotnl = Self::get_flag_value(self.syntaxc.get_dot_matches_new_line(), FLAG_DOTNL);
431        let unicode = Self::get_flag_value(self.syntaxc.get_unicode(), FLAG_UNICODE);
432        let oniguruma_mode = Self::get_flag_value(self.oniguruma_mode, FLAG_ONIGURUMA_MODE);
433        let crlf = Self::get_flag_value(self.syntaxc.get_crlf(), FLAG_CRLF);
434        let named_groups_only = Self::get_flag_value(
435            self.ignore_numbered_groups_when_named_groups_exist,
436            FLAG_IGNORE_NUMBERED_GROUPS_WHEN_NAMED_GROUPS_EXIST,
437        );
438
439        insensitive
440            | multiline
441            | whitespace
442            | dotnl
443            | unicode
444            | oniguruma_mode
445            | crlf
446            | named_groups_only
447    }
448}
449
450impl Default for HardRegexRuntimeOptions {
451    fn default() -> Self {
452        HardRegexRuntimeOptions {
453            backtrack_limit: 1_000_000,
454            find_not_empty: false,
455        }
456    }
457}
458
459impl Default for RegexOptionsBuilder {
460    fn default() -> Self {
461        Self::new()
462    }
463}
464
465impl RegexOptionsBuilder {
466    /// Create a new regex options builder.
467    pub fn new() -> Self {
468        RegexOptionsBuilder {
469            options: RegexOptions::default(),
470        }
471    }
472
473    /// Build a `Regex` from the given pattern.
474    ///
475    /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
476    pub fn build(&self, pattern: String) -> Result<Regex> {
477        Regex::new_options(pattern, &self.options)
478    }
479
480    fn set_config(&mut self, func: impl Fn(SyntaxConfig) -> SyntaxConfig) -> &mut Self {
481        self.options.syntaxc = func(self.options.syntaxc);
482        self
483    }
484
485    /// Override default case insensitive
486    /// this is to enable/disable casing via builder instead of a flag within
487    /// the raw string pattern which will be parsed
488    ///
489    /// Default is false
490    pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
491        self.set_config(|x| x.case_insensitive(yes))
492    }
493
494    /// Enable multi-line regex
495    pub fn multi_line(&mut self, yes: bool) -> &mut Self {
496        self.set_config(|x| x.multi_line(yes))
497    }
498
499    /// Allow ignore whitespace
500    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut Self {
501        self.set_config(|x| x.ignore_whitespace(yes))
502    }
503
504    /// Enable or disable the "dot matches any character" flag.
505    /// When this is enabled, `.` will match any character. When it's disabled, then `.` will match any character
506    /// except for a new line character.
507    pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut Self {
508        self.set_config(|x| x.dot_matches_new_line(yes))
509    }
510
511    /// Enable or disable the CRLF mode flag (`R`).
512    ///
513    /// When enabled, `\r\n` is treated as a single line ending for the purposes of
514    /// `^` and `$` in multi-line mode, instead of treating `\r` and `\n` as separate
515    /// line endings.
516    ///
517    /// By default, this is disabled. It may be selectively enabled in the regular
518    /// expression by using the `R` flag, e.g. `(?mR)` or `(?Rm)`.
519    pub fn crlf(&mut self, yes: bool) -> &mut Self {
520        self.set_config(|x| x.crlf(yes))
521    }
522
523    /// Enable verbose mode in the regular expression.
524    ///
525    /// The same as ignore_whitespace
526    ///
527    /// When enabled, verbose mode permits insigificant whitespace in many
528    /// places in the regular expression, as well as comments. Comments are
529    /// started using `#` and continue until the end of the line.
530    ///
531    /// By default, this is disabled. It may be selectively enabled in the
532    /// regular expression by using the `x` flag regardless of this setting.
533    pub fn verbose_mode(&mut self, yes: bool) -> &mut Self {
534        self.set_config(|x| x.ignore_whitespace(yes))
535    }
536
537    /// Enable or disable the Unicode flag (`u`) by default.
538    ///
539    /// By default this is **enabled**. It may alternatively be selectively
540    /// disabled in the regular expression itself via the `u` flag.
541    ///
542    /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
543    /// default), a regular expression will fail to parse if Unicode mode is
544    /// disabled and a sub-expression could possibly match invalid UTF-8.
545    ///
546    /// **WARNING**: Unicode mode can greatly increase the size of the compiled
547    /// DFA, which can noticeably impact both memory usage and compilation
548    /// time. This is especially noticeable if your regex contains character
549    /// classes like `\w` that are impacted by whether Unicode is enabled or
550    /// not. If Unicode is not necessary, you are encouraged to disable it.
551    pub fn unicode_mode(&mut self, yes: bool) -> &mut Self {
552        self.set_config(|x| x.unicode(yes))
553    }
554
555    /// Limit for how many times backtracking should be attempted for fancy regexes (where
556    /// backtracking is used). If this limit is exceeded, execution returns an error with
557    /// [`Error::BacktrackLimitExceeded`](enum.Error.html#variant.BacktrackLimitExceeded).
558    /// This is for preventing a regex with catastrophic backtracking to run for too long.
559    ///
560    /// Default is `1_000_000` (1 million).
561    pub fn backtrack_limit(&mut self, limit: usize) -> &mut Self {
562        self.options.hard_regex_runtime_options.backtrack_limit = limit;
563        self
564    }
565
566    /// Set the approximate size limit of the compiled regular expression.
567    ///
568    /// This option is forwarded from the wrapped `regex` crate. Note that depending on the used
569    /// regex features there may be multiple delegated sub-regexes fed to the `regex` crate. As
570    /// such the actual limit is closer to `<number of delegated regexes> * delegate_size_limit`.
571    pub fn delegate_size_limit(&mut self, limit: usize) -> &mut Self {
572        self.options.delegate_size_limit = Some(limit);
573        self
574    }
575
576    /// Set the approximate size of the cache used by the DFA.
577    ///
578    /// This option is forwarded from the wrapped `regex` crate. Note that depending on the used
579    /// regex features there may be multiple delegated sub-regexes fed to the `regex` crate. As
580    /// such the actual limit is closer to `<number of delegated regexes> *
581    /// delegate_dfa_size_limit`.
582    pub fn delegate_dfa_size_limit(&mut self, limit: usize) -> &mut Self {
583        self.options.delegate_dfa_size_limit = Some(limit);
584        self
585    }
586
587    /// Require that matches are non-empty (i.e. match at least one character).
588    ///
589    /// When this is enabled, any match attempt that would result in a zero-length match is
590    /// rejected.
591    ///
592    /// Default is `false`.
593    ///
594    /// N.B. When `find_not_empty` is set and analysis determines the pattern will only ever
595    /// produce an empty match, compiling the regex will return
596    /// `CompileError::PatternCanNeverMatch` instead of silently constructing a regex that can never
597    /// return a result. This catches the user error at compile time rather than allowing the
598    /// combination to execute pointlessly at runtime.
599    pub fn find_not_empty(&mut self, yes: bool) -> &mut Self {
600        self.options.hard_regex_runtime_options.find_not_empty = yes;
601        self
602    }
603
604    /// Treat unnamed capture groups as non-capturing when named groups exist.
605    /// Prevents accessing capture groups by number from within the pattern
606    /// (backrefs, subroutine calls) when named groups are present.
607    pub fn ignore_numbered_groups_when_named_groups_exist(&mut self, yes: bool) -> &mut Self {
608        self.options.ignore_numbered_groups_when_named_groups_exist = yes;
609        self
610    }
611
612    /// Attempts to better match [Oniguruma](https://github.com/kkos/oniguruma)'s default behavior
613    ///
614    /// Currently this amounts to changing behavior with:
615    ///
616    /// # Left and right word bounds
617    ///
618    /// `fancy-regex` follows the default of other regex engines such as the `regex` crate itself
619    /// where `\<` and `\>` correspond to a _left_ and _right_ word-bound respectively. This
620    /// differs from Oniguruma's defaults which treat them as matching the literals `<` and `>`.
621    /// When this option is set using `\<` and `\>` in the pattern will match the literals
622    /// `<` and `>` instead of word bounds.
623    ///
624    /// # Repetition/Quantifiers on empty groups
625    ///
626    /// `fancy-regex` would normally reject patterns like `(?:)+` because the `+` has nothing
627    /// to target. In Oniguruma mode, the empty repeat is silently dropped at parse time.
628    ///
629    /// ## Example
630    ///
631    /// ```
632    /// use fancy_regex::{Regex, RegexBuilder};
633    ///
634    /// let haystack = "turbo::<Fish>";
635    /// let regex = r"\<\w*\>";
636    ///
637    /// // By default `\<` and `\>` will match the start and end of a word boundary
638    /// let word_bounds_regex = Regex::new(regex).unwrap();
639    /// let word_bounds = word_bounds_regex.find(haystack).unwrap().unwrap();
640    /// assert_eq!(word_bounds.as_str(), "turbo");
641    ///
642    /// // With the option set they instead match the literal `<` and `>` characters
643    /// let literals_regex = RegexBuilder::new(regex).oniguruma_mode(true).build().unwrap();
644    /// let literals = literals_regex.find(haystack).unwrap().unwrap();
645    /// assert_eq!(literals.as_str(), "<Fish>");
646    /// ```
647    pub fn oniguruma_mode(&mut self, yes: bool) -> &mut Self {
648        self.options.oniguruma_mode = yes;
649        self
650    }
651}
652
653impl RegexBuilder {
654    /// Create a new regex builder.
655    pub fn new(pattern: &str) -> Self {
656        RegexBuilder {
657            pattern: pattern.to_string(),
658            options: RegexOptionsBuilder::new(),
659        }
660    }
661
662    /// Build a `Regex` from the given pattern.
663    ///
664    /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
665    pub fn build(&self) -> Result<Regex> {
666        self.options.build(self.pattern.clone())
667    }
668
669    /// Change the pattern to build. Useful when building multiple regexes from
670    /// many patterns.
671    pub fn pattern(&mut self, pattern: String) -> &mut Self {
672        self.pattern = pattern;
673        self
674    }
675
676    /// See [`RegexOptionsBuilder::case_insensitive`]
677    pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
678        self.options.case_insensitive(yes);
679        self
680    }
681
682    /// See [`RegexOptionsBuilder::multi_line`]
683    pub fn multi_line(&mut self, yes: bool) -> &mut Self {
684        self.options.multi_line(yes);
685        self
686    }
687
688    /// See [`RegexOptionsBuilder::ignore_whitespace`]
689    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut Self {
690        self.options.ignore_whitespace(yes);
691        self
692    }
693
694    /// See [`RegexOptionsBuilder::dot_matches_new_line`]
695    pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut Self {
696        self.options.dot_matches_new_line(yes);
697        self
698    }
699
700    /// See [`RegexOptionsBuilder::verbose_mode`]
701    pub fn verbose_mode(&mut self, yes: bool) -> &mut Self {
702        self.options.ignore_whitespace(yes);
703        self
704    }
705
706    /// See [`RegexOptionsBuilder::unicode_mode`]
707    pub fn unicode_mode(&mut self, yes: bool) -> &mut Self {
708        self.options.unicode_mode(yes);
709        self
710    }
711
712    /// See [`RegexOptionsBuilder::backtrack_limit`]
713    pub fn backtrack_limit(&mut self, limit: usize) -> &mut Self {
714        self.options.backtrack_limit(limit);
715        self
716    }
717
718    /// See [`RegexOptionsBuilder::delegate_size_limit`]
719    pub fn delegate_size_limit(&mut self, limit: usize) -> &mut Self {
720        self.options.delegate_size_limit(limit);
721        self
722    }
723
724    /// See [`RegexOptionsBuilder::delegate_dfa_size_limit`]
725    pub fn delegate_dfa_size_limit(&mut self, limit: usize) -> &mut Self {
726        self.options.delegate_dfa_size_limit(limit);
727        self
728    }
729
730    /// See [`RegexOptionsBuilder::oniguruma_mode`]
731    pub fn oniguruma_mode(&mut self, yes: bool) -> &mut Self {
732        self.options.oniguruma_mode(yes);
733        self
734    }
735
736    /// See [`RegexOptionsBuilder::crlf`]
737    pub fn crlf(&mut self, yes: bool) -> &mut Self {
738        self.options.crlf(yes);
739        self
740    }
741
742    /// See [`RegexOptionsBuilder::find_not_empty`]
743    pub fn find_not_empty(&mut self, yes: bool) -> &mut Self {
744        self.options.find_not_empty(yes);
745        self
746    }
747
748    /// See [`RegexOptionsBuilder::ignore_numbered_groups_when_named_groups_exist`]
749    pub fn ignore_numbered_groups_when_named_groups_exist(&mut self, yes: bool) -> &mut Self {
750        self.options
751            .ignore_numbered_groups_when_named_groups_exist(yes);
752        self
753    }
754}
755
756impl fmt::Debug for Regex {
757    /// Shows the original regular expression.
758    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
759        write!(f, "{}", self.as_str())
760    }
761}
762
763impl fmt::Display for Regex {
764    /// Shows the original regular expression
765    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
766        write!(f, "{}", self.as_str())
767    }
768}
769
770impl FromStr for Regex {
771    type Err = Error;
772
773    /// Attempts to parse a string into a regular expression
774    fn from_str(s: &str) -> Result<Regex> {
775        Regex::new(s)
776    }
777}
778
779impl Regex {
780    /// Parse and compile a regex with default options, see `RegexBuilder`.
781    ///
782    /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
783    pub fn new(re: &str) -> Result<Regex> {
784        Self::new_options(re.to_string(), &RegexOptions::default())
785    }
786
787    fn new_options(pattern: String, options: &RegexOptions) -> Result<Regex> {
788        let mut tree = Expr::parse_tree_with_flags(&pattern, options.compute_flags())?;
789
790        let find_not_empty = options.hard_regex_runtime_options.find_not_empty;
791
792        let requires_capture_group_fixup = if find_not_empty {
793            // if the find_not_empty flag is set, we skip optimizations
794            // partially because we have to go though the VM anyway
795            // partially because having the last instruction of the expression not have
796            // ix be at the end of capture group 0 ruins our empty match checking logic.
797            false
798        } else {
799            // try to optimize the expression tree so that a hard pattern could become easy
800            // with a fixup of the capture groups
801            optimize(&mut tree)
802        };
803        let info = analyze(
804            &tree,
805            AnalyzeContext {
806                explicit_capture_group_0: requires_capture_group_fixup,
807                find_not_empty,
808            },
809        )?;
810
811        if find_not_empty && info.const_size && info.min_size == 0 {
812            return Err(CompileError::PatternCanNeverMatch.into());
813        }
814
815        if !info.hard {
816            // easy case, wrap regex
817
818            // we do our own to_str because escapes are different
819            // NOTE: there is a good opportunity here to use Hir to avoid regex-automata re-parsing it
820            let mut re_cooked = String::new();
821            tree.expr.to_str(&mut re_cooked, 0);
822            let inner = compile::compile_inner(&re_cooked, options)?;
823            return Ok(Regex {
824                inner: RegexImpl::Wrap {
825                    inner,
826                    pattern,
827                    explicit_capture_group_0: requires_capture_group_fixup,
828                    delegated_pattern: re_cooked,
829                },
830                named_groups: Arc::new(tree.named_groups),
831            });
832        }
833
834        let prog = compile(
835            &info,
836            CompileOptions {
837                anchored: can_compile_as_anchored(&tree.expr),
838                contains_subroutines: tree.contains_subroutines,
839            },
840        )?;
841        Ok(Regex {
842            inner: RegexImpl::Fancy {
843                prog: Arc::new(prog),
844                n_groups: info.end_group(),
845                options: options.hard_regex_runtime_options,
846                pattern,
847            },
848            named_groups: Arc::new(tree.named_groups),
849        })
850    }
851
852    /// Returns the original string of this regex.
853    pub fn as_str(&self) -> &str {
854        match &self.inner {
855            RegexImpl::Wrap { pattern, .. } => pattern,
856            RegexImpl::Fancy { pattern, .. } => pattern,
857        }
858    }
859
860    /// Check if the regex matches the input text.
861    ///
862    /// # Example
863    ///
864    /// Test if some text contains the same word twice:
865    ///
866    /// ```rust
867    /// # use fancy_regex::Regex;
868    ///
869    /// let re = Regex::new(r"(\w+) \1").unwrap();
870    /// assert!(re.is_match("mirror mirror on the wall").unwrap());
871    /// ```
872    pub fn is_match(&self, text: &str) -> Result<bool> {
873        match &self.inner {
874            RegexImpl::Wrap { inner, .. } => Ok(inner.is_match(text)),
875            RegexImpl::Fancy { .. } => self.find(text).map(|m| m.is_some()),
876        }
877    }
878
879    /// Returns an iterator for each successive non-overlapping match in `text`.
880    ///
881    /// If you have capturing groups in your regex that you want to extract, use the [Regex::captures_iter()]
882    /// method.
883    ///
884    /// # Example
885    ///
886    /// Find all words followed by an exclamation point:
887    ///
888    /// ```rust
889    /// # use fancy_regex::Regex;
890    ///
891    /// let re = Regex::new(r"\w+(?=!)").unwrap();
892    /// let mut matches = re.find_iter("so fancy! even with! iterators!");
893    /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "fancy");
894    /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "with");
895    /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "iterators");
896    /// assert!(matches.next().is_none());
897    /// ```
898    pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
899        Matches {
900            re: self,
901            text,
902            last_end: 0,
903            last_match: None,
904            last_skipped_empty: false,
905        }
906    }
907
908    /// Find the first match in the input text.
909    ///
910    /// If you have capturing groups in your regex that you want to extract, use the [Regex::captures()]
911    /// method.
912    ///
913    /// # Example
914    ///
915    /// Find a word that is followed by an exclamation point:
916    ///
917    /// ```rust
918    /// # use fancy_regex::Regex;
919    ///
920    /// let re = Regex::new(r"\w+(?=!)").unwrap();
921    /// assert_eq!(re.find("so fancy!").unwrap().unwrap().as_str(), "fancy");
922    /// ```
923    pub fn find<'t>(&self, text: &'t str) -> Result<Option<Match<'t>>> {
924        self.find_from_pos(text, 0)
925    }
926
927    /// Returns the first match in `text`, starting from the specified byte position `pos`.
928    ///
929    /// # Examples
930    ///
931    /// Finding match starting at a position:
932    ///
933    /// ```
934    /// # use fancy_regex::Regex;
935    /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
936    /// let text = "1 test 123\n2 foo";
937    /// let mat = re.find_from_pos(text, 7).unwrap().unwrap();
938    ///
939    /// assert_eq!(mat.start(), 11);
940    /// assert_eq!(mat.end(), 12);
941    /// ```
942    ///
943    /// Note that in some cases this is not the same as using the `find`
944    /// method and passing a slice of the string, see [Regex::captures_from_pos()] for details.
945    pub fn find_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Match<'t>>> {
946        self.find_from_pos_with_option_flags(text, pos, 0)
947    }
948
949    fn find_from_pos_with_option_flags<'t>(
950        &self,
951        text: &'t str,
952        pos: usize,
953        option_flags: u32,
954    ) -> Result<Option<Match<'t>>> {
955        if pos > text.len() {
956            return Ok(None);
957        }
958        match &self.inner {
959            RegexImpl::Wrap {
960                inner,
961                explicit_capture_group_0,
962                ..
963            } => {
964                let result = if !*explicit_capture_group_0 {
965                    inner
966                        .search(&RaInput::new(text).span(pos..text.len()))
967                        .map(|m| Match::new(text, m.start(), m.end()))
968                } else {
969                    let mut locations = inner.create_captures();
970                    inner.captures(RaInput::new(text).span(pos..text.len()), &mut locations);
971                    locations
972                        .get_group(1)
973                        .map(|group1| Match::new(text, group1.start, group1.end))
974                };
975                Ok(result)
976            }
977            RegexImpl::Fancy { prog, options, .. } => {
978                let option_flags = option_flags
979                    | if options.find_not_empty {
980                        OPTION_FIND_NOT_EMPTY
981                    } else {
982                        0
983                    };
984                let result = vm::run(prog, text, pos, option_flags, options)?;
985                Ok(result.map(|saves| Match::new(text, saves[0], saves[1])))
986            }
987        }
988    }
989
990    /// Returns an iterator over all the non-overlapping capture groups matched in `text`.
991    ///
992    /// # Examples
993    ///
994    /// Finding all matches and capturing parts of each:
995    ///
996    /// ```rust
997    /// # use fancy_regex::Regex;
998    ///
999    /// let re = Regex::new(r"(\d{4})-(\d{2})").unwrap();
1000    /// let text = "It was between 2018-04 and 2020-01";
1001    /// let mut all_captures = re.captures_iter(text);
1002    ///
1003    /// let first = all_captures.next().unwrap().unwrap();
1004    /// assert_eq!(first.get(1).unwrap().as_str(), "2018");
1005    /// assert_eq!(first.get(2).unwrap().as_str(), "04");
1006    /// assert_eq!(first.get(0).unwrap().as_str(), "2018-04");
1007    ///
1008    /// let second = all_captures.next().unwrap().unwrap();
1009    /// assert_eq!(second.get(1).unwrap().as_str(), "2020");
1010    /// assert_eq!(second.get(2).unwrap().as_str(), "01");
1011    /// assert_eq!(second.get(0).unwrap().as_str(), "2020-01");
1012    ///
1013    /// assert!(all_captures.next().is_none());
1014    /// ```
1015    pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CaptureMatches<'r, 't> {
1016        CaptureMatches(self.find_iter(text))
1017    }
1018
1019    /// Returns the capture groups for the first match in `text`.
1020    ///
1021    /// If no match is found, then `Ok(None)` is returned.
1022    ///
1023    /// # Examples
1024    ///
1025    /// Finding matches and capturing parts of the match:
1026    ///
1027    /// ```rust
1028    /// # use fancy_regex::Regex;
1029    ///
1030    /// let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
1031    /// let text = "The date was 2018-04-07";
1032    /// let captures = re.captures(text).unwrap().unwrap();
1033    ///
1034    /// assert_eq!(captures.get(1).unwrap().as_str(), "2018");
1035    /// assert_eq!(captures.get(2).unwrap().as_str(), "04");
1036    /// assert_eq!(captures.get(3).unwrap().as_str(), "07");
1037    /// assert_eq!(captures.get(0).unwrap().as_str(), "2018-04-07");
1038    /// ```
1039    pub fn captures<'t>(&self, text: &'t str) -> Result<Option<Captures<'t>>> {
1040        self.captures_from_pos(text, 0)
1041    }
1042
1043    /// Returns the capture groups for the first match in `text`, starting from
1044    /// the specified byte position `pos`.
1045    ///
1046    /// # Examples
1047    ///
1048    /// Finding captures starting at a position:
1049    ///
1050    /// ```
1051    /// # use fancy_regex::Regex;
1052    /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
1053    /// let text = "1 test 123\n2 foo";
1054    /// let captures = re.captures_from_pos(text, 7).unwrap().unwrap();
1055    ///
1056    /// let group = captures.get(1).unwrap();
1057    /// assert_eq!(group.as_str(), "2");
1058    /// assert_eq!(group.start(), 11);
1059    /// assert_eq!(group.end(), 12);
1060    /// ```
1061    ///
1062    /// Note that in some cases this is not the same as using the `captures`
1063    /// method and passing a slice of the string, see the capture that we get
1064    /// when we do this:
1065    ///
1066    /// ```
1067    /// # use fancy_regex::Regex;
1068    /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
1069    /// let text = "1 test 123\n2 foo";
1070    /// let captures = re.captures(&text[7..]).unwrap().unwrap();
1071    /// assert_eq!(captures.get(1).unwrap().as_str(), "123");
1072    /// ```
1073    ///
1074    /// This matched the number "123" because it's at the beginning of the text
1075    /// of the string slice.
1076    ///
1077    pub fn captures_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Captures<'t>>> {
1078        self.captures_from_pos_with_option_flags(text, pos, 0)
1079    }
1080
1081    fn captures_from_pos_with_option_flags<'t>(
1082        &self,
1083        text: &'t str,
1084        pos: usize,
1085        option_flags: u32,
1086    ) -> Result<Option<Captures<'t>>> {
1087        if pos > text.len() {
1088            return Ok(None);
1089        }
1090        let named_groups = self.named_groups.clone();
1091        match &self.inner {
1092            RegexImpl::Wrap {
1093                inner,
1094                explicit_capture_group_0,
1095                ..
1096            } => {
1097                // find_not_empty patterns are always compiled as Fancy, so find_not_empty is
1098                // always false here.
1099                let explicit = *explicit_capture_group_0;
1100                let mut locations = inner.create_captures();
1101                inner.captures(RaInput::new(text).span(pos..text.len()), &mut locations);
1102                Ok(locations.is_match().then_some(Captures {
1103                    inner: CapturesImpl::Wrap {
1104                        text,
1105                        locations,
1106                        explicit_capture_group_0: explicit,
1107                    },
1108                    named_groups,
1109                }))
1110            }
1111            RegexImpl::Fancy {
1112                prog,
1113                n_groups,
1114                options,
1115                ..
1116            } => {
1117                let option_flags = option_flags
1118                    | if options.find_not_empty {
1119                        OPTION_FIND_NOT_EMPTY
1120                    } else {
1121                        0
1122                    };
1123                let result = vm::run(prog, text, pos, option_flags, options)?;
1124                Ok(result.map(|mut saves| {
1125                    saves.truncate(n_groups * 2);
1126                    Captures {
1127                        inner: CapturesImpl::Fancy { text, saves },
1128                        named_groups,
1129                    }
1130                }))
1131            }
1132        }
1133    }
1134
1135    /// Returns the number of captures, including the implicit capture of the entire expression.
1136    pub fn captures_len(&self) -> usize {
1137        match &self.inner {
1138            RegexImpl::Wrap {
1139                inner,
1140                explicit_capture_group_0,
1141                ..
1142            } => inner.captures_len() - if *explicit_capture_group_0 { 1 } else { 0 },
1143            RegexImpl::Fancy { n_groups, .. } => *n_groups,
1144        }
1145    }
1146
1147    /// Returns an iterator over the capture names.
1148    pub fn capture_names(&self) -> CaptureNames<'_> {
1149        let mut names = Vec::new();
1150        names.resize(self.captures_len(), None);
1151        for (name, &i) in self.named_groups.iter() {
1152            names[i] = Some(name.as_str());
1153        }
1154        CaptureNames(names.into_iter())
1155    }
1156
1157    // for debugging only
1158    #[doc(hidden)]
1159    pub fn debug_print(&self, writer: &mut Formatter<'_>) -> fmt::Result {
1160        match &self.inner {
1161            RegexImpl::Wrap {
1162                delegated_pattern,
1163                explicit_capture_group_0,
1164                ..
1165            } => {
1166                write!(
1167                    writer,
1168                    "wrapped Regex {:?}, explicit_capture_group_0: {:}",
1169                    delegated_pattern, *explicit_capture_group_0
1170                )
1171            }
1172            RegexImpl::Fancy { prog, .. } => prog.debug_print(writer),
1173        }
1174    }
1175
1176    /// Replaces the leftmost-first match with the replacement provided.
1177    /// The replacement can be a regular string (where `$N` and `$name` are
1178    /// expanded to match capture groups) or a function that takes the matches'
1179    /// `Captures` and returns the replaced string.
1180    ///
1181    /// If no match is found, then a copy of the string is returned unchanged.
1182    ///
1183    /// # Replacement string syntax
1184    ///
1185    /// All instances of `$name` in the replacement text is replaced with the
1186    /// corresponding capture group `name`.
1187    ///
1188    /// `name` may be an integer corresponding to the index of the
1189    /// capture group (counted by order of opening parenthesis where `0` is the
1190    /// entire match) or it can be a name (consisting of letters, digits or
1191    /// underscores) corresponding to a named capture group.
1192    ///
1193    /// If `name` isn't a valid capture group (whether the name doesn't exist
1194    /// or isn't a valid index), then it is replaced with the empty string.
1195    ///
1196    /// The longest possible name is used. e.g., `$1a` looks up the capture
1197    /// group named `1a` and not the capture group at index `1`. To exert more
1198    /// precise control over the name, use braces, e.g., `${1}a`.
1199    ///
1200    /// To write a literal `$` use `$$`.
1201    ///
1202    /// # Examples
1203    ///
1204    /// Note that this function is polymorphic with respect to the replacement.
1205    /// In typical usage, this can just be a normal string:
1206    ///
1207    /// ```rust
1208    /// # use fancy_regex::Regex;
1209    /// let re = Regex::new("[^01]+").unwrap();
1210    /// assert_eq!(re.replace("1078910", ""), "1010");
1211    /// ```
1212    ///
1213    /// But anything satisfying the `Replacer` trait will work. For example,
1214    /// a closure of type `|&Captures| -> String` provides direct access to the
1215    /// captures corresponding to a match. This allows one to access
1216    /// capturing group matches easily:
1217    ///
1218    /// ```rust
1219    /// # use fancy_regex::{Regex, Captures};
1220    /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
1221    /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
1222    ///     format!("{} {}", &caps[2], &caps[1])
1223    /// });
1224    /// assert_eq!(result, "Bruce Springsteen");
1225    /// ```
1226    ///
1227    /// But this is a bit cumbersome to use all the time. Instead, a simple
1228    /// syntax is supported that expands `$name` into the corresponding capture
1229    /// group. Here's the last example, but using this expansion technique
1230    /// with named capture groups:
1231    ///
1232    /// ```rust
1233    /// # use fancy_regex::Regex;
1234    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
1235    /// let result = re.replace("Springsteen, Bruce", "$first $last");
1236    /// assert_eq!(result, "Bruce Springsteen");
1237    /// ```
1238    ///
1239    /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
1240    /// would produce the same result. To write a literal `$` use `$$`.
1241    ///
1242    /// Sometimes the replacement string requires use of curly braces to
1243    /// delineate a capture group replacement and surrounding literal text.
1244    /// For example, if we wanted to join two words together with an
1245    /// underscore:
1246    ///
1247    /// ```rust
1248    /// # use fancy_regex::Regex;
1249    /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
1250    /// let result = re.replace("deep fried", "${first}_$second");
1251    /// assert_eq!(result, "deep_fried");
1252    /// ```
1253    ///
1254    /// Without the curly braces, the capture group name `first_` would be
1255    /// used, and since it doesn't exist, it would be replaced with the empty
1256    /// string.
1257    ///
1258    /// Finally, sometimes you just want to replace a literal string with no
1259    /// regard for capturing group expansion. This can be done by wrapping a
1260    /// byte string with `NoExpand`:
1261    ///
1262    /// ```rust
1263    /// # use fancy_regex::Regex;
1264    /// use fancy_regex::NoExpand;
1265    ///
1266    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
1267    /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
1268    /// assert_eq!(result, "$2 $last");
1269    /// ```
1270    pub fn replace<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
1271        self.replacen(text, 1, rep)
1272    }
1273
1274    /// Replaces all non-overlapping matches in `text` with the replacement
1275    /// provided. This is the same as calling `replacen` with `limit` set to
1276    /// `0`.
1277    ///
1278    /// See the documentation for `replace` for details on how to access
1279    /// capturing group matches in the replacement string.
1280    pub fn replace_all<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
1281        self.replacen(text, 0, rep)
1282    }
1283
1284    /// Replaces at most `limit` non-overlapping matches in `text` with the
1285    /// replacement provided. If `limit` is 0, then all non-overlapping matches
1286    /// are replaced.
1287    ///
1288    /// Will panic if any errors are encountered. Use `try_replacen`, which this
1289    /// function unwraps, if you want to handle errors.
1290    ///
1291    /// See the documentation for `replace` for details on how to access
1292    /// capturing group matches in the replacement string.
1293    ///
1294    pub fn replacen<'t, R: Replacer>(&self, text: &'t str, limit: usize, rep: R) -> Cow<'t, str> {
1295        self.try_replacen(text, limit, rep).unwrap()
1296    }
1297
1298    /// Replaces at most `limit` non-overlapping matches in `text` with the
1299    /// replacement provided. If `limit` is 0, then all non-overlapping matches
1300    /// are replaced.
1301    ///
1302    /// Propagates any errors encountered, such as `RuntimeError::BacktrackLimitExceeded`.
1303    ///
1304    /// See the documentation for `replace` for details on how to access
1305    /// capturing group matches in the replacement string.
1306    pub fn try_replacen<'t, R: Replacer>(
1307        &self,
1308        text: &'t str,
1309        limit: usize,
1310        mut rep: R,
1311    ) -> Result<Cow<'t, str>> {
1312        // If we know that the replacement doesn't have any capture expansions,
1313        // then we can fast path. The fast path can make a tremendous
1314        // difference:
1315        //
1316        //   1) We use `find_iter` instead of `captures_iter`. Not asking for
1317        //      captures generally makes the regex engines faster.
1318        //   2) We don't need to look up all of the capture groups and do
1319        //      replacements inside the replacement string. We just push it
1320        //      at each match and be done with it.
1321        if let Some(rep) = rep.no_expansion() {
1322            let mut it = self.find_iter(text).enumerate().peekable();
1323            if it.peek().is_none() {
1324                return Ok(Cow::Borrowed(text));
1325            }
1326            let mut new = String::with_capacity(text.len());
1327            let mut last_match = 0;
1328            for (i, m) in it {
1329                let m = m?;
1330
1331                if limit > 0 && i >= limit {
1332                    break;
1333                }
1334                new.push_str(&text[last_match..m.start()]);
1335                new.push_str(&rep);
1336                last_match = m.end();
1337            }
1338            new.push_str(&text[last_match..]);
1339            return Ok(Cow::Owned(new));
1340        }
1341
1342        // The slower path, which we use if the replacement needs access to
1343        // capture groups.
1344        let mut it = self.captures_iter(text).enumerate().peekable();
1345        if it.peek().is_none() {
1346            return Ok(Cow::Borrowed(text));
1347        }
1348        let mut new = String::with_capacity(text.len());
1349        let mut last_match = 0;
1350        for (i, cap) in it {
1351            let cap = cap?;
1352
1353            if limit > 0 && i >= limit {
1354                break;
1355            }
1356            // unwrap on 0 is OK because captures only reports matches
1357            let m = cap.get(0).unwrap();
1358            new.push_str(&text[last_match..m.start()]);
1359            rep.replace_append(&cap, &mut new);
1360            last_match = m.end();
1361        }
1362        new.push_str(&text[last_match..]);
1363        Ok(Cow::Owned(new))
1364    }
1365
1366    /// Splits the string by matches of the regex.
1367    ///
1368    /// Returns an iterator over the substrings of the target string
1369    ///  that *aren't* matched by the regex.
1370    ///
1371    /// # Example
1372    ///
1373    /// To split a string delimited by arbitrary amounts of spaces or tabs:
1374    ///
1375    /// ```rust
1376    /// # use fancy_regex::Regex;
1377    /// let re = Regex::new(r"[ \t]+").unwrap();
1378    /// let target = "a b \t  c\td    e";
1379    /// let fields: Vec<&str> = re.split(target).map(|x| x.unwrap()).collect();
1380    /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
1381    /// ```
1382    pub fn split<'r, 'h>(&'r self, target: &'h str) -> Split<'r, 'h> {
1383        Split {
1384            matches: self.find_iter(target),
1385            next_start: 0,
1386            target,
1387        }
1388    }
1389
1390    /// Splits the string by matches of the regex at most `limit` times.
1391    ///
1392    /// Returns an iterator over the substrings of the target string
1393    /// that *aren't* matched by the regex.
1394    ///
1395    /// The `N`th substring is the remaining part of the target.
1396    ///
1397    /// # Example
1398    ///
1399    /// To split a string delimited by arbitrary amounts of spaces or tabs
1400    /// 3 times:
1401    ///
1402    /// ```rust
1403    /// # use fancy_regex::Regex;
1404    /// let re = Regex::new(r"[ \t]+").unwrap();
1405    /// let target = "a b \t  c\td    e";
1406    /// let fields: Vec<&str> = re.splitn(target, 3).map(|x| x.unwrap()).collect();
1407    /// assert_eq!(fields, vec!["a", "b", "c\td    e"]);
1408    /// ```
1409    pub fn splitn<'r, 'h>(&'r self, target: &'h str, limit: usize) -> SplitN<'r, 'h> {
1410        SplitN {
1411            splits: self.split(target),
1412            limit,
1413        }
1414    }
1415}
1416
1417impl TryFrom<&str> for Regex {
1418    type Error = Error;
1419
1420    /// Attempts to parse a string into a regular expression
1421    fn try_from(s: &str) -> Result<Self> {
1422        Self::new(s)
1423    }
1424}
1425
1426impl TryFrom<String> for Regex {
1427    type Error = Error;
1428
1429    /// Attempts to parse a string into a regular expression
1430    fn try_from(s: String) -> Result<Self> {
1431        Self::new(&s)
1432    }
1433}
1434
1435impl<'t> Match<'t> {
1436    /// Returns the starting byte offset of the match in the text.
1437    #[inline]
1438    pub fn start(&self) -> usize {
1439        self.start
1440    }
1441
1442    /// Returns the ending byte offset of the match in the text.
1443    #[inline]
1444    pub fn end(&self) -> usize {
1445        self.end
1446    }
1447
1448    /// Returns the range over the starting and ending byte offsets of the match in text.
1449    #[inline]
1450    pub fn range(&self) -> Range<usize> {
1451        self.start..self.end
1452    }
1453
1454    /// Returns the matched text.
1455    #[inline]
1456    pub fn as_str(&self) -> &'t str {
1457        &self.text[self.start..self.end]
1458    }
1459
1460    /// Creates a new match from the given text and byte offsets.
1461    fn new(text: &'t str, start: usize, end: usize) -> Match<'t> {
1462        Match { text, start, end }
1463    }
1464}
1465
1466impl<'t> From<Match<'t>> for &'t str {
1467    fn from(m: Match<'t>) -> &'t str {
1468        m.as_str()
1469    }
1470}
1471
1472impl<'t> From<Match<'t>> for Range<usize> {
1473    fn from(m: Match<'t>) -> Range<usize> {
1474        m.range()
1475    }
1476}
1477
1478#[allow(clippy::len_without_is_empty)] // follow regex's API
1479impl<'t> Captures<'t> {
1480    /// Get the capture group by its index in the regex.
1481    ///
1482    /// If there is no match for that group or the index does not correspond to a group, `None` is
1483    /// returned. The index 0 returns the whole match.
1484    pub fn get(&self, i: usize) -> Option<Match<'t>> {
1485        match &self.inner {
1486            CapturesImpl::Wrap {
1487                text,
1488                locations,
1489                explicit_capture_group_0,
1490            } => locations
1491                .get_group(i + if *explicit_capture_group_0 { 1 } else { 0 })
1492                .map(|span| Match {
1493                    text,
1494                    start: span.start,
1495                    end: span.end,
1496                }),
1497            CapturesImpl::Fancy { text, saves } => {
1498                let slot = i * 2;
1499                if slot >= saves.len() {
1500                    return None;
1501                }
1502                let lo = saves[slot];
1503                if lo == usize::MAX {
1504                    return None;
1505                }
1506                let hi = saves[slot + 1];
1507                Some(Match {
1508                    text,
1509                    start: lo,
1510                    end: hi,
1511                })
1512            }
1513        }
1514    }
1515
1516    /// Returns the match for a named capture group.  Returns `None` the capture
1517    /// group did not match or if there is no group with the given name.
1518    pub fn name(&self, name: &str) -> Option<Match<'t>> {
1519        self.named_groups.get(name).and_then(|i| self.get(*i))
1520    }
1521
1522    /// Expands all instances of `$group` in `replacement` to the corresponding
1523    /// capture group `name`, and writes them to the `dst` buffer given.
1524    ///
1525    /// `group` may be an integer corresponding to the index of the
1526    /// capture group (counted by order of opening parenthesis where `\0` is the
1527    /// entire match) or it can be a name (consisting of letters, digits or
1528    /// underscores) corresponding to a named capture group.
1529    ///
1530    /// If `group` isn't a valid capture group (whether the name doesn't exist
1531    /// or isn't a valid index), then it is replaced with the empty string.
1532    ///
1533    /// The longest possible name is used. e.g., `$1a` looks up the capture
1534    /// group named `1a` and not the capture group at index `1`. To exert more
1535    /// precise control over the name, use braces, e.g., `${1}a`.
1536    ///
1537    /// To write a literal `$`, use `$$`.
1538    ///
1539    /// For more control over expansion, see [`Expander`].
1540    ///
1541    /// [`Expander`]: expand/struct.Expander.html
1542    pub fn expand(&self, replacement: &str, dst: &mut String) {
1543        Expander::default().append_expansion(dst, replacement, self);
1544    }
1545
1546    /// Iterate over the captured groups in order in which they appeared in the regex. The first
1547    /// capture corresponds to the whole match.
1548    pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
1549        SubCaptureMatches { caps: self, i: 0 }
1550    }
1551
1552    /// How many groups were captured. This is always at least 1 because group 0 returns the whole
1553    /// match.
1554    pub fn len(&self) -> usize {
1555        match &self.inner {
1556            CapturesImpl::Wrap {
1557                locations,
1558                explicit_capture_group_0,
1559                ..
1560            } => locations.group_len() - if *explicit_capture_group_0 { 1 } else { 0 },
1561            CapturesImpl::Fancy { saves, .. } => saves.len() / 2,
1562        }
1563    }
1564}
1565
1566/// Get a group by index.
1567///
1568/// `'t` is the lifetime of the matched text.
1569///
1570/// The text can't outlive the `Captures` object if this method is
1571/// used, because of how `Index` is defined (normally `a[i]` is part
1572/// of `a` and can't outlive it); to do that, use `get()` instead.
1573///
1574/// # Panics
1575///
1576/// If there is no group at the given index.
1577impl<'t> Index<usize> for Captures<'t> {
1578    type Output = str;
1579
1580    fn index(&self, i: usize) -> &str {
1581        self.get(i)
1582            .map(|m| m.as_str())
1583            .unwrap_or_else(|| panic!("no group at index '{}'", i))
1584    }
1585}
1586
1587/// Get a group by name.
1588///
1589/// `'t` is the lifetime of the matched text and `'i` is the lifetime
1590/// of the group name (the index).
1591///
1592/// The text can't outlive the `Captures` object if this method is
1593/// used, because of how `Index` is defined (normally `a[i]` is part
1594/// of `a` and can't outlive it); to do that, use `name` instead.
1595///
1596/// # Panics
1597///
1598/// If there is no group named by the given value.
1599impl<'t, 'i> Index<&'i str> for Captures<'t> {
1600    type Output = str;
1601
1602    fn index<'a>(&'a self, name: &'i str) -> &'a str {
1603        self.name(name)
1604            .map(|m| m.as_str())
1605            .unwrap_or_else(|| panic!("no group named '{}'", name))
1606    }
1607}
1608
1609impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
1610    type Item = Option<Match<'t>>;
1611
1612    fn next(&mut self) -> Option<Option<Match<'t>>> {
1613        if self.i < self.caps.len() {
1614            let result = self.caps.get(self.i);
1615            self.i += 1;
1616            Some(result)
1617        } else {
1618            None
1619        }
1620    }
1621}
1622
1623// TODO: might be nice to implement ExactSizeIterator etc for SubCaptures
1624
1625/// Regular expression AST. This is public for now but may change.
1626#[derive(Debug, PartialEq, Eq, Clone)]
1627pub enum Expr {
1628    /// An empty expression, e.g. the last branch in `(a|b|)`
1629    Empty,
1630    /// Any character, regex `.`
1631    Any {
1632        /// Whether it also matches newlines or not
1633        newline: bool,
1634        /// Whether CRLF mode is enabled (`\r` also counts as a newline, so dot
1635        /// excludes both `\r` and `\n`)
1636        crlf: bool,
1637    },
1638    /// An assertion
1639    Assertion(Assertion),
1640    /// General newline sequence, `\R`
1641    /// Matches `\r\n` or any single newline character (\n, \v, \f, \r)
1642    /// In Unicode mode, also matches U+0085, U+2028, U+2029
1643    GeneralNewline {
1644        /// Whether Unicode mode is enabled
1645        unicode: bool,
1646    },
1647    /// The string as a literal, e.g. `a`
1648    Literal {
1649        /// The string to match
1650        val: String,
1651        /// Whether match is case-insensitive or not
1652        casei: bool,
1653    },
1654    /// Concatenation of multiple expressions, must match in order, e.g. `a.` is a concatenation of
1655    /// the literal `a` and `.` for any character
1656    Concat(Vec<Expr>),
1657    /// Alternative of multiple expressions, one of them must match, e.g. `a|b` is an alternative
1658    /// where either the literal `a` or `b` must match
1659    Alt(Vec<Expr>),
1660    /// Capturing group of expression, e.g. `(a.)` matches `a` and any character and "captures"
1661    /// (remembers) the match
1662    Group(Arc<Expr>),
1663    /// Look-around (e.g. positive/negative look-ahead or look-behind) with an expression, e.g.
1664    /// `(?=a)` means the next character must be `a` (but the match is not consumed)
1665    LookAround(Box<Expr>, LookAround),
1666    /// Repeat of an expression, e.g. `a*` or `a+` or `a{1,3}`
1667    Repeat {
1668        /// The expression that is being repeated
1669        child: Box<Expr>,
1670        /// The minimum number of repetitions
1671        lo: usize,
1672        /// The maximum number of repetitions (or `usize::MAX`)
1673        hi: usize,
1674        /// Greedy means as much as possible is matched, e.g. `.*b` would match all of `abab`.
1675        /// Non-greedy means as little as possible, e.g. `.*?b` would match only `ab` in `abab`.
1676        greedy: bool,
1677    },
1678    /// Delegate a regex to the regex crate. This is used as a simplification so that we don't have
1679    /// to represent all the expressions in the AST, e.g. character classes.
1680    ///
1681    /// **Constraint**: All Delegate expressions must match exactly 1 character. This ensures
1682    /// consistent analysis and compilation behavior. For zero-width or multi-character patterns,
1683    /// use the appropriate Expr variants instead (e.g., Assertion, Repeat, Concat).
1684    Delegate {
1685        /// The regex
1686        inner: String,
1687        /// Whether the matching is case-insensitive or not
1688        casei: bool,
1689    },
1690    /// Back reference to a capture group, e.g. `\1` in `(abc|def)\1` references the captured group
1691    /// and the whole regex matches either `abcabc` or `defdef`.
1692    Backref {
1693        /// The capture group number being referenced
1694        group: usize,
1695        /// Whether the matching is case-insensitive or not
1696        casei: bool,
1697    },
1698    /// Back reference to a capture group at the given specified relative recursion level.
1699    BackrefWithRelativeRecursionLevel {
1700        /// The capture group number being referenced
1701        group: usize,
1702        /// Relative recursion level
1703        relative_level: isize,
1704        /// Whether the matching is case-insensitive or not
1705        casei: bool,
1706    },
1707    /// Atomic non-capturing group, e.g. `(?>ab|a)` in text that contains `ab` will match `ab` and
1708    /// never backtrack and try `a`, even if matching fails after the atomic group.
1709    AtomicGroup(Box<Expr>),
1710    /// Keep matched text so far out of overall match
1711    KeepOut,
1712    /// Anchor to match at the position where the previous match ended
1713    ContinueFromPreviousMatchEnd,
1714    /// Conditional expression based on whether the numbered capture group matched or not.
1715    /// The optional `relative_recursion_level` qualifies which recursion level's capture is
1716    /// tested (Oniguruma `(?(name+N)...)` syntax).
1717    BackrefExistsCondition {
1718        /// The resolved capture group number
1719        group: usize,
1720        /// Optional relative recursion level (e.g. `+0`, `-1`)
1721        relative_recursion_level: Option<isize>,
1722    },
1723    /// If/Then/Else Condition. If there is no Then/Else, these will just be empty expressions.
1724    Conditional {
1725        /// The conditional expression to evaluate
1726        condition: Box<Expr>,
1727        /// What to execute if the condition is true
1728        true_branch: Box<Expr>,
1729        /// What to execute if the condition is false
1730        false_branch: Box<Expr>,
1731    },
1732    /// Subroutine call to the specified group number
1733    SubroutineCall(usize),
1734    /// Backtracking control verb
1735    BacktrackingControlVerb(BacktrackingControlVerb),
1736    /// Match while the given expression is absent from the haystack
1737    Absent(Absent),
1738    /// DEFINE group - defines capture groups for subroutines without matching anything
1739    /// The expressions inside are parsed and assigned group numbers, but no VM instructions
1740    /// are generated for the DEFINE block itself.
1741    DefineGroup {
1742        /// The expressions/groups being defined
1743        definitions: Box<Expr>,
1744    },
1745    /// Abstract Syntax Tree node - will be resolved into an Expr before analysis.
1746    /// Contains the position in the pattern where the node was parsed from
1747    AstNode(AstNode, usize),
1748}
1749
1750/// Target of a backreference or subroutine call
1751#[derive(Debug, PartialEq, Eq, Clone)]
1752pub enum CaptureGroupTarget {
1753    /// Direct numbered reference
1754    ByNumber(usize),
1755
1756    /// Named reference
1757    ByName(String),
1758
1759    /// Relative reference (e.g., -1, -2, etc.)
1760    Relative(isize),
1761}
1762
1763/// Abstract Syntax Tree node - will be resolved into an Expr before analysis
1764#[derive(Debug, PartialEq, Eq, Clone)]
1765pub enum AstNode {
1766    /// Group with optional name - name is only present if explicitly specified in pattern
1767    AstGroup {
1768        /// Optional name of the capture group, present only when explicitly named in the pattern
1769        name: Option<String>,
1770        /// The inner expression of the group
1771        inner: Box<Expr>,
1772    },
1773    /// Backreference
1774    Backref {
1775        /// The target capture group being referenced
1776        target: CaptureGroupTarget,
1777        /// Whether the matching is case-insensitive or not
1778        // TODO: move out of Backref and prefer a Flags AstNode. The resolver can then track the flags and set casei on the resolved Expr accordingly
1779        casei: bool,
1780        /// Optional relative recursion level for the backreference
1781        relative_recursion_level: Option<isize>,
1782    },
1783    /// Subroutine Call
1784    SubroutineCall(CaptureGroupTarget),
1785    /// Backreference exists condition `(?(name)...)` or `(?(1)...)` - unresolved target.
1786    /// The optional `relative_recursion_level` corresponds to the Oniguruma `+N`/`-N` suffix
1787    /// (e.g. `(?(name+0)...)`) which qualifies which recursion level's capture is tested.
1788    BackrefExistsCondition {
1789        /// The target capture group being tested for existence
1790        target: CaptureGroupTarget,
1791        /// Optional relative recursion level qualifier (e.g. `+0`, `-1`)
1792        relative_recursion_level: Option<isize>,
1793    },
1794}
1795
1796/// Type of look-around assertion as used for a look-around expression.
1797#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1798pub enum LookAround {
1799    /// Look-ahead assertion, e.g. `(?=a)`
1800    LookAhead,
1801    /// Negative look-ahead assertion, e.g. `(?!a)`
1802    LookAheadNeg,
1803    /// Look-behind assertion, e.g. `(?<=a)`
1804    LookBehind,
1805    /// Negative look-behind assertion, e.g. `(?<!a)`
1806    LookBehindNeg,
1807}
1808
1809/// Type of absent operator as used for Oniguruma's absent functionality.
1810#[derive(Debug, PartialEq, Eq, Clone)]
1811pub enum Absent {
1812    /// Absent repeater `(?~absent)` - works like `\O*` (match any character including newline, repeated)
1813    /// but is limited by the range that does not include the string match with `absent`.
1814    /// This is a written abbreviation of `(?~|absent|\O*)`.
1815    Repeater(Box<Expr>),
1816    /// Absent expression `(?~|absent|exp)` - works like `exp`, but is limited by the range
1817    /// that does not include the string match with `absent`.
1818    Expression {
1819        /// The expression to avoid matching
1820        absent: Box<Expr>,
1821        /// The expression to match
1822        exp: Box<Expr>,
1823    },
1824    /// Absent stopper `(?~|absent)` - after this operator, haystack range is limited
1825    /// up to the point where `absent` matches.
1826    Stopper(Box<Expr>),
1827    /// Range clear `(?~|)` - clears the effects caused by absent stoppers.
1828    Clear,
1829}
1830
1831/// Type of backtracking control verb which affects how backtracking will behave.
1832/// See <https://www.regular-expressions.info/verb.html>
1833#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1834pub enum BacktrackingControlVerb {
1835    /// Fail this branch immediately
1836    Fail,
1837    /// Treat match so far as successful overall match
1838    Accept,
1839    /// Abort the entire match on failure
1840    Commit,
1841    /// Restart the entire match attempt at the current position
1842    Skip,
1843    /// Prune all backtracking states and restart the entire match attempt at the next position
1844    Prune,
1845}
1846
1847/// An iterator over capture names in a [Regex].  The iterator
1848/// returns the name of each group, or [None] if the group has
1849/// no name.  Because capture group 0 cannot have a name, the
1850/// first item returned is always [None].
1851pub struct CaptureNames<'r>(vec::IntoIter<Option<&'r str>>);
1852
1853impl Debug for CaptureNames<'_> {
1854    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1855        f.write_str("<CaptureNames>")
1856    }
1857}
1858
1859impl<'r> Iterator for CaptureNames<'r> {
1860    type Item = Option<&'r str>;
1861
1862    fn next(&mut self) -> Option<Self::Item> {
1863        self.0.next()
1864    }
1865}
1866
1867// silly to write my own, but this is super-fast for the common 1-digit
1868// case.
1869fn push_usize(s: &mut String, x: usize) {
1870    if x >= 10 {
1871        push_usize(s, x / 10);
1872        s.push((b'0' + (x % 10) as u8) as char);
1873    } else {
1874        s.push((b'0' + (x as u8)) as char);
1875    }
1876}
1877
1878fn is_special(c: char) -> bool {
1879    matches!(
1880        c,
1881        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' | '#'
1882    )
1883}
1884
1885fn push_quoted(buf: &mut String, s: &str) {
1886    for c in s.chars() {
1887        if is_special(c) {
1888            buf.push('\\');
1889        }
1890        buf.push(c);
1891    }
1892}
1893
1894/// Escapes special characters in `text` with '\\'.  Returns a string which, when interpreted
1895/// as a regex, matches exactly `text`.
1896pub fn escape(text: &str) -> Cow<'_, str> {
1897    // Using bytes() is OK because all special characters are single bytes.
1898    match text.bytes().filter(|&b| is_special(b as char)).count() {
1899        0 => Cow::Borrowed(text),
1900        n => {
1901            // The capacity calculation is exact because '\\' is a single byte.
1902            let mut buf = String::with_capacity(text.len() + n);
1903            push_quoted(&mut buf, text);
1904            Cow::Owned(buf)
1905        }
1906    }
1907}
1908
1909/// Type of assertions
1910#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1911pub enum Assertion {
1912    /// Start of input text
1913    StartText,
1914    /// End of input text
1915    EndText,
1916    /// End of input text, or before any trailing newlines at the end (Oniguruma's `\Z`)
1917    EndTextIgnoreTrailingNewlines {
1918        /// Whether CRLF mode is enabled.
1919        /// If `true`, trailing `\r\n` pairs (in addition to bare `\n`) are also ignored.
1920        crlf: bool,
1921    },
1922    /// Start of a line
1923    StartLine {
1924        /// CRLF mode.
1925        /// If true, this assertion matches at the starting position of the input text, or at the position immediately
1926        /// following either a `\r` or `\n` character, but never after a `\r` when a `\n` follows.
1927        crlf: bool,
1928    },
1929    /// End of a line
1930    EndLine {
1931        /// CRLF mode
1932        /// If true, this assertion matches at the ending position of the input text, or at the position immediately
1933        /// preceding either a `\r` or `\n` character, but never after a `\r` when a `\n` follows.
1934        crlf: bool,
1935    },
1936    /// Left word boundary
1937    LeftWordBoundary,
1938    /// Left word half boundary
1939    LeftWordHalfBoundary,
1940    /// Right word boundary
1941    RightWordBoundary,
1942    /// Right word half boundary
1943    RightWordHalfBoundary,
1944    /// Both word boundaries
1945    WordBoundary,
1946    /// Not word boundary
1947    NotWordBoundary,
1948}
1949
1950impl Assertion {
1951    pub(crate) fn is_hard(&self) -> bool {
1952        use Assertion::*;
1953        matches!(
1954            self,
1955            // these will make regex-automata use PikeVM
1956            LeftWordBoundary
1957                | LeftWordHalfBoundary
1958                | RightWordBoundary
1959                | RightWordHalfBoundary
1960                | WordBoundary
1961                | NotWordBoundary
1962                | EndTextIgnoreTrailingNewlines { .. }
1963        )
1964    }
1965}
1966
1967/// An iterator over the immediate children of an [`Expr`].
1968///
1969/// This iterator yields references to child expressions but does not recurse into them.
1970#[derive(Debug)]
1971pub enum ExprChildrenIter<'a> {
1972    /// No children (leaf node)
1973    Empty,
1974    /// A single child (Group, LookAround, AtomicGroup, Repeat)
1975    Single(Option<&'a Expr>),
1976    /// Multiple children in a Vec (Concat, Alt)
1977    Vec(alloc::slice::Iter<'a, Expr>),
1978    /// Three children (Conditional)
1979    Triple {
1980        /// First child
1981        first: Option<&'a Expr>,
1982        /// Second child
1983        second: Option<&'a Expr>,
1984        /// Third child
1985        third: Option<&'a Expr>,
1986    },
1987}
1988
1989/// An iterator over the immediate children of an [`Expr`] for mutable access.
1990///
1991/// This iterator yields mutable references to child expressions but does not recurse into them.
1992#[derive(Debug)]
1993pub enum ExprChildrenIterMut<'a> {
1994    /// No children (leaf node)
1995    Empty,
1996    /// A single child (Group, LookAround, AtomicGroup, Repeat)
1997    Single(Option<&'a mut Expr>),
1998    /// Multiple children in a Vec (Concat, Alt)
1999    Vec(alloc::slice::IterMut<'a, Expr>),
2000    /// Three children (Conditional)
2001    Triple {
2002        /// First child
2003        first: Option<&'a mut Expr>,
2004        /// Second child
2005        second: Option<&'a mut Expr>,
2006        /// Third child
2007        third: Option<&'a mut Expr>,
2008    },
2009}
2010
2011impl<'a> Iterator for ExprChildrenIter<'a> {
2012    type Item = &'a Expr;
2013
2014    fn next(&mut self) -> Option<Self::Item> {
2015        match self {
2016            ExprChildrenIter::Empty => None,
2017            ExprChildrenIter::Single(ref mut child) => child.take(),
2018            ExprChildrenIter::Vec(ref mut iter) => iter.next(),
2019            ExprChildrenIter::Triple {
2020                ref mut first,
2021                ref mut second,
2022                ref mut third,
2023            } => first
2024                .take()
2025                .or_else(|| second.take())
2026                .or_else(|| third.take()),
2027        }
2028    }
2029}
2030
2031impl<'a> Iterator for ExprChildrenIterMut<'a> {
2032    type Item = &'a mut Expr;
2033
2034    fn next(&mut self) -> Option<Self::Item> {
2035        match self {
2036            ExprChildrenIterMut::Empty => None,
2037            ExprChildrenIterMut::Single(ref mut child) => child.take(),
2038            ExprChildrenIterMut::Vec(ref mut iter) => iter.next(),
2039            ExprChildrenIterMut::Triple {
2040                ref mut first,
2041                ref mut second,
2042                ref mut third,
2043            } => first
2044                .take()
2045                .or_else(|| second.take())
2046                .or_else(|| third.take()),
2047        }
2048    }
2049}
2050
2051macro_rules! children_iter_match {
2052    ($self:expr, $iter:ident, $vec_method:ident, $single_method:ident, $group_method:ident) => {
2053        match $self {
2054            Expr::Concat(children) | Expr::Alt(children) => $iter::Vec(children.$vec_method()),
2055            Expr::Group(child) => $iter::Single(Some(Arc::$group_method(child))),
2056            Expr::Absent(Absent::Repeater(child))
2057            | Expr::Absent(Absent::Stopper(child))
2058            | Expr::LookAround(child, _)
2059            | Expr::AtomicGroup(child)
2060            | Expr::Repeat { child, .. } => $iter::Single(Some(child.$single_method())),
2061            Expr::Conditional {
2062                condition,
2063                true_branch,
2064                false_branch,
2065            } => $iter::Triple {
2066                first: Some(condition.$single_method()),
2067                second: Some(true_branch.$single_method()),
2068                third: Some(false_branch.$single_method()),
2069            },
2070            Expr::Absent(Absent::Expression { absent, exp }) => $iter::Triple {
2071                first: Some(absent.$single_method()),
2072                second: Some(exp.$single_method()),
2073                third: None,
2074            },
2075            Expr::DefineGroup { definitions } => $iter::Single(Some(definitions.$single_method())),
2076            _ if $self.is_leaf_node() => $iter::Empty,
2077            _ => unimplemented!(),
2078        }
2079    };
2080}
2081impl Expr {
2082    /// Parse the regex and return an expression (AST) and a bit set with the indexes of groups
2083    /// that are referenced by backrefs.
2084    pub fn parse_tree(re: &str) -> Result<ExprTree> {
2085        Parser::parse(re)
2086    }
2087
2088    /// Parse the regex and return an expression (AST)
2089    /// Flags should be bit based based on flags
2090    pub fn parse_tree_with_flags(re: &str, flags: u32) -> Result<ExprTree> {
2091        Parser::parse_with_flags(re, flags)
2092    }
2093
2094    /// Returns `true` if this expression is a leaf node (has no children).
2095    ///
2096    /// Leaf nodes include literals, assertions, backreferences, and other atomic expressions.
2097    /// Non-leaf nodes include groups, concatenations, alternations, and repetitions.
2098    pub fn is_leaf_node(&self) -> bool {
2099        matches!(
2100            self,
2101            Expr::Empty
2102                | Expr::Any { .. }
2103                | Expr::Assertion(_)
2104                | Expr::GeneralNewline { .. }
2105                | Expr::Literal { .. }
2106                | Expr::Delegate { .. }
2107                | Expr::Backref { .. }
2108                | Expr::BackrefWithRelativeRecursionLevel { .. }
2109                | Expr::KeepOut
2110                | Expr::ContinueFromPreviousMatchEnd
2111                | Expr::BackrefExistsCondition { .. }
2112                | Expr::BacktrackingControlVerb(_)
2113                |             Expr::SubroutineCall(_)
2114                | Expr::Absent(Absent::Clear)
2115                // An unresolved AstNode has no separate child Expr to iterate; the resolver
2116                // should have replaced it before analysis, so treat it as a leaf so that
2117                // collection/iteration doesn't panic, and let the analyzer emit the error.
2118                | Expr::AstNode(..),
2119        )
2120    }
2121
2122    /// Returns `true` if any descendant of this expression (not including itself)
2123    /// satisfies the given predicate.
2124    ///
2125    /// This performs an iterative depth-first search using [`children_iter`](Self::children_iter).
2126    pub fn has_descendant(&self, predicate: impl Fn(&Expr) -> bool) -> bool {
2127        let mut stack: Vec<&Expr> = self.children_iter().collect();
2128        while let Some(expr) = stack.pop() {
2129            if predicate(expr) {
2130                return true;
2131            }
2132            stack.extend(expr.children_iter());
2133        }
2134        false
2135    }
2136
2137    /// Returns an iterator over the immediate children of this expression.
2138    ///
2139    /// For leaf nodes, this returns an empty iterator. For non-leaf nodes, it returns
2140    /// references to their immediate children (non-recursive).
2141    pub fn children_iter(&self) -> ExprChildrenIter<'_> {
2142        children_iter_match!(self, ExprChildrenIter, iter, as_ref, as_ref)
2143    }
2144
2145    /// Returns an iterator over the immediate children of this expression for mutable access.
2146    ///
2147    /// For leaf nodes, this returns an empty iterator. For non-leaf nodes, it returns
2148    /// mutable references to their immediate children (non-recursive).
2149    pub fn children_iter_mut(&mut self) -> ExprChildrenIterMut<'_> {
2150        children_iter_match!(self, ExprChildrenIterMut, iter_mut, as_mut, make_mut)
2151    }
2152
2153    /// Convert expression to a regex string in the regex crate's syntax.
2154    ///
2155    /// # Panics
2156    ///
2157    /// Panics for expressions that are hard, i.e. can not be handled by the regex crate.
2158    pub fn to_str(&self, buf: &mut String, precedence: u8) {
2159        match *self {
2160            Expr::Empty => (),
2161            Expr::Any { newline, crlf } => buf.push_str(match (newline, crlf) {
2162                (true, _) => "(?s:.)",
2163                (false, true) => "(?R-s:.)",
2164                (false, false) => ".",
2165            }),
2166            Expr::Literal { ref val, casei } => {
2167                if casei {
2168                    buf.push_str("(?i:");
2169                }
2170                push_quoted(buf, val);
2171                if casei {
2172                    buf.push(')');
2173                }
2174            }
2175            Expr::Assertion(Assertion::StartText) => buf.push('^'),
2176            Expr::Assertion(Assertion::EndText) => buf.push('$'),
2177            Expr::Assertion(Assertion::StartLine { crlf: false }) => buf.push_str("(?m:^)"),
2178            Expr::Assertion(Assertion::EndLine { crlf: false }) => buf.push_str("(?m:$)"),
2179            Expr::Assertion(Assertion::StartLine { crlf: true }) => buf.push_str("(?Rm:^)"),
2180            Expr::Assertion(Assertion::EndLine { crlf: true }) => buf.push_str("(?Rm:$)"),
2181            Expr::Concat(ref children) => {
2182                if precedence > 1 {
2183                    buf.push_str("(?:");
2184                }
2185                for child in children {
2186                    child.to_str(buf, 2);
2187                }
2188                if precedence > 1 {
2189                    buf.push(')')
2190                }
2191            }
2192            Expr::Alt(_) => {
2193                if precedence > 0 {
2194                    buf.push_str("(?:");
2195                }
2196                let mut children = self.children_iter();
2197                if let Some(first) = children.next() {
2198                    first.to_str(buf, 1);
2199                    for child in children {
2200                        buf.push('|');
2201                        child.to_str(buf, 1);
2202                    }
2203                }
2204                if precedence > 0 {
2205                    buf.push(')');
2206                }
2207            }
2208            Expr::Group(ref child) => {
2209                buf.push('(');
2210                child.to_str(buf, 0);
2211                buf.push(')');
2212            }
2213            Expr::Repeat {
2214                ref child,
2215                lo,
2216                hi,
2217                greedy,
2218            } => {
2219                if precedence > 2 {
2220                    buf.push_str("(?:");
2221                }
2222                child.to_str(buf, 3);
2223                match (lo, hi) {
2224                    (0, 1) => buf.push('?'),
2225                    (0, usize::MAX) => buf.push('*'),
2226                    (1, usize::MAX) => buf.push('+'),
2227                    (lo, hi) => {
2228                        buf.push('{');
2229                        push_usize(buf, lo);
2230                        if lo != hi {
2231                            buf.push(',');
2232                            if hi != usize::MAX {
2233                                push_usize(buf, hi);
2234                            }
2235                        }
2236                        buf.push('}');
2237                    }
2238                }
2239                if !greedy {
2240                    buf.push('?');
2241                }
2242                if precedence > 2 {
2243                    buf.push(')');
2244                }
2245            }
2246            Expr::Delegate {
2247                ref inner, casei, ..
2248            } => {
2249                // at the moment, delegate nodes are just atoms
2250                if casei {
2251                    buf.push_str("(?i:");
2252                }
2253                buf.push_str(inner);
2254                if casei {
2255                    buf.push(')');
2256                }
2257            }
2258            Expr::DefineGroup { .. } => {
2259                // DEFINE groups match nothing - output empty string for delegation
2260            }
2261            _ => panic!("attempting to format hard expr {:?}", self),
2262        }
2263    }
2264}
2265
2266// precondition: ix > 0
2267fn prev_codepoint_ix(s: &str, mut ix: usize) -> usize {
2268    let bytes = s.as_bytes();
2269    loop {
2270        ix -= 1;
2271        // fancy bit magic for ranges 0..0x80 + 0xc0..
2272        if (bytes[ix] as i8) >= -0x40 {
2273            break;
2274        }
2275    }
2276    ix
2277}
2278
2279fn codepoint_len(b: u8) -> usize {
2280    match b {
2281        b if b < 0x80 => 1,
2282        b if b < 0xe0 => 2,
2283        b if b < 0xf0 => 3,
2284        _ => 4,
2285    }
2286}
2287
2288/// Returns the smallest possible index of the next valid UTF-8 sequence
2289/// starting after `i`.
2290/// Adapted from a function with the same name in the `regex` crate.
2291pub(crate) fn next_utf8(text: &str, i: usize) -> usize {
2292    let b = match text.as_bytes().get(i) {
2293        None => return i + 1,
2294        Some(&b) => b,
2295    };
2296    i + codepoint_len(b)
2297}
2298
2299// If this returns false, then there is no possible backref in the re
2300
2301// Both potential implementations are turned off, because we currently
2302// always need to do a deeper analysis because of 1-character
2303// look-behind. If we could call a find_from_pos method of regex::Regex,
2304// it would make sense to bring this back.
2305/*
2306pub fn detect_possible_backref(re: &str) -> bool {
2307    let mut last = b'\x00';
2308    for b in re.as_bytes() {
2309        if b'0' <= *b && *b <= b'9' && last == b'\\' { return true; }
2310        last = *b;
2311    }
2312    false
2313}
2314
2315pub fn detect_possible_backref(re: &str) -> bool {
2316    let mut bytes = re.as_bytes();
2317    loop {
2318        match memchr::memchr(b'\\', &bytes[..bytes.len() - 1]) {
2319            Some(i) => {
2320                bytes = &bytes[i + 1..];
2321                let c = bytes[0];
2322                if b'0' <= c && c <= b'9' { return true; }
2323            }
2324            None => return false
2325        }
2326    }
2327}
2328*/
2329
2330/// The internal module only exists so that the toy example can access internals for debugging and
2331/// experimenting.
2332#[doc(hidden)]
2333pub mod internal {
2334    pub use crate::analyze::{analyze, can_compile_as_anchored, AnalyzeContext, Info};
2335    pub use crate::compile::{compile, CompileOptions};
2336    pub use crate::optimize::optimize;
2337    pub use crate::parse_flags::{
2338        FLAG_CASEI, FLAG_CRLF, FLAG_DOTNL, FLAG_IGNORE_NUMBERED_GROUPS_WHEN_NAMED_GROUPS_EXIST,
2339        FLAG_IGNORE_SPACE, FLAG_MULTI, FLAG_ONIGURUMA_MODE, FLAG_UNICODE,
2340    };
2341    pub use crate::vm::{run_default, run_trace, Insn, Prog};
2342}
2343
2344#[cfg(test)]
2345mod tests {
2346    use alloc::borrow::Cow;
2347    use alloc::boxed::Box;
2348    use alloc::string::{String, ToString};
2349    use alloc::sync::Arc;
2350    use alloc::vec::Vec;
2351    use alloc::{format, vec};
2352
2353    use crate::parse::{make_group, make_literal};
2354    use crate::{Absent, Expr, Regex, RegexImpl};
2355
2356    //use detect_possible_backref;
2357
2358    // tests for to_str
2359
2360    fn to_str(e: Expr) -> String {
2361        let mut s = String::new();
2362        e.to_str(&mut s, 0);
2363        s
2364    }
2365
2366    #[test]
2367    fn to_str_concat_alt() {
2368        let e = Expr::Concat(vec![
2369            Expr::Alt(vec![make_literal("a"), make_literal("b")]),
2370            make_literal("c"),
2371        ]);
2372        assert_eq!(to_str(e), "(?:a|b)c");
2373    }
2374
2375    #[test]
2376    fn to_str_rep_concat() {
2377        let e = Expr::Repeat {
2378            child: Box::new(Expr::Concat(vec![make_literal("a"), make_literal("b")])),
2379            lo: 2,
2380            hi: 3,
2381            greedy: true,
2382        };
2383        assert_eq!(to_str(e), "(?:ab){2,3}");
2384    }
2385
2386    #[test]
2387    fn to_str_group_alt() {
2388        let e = Expr::Group(Arc::new(Expr::Alt(vec![
2389            make_literal("a"),
2390            make_literal("b"),
2391        ])));
2392        assert_eq!(to_str(e), "(a|b)");
2393    }
2394
2395    #[test]
2396    fn as_str_debug() {
2397        let s = r"(a+)b\1";
2398        let regex = Regex::new(s).unwrap();
2399        assert_eq!(s, regex.as_str());
2400        assert_eq!(s, format!("{:?}", regex));
2401    }
2402
2403    #[test]
2404    fn display() {
2405        let s = r"(a+)b\1";
2406        let regex = Regex::new(s).unwrap();
2407        assert_eq!(s, format!("{}", regex));
2408    }
2409
2410    #[test]
2411    fn from_str() {
2412        let s = r"(a+)b\1";
2413        let regex = s.parse::<Regex>().unwrap();
2414        assert_eq!(regex.as_str(), s);
2415    }
2416
2417    #[test]
2418    fn to_str_repeat() {
2419        fn repeat(lo: usize, hi: usize, greedy: bool) -> Expr {
2420            Expr::Repeat {
2421                child: Box::new(make_literal("a")),
2422                lo,
2423                hi,
2424                greedy,
2425            }
2426        }
2427
2428        assert_eq!(to_str(repeat(2, 2, true)), "a{2}");
2429        assert_eq!(to_str(repeat(2, 2, false)), "a{2}?");
2430        assert_eq!(to_str(repeat(2, 3, true)), "a{2,3}");
2431        assert_eq!(to_str(repeat(2, 3, false)), "a{2,3}?");
2432        assert_eq!(to_str(repeat(2, usize::MAX, true)), "a{2,}");
2433        assert_eq!(to_str(repeat(2, usize::MAX, false)), "a{2,}?");
2434        assert_eq!(to_str(repeat(0, 1, true)), "a?");
2435        assert_eq!(to_str(repeat(0, 1, false)), "a??");
2436        assert_eq!(to_str(repeat(0, usize::MAX, true)), "a*");
2437        assert_eq!(to_str(repeat(0, usize::MAX, false)), "a*?");
2438        assert_eq!(to_str(repeat(1, usize::MAX, true)), "a+");
2439        assert_eq!(to_str(repeat(1, usize::MAX, false)), "a+?");
2440    }
2441
2442    #[test]
2443    fn escape() {
2444        // Check that strings that need no quoting are borrowed, and that non-special punctuation
2445        // is not quoted.
2446        match crate::escape("@foo") {
2447            Cow::Borrowed(s) => assert_eq!(s, "@foo"),
2448            _ => panic!("Value should be borrowed."),
2449        }
2450
2451        // Check typical usage.
2452        assert_eq!(crate::escape("fo*o").into_owned(), "fo\\*o");
2453
2454        // Check that multibyte characters are handled correctly.
2455        assert_eq!(crate::escape("fø*ø").into_owned(), "fø\\*ø");
2456    }
2457
2458    #[test]
2459    fn trailing_positive_lookahead_wrap_capture_group_fixup() {
2460        let s = r"a+(?=c)";
2461        let regex = s.parse::<Regex>().unwrap();
2462        assert!(matches!(regex.inner,
2463            RegexImpl::Wrap { explicit_capture_group_0: true, .. }),
2464            "trailing positive lookahead for an otherwise easy pattern should avoid going through the VM");
2465        assert_eq!(s, regex.as_str());
2466        assert_eq!(s, format!("{:?}", regex));
2467    }
2468
2469    #[test]
2470    fn easy_regex() {
2471        let s = r"(a+)b";
2472        let regex = s.parse::<Regex>().unwrap();
2473        assert!(
2474            matches!(regex.inner, RegexImpl::Wrap { explicit_capture_group_0: false, .. }),
2475            "easy pattern should avoid going through the VM, and capture group 0 should be implicit"
2476        );
2477
2478        assert_eq!(s, regex.as_str());
2479        assert_eq!(s, format!("{:?}", regex));
2480    }
2481
2482    #[test]
2483    fn hard_regex() {
2484        let s = r"(a+)(?>c)";
2485        let regex = s.parse::<Regex>().unwrap();
2486        assert!(
2487            matches!(regex.inner, RegexImpl::Fancy { .. }),
2488            "hard regex should be compiled into a VM"
2489        );
2490        assert_eq!(s, regex.as_str());
2491        assert_eq!(s, format!("{:?}", regex));
2492    }
2493
2494    /*
2495    #[test]
2496    fn detect_backref() {
2497        assert_eq!(detect_possible_backref("a0a1a2"), false);
2498        assert_eq!(detect_possible_backref("a0a1\\a2"), false);
2499        assert_eq!(detect_possible_backref("a0a\\1a2"), true);
2500        assert_eq!(detect_possible_backref("a0a1a2\\"), false);
2501    }
2502    */
2503
2504    #[test]
2505    fn test_is_leaf_node_leaf_nodes() {
2506        // Test all leaf node variants
2507        assert!(Expr::Empty.is_leaf_node());
2508        assert!(Expr::Any {
2509            newline: false,
2510            crlf: false
2511        }
2512        .is_leaf_node());
2513        assert!(Expr::Any {
2514            newline: true,
2515            crlf: false
2516        }
2517        .is_leaf_node());
2518        assert!(Expr::Assertion(crate::Assertion::StartText).is_leaf_node());
2519        assert!(Expr::Literal {
2520            val: "test".to_string(),
2521            casei: false
2522        }
2523        .is_leaf_node());
2524        assert!(Expr::Delegate {
2525            inner: "[0-9]".to_string(),
2526            casei: false
2527        }
2528        .is_leaf_node());
2529        assert!(Expr::Backref {
2530            group: 1,
2531            casei: false
2532        }
2533        .is_leaf_node());
2534        assert!(Expr::BackrefWithRelativeRecursionLevel {
2535            group: 1,
2536            relative_level: -1,
2537            casei: false
2538        }
2539        .is_leaf_node());
2540        assert!(Expr::KeepOut.is_leaf_node());
2541        assert!(Expr::ContinueFromPreviousMatchEnd.is_leaf_node());
2542        assert!(Expr::BackrefExistsCondition {
2543            group: 1,
2544            relative_recursion_level: None
2545        }
2546        .is_leaf_node());
2547        assert!(Expr::BacktrackingControlVerb(crate::BacktrackingControlVerb::Fail).is_leaf_node());
2548        assert!(Expr::SubroutineCall(1).is_leaf_node());
2549
2550        assert!(Expr::Absent(Absent::Clear).is_leaf_node());
2551    }
2552
2553    #[test]
2554    fn test_is_leaf_node_non_leaf_nodes() {
2555        // Test all non-leaf node variants
2556        assert!(!Expr::Concat(vec![make_literal("a")]).is_leaf_node());
2557        assert!(!Expr::Alt(vec![make_literal("a"), make_literal("b")]).is_leaf_node());
2558        assert!(!make_group(make_literal("a")).is_leaf_node());
2559        assert!(
2560            !Expr::LookAround(Box::new(make_literal("a")), crate::LookAround::LookAhead)
2561                .is_leaf_node()
2562        );
2563        assert!(!Expr::Repeat {
2564            child: Box::new(make_literal("a")),
2565            lo: 0,
2566            hi: 1,
2567            greedy: true
2568        }
2569        .is_leaf_node());
2570        assert!(!Expr::AtomicGroup(Box::new(make_literal("a"))).is_leaf_node());
2571        assert!(!Expr::Conditional {
2572            condition: Box::new(Expr::BackrefExistsCondition {
2573                group: 1,
2574                relative_recursion_level: None
2575            }),
2576            true_branch: Box::new(make_literal("a")),
2577            false_branch: Box::new(Expr::Empty)
2578        }
2579        .is_leaf_node());
2580
2581        assert!(!Expr::Absent(Absent::Repeater(Box::new(make_literal("a")))).is_leaf_node());
2582        assert!(!Expr::Absent(Absent::Expression {
2583            absent: Box::new(make_literal("/*")),
2584            exp: Box::new(Expr::Repeat {
2585                child: Box::new(Expr::Any {
2586                    newline: true,
2587                    crlf: false
2588                }),
2589                lo: 0,
2590                hi: usize::MAX,
2591                greedy: true
2592            })
2593        })
2594        .is_leaf_node());
2595        assert!(!Expr::Absent(Absent::Stopper(Box::new(make_literal("/*")))).is_leaf_node());
2596    }
2597
2598    #[test]
2599    fn test_children_iter_empty() {
2600        // Leaf nodes should return empty iterator
2601        let expr = Expr::Empty;
2602        let mut iter = expr.children_iter();
2603        assert!(iter.next().is_none());
2604
2605        let expr = make_literal("test");
2606        let mut iter = expr.children_iter();
2607        assert!(iter.next().is_none());
2608    }
2609
2610    #[test]
2611    fn test_children_iter_single() {
2612        // Group, LookAround, AtomicGroup, Repeat should return single child
2613        let child = make_literal("a");
2614        let expr = make_group(child.clone());
2615        let children: Vec<_> = expr.children_iter().collect();
2616        assert_eq!(children.len(), 1);
2617
2618        let expr = Expr::Repeat {
2619            child: Box::new(child.clone()),
2620            lo: 0,
2621            hi: 1,
2622            greedy: true,
2623        };
2624        let children: Vec<_> = expr.children_iter().collect();
2625        assert_eq!(children.len(), 1);
2626    }
2627
2628    #[test]
2629    fn test_children_iter_vec() {
2630        // Concat and Alt should return all children
2631        let children_vec = vec![make_literal("a"), make_literal("b"), make_literal("c")];
2632        let expr = Expr::Concat(children_vec.clone());
2633        let children: Vec<_> = expr.children_iter().collect();
2634        assert_eq!(children.len(), 3);
2635
2636        let expr = Expr::Alt(children_vec);
2637        let children: Vec<_> = expr.children_iter().collect();
2638        assert_eq!(children.len(), 3);
2639    }
2640
2641    #[test]
2642    fn test_children_iter_triple() {
2643        // Conditional should return three children
2644        let expr = Expr::Conditional {
2645            condition: Box::new(Expr::BackrefExistsCondition {
2646                group: 1,
2647                relative_recursion_level: None,
2648            }),
2649            true_branch: Box::new(make_literal("a")),
2650            false_branch: Box::new(make_literal("b")),
2651        };
2652        let children: Vec<_> = expr.children_iter().collect();
2653        assert_eq!(children.len(), 3);
2654
2655        // Absent expression should return two children
2656        let expr = Expr::Absent(Absent::Expression {
2657            absent: Box::new(make_literal("/*")),
2658            exp: Box::new(Expr::Repeat {
2659                child: Box::new(Expr::Any {
2660                    newline: true,
2661                    crlf: false,
2662                }),
2663                lo: 0,
2664                hi: usize::MAX,
2665                greedy: true,
2666            }),
2667        });
2668        let children: Vec<_> = expr.children_iter().collect();
2669        assert_eq!(children.len(), 2);
2670    }
2671}