Skip to main content

rexile/
lib.rs

1//! # ReXile 🦎
2//!
3//! **A blazing-fast regex engine with 10-100x faster compilation speed**
4//!
5//! ReXile is a lightweight regex alternative optimized for fast compilation while maintaining
6//! competitive matching performance.
7
8#![allow(dead_code)]
9#![allow(clippy::clone_on_copy)]
10#![allow(clippy::match_like_matches_macro)]
11#![allow(clippy::type_complexity)]
12#![allow(clippy::upper_case_acronyms)]
13#![allow(clippy::redundant_closure)]
14#![allow(clippy::len_without_is_empty)]
15#![allow(clippy::too_many_arguments)]
16#![allow(clippy::comparison_chain)]
17#![allow(clippy::manual_range_contains)]
18#![allow(clippy::large_enum_variant)]
19#![allow(clippy::manual_strip)]
20#![allow(clippy::needless_range_loop)]
21#![allow(clippy::string_slice)]
22#![allow(clippy::needless_late_init)]
23#![allow(clippy::manual_is_ascii_check)]
24#![allow(clippy::sliced_string_as_bytes)]
25//!
26//! ## Quick Start
27//!
28//! ```rust
29//! use rexile::Pattern;
30//!
31//! // Literal matching with SIMD acceleration
32//! let pattern = Pattern::new("hello").unwrap();
33//! assert!(pattern.is_match("hello world"));
34//!
35//! // Digit matching (1.4-1.9x faster than regex!)
36//! let digits = Pattern::new(r"\d+").unwrap();
37//! let matches = digits.find_all("Order #12345 costs $67.89");
38//! assert_eq!(matches, vec![(7, 12), (20, 22), (23, 25)]);
39//!
40//! // Dot wildcard with backtracking
41//! let quoted = Pattern::new(r#""[^"]+""#).unwrap();
42//! assert!(quoted.is_match(r#"say "hello world""#));
43//! ```
44//!
45//! ## Performance Highlights
46//!
47//! **Compilation Speed** (vs regex crate):
48//! **Compilation Speed** (vs regex crate):
49//! - Pattern `[a-zA-Z_]\w*`: **104.7x faster** compilation
50//! - Pattern `\d+`: **46.5x faster** compilation
51//! - Average: **10-100x faster compilation**
52//!
53//! **Memory Usage**:
54//! - Compilation: **15x less memory** (128 KB vs 1920 KB)
55//! - Compilation time: **10-100x faster** on average
56//! - Peak memory: **5x less** in stress tests
57//!
58//! ## Fast Path Optimizations
59//!
60//! ReXile uses **10 specialized fast paths** for common patterns:
61//!
62//! | Pattern | Fast Path | Performance |
63//! |---------|-----------|-------------|
64//! | `\d+` | DigitRun | 1.4-1.9x faster |
65//! | `"[^"]+"` | QuotedString | 2.44x faster |
66//! | `[a-zA-Z_]\w*` | IdentifierRun | 104.7x faster compilation |
67//! | `\w+` | WordRun | Competitive |
68//! | `foo\|bar\|baz` | Alternation (aho-corasick) | 2x slower (acceptable) |
69//!
70//! ## Supported Features
71//!
72//! - ✅ Literal searches with SIMD acceleration
73//! - ✅ Multi-pattern matching (alternations)
74//! - ✅ Character classes with negation (`[a-z]`, `[^abc]`)
75//! - ✅ Quantifiers (`*`, `+`, `?`, `{n}`, `{n,m}`)
76//! - ✅ Range quantifiers (`{n}`, `{n,}`, `{n,m}`)
77//! - ✅ Case-insensitive flag (`(?i)`)
78//! - ✅ Escape sequences (`\d`, `\w`, `\s`, etc.)
79//! - ✅ Sequences and groups
80//! - ✅ Word boundaries (`\b`, `\B`)
81//! - ✅ Anchoring (`^`, `$`)
82//!
83//! ## Use Cases
84//!
85//! ReXile is production-ready for:
86//!
87//! - ✅ **Parsers & lexers** - 10-100x faster compilation, instant startup
88//! - ✅ **Rule engines** - Original use case (GRL parsing)
89//! - ✅ **Log processing** - Fast keyword extraction
90//! - ✅ **Dynamic patterns** - Applications that compile patterns at runtime
91//! - ✅ **Memory-constrained environments** - 15x less compilation memory
92//! - ✅ **Low-latency applications** - Predictable performance
93//!
94//! ## Cached API
95//!
96//! For patterns used repeatedly in hot loops:
97//!
98//! ```rust
99//! use rexile;
100//!
101//! // Automatically cached - compile once, reuse forever
102//! assert!(rexile::is_match("test", "this is a test").unwrap());
103//! assert_eq!(rexile::find("world", "hello world").unwrap(), Some((6, 11)));
104//! ```
105//!
106//! ## Architecture
107//!
108//! ```text
109//! Pattern → Parser → AST → Fast Path Detection → Specialized Matcher
110//!                                                        ↓
111//!                                     DigitRun (memchr SIMD)
112//!                                     IdentifierRun (direct bytes)
113//!                                     QuotedString (memchr + validation)
114//!                                     Alternation (aho-corasick)
115//!                                     ... 6 more fast paths
116//! ```
117//!
118//! **Dependencies:** Only `memchr` and `aho-corasick` for SIMD primitives
119//!
120//! ## When to Use ReXile vs regex
121//!
122//! **Choose ReXile for:**
123//! - Digit extraction (`\d+`) - 3.57x faster
124//! - Quoted strings (`"[^"]+"`) - 2.44x faster
125//! - Identifiers (`[a-zA-Z_]\w*`) - Much faster
126//! - Dynamic pattern compilation - 21x faster
127//! - Memory-constrained environments - 15x less memory
128//!
129//! **Choose regex crate for:**
130//! - Complex alternations (ReXile 2x slower)
131//! - Unicode properties (`\p{L}` - not yet supported)
132//! - Advanced features (lookahead, backreferences - not yet supported)
133//!
134//! ## License
135//!
136//! Licensed under either of MIT or Apache-2.0 at your option.
137
138// Module organization
139mod advanced; // Advanced features: captures, lookaround
140mod engine; // Matching engines: NFA, DFA, Lazy DFA
141pub mod optimization; // Fast paths and optimizations
142mod parser; // Pattern parsing: escape, charclass, quantifier, etc.
143
144// External dependencies
145use aho_corasick::AhoCorasick;
146use memchr::memmem;
147use std::collections::HashMap;
148use std::sync::{Mutex, OnceLock};
149
150// Internal imports using new module structure
151use advanced::{Lookaround, LookaroundType};
152use engine::DFA;
153use parser::{
154    is_sequence_pattern, parse_escape, parse_quantified_pattern, parse_sequence,
155    starts_with_escape, BoundaryType, CharClass, Flags, Group, QuantifiedPattern, Sequence,
156};
157
158// Re-export public types
159pub use advanced::{CaptureGroup, Captures};
160pub use optimization::{literal, prefilter};
161
162/// Main ReXile pattern type
163#[derive(Debug, Clone)]
164pub struct Pattern {
165    matcher: Matcher,
166    prefilter: Option<(
167        optimization::prefilter::Prefilter,
168        optimization::literal::LiteralKind,
169    )>,
170    fast_path: Option<optimization::fast_path::FastPath>, // JIT-style fast path
171    #[allow(dead_code)]
172    flags: Flags,                  // Regex flags: (?i), (?m), (?s)
173}
174
175/// Type alias for convenience
176pub type ReXile = Pattern;
177
178// Helper functions for safe Unicode string slicing
179#[inline]
180fn safe_slice(text: &str, start: usize) -> Option<&str> {
181    text.get(start..)
182}
183
184#[inline]
185fn safe_slice_range(text: &str, start: usize, end: usize) -> Option<&str> {
186    text.get(start..end)
187}
188
189/// Get all valid char boundary positions in a string slice from start_pos to end
190#[inline]
191#[allow(dead_code)]
192fn char_boundaries(text: &str, start_pos: usize) -> impl Iterator<Item = usize> + '_ {
193    (start_pos..=text.len()).filter(|&i| text.is_char_boundary(i))
194}
195
196impl Pattern {
197    pub fn new(pattern: &str) -> Result<Self, PatternError> {
198        // Parse inline flags like (?i), (?m), (?s) at the start of the pattern
199        let (flags, effective_pattern) =
200            if let Some((parsed_flags, rest)) = Flags::parse_from_pattern(pattern) {
201                (parsed_flags, rest)
202            } else {
203                (Flags::new(), pattern)
204            };
205
206        // Check for anchors
207        let has_start_anchor = effective_pattern.starts_with('^');
208        let has_end_anchor =
209            effective_pattern.ends_with('$') && !effective_pattern.ends_with("\\$");
210
211        // Strip anchors to get inner pattern
212        let inner_pattern = {
213            let mut p = effective_pattern;
214            if has_start_anchor {
215                p = p.strip_prefix('^').unwrap_or(p);
216            }
217            if has_end_anchor {
218                p = p.strip_suffix('$').unwrap_or(p);
219            }
220            p
221        };
222
223        // Check for capture groups, but exclude special patterns like (?:...), (?=...), (?!...), etc.
224        let has_captures = inner_pattern.contains('(')
225            && !inner_pattern.contains("(?:")
226            && !inner_pattern.contains("(?=")
227            && !inner_pattern.contains("(?!")
228            && !inner_pattern.contains("(?<=")
229            && !inner_pattern.contains("(?<!");
230
231        // Parse the inner pattern (without anchors)
232        let inner_ast = if has_captures {
233            parse_pattern_with_captures_with_flags(inner_pattern, &flags)?
234        } else {
235            parse_pattern_with_flags(inner_pattern, &flags)?
236        };
237
238        // Wrap with anchor constraints if needed
239        let ast = if has_start_anchor || has_end_anchor {
240            Ast::AnchoredPattern {
241                inner: Box::new(inner_ast),
242                start: has_start_anchor,
243                end: has_end_anchor,
244            }
245        } else {
246            inner_ast
247        };
248        let mut matcher = compile_ast(&ast)?;
249
250        // Apply flags to matcher (avoid double-wrapping if AST already wrapped)
251        if flags.case_insensitive && !matches!(matcher, Matcher::CaseInsensitive(_)) {
252            matcher = Matcher::CaseInsensitive(Box::new(matcher));
253        }
254
255        // Try to detect fast path first (JIT-style optimization)
256        // Note: fast path doesn't support flags yet, so skip if flags are set
257        let fast_path = if flags.any_set() {
258            None
259        } else {
260            // First check if we can compile a CaptureDFA for patterns with captures
261            if let Matcher::PatternWithCaptures { ref elements, .. } = matcher {
262                // Try to compile DFA
263                if let Some(dfa) = engine::capture_dfa::compile_capture_pattern(elements) {
264                    // Successfully compiled DFA - use it as fast path
265                    Some(optimization::fast_path::FastPath::CaptureDFA(
266                        std::sync::Arc::new(dfa),
267                    ))
268                } else {
269                    // DFA compilation failed - fall back to normal fast path detection
270                    optimization::fast_path::detect_fast_path(effective_pattern)
271                }
272            } else {
273                // Not a capture pattern - use normal fast path detection
274                optimization::fast_path::detect_fast_path(effective_pattern)
275            }
276        };
277
278        // Extract literals and create prefilter
279        let literals = optimization::literal::extract_from_pattern(effective_pattern);
280
281        // Only use prefilter for Prefix literals and patterns without groups
282        // Groups can cause incorrect literal extraction that breaks leftmost-first semantics
283        // Inner literals require expensive bounded verification
284        // Also disable prefilter when flags are set (case-insensitive, multiline, etc.)
285        let has_groups = effective_pattern.contains("(?:")
286            || (effective_pattern.contains('(') && !effective_pattern.contains("(?"));
287        let prefilter = if !literals.is_empty()
288            && literals.kind == optimization::literal::LiteralKind::Prefix
289            && !has_groups
290            && !flags.any_set()
291        {
292            let pf = optimization::prefilter::Prefilter::from_literals(&literals);
293            if pf.is_available() {
294                Some((pf, literals.kind))
295            } else {
296                None
297            }
298        } else {
299            None
300        };
301
302        Ok(Pattern {
303            matcher,
304            prefilter,
305            fast_path,
306            flags,
307        })
308    }
309
310    pub fn is_match(&self, text: &str) -> bool {
311        // Fast path for common patterns (JIT-style)
312        if let Some(ref fp) = self.fast_path {
313            return fp.find(text).is_some();
314        }
315
316        // Use prefilter if available for faster scanning
317        if let Some((ref prefilter, literal_kind)) = self.prefilter {
318            return self.is_match_with_prefilter(text, prefilter, literal_kind);
319        }
320
321        // No prefilter: use matcher's is_match directly
322        self.matcher.is_match(text)
323    }
324
325    /// Match with prefilter using bounded verification strategy
326    fn is_match_with_prefilter(
327        &self,
328        text: &str,
329        prefilter: &prefilter::Prefilter,
330        literal_kind: literal::LiteralKind,
331    ) -> bool {
332        let bytes = text.as_bytes();
333
334        // Determine lookback window based on literal kind
335        let max_lookback = match literal_kind {
336            literal::LiteralKind::Prefix => 10, // Prefix: small window (e.g., https?)
337            literal::LiteralKind::Inner => 30,  // Inner: medium window (e.g., \w+@)
338            literal::LiteralKind::Suffix => 50, // Suffix: larger window
339            literal::LiteralKind::None => return self.matcher.is_match(text),
340        };
341
342        for candidate_pos in prefilter.candidates(bytes) {
343            let lookback = candidate_pos.min(max_lookback);
344
345            for offset in 0..=lookback {
346                let start_pos = candidate_pos - offset;
347                if self
348                    .matcher
349                    .is_match(safe_slice(text, start_pos).unwrap_or(""))
350                {
351                    return true;
352                }
353            }
354        }
355
356        false
357    }
358
359    pub fn find(&self, text: &str) -> Option<(usize, usize)> {
360        // Fast path for common patterns (JIT-style)
361        if let Some(ref fp) = self.fast_path {
362            return fp.find(text);
363        }
364
365        // Use prefilter if available for faster scanning
366        if let Some((ref prefilter, literal_kind)) = self.prefilter {
367            return self.find_with_prefilter(text, prefilter, literal_kind);
368        }
369
370        // No prefilter: use matcher's find directly
371        self.matcher.find(text)
372    }
373
374    /// Find with prefilter using bounded verification strategy
375    fn find_with_prefilter(
376        &self,
377        text: &str,
378        prefilter: &prefilter::Prefilter,
379        literal_kind: literal::LiteralKind,
380    ) -> Option<(usize, usize)> {
381        let bytes = text.as_bytes();
382        let mut earliest_match: Option<(usize, usize)> = None;
383
384        // Determine lookback window based on literal kind
385        let max_lookback = match literal_kind {
386            literal::LiteralKind::Prefix => 10,
387            literal::LiteralKind::Inner => 30,
388            literal::LiteralKind::Suffix => 50,
389            literal::LiteralKind::None => return self.matcher.find(text),
390        };
391
392        // For each candidate position found by prefilter
393        for candidate_pos in prefilter.candidates(bytes) {
394            // If we already found a match before this candidate, return it
395            if let Some((start, _)) = earliest_match {
396                if start < candidate_pos {
397                    return earliest_match;
398                }
399            }
400
401            let lookback = candidate_pos.min(max_lookback);
402
403            for offset in 0..=lookback {
404                let start_pos = candidate_pos - offset;
405
406                // Try to find match from this position
407                if let Some((match_start, match_end)) =
408                    self.matcher.find(safe_slice(text, start_pos).unwrap_or(""))
409                {
410                    let abs_start = start_pos + match_start;
411                    let abs_end = start_pos + match_end;
412
413                    // Update earliest match if this is earlier
414                    if earliest_match.is_none() || abs_start < earliest_match.unwrap().0 {
415                        earliest_match = Some((abs_start, abs_end));
416                    }
417                    break;
418                }
419            }
420        }
421
422        earliest_match
423    }
424
425    pub fn find_all(&self, text: &str) -> Vec<(usize, usize)> {
426        // Fast path for common patterns (JIT-style)
427        if let Some(ref fp) = self.fast_path {
428            return fp.find_all(text);
429        }
430
431        // OPTIMIZED: Fast path for Literal using memchr's find_iter
432        match &self.matcher {
433            Matcher::Literal(lit) => {
434                // Use memmem::find_iter for direct SIMD iteration
435                memmem::find_iter(text.as_bytes(), lit.as_bytes())
436                    .map(|pos| (pos, pos + lit.len()))
437                    .collect()
438            }
439            Matcher::MultiLiteral(ac) => {
440                // AhoCorasick already has find_iter
441                ac.find_iter(text)
442                    .map(|mat| (mat.start(), mat.end()))
443                    .collect()
444            }
445            Matcher::Sequence(seq) => {
446                // OPTIMIZED: Use specialized sequence iterator with cached Finder
447                seq.find_all(text)
448            }
449            _ => {
450                // Complex patterns: use general iterator
451                self.find_iter(text).map(|m| (m.start(), m.end())).collect()
452            }
453        }
454    }
455
456    /// Create an iterator over all matches
457    pub fn find_iter<'a>(&'a self, text: &'a str) -> FindIter<'a> {
458        FindIter {
459            matcher: &self.matcher,
460            fast_path: &self.fast_path,
461            text,
462            pos: 0,
463        }
464    }
465
466    /// Capture groups from the first match
467    ///
468    /// Returns a `Captures` object if the pattern matches, containing the full match
469    /// and any captured groups. Returns None if no match is found.
470    ///
471    /// # Example
472    /// ```
473    /// use rexile::Pattern;
474    ///
475    /// let pattern = Pattern::new(r"(\w+)@(\w+)\.(\w+)").unwrap();
476    /// if let Some(caps) = pattern.captures("email: test@example.com") {
477    ///     println!("Full: {}", &caps[0]);    // test@example.com
478    ///     println!("User: {}", &caps[1]);    // test
479    ///     println!("Domain: {}", &caps[2]);  // example
480    ///     println!("TLD: {}", &caps[3]);     // com
481    /// }
482    /// ```
483    pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
484        // Check if this is a PatternWithCaptures matcher
485        if let Matcher::PatternWithCaptures {
486            elements,
487            total_groups,
488        } = &self.matcher
489        {
490            // Try matching with backtracking at any position
491            for start_pos in 0..=text.len() {
492                if let Some((end_pos, capture_list)) =
493                    Matcher::match_elements_with_backtrack_and_captures(text, start_pos, elements)
494                {
495                    if end_pos > start_pos || elements.is_empty() {
496                        // Create Captures with full match and capture groups
497                        let mut caps = Captures::new(text, (start_pos, end_pos), *total_groups);
498
499                        // Add each capture group
500                        for (group_num, cap_start, cap_end) in capture_list {
501                            caps.set(group_num, cap_start, cap_end);
502                        }
503
504                        return Some(caps);
505                    }
506                }
507            }
508            None
509        } else if let Matcher::Capture(inner_matcher, group_index) = &self.matcher {
510            // Single capture group - get total groups from inner matcher
511            let total_groups =
512                if let Matcher::PatternWithCaptures { total_groups, .. } = **inner_matcher {
513                    total_groups
514                } else {
515                    *group_index // If inner is not PatternWithCaptures, just use group_index
516                };
517
518            if let Some((start, end)) = inner_matcher.find(text) {
519                let mut caps = Captures::new(text, (start, end), total_groups);
520
521                // Record the main capture
522                caps.set(*group_index, start, end);
523
524                // Extract all nested captures recursively
525                let nested = inner_matcher.extract_nested_captures(text, start);
526                for (group_num, cap_start, cap_end) in nested {
527                    caps.set(group_num, cap_start, cap_end);
528                }
529
530                Some(caps)
531            } else {
532                None
533            }
534        } else if let Matcher::AnchoredPattern { inner, start, end } = &self.matcher {
535            // Handle anchored patterns with captures
536            // Delegate to inner matcher's captures logic, but with anchor constraints
537            if let Matcher::PatternWithCaptures {
538                elements,
539                total_groups,
540            } = inner.as_ref()
541            {
542                // For anchored captures, we need to respect anchor constraints
543                let check_anchor = |match_start: usize, match_end: usize| -> bool {
544                    let start_ok = !*start || match_start == 0;
545                    let end_ok = !*end || match_end == text.len();
546                    start_ok && end_ok
547                };
548
549                // Try matching with backtracking at any position
550                for start_pos in 0..=text.len() {
551                    // For start anchor, only try position 0
552                    if *start && start_pos != 0 {
553                        continue;
554                    }
555
556                    if let Some((end_pos, capture_list)) =
557                        Matcher::match_elements_with_backtrack_and_captures(
558                            text, start_pos, elements,
559                        )
560                    {
561                        if (end_pos > start_pos || elements.is_empty())
562                            && check_anchor(start_pos, end_pos)
563                        {
564                            // Create Captures with full match and capture groups
565                            let mut caps = Captures::new(text, (start_pos, end_pos), *total_groups);
566
567                            // Add each capture group
568                            for (group_num, cap_start, cap_end) in capture_list {
569                                caps.set(group_num, cap_start, cap_end);
570                            }
571
572                            return Some(caps);
573                        }
574                    }
575                }
576                None
577            } else {
578                // Simple pattern without captures - just return full match with anchor check
579                self.find(text).map(|(match_start, match_end)| {
580                    Captures::new(text, (match_start, match_end), 0)
581                })
582            }
583        } else {
584            // Simple pattern without explicit captures - just return full match
585            self.find(text)
586                .map(|(start, end)| Captures::new(text, (start, end), 0))
587        }
588    }
589
590    /// Iterate over all captures in the text
591    ///
592    /// Returns an iterator that yields `Captures` for each match found.
593    ///
594    /// # Example
595    /// ```
596    /// use rexile::Pattern;
597    ///
598    /// let pattern = Pattern::new(r"(\w+)=(\d+)").unwrap();
599    /// for caps in pattern.captures_iter("a=1 b=2 c=3") {
600    ///     println!("{} = {}", &caps[1], &caps[2]);
601    /// }
602    /// ```
603    pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CapturesIter<'r, 't> {
604        CapturesIter {
605            pattern: self,
606            text,
607            pos: 0,
608        }
609    }
610
611    /// Replace the first match with a replacement string
612    ///
613    /// Supports capture group references using $1, $2, etc.
614    ///
615    /// # Example
616    /// ```
617    /// use rexile::Pattern;
618    ///
619    /// let pattern = Pattern::new(r"(\w+)").unwrap();
620    /// let result = pattern.replace("hello world", "goodbye");
621    /// assert_eq!(result, "goodbye world");
622    ///
623    /// // With captures
624    /// let pattern = Pattern::new(r"(\w+)=(\d+)").unwrap();
625    /// let result = pattern.replace("a=1 b=2", "$1:[$2]");
626    /// assert_eq!(result, "a:[1] b=2");
627    /// ```
628    pub fn replace(&self, text: &str, replacement: &str) -> String {
629        // Check if replacement contains capture references like $1, $2
630        let has_captures = replacement.contains('$');
631
632        if !has_captures {
633            // Simple literal replacement (fast path)
634            if let Some((start, end)) = self.find(text) {
635                let mut result = String::new();
636                result.push_str(&text[..start]);
637                result.push_str(replacement);
638                result.push_str(&text[end..]);
639                result
640            } else {
641                // No match, return original text
642                text.to_string()
643            }
644        } else {
645            // Replacement with capture groups
646            if let Some(caps) = self.captures(text) {
647                let match_start = caps.pos(0).unwrap().0;
648                let match_end = caps.pos(0).unwrap().1;
649
650                let mut result = String::new();
651                result.push_str(&text[..match_start]);
652
653                // Process replacement string with $1, $2, etc.
654                let mut chars = replacement.chars().peekable();
655                while let Some(ch) = chars.next() {
656                    if ch == '$' {
657                        // Check if next char is a digit
658                        if let Some(&next_ch) = chars.peek() {
659                            if next_ch.is_ascii_digit() {
660                                chars.next(); // consume the digit
661                                let group_num = next_ch.to_digit(10).unwrap() as usize;
662
663                                // Insert the captured group
664                                if let Some(group_text) = caps.get(group_num) {
665                                    result.push_str(group_text);
666                                }
667                            } else {
668                                result.push('$');
669                            }
670                        } else {
671                            result.push('$');
672                        }
673                    } else {
674                        result.push(ch);
675                    }
676                }
677
678                result.push_str(&text[match_end..]);
679                result
680            } else {
681                // No match, return original text
682                text.to_string()
683            }
684        }
685    }
686
687    /// Replace all matches with a replacement string
688    ///
689    /// Supports capture group references using $1, $2, etc.
690    ///
691    /// # Example
692    /// ```
693    /// use rexile::Pattern;
694    ///
695    /// let pattern = Pattern::new(r"(\w+)=(\d+)").unwrap();
696    /// let result = pattern.replace_all("a=1 b=2", "$1:[$2]");
697    /// assert_eq!(result, "a:[1] b:[2]");
698    /// ```
699    pub fn replace_all(&self, text: &str, replacement: &str) -> String {
700        // Check if replacement contains capture references like $1, $2
701        let has_captures = replacement.contains('$');
702
703        if !has_captures {
704            // Simple literal replacement (fast path)
705            let mut result = String::new();
706            let mut last_end = 0;
707
708            for (start, end) in self.find_all(text) {
709                result.push_str(&text[last_end..start]);
710                result.push_str(replacement);
711                last_end = end;
712            }
713            result.push_str(&text[last_end..]);
714            return result;
715        }
716
717        // Replacement with capture groups
718        let mut result = String::new();
719        let mut last_end = 0;
720
721        for caps in self.captures_iter(text) {
722            let _full_match = caps.get(0).unwrap();
723            let match_start = caps.pos(0).unwrap().0;
724            let match_end = caps.pos(0).unwrap().1;
725
726            // Add text before this match
727            result.push_str(&text[last_end..match_start]);
728
729            // Process replacement string with $1, $2, etc.
730            let mut chars = replacement.chars().peekable();
731            while let Some(ch) = chars.next() {
732                if ch == '$' {
733                    // Check if next char is a digit
734                    if let Some(&next_ch) = chars.peek() {
735                        if next_ch.is_ascii_digit() {
736                            chars.next(); // consume the digit
737                            let group_num = next_ch.to_digit(10).unwrap() as usize;
738
739                            // Insert the captured group
740                            if let Some(group_text) = caps.get(group_num) {
741                                result.push_str(group_text);
742                            }
743                            // If group doesn't exist, just skip (don't insert anything)
744                        } else {
745                            // $ not followed by digit, insert literal $
746                            result.push('$');
747                        }
748                    } else {
749                        // $ at end of string
750                        result.push('$');
751                    }
752                } else {
753                    result.push(ch);
754                }
755            }
756
757            last_end = match_end;
758        }
759
760        // Add remaining text
761        result.push_str(&text[last_end..]);
762        result
763    }
764
765    /// Split text by matches of this pattern
766    ///
767    /// # Example
768    /// ```
769    /// use rexile::Pattern;
770    ///
771    /// let pattern = Pattern::new(r"\s+").unwrap();
772    /// let parts: Vec<_> = pattern.split("a  b   c").collect();
773    /// assert_eq!(parts, vec!["a", "b", "c"]);
774    /// ```
775    pub fn split<'r, 't>(&'r self, text: &'t str) -> SplitIter<'r, 't> {
776        SplitIter {
777            pattern: self,
778            text,
779            pos: 0,
780            finished: false,
781        }
782    }
783}
784
785/// A single match in the haystack.
786///
787/// This is similar to `regex::Match` and provides access to
788/// the matched text and its position.
789#[derive(Debug, Clone, Copy, PartialEq, Eq)]
790pub struct Match<'t> {
791    text: &'t str,
792    start: usize,
793    end: usize,
794}
795
796impl<'t> Match<'t> {
797    /// Create a new Match
798    #[inline]
799    pub fn new(text: &'t str, start: usize, end: usize) -> Self {
800        Self { text, start, end }
801    }
802
803    /// Returns the starting byte offset of the match.
804    #[inline]
805    pub fn start(&self) -> usize {
806        self.start
807    }
808
809    /// Returns the ending byte offset of the match.
810    #[inline]
811    pub fn end(&self) -> usize {
812        self.end
813    }
814
815    /// Returns the matched text.
816    #[inline]
817    pub fn as_str(&self) -> &'t str {
818        &self.text[self.start..self.end]
819    }
820
821    /// Returns the range of byte offsets spanned by this match.
822    #[inline]
823    pub fn range(&self) -> std::ops::Range<usize> {
824        self.start..self.end
825    }
826
827    /// Returns the length of the match in bytes.
828    #[inline]
829    pub fn len(&self) -> usize {
830        self.end - self.start
831    }
832
833    /// Returns true if this is an empty match.
834    #[inline]
835    pub fn is_empty(&self) -> bool {
836        self.start == self.end
837    }
838}
839
840/// Iterator over pattern matches
841pub struct FindIter<'a> {
842    matcher: &'a Matcher,
843    fast_path: &'a Option<optimization::fast_path::FastPath>,
844    text: &'a str,
845    pos: usize,
846}
847
848impl<'a> Iterator for FindIter<'a> {
849    type Item = Match<'a>;
850
851    fn next(&mut self) -> Option<Self::Item> {
852        // TRUE LAZY EVALUATION: Find one match at a time
853        if self.pos >= self.text.len() {
854            return None;
855        }
856
857        // Use fast path if available - find_at() finds ONE match from position
858        if let Some(ref fast_path) = self.fast_path {
859            if let Some((start, end)) = fast_path.find_at(self.text, self.pos) {
860                // Move position past this match
861                self.pos = end.max(self.pos + 1);
862                return Some(Match::new(self.text, start, end));
863            } else {
864                // No more matches
865                return None;
866            }
867        }
868
869        // Fallback: normal matcher iteration
870        let remaining = &self.text[self.pos..];
871        if let Some((rel_start, rel_end)) = self.matcher.find(remaining) {
872            let abs_start = self.pos + rel_start;
873            let abs_end = self.pos + rel_end;
874
875            // Move position past this match to avoid infinite loop
876            self.pos = abs_end.max(self.pos + 1);
877
878            Some(Match::new(self.text, abs_start, abs_end))
879        } else {
880            None
881        }
882    }
883}
884
885/// Iterator over captures for each match
886pub struct CapturesIter<'r, 't> {
887    pattern: &'r Pattern,
888    text: &'t str,
889    pos: usize,
890}
891
892impl<'r, 't> Iterator for CapturesIter<'r, 't> {
893    type Item = Captures<'t>;
894
895    fn next(&mut self) -> Option<Self::Item> {
896        if self.pos >= self.text.len() {
897            return None;
898        }
899
900        // Check if this is a PatternWithCaptures matcher
901        if let Matcher::PatternWithCaptures {
902            elements,
903            total_groups,
904        } = &self.pattern.matcher
905        {
906            // Find next match starting from current position and extract capture positions
907            let remaining = &self.text[self.pos..];
908
909            // Iterate over char boundaries, not arbitrary byte positions
910            let char_indices: Vec<usize> = remaining.char_indices().map(|(i, _)| i).collect();
911            let search_positions: Vec<usize> = if char_indices.is_empty() {
912                vec![0]
913            } else {
914                char_indices
915                    .into_iter()
916                    .chain(std::iter::once(remaining.len()))
917                    .collect()
918            };
919
920            for &start_offset in &search_positions {
921                if start_offset >= remaining.len() {
922                    break;
923                }
924
925                let mut pos = start_offset;
926                let mut capture_positions: Vec<(usize, usize)> = Vec::new();
927                let mut all_matched = true;
928
929                for element in elements {
930                    let (matcher, group_num_opt) = match element {
931                        CompiledCaptureElement::Capture(m, num) => (m, Some(*num)),
932                        CompiledCaptureElement::NonCapture(m) => (m, None),
933                    };
934
935                    if let Some((rel_start, rel_end)) = matcher.find(&remaining[pos..]) {
936                        if rel_start != 0 {
937                            // Element must match at current position
938                            all_matched = false;
939                            break;
940                        }
941
942                        let abs_start = pos;
943                        let abs_end = pos + rel_end;
944
945                        // If this is a capture group, record its position
946                        if let Some(group_num) = group_num_opt {
947                            // Ensure we have enough space
948                            while capture_positions.len() < group_num {
949                                capture_positions.push((0, 0));
950                            }
951                            capture_positions[group_num - 1] = (abs_start, abs_end);
952                        }
953
954                        pos = abs_end;
955                    } else {
956                        all_matched = false;
957                        break;
958                    }
959                }
960
961                if all_matched {
962                    // Convert relative positions to absolute positions
963                    let abs_start = self.pos + start_offset;
964                    let abs_end = self.pos + pos;
965
966                    // Move position past this match
967                    self.pos = abs_end.max(self.pos + 1);
968
969                    // Create Captures with full match and capture groups
970                    let mut caps = Captures::new(self.text, (abs_start, abs_end), *total_groups);
971
972                    // Add each capture group using the set method
973                    for (i, &(start, end)) in capture_positions.iter().enumerate() {
974                        caps.set(i + 1, self.pos - pos + start, self.pos - pos + end);
975                    }
976
977                    return Some(caps);
978                }
979            }
980            None
981        } else {
982            // Simple pattern without explicit captures
983            let remaining = &self.text[self.pos..];
984            if let Some((rel_start, rel_end)) = self.pattern.matcher.find(remaining) {
985                let abs_start = self.pos + rel_start;
986                let abs_end = self.pos + rel_end;
987
988                // Move position past this match
989                self.pos = abs_end.max(self.pos + 1);
990
991                // Create captures for this match
992                Some(Captures::new(self.text, (abs_start, abs_end), 0))
993            } else {
994                None
995            }
996        }
997    }
998}
999
1000/// Iterator over text split by pattern matches
1001pub struct SplitIter<'r, 't> {
1002    pattern: &'r Pattern,
1003    text: &'t str,
1004    pos: usize,
1005    finished: bool,
1006}
1007
1008impl<'r, 't> Iterator for SplitIter<'r, 't> {
1009    type Item = &'t str;
1010
1011    fn next(&mut self) -> Option<Self::Item> {
1012        if self.finished {
1013            return None;
1014        }
1015
1016        // Find next match starting from current position
1017        let remaining = &self.text[self.pos..];
1018        if let Some((rel_start, rel_end)) = self.pattern.matcher.find(remaining) {
1019            let abs_start = self.pos + rel_start;
1020            let abs_end = self.pos + rel_end;
1021
1022            // Return text before the match
1023            let result = &self.text[self.pos..abs_start];
1024            self.pos = abs_end;
1025
1026            Some(result)
1027        } else {
1028            // No more matches, return remaining text
1029            self.finished = true;
1030            Some(&self.text[self.pos..])
1031        }
1032    }
1033}
1034
1035static CACHE: OnceLock<Mutex<HashMap<String, Pattern>>> = OnceLock::new();
1036
1037fn get_cache() -> &'static Mutex<HashMap<String, Pattern>> {
1038    CACHE.get_or_init(|| Mutex::new(HashMap::new()))
1039}
1040
1041pub fn get_pattern(pattern: &str) -> Result<Pattern, PatternError> {
1042    let mut cache = get_cache().lock().unwrap();
1043    if let Some(p) = cache.get(pattern) {
1044        return Ok(p.clone());
1045    }
1046    let compiled = Pattern::new(pattern)?;
1047    cache.insert(pattern.to_string(), compiled.clone());
1048    Ok(compiled)
1049}
1050
1051pub fn is_match(pattern: &str, text: &str) -> Result<bool, PatternError> {
1052    Ok(get_pattern(pattern)?.is_match(text))
1053}
1054
1055pub fn find(pattern: &str, text: &str) -> Result<Option<(usize, usize)>, PatternError> {
1056    Ok(get_pattern(pattern)?.find(text))
1057}
1058
1059#[derive(Debug, Clone, PartialEq, Eq)]
1060pub enum PatternError {
1061    ParseError(String),
1062    UnsupportedFeature(String),
1063}
1064
1065impl std::fmt::Display for PatternError {
1066    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1067        match self {
1068            PatternError::ParseError(msg) => write!(f, "Parse error: {}", msg),
1069            PatternError::UnsupportedFeature(msg) => write!(f, "Unsupported: {}", msg),
1070        }
1071    }
1072}
1073
1074impl std::error::Error for PatternError {}
1075
1076#[derive(Debug, Clone, PartialEq)]
1077enum Ast {
1078    Literal(String),
1079    Dot,    // Matches any character except newline
1080    DotAll, // Matches any character INCLUDING newline (for (?s) flag)
1081    Alternation(Vec<String>),
1082    Anchored {
1083        literal: String,
1084        start: bool,
1085        end: bool,
1086    },
1087    AnchoredGroup {
1088        group: Group,
1089        start: bool,
1090        end: bool,
1091    },
1092    AnchoredPattern {
1093        inner: Box<Ast>,
1094        start: bool,
1095        end: bool,
1096    },
1097    CharClass(CharClass),
1098    Quantified(QuantifiedPattern),
1099    Sequence(Sequence),
1100    SequenceWithFlags(Sequence, Flags), // Sequence with flags applied
1101    Group(Group),
1102    Boundary(BoundaryType),   // Phase 6: Word boundary support
1103    Lookaround(Lookaround),   // Phase 7: Lookahead/lookbehind
1104    Capture(Box<Ast>, usize), // Phase 8: Capture group (pattern, group_index)
1105    QuantifiedCapture(Box<Ast>, parser::quantifier::Quantifier), // Capture with quantifier: (foo)+
1106    CombinedWithLookaround {
1107        prefix: Box<Ast>,
1108        lookaround: Lookaround,
1109    }, // Phase 7.2: foo(?=bar) - prefix with lookahead
1110    LookbehindWithSuffix {
1111        lookbehind: Lookaround,
1112        suffix: Box<Ast>,
1113    }, // Phase 7.3: (?<=foo)bar - lookbehind with suffix
1114    PatternWithCaptures {
1115        elements: Vec<CaptureElement>,
1116        total_groups: usize,
1117    }, // Phase 8.1: Hello (\w+)
1118    AlternationWithCaptures {
1119        branches: Vec<Ast>,
1120        total_groups: usize,
1121    }, // Alternation where branches may contain captures: (a)|(b) or (?:(a)|(b))
1122    Backreference(usize),     // Phase 9: Backreference to capture group (\1, \2, etc.)
1123    CaseInsensitive(Box<Ast>), // Wrap AST with case-insensitive matching
1124}
1125
1126/// Parse patterns that contain groups combined with other elements
1127/// Handles: ^(hello), (foo)(bar), prefix(foo|bar), (foo|bar)suffix, (http|https)://
1128fn parse_pattern_with_groups(pattern: &str) -> Result<Ast, PatternError> {
1129    // Case 1: Multiple consecutive groups: (foo)(bar) - CHECK FIRST!
1130    if pattern.matches('(').count() > 1 && !pattern.contains('|') {
1131        let mut combined_literals = Vec::new();
1132        let mut pos = 0;
1133        let mut all_parsed = true;
1134
1135        while pos < pattern.len() && pattern[pos..].starts_with('(') {
1136            match parser::group::parse_group(&pattern[pos..]) {
1137                Ok((group, bytes_consumed)) => {
1138                    // Extract literals from this group
1139                    match &group.content {
1140                        parser::group::GroupContent::Single(s) => {
1141                            combined_literals.push(s.clone());
1142                        }
1143                        parser::group::GroupContent::Sequence(seq) => {
1144                            // Try to extract literal from sequence of chars
1145                            let mut literal = String::new();
1146                            let mut is_simple = true;
1147
1148                            for elem in &seq.elements {
1149                                match elem {
1150                                    crate::parser::sequence::SequenceElement::Char(ch) => {
1151                                        literal.push(*ch);
1152                                    }
1153                                    crate::parser::sequence::SequenceElement::Literal(lit) => {
1154                                        literal.push_str(lit);
1155                                    }
1156                                    _ => {
1157                                        // Not a simple literal sequence
1158                                        is_simple = false;
1159                                        break;
1160                                    }
1161                                }
1162                            }
1163
1164                            if is_simple {
1165                                combined_literals.push(literal);
1166                            } else {
1167                                all_parsed = false;
1168                                break;
1169                            }
1170                        }
1171                        parser::group::GroupContent::Alternation(_)
1172                        | parser::group::GroupContent::ParsedAlternation(_) => {
1173                            // Can't easily combine alternations
1174                            all_parsed = false;
1175                            break;
1176                        }
1177                    }
1178                    pos += bytes_consumed;
1179                }
1180                Err(_) => {
1181                    all_parsed = false;
1182                    break;
1183                }
1184            }
1185        }
1186
1187        if all_parsed && pos == pattern.len() && !combined_literals.is_empty() {
1188            // All groups parsed successfully - build as sequence
1189            // Create a sequence of literal elements for consecutive matching
1190            use crate::parser::sequence::{Sequence, SequenceElement};
1191
1192            let mut elements = Vec::new();
1193            for literal in combined_literals {
1194                // Each literal becomes a sequence element
1195                elements.push(SequenceElement::Literal(literal));
1196            }
1197
1198            let seq = Sequence::new(elements);
1199            return Ok(Ast::Sequence(seq));
1200        }
1201    }
1202
1203    // Case 2: Anchor + Group: ^(hello) or (world)$
1204    if pattern.starts_with("^(") || pattern.ends_with(")$") {
1205        let has_start = pattern.starts_with('^');
1206        let has_end = pattern.ends_with('$');
1207
1208        // Strip anchors properly - need to handle chaining correctly
1209        let mut inner = pattern;
1210        if has_start {
1211            inner = &inner[1..]; // Remove '^'
1212        }
1213        if has_end {
1214            inner = &inner[..inner.len() - 1]; // Remove '$'
1215        }
1216
1217        if inner.starts_with('(') {
1218            if let Ok((group, bytes_consumed)) = parser::group::parse_group(inner) {
1219                if bytes_consumed == inner.len() {
1220                    // Extract the actual pattern from group for anchored matching
1221                    let group_literal = match &group.content {
1222                        parser::group::GroupContent::Single(s) => Some(s.clone()),
1223                        parser::group::GroupContent::Sequence(seq) => {
1224                            // Try to extract literal from sequence of chars
1225                            let mut literal = String::new();
1226                            let mut is_simple = true;
1227
1228                            for elem in &seq.elements {
1229                                match elem {
1230                                    crate::parser::sequence::SequenceElement::Char(ch) => {
1231                                        literal.push(*ch);
1232                                    }
1233                                    crate::parser::sequence::SequenceElement::Literal(lit) => {
1234                                        literal.push_str(lit);
1235                                    }
1236                                    _ => {
1237                                        // Not a simple literal - can't anchor
1238                                        is_simple = false;
1239                                        break;
1240                                    }
1241                                }
1242                            }
1243
1244                            if is_simple {
1245                                Some(literal)
1246                            } else {
1247                                None
1248                            }
1249                        }
1250                        parser::group::GroupContent::Alternation(_)
1251                        | parser::group::GroupContent::ParsedAlternation(_) => {
1252                            // For alternation like ^(foo|bar), can't use simple Anchored
1253                            None
1254                        }
1255                    };
1256
1257                    if let Some(lit) = group_literal {
1258                        return Ok(Ast::Anchored {
1259                            literal: lit,
1260                            start: has_start,
1261                            end: has_end,
1262                        });
1263                    } else {
1264                        // Complex group - use AnchoredGroup
1265                        return Ok(Ast::AnchoredGroup {
1266                            group,
1267                            start: has_start,
1268                            end: has_end,
1269                        });
1270                    }
1271                }
1272            }
1273        }
1274    }
1275
1276    // Case 3: Just a single group
1277    if pattern.starts_with('(') {
1278        if let Ok((group, bytes_consumed)) = parser::group::parse_group(pattern) {
1279            if bytes_consumed == pattern.len() {
1280                return Ok(Ast::Group(group));
1281            }
1282
1283            // Case 4: Group with suffix: (foo|bar)suffix, (http|https)://
1284            if bytes_consumed < pattern.len() {
1285                let suffix = &pattern[bytes_consumed..];
1286                // Build a combined pattern
1287                // For alternation groups, expand: (a|b)c -> ac|bc
1288                match &group.content {
1289                    parser::group::GroupContent::Alternation(parts) => {
1290                        let expanded: Vec<String> =
1291                            parts.iter().map(|p| format!("{}{}", p, suffix)).collect();
1292                        return Ok(Ast::Alternation(expanded));
1293                    }
1294                    parser::group::GroupContent::Sequence(seq) => {
1295                        // Group with sequence + suffix: (\w+)@ or (\d+).
1296                        // Need to append suffix to the sequence
1297                        use crate::parser::sequence::{Sequence, SequenceElement};
1298
1299                        let mut new_elements = seq.elements.clone();
1300                        // Add suffix as literal elements
1301                        for ch in suffix.chars() {
1302                            new_elements.push(SequenceElement::Char(ch));
1303                        }
1304
1305                        let combined_seq = Sequence::new(new_elements);
1306                        return Ok(Ast::Sequence(combined_seq));
1307                    }
1308                    parser::group::GroupContent::Single(s) => {
1309                        // Simple literal + suffix
1310                        let combined = format!("{}{}", s, suffix);
1311                        return Ok(Ast::Literal(combined));
1312                    }
1313                    parser::group::GroupContent::ParsedAlternation(_) => {
1314                        // Complex alternation with suffix - fall through
1315                    }
1316                }
1317            }
1318        }
1319    }
1320
1321    // Case 5: Prefix + Group: prefix(foo|bar) - but NOT ^(hello) or $(hello)
1322    if let Some(group_start) = pattern.find('(') {
1323        if group_start > 0 {
1324            let prefix = &pattern[..group_start];
1325            // Skip if prefix is just an anchor
1326            if prefix != "^" && prefix != "$" {
1327                let group_part = &pattern[group_start..];
1328
1329                if let Ok((group, bytes_consumed)) = parser::group::parse_group(group_part) {
1330                    if bytes_consumed == group_part.len() {
1331                        // prefix + group
1332                        match &group.content {
1333                            parser::group::GroupContent::Alternation(parts) => {
1334                                let expanded: Vec<String> =
1335                                    parts.iter().map(|p| format!("{}{}", prefix, p)).collect();
1336                                return Ok(Ast::Alternation(expanded));
1337                            }
1338                            _ => {
1339                                // Single pattern with prefix
1340                                return Ok(Ast::Group(group));
1341                            }
1342                        }
1343                    }
1344                }
1345            }
1346        }
1347    }
1348
1349    Err(PatternError::ParseError(
1350        "Complex group pattern not fully supported".to_string(),
1351    ))
1352}
1353
1354fn parse_pattern(pattern: &str) -> Result<Ast, PatternError> {
1355    parse_pattern_with_depth(pattern, 0)
1356}
1357
1358const MAX_RECURSION_DEPTH: usize = 100;
1359
1360fn parse_pattern_with_depth(pattern: &str, depth: usize) -> Result<Ast, PatternError> {
1361    if depth > MAX_RECURSION_DEPTH {
1362        return Err(PatternError::ParseError(
1363            "Pattern too complex: recursion depth exceeded".to_string(),
1364        ));
1365    }
1366
1367    if pattern.is_empty() {
1368        return Ok(Ast::Literal(String::new()));
1369    }
1370
1371    // Phase 7: Check for lookaround assertions (?=...), (?!...), (?<=...), (?<!...)
1372    if pattern.starts_with("(?=")
1373        || pattern.starts_with("(?!")
1374        || pattern.starts_with("(?<=")
1375        || pattern.starts_with("(?<!")
1376    {
1377        return parse_lookaround(pattern, depth);
1378    }
1379
1380    // Phase 7.2: Check for combined patterns with lookaround: foo(?=bar), \d+(?!x)
1381    if pattern.contains("(?=")
1382        || pattern.contains("(?!")
1383        || pattern.contains("(?<=")
1384        || pattern.contains("(?<!")
1385    {
1386        // Try to parse as combined pattern with lookaround
1387        if let Ok(ast) = parse_combined_with_lookaround(pattern, depth) {
1388            return Ok(ast);
1389        }
1390    }
1391
1392    // Phase 8: Check for capture groups (...) - but not (?:...) which is handled by group parser
1393    // Simple heuristic: if starts with ( but not (? or (?:, might be capture group
1394    if pattern.starts_with('(') && !pattern.starts_with("(?") {
1395        // Check if this is a simple capture group pattern (no nested captures inside)
1396        if let Some(close_idx) = find_matching_paren(pattern, 0) {
1397            if close_idx == pattern.len() - 1 {
1398                // Entire pattern is a capture group: (pattern)
1399                let inner = &pattern[1..close_idx];
1400
1401                // If inner contains captures, let parse_pattern_with_captures handle it
1402                if !contains_unescaped_paren(inner) || inner.starts_with("(?") {
1403                    // Simple capture with no nesting
1404                    let inner_ast = parse_pattern_with_depth(inner, depth + 1)?;
1405                    return Ok(Ast::Capture(Box::new(inner_ast), 1)); // Group 1
1406                }
1407                // Else: fall through to parse_pattern_with_captures below
1408            }
1409        }
1410    }
1411
1412    // Phase 8.1: Check for patterns with embedded captures: Hello (\w+), (\w+)=(\d+)
1413    // Phase 8.2: Also handles non-capturing groups: (?:Hello) (\w+)
1414    // But skip patterns starting with anchors - they need special handling below
1415    // Also skip quantified groups like (test)?, (foo)+, (bar)* - they're handled as quantified patterns
1416    let is_quantified_group = pattern.starts_with('(')
1417        && if let Some(close_idx) = find_matching_paren(pattern, 0) {
1418            close_idx == pattern.len() - 2
1419                && (pattern.ends_with('?') || pattern.ends_with('*') || pattern.ends_with('+'))
1420        } else {
1421            false
1422        };
1423
1424    let is_bounded_quantified_group = pattern.starts_with('(')
1425        && if let Some(close_idx) = find_matching_paren(pattern, 0) {
1426            close_idx < pattern.len() - 1 && pattern[close_idx + 1..].starts_with('{')
1427        } else {
1428            false
1429        };
1430
1431    if contains_unescaped_paren(pattern)
1432        && !pattern.starts_with('^')
1433        && !pattern.ends_with('$')
1434        && !is_quantified_group
1435        && !is_bounded_quantified_group
1436        && !pattern.contains("(?=")
1437        && !pattern.contains("(?!")
1438        && !pattern.contains("(?<=")
1439        && !pattern.contains("(?<!")
1440    {
1441        // Try to parse as pattern with captures (including non-capturing groups)
1442        if let Ok(ast) = parse_pattern_with_captures(pattern) {
1443            return Ok(ast);
1444        }
1445    }
1446
1447    // Special handling for patterns with groups and other elements
1448    // e.g., ^(hello), (foo)(bar), prefix(foo|bar), (foo|bar)suffix
1449    if contains_unescaped_paren(pattern) {
1450        // Try to parse as complex pattern with groups
1451        if let Ok(ast) = parse_pattern_with_groups(pattern) {
1452            return Ok(ast);
1453        }
1454    }
1455
1456    // Check for anchors (before sequences)
1457    let has_start_anchor = pattern.starts_with('^');
1458    let has_end_anchor = pattern.ends_with('$');
1459
1460    if has_start_anchor || has_end_anchor {
1461        // Strip anchors properly - don't fall back to original pattern
1462        let mut literal = pattern;
1463        if has_start_anchor {
1464            literal = literal.strip_prefix('^').unwrap();
1465        }
1466        if has_end_anchor {
1467            literal = literal.strip_suffix('$').unwrap();
1468        }
1469
1470        // Don't treat anchored patterns as sequences
1471        return Ok(Ast::Anchored {
1472            literal: literal.to_string(),
1473            start: has_start_anchor,
1474            end: has_end_anchor,
1475        });
1476    }
1477
1478    // Check for alternation (|)
1479    if pattern.contains('|') && !pattern.contains('[') {
1480        let parts: Vec<String> = pattern.split('|').map(|s| s.to_string()).collect();
1481        return Ok(Ast::Alternation(parts));
1482    }
1483
1484    // Check for sequence pattern (most complex)
1485    if is_sequence_pattern(pattern) {
1486        match parse_sequence(pattern) {
1487            Ok(seq) => return Ok(Ast::Sequence(seq)),
1488            Err(_) => {
1489                // Fall through to other parsers
1490            }
1491        }
1492    }
1493
1494    // Check for escape sequences: \d, \w, \s, \b, \B, \., etc.
1495    if starts_with_escape(pattern) {
1496        match parse_escape(pattern) {
1497            Ok((seq, bytes_consumed)) => {
1498                // If it's the whole pattern
1499                if bytes_consumed == pattern.len() {
1500                    // Check for boundary first (since it doesn't convert to CharClass)
1501                    if let Some(boundary_type) = seq.to_boundary() {
1502                        return Ok(Ast::Boundary(boundary_type));
1503                    }
1504                    // Convert to CharClass if possible
1505                    if let Some(cc) = seq.to_char_class() {
1506                        return Ok(Ast::CharClass(cc));
1507                    }
1508                    // Or to literal char
1509                    if let Some(ch) = seq.to_char() {
1510                        return Ok(Ast::Literal(ch.to_string()));
1511                    }
1512                }
1513                // Otherwise, check for quantifier after escape
1514                let remaining = &pattern[bytes_consumed..];
1515                if !remaining.is_empty() {
1516                    if let Some(q_char) = remaining.chars().next() {
1517                        if q_char == '*' || q_char == '+' || q_char == '?' || q_char == '{' {
1518                            // This is an escape with quantifier: \d+, \w*, \d{4}, etc.
1519                            if let Ok(qp) = parse_quantified_pattern(pattern) {
1520                                return Ok(Ast::Quantified(qp));
1521                            }
1522                        }
1523                    }
1524                }
1525            }
1526            Err(e) => return Err(PatternError::ParseError(e)),
1527        }
1528    }
1529
1530    // Check for quantified patterns: a+, [0-9]*, \d+, etc.
1531    let has_quantifier = pattern.ends_with('*')
1532        || pattern.ends_with('+')
1533        || pattern.ends_with('?')
1534        || (pattern.contains('{') && pattern.ends_with('}'));
1535
1536    if has_quantifier {
1537        // Try to parse as quantified pattern
1538        match parse_quantified_pattern(pattern) {
1539            Ok(qp) => return Ok(Ast::Quantified(qp)),
1540            Err(_) => {
1541                // Fall through to other parsers
1542            }
1543        }
1544    }
1545
1546    // Check for character class [...]
1547    if pattern.starts_with('[') && pattern.contains(']') {
1548        let end_idx = pattern.find(']').unwrap();
1549        if end_idx == pattern.len() - 1 {
1550            // Pure character class pattern: [a-z]
1551            let class_content = &pattern[1..end_idx];
1552            let char_class = CharClass::parse(class_content).map_err(PatternError::ParseError)?;
1553            return Ok(Ast::CharClass(char_class));
1554        }
1555        // Character class with quantifier is handled above
1556    }
1557
1558    // Check for single dot wildcard
1559    if pattern == "." {
1560        return Ok(Ast::Dot);
1561    }
1562
1563    // Check if pattern contains dots - needs sequence parsing
1564    if pattern.contains('.') {
1565        // Pattern like "a.c" needs to be parsed as sequence with dot wildcard
1566        use crate::parser::sequence::{Sequence, SequenceElement};
1567        let mut elements = Vec::new();
1568
1569        for ch in pattern.chars() {
1570            if ch == '.' {
1571                elements.push(SequenceElement::Dot);
1572            } else {
1573                elements.push(SequenceElement::Char(ch));
1574            }
1575        }
1576
1577        return Ok(Ast::Sequence(Sequence::new(elements)));
1578    }
1579
1580    // Default: treat as literal
1581    Ok(Ast::Literal(pattern.to_string()))
1582}
1583
1584/// Parse pattern with flags applied
1585/// This handles (?i) case-insensitive, (?m) multiline, (?s) dotall flags
1586fn parse_pattern_with_flags(pattern: &str, flags: &Flags) -> Result<Ast, PatternError> {
1587    // If dotall flag is set, we need to handle . differently
1588    // If case_insensitive is set, wrap result in CaseInsensitive
1589
1590    if flags.dot_matches_newline {
1591        // Parse the pattern with dot matching newlines
1592        let ast = parse_pattern_dotall(pattern, flags)?;
1593        if flags.case_insensitive {
1594            return Ok(Ast::CaseInsensitive(Box::new(ast)));
1595        }
1596        return Ok(ast);
1597    }
1598
1599    // Parse normally
1600    let ast = parse_pattern(pattern)?;
1601    if flags.case_insensitive {
1602        return Ok(Ast::CaseInsensitive(Box::new(ast)));
1603    }
1604    Ok(ast)
1605}
1606
1607/// Parse pattern with DOTALL mode: . matches newlines
1608fn parse_pattern_dotall(pattern: &str, flags: &Flags) -> Result<Ast, PatternError> {
1609    if pattern.is_empty() {
1610        return Ok(Ast::Literal(String::new()));
1611    }
1612
1613    // Check for single dot wildcard
1614    if pattern == "." {
1615        return Ok(Ast::DotAll);
1616    }
1617
1618    // Check if pattern contains dots - needs sequence parsing with DotAll
1619    if pattern.contains('.') {
1620        // Check if this is a sequence pattern
1621        if is_sequence_pattern(pattern) {
1622            // Parse the sequence and apply DOTALL flag
1623            match parse_sequence(pattern) {
1624                Ok(seq) => return Ok(Ast::SequenceWithFlags(seq, *flags)),
1625                Err(_) => {
1626                    // Fall through to other parsers
1627                }
1628            }
1629        }
1630
1631        // Pattern like "a.c" needs to be parsed as sequence with dot wildcard
1632        use crate::parser::sequence::{Sequence, SequenceElement};
1633        let mut elements = Vec::new();
1634
1635        for ch in pattern.chars() {
1636            if ch == '.' {
1637                // Use DotAll element (will be handled by SequenceWithFlags)
1638                elements.push(SequenceElement::Dot);
1639            } else {
1640                elements.push(SequenceElement::Char(ch));
1641            }
1642        }
1643
1644        return Ok(Ast::SequenceWithFlags(Sequence::new(elements), *flags));
1645    }
1646
1647    // For non-dot patterns, delegate to normal parsing
1648    parse_pattern(pattern)
1649}
1650
1651/// Parse patterns with captures and flags
1652fn parse_pattern_with_captures_with_flags(
1653    pattern: &str,
1654    flags: &Flags,
1655) -> Result<Ast, PatternError> {
1656    // For now, parse normally and wrap if case-insensitive
1657    // TODO: proper flags handling for captures
1658    let ast = parse_pattern_with_captures(pattern)?;
1659
1660    if flags.case_insensitive {
1661        return Ok(Ast::CaseInsensitive(Box::new(ast)));
1662    }
1663
1664    // If DOTALL flag is set and the pattern has sequences, we need special handling
1665    // For now, return as-is - full support requires more work
1666    Ok(ast)
1667}
1668
1669#[derive(Debug, Clone)]
1670enum Matcher {
1671    Literal(String),
1672    MultiLiteral(AhoCorasick),
1673    AnchoredLiteral {
1674        literal: String,
1675        start: bool,
1676        end: bool,
1677    },
1678    AnchoredGroup {
1679        group: Group,
1680        start: bool,
1681        end: bool,
1682    },
1683    AnchoredPattern {
1684        inner: Box<Matcher>,
1685        start: bool,
1686        end: bool,
1687    },
1688    CharClass(CharClass),
1689    Quantified(QuantifiedPattern),
1690    Sequence(Sequence),
1691    SequenceWithFlags(Sequence, Flags), // Sequence with flags (e.g., DOTALL)
1692    Group(Group),
1693    DigitRun,                                  // Specialized fast path for \d+ pattern
1694    WordRun,                                   // Specialized fast path for \w+ pattern
1695    Boundary(BoundaryType),                    // Phase 6: Word boundary matcher
1696    Lookaround(Box<Lookaround>, Box<Matcher>), // Phase 7: Lookaround with compiled inner matcher
1697    Capture(Box<Matcher>, usize), // Phase 8: Capture matcher (inner pattern, group_index)
1698    QuantifiedCapture(Box<Matcher>, parser::quantifier::Quantifier), // Quantified capture: (foo)+
1699    CombinedWithLookaround {
1700        prefix: Box<Matcher>,
1701        lookaround: Box<Lookaround>,
1702        lookaround_matcher: Box<Matcher>,
1703    }, // Phase 7.2: foo(?=bar) - prefix with lookahead
1704    LookbehindWithSuffix {
1705        lookbehind: Box<Lookaround>,
1706        lookbehind_matcher: Box<Matcher>,
1707        suffix: Box<Matcher>,
1708    }, // Phase 7.3: (?<=foo)bar - lookbehind with suffix
1709    PatternWithCaptures {
1710        elements: Vec<CompiledCaptureElement>,
1711        total_groups: usize,
1712    }, // Phase 8.1
1713    AlternationWithCaptures {
1714        branches: Vec<Matcher>,
1715        #[allow(dead_code)]
1716        total_groups: usize,
1717    }, // Alternation where branches may contain captures: (a)|(b) or (?:(a)|(b))
1718    Backreference(usize),         // Phase 9: Backreference to capture group
1719    DFA(DFA),                     // Phase 9.2: DFA-optimized sequence matcher
1720    LazyDFA(engine::lazy_dfa::LazyDFA), // Phase 9.3: Lazy DFA for complex patterns
1721    CaseInsensitive(Box<Matcher>), // Case-insensitive wrapper for (?i)
1722}
1723
1724/// Compiled capture element
1725#[derive(Debug, Clone)]
1726enum CompiledCaptureElement {
1727    Capture(Matcher, usize), // Compiled matcher, group number
1728    NonCapture(Matcher),     // Compiled matcher (non-capturing)
1729}
1730
1731impl Matcher {
1732    fn is_match(&self, text: &str) -> bool {
1733        match self {
1734            Matcher::Literal(lit) => memmem::find(text.as_bytes(), lit.as_bytes()).is_some(),
1735            Matcher::MultiLiteral(ac) => ac.is_match(text),
1736            Matcher::AnchoredLiteral {
1737                literal,
1738                start,
1739                end,
1740            } => match (start, end) {
1741                (true, true) => text == literal,
1742                (true, false) => text.starts_with(literal),
1743                (false, true) => text.ends_with(literal),
1744                _ => unreachable!(),
1745            },
1746            Matcher::AnchoredGroup { group, start, end } => {
1747                // Check if group matches with anchor constraints
1748                match (start, end) {
1749                    (true, true) => {
1750                        // Must match entire text
1751                        group
1752                            .match_at(text, 0)
1753                            .map(|len| len == text.len())
1754                            .unwrap_or(false)
1755                    }
1756                    (true, false) => {
1757                        // Must match at start
1758                        group.match_at(text, 0).is_some()
1759                    }
1760                    (false, true) => {
1761                        // Must match at end
1762                        if let Some((_start_pos, end_pos)) = group.find(text) {
1763                            end_pos == text.len()
1764                        } else {
1765                            false
1766                        }
1767                    }
1768                    _ => unreachable!(),
1769                }
1770            }
1771            Matcher::AnchoredPattern { inner, start, end } => {
1772                // Check if inner pattern matches with anchor constraints
1773                match (start, end) {
1774                    (true, true) => {
1775                        // Must match entire text
1776                        if let Some((match_start, match_end)) = inner.find(text) {
1777                            match_start == 0 && match_end == text.len()
1778                        } else {
1779                            false
1780                        }
1781                    }
1782                    (true, false) => {
1783                        // Must match at start
1784                        if let Some((match_start, _)) = inner.find(text) {
1785                            match_start == 0
1786                        } else {
1787                            false
1788                        }
1789                    }
1790                    (false, true) => {
1791                        // Must match at end
1792                        if let Some((_, match_end)) = inner.find(text) {
1793                            match_end == text.len()
1794                        } else {
1795                            false
1796                        }
1797                    }
1798                    _ => unreachable!(),
1799                }
1800            }
1801            Matcher::CharClass(cc) => {
1802                // OPTIMIZED: Use SIMD-friendly find_first for ASCII text
1803                cc.find_first(text).is_some()
1804            }
1805            Matcher::Quantified(qp) => {
1806                if let crate::parser::quantifier::QuantifiedElement::CharClass(cc) = &qp.element {
1807                    if let Some(bitmap) = cc.get_ascii_bitmap() {
1808                        let negated = cc.negated;
1809                        let min = qp.quantifier.min_matches();
1810                        let bytes = text.as_bytes();
1811
1812                        if min <= 1 {
1813                            // For +, *, ?, {0,N}, {1,N} - just find one matching byte
1814                            for &byte in bytes {
1815                                if byte < 128 {
1816                                    let idx = byte as usize;
1817                                    let bit_set = (bitmap[idx / 64] & (1u64 << (idx % 64))) != 0;
1818                                    if bit_set != negated {
1819                                        return true;
1820                                    }
1821                                }
1822                            }
1823                            return false;
1824                        } else {
1825                            // For {N}, {N,}, {N,M} where N >= 2 - find N consecutive matching bytes
1826                            let mut run = 0usize;
1827                            for &byte in bytes {
1828                                if byte < 128 {
1829                                    let idx = byte as usize;
1830                                    let bit_set = (bitmap[idx / 64] & (1u64 << (idx % 64))) != 0;
1831                                    if bit_set != negated {
1832                                        run += 1;
1833                                        if run >= min {
1834                                            return true;
1835                                        }
1836                                        continue;
1837                                    }
1838                                }
1839                                run = 0;
1840                            }
1841                            return false;
1842                        }
1843                    }
1844                }
1845                qp.is_match(text)
1846            }
1847            Matcher::Sequence(seq) => seq.is_match(text), // NEW: Early termination
1848            Matcher::Group(group) => group.is_match(text), // NEW: Early termination
1849            Matcher::DigitRun => Self::digit_run_is_match(text), // NEW: Specialized digit fast path
1850            Matcher::WordRun => Self::word_run_is_match(text), // NEW: Specialized word fast path
1851            Matcher::Boundary(boundary_type) => boundary_type.find_first(text).is_some(),
1852            Matcher::Lookaround(lookaround, inner_matcher) => {
1853                // Lookaround assertions are zero-width, check if they match at any position
1854                for pos in 0..=text.len() {
1855                    if lookaround.matches_at(text, pos, inner_matcher) {
1856                        return true;
1857                    }
1858                }
1859                false
1860            }
1861            Matcher::Capture(inner_matcher, _group_index) => {
1862                // Capture groups don't affect matching, just check inner pattern
1863                inner_matcher.is_match(text)
1864            }
1865            Matcher::QuantifiedCapture(inner_matcher, quantifier) => {
1866                // Quantified capture - match inner pattern with quantifier semantics
1867                Self::quantified_is_match(text, inner_matcher, quantifier)
1868            }
1869            Matcher::CombinedWithLookaround {
1870                prefix,
1871                lookaround,
1872                lookaround_matcher,
1873            } => {
1874                // Need to find where prefix matches, then check lookaround at that position
1875                if let Some((_start, end)) = prefix.find(text) {
1876                    // Check if lookaround succeeds at the end position of the prefix match
1877                    lookaround.matches_at(text, end, lookaround_matcher)
1878                } else {
1879                    false
1880                }
1881            }
1882            Matcher::LookbehindWithSuffix {
1883                lookbehind,
1884                lookbehind_matcher,
1885                suffix,
1886            } => {
1887                // Match suffix anywhere in text, then check if lookbehind matches at that position
1888                // Try to find suffix match
1889                if let Some((start, _end)) = suffix.find(text) {
1890                    // Check if lookbehind succeeds at the start position of the suffix match
1891                    lookbehind.matches_at(text, start, lookbehind_matcher)
1892                } else {
1893                    false
1894                }
1895            }
1896            Matcher::PatternWithCaptures { .. } => {
1897                // OPTIMIZATION: Delegate to find() for simplicity
1898                self.find(text).is_some()
1899            }
1900            Matcher::Backreference(_) => {
1901                // Backreferences cannot be matched without context
1902                // They need access to captured groups, which is_match doesn't have
1903                // Return false - backreferences only work in captures() method
1904                false
1905            }
1906            Matcher::DFA(dfa) => {
1907                // DFA-optimized sequence matching
1908                dfa.is_match(text)
1909            }
1910            Matcher::LazyDFA(lazy_dfa) => {
1911                // Lazy DFA is mutable, clone for is_match
1912                let mut dfa = lazy_dfa.clone();
1913                dfa.find(text).is_some()
1914            }
1915            Matcher::SequenceWithFlags(seq, flags) => {
1916                // Sequence matching with flags (e.g., DOTALL)
1917                seq.is_match_with_flags(text, flags)
1918            }
1919            Matcher::AlternationWithCaptures { branches, .. } => {
1920                // Try each branch - return true if ANY branch matches
1921                for branch in branches {
1922                    if branch.is_match(text) {
1923                        return true;
1924                    }
1925                }
1926                false
1927            }
1928            Matcher::CaseInsensitive(inner) => {
1929                // Fast path: literal case-insensitive search without allocation
1930                // Only for ASCII needle + ASCII text (XOR trick only works for A-Z/a-z)
1931                if let Matcher::Literal(needle) = inner.as_ref() {
1932                    let needle_bytes = needle.as_bytes();
1933                    let text_bytes = text.as_bytes();
1934                    let needle_is_ascii = needle_bytes.iter().all(|&b| b < 128);
1935                    if needle_is_ascii
1936                        && !needle_bytes.is_empty()
1937                        && needle_bytes.len() <= text_bytes.len()
1938                    {
1939                        let first_lower = needle_bytes[0];
1940                        let first_upper = if first_lower >= b'a' && first_lower <= b'z' {
1941                            first_lower - 32
1942                        } else {
1943                            first_lower
1944                        };
1945                        for i in 0..=(text_bytes.len() - needle_bytes.len()) {
1946                            let b = text_bytes[i];
1947                            if b == first_lower || b == first_upper {
1948                                let mut matched = true;
1949                                for j in 1..needle_bytes.len() {
1950                                    let tb = text_bytes[i + j];
1951                                    let nb = needle_bytes[j];
1952                                    // Skip non-ASCII bytes in text
1953                                    if tb >= 128 || nb >= 128 {
1954                                        matched = false;
1955                                        break;
1956                                    }
1957                                    if tb != nb && (tb ^ 32) != nb {
1958                                        matched = false;
1959                                        break;
1960                                    }
1961                                }
1962                                if matched {
1963                                    return true;
1964                                }
1965                            }
1966                        }
1967                        return false;
1968                    }
1969                }
1970                // Fast path: alternation of literals
1971                if let Matcher::MultiLiteral(ac) = inner.as_ref() {
1972                    let bytes = text.as_bytes();
1973                    let len = bytes.len();
1974                    if len <= 256 {
1975                        let mut buf = [0u8; 256];
1976                        let mut all_ascii = true;
1977                        for i in 0..len {
1978                            let b = bytes[i];
1979                            if b >= 128 {
1980                                all_ascii = false;
1981                                break;
1982                            }
1983                            buf[i] = if b >= b'A' && b <= b'Z' { b + 32 } else { b };
1984                        }
1985                        if all_ascii {
1986                            let lower = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
1987                            return ac.is_match(lower);
1988                        }
1989                    }
1990                }
1991                // General case: lowercase text then match
1992                let bytes = text.as_bytes();
1993                let len = bytes.len();
1994                if len <= 256 {
1995                    let mut buf = [0u8; 256];
1996                    let mut all_ascii = true;
1997                    for i in 0..len {
1998                        let b = bytes[i];
1999                        if b >= 128 {
2000                            all_ascii = false;
2001                            break;
2002                        }
2003                        buf[i] = if b >= b'A' && b <= b'Z' { b + 32 } else { b };
2004                    }
2005                    if all_ascii {
2006                        let lower = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
2007                        return inner.is_match(lower);
2008                    }
2009                }
2010                let lower_text = text.to_lowercase();
2011                inner.is_match(&lower_text)
2012            }
2013        }
2014    }
2015
2016    /// Recursively extract all nested captures from a matched pattern
2017    /// Returns Vec<(group_num, start, end)> for all capture groups found
2018    fn extract_nested_captures(&self, text: &str, start_pos: usize) -> Vec<(usize, usize, usize)> {
2019        let mut captures = Vec::new();
2020
2021        match self {
2022            Matcher::PatternWithCaptures { elements, .. } => {
2023                let mut pos = start_pos;
2024
2025                for element in elements {
2026                    match element {
2027                        CompiledCaptureElement::Capture(inner_matcher, group_num) => {
2028                            if let Some((rel_start, rel_end)) =
2029                                inner_matcher.find(safe_slice(text, pos).unwrap_or(""))
2030                            {
2031                                if rel_start == 0 {
2032                                    let abs_start = pos;
2033                                    let abs_end = pos + rel_end;
2034
2035                                    // Record this capture
2036                                    captures.push((*group_num, abs_start, abs_end));
2037
2038                                    // Recursively extract nested captures
2039                                    let nested =
2040                                        inner_matcher.extract_nested_captures(text, abs_start);
2041                                    captures.extend(nested);
2042
2043                                    pos = abs_end;
2044                                } else {
2045                                    break;
2046                                }
2047                            } else {
2048                                break;
2049                            }
2050                        }
2051                        CompiledCaptureElement::NonCapture(inner_matcher) => {
2052                            if let Some((rel_start, rel_end)) =
2053                                inner_matcher.find(safe_slice(text, pos).unwrap_or(""))
2054                            {
2055                                if rel_start == 0 {
2056                                    let abs_start = pos;
2057
2058                                    // Even for non-capturing, extract nested captures
2059                                    let nested =
2060                                        inner_matcher.extract_nested_captures(text, abs_start);
2061                                    captures.extend(nested);
2062
2063                                    pos += rel_end;
2064                                } else {
2065                                    break;
2066                                }
2067                            } else {
2068                                break;
2069                            }
2070                        }
2071                    }
2072                }
2073            }
2074            Matcher::Capture(inner_matcher, group_num) => {
2075                // This is a single capture - record it and check for nested
2076                if let Some((rel_start, rel_end)) =
2077                    inner_matcher.find(safe_slice(text, start_pos).unwrap_or(""))
2078                {
2079                    let abs_start = start_pos + rel_start;
2080                    let abs_end = start_pos + rel_end;
2081
2082                    // Record this capture
2083                    captures.push((*group_num, abs_start, abs_end));
2084
2085                    // Recursively extract nested captures
2086                    let nested = inner_matcher.extract_nested_captures(text, abs_start);
2087                    captures.extend(nested);
2088                }
2089            }
2090            Matcher::AlternationWithCaptures { branches, .. } => {
2091                // Try each branch to find which one matched
2092                for branch in branches {
2093                    if let Some((rel_start, _rel_end)) =
2094                        branch.find(safe_slice(text, start_pos).unwrap_or(""))
2095                    {
2096                        if rel_start == 0 {
2097                            let abs_start = start_pos;
2098                            // Extract captures from the matched branch
2099                            let nested = branch.extract_nested_captures(text, abs_start);
2100                            captures.extend(nested);
2101                            break; // Only one branch can match
2102                        }
2103                    }
2104                }
2105            }
2106            _ => {
2107                // Other matchers don't have nested captures
2108            }
2109        }
2110
2111        captures
2112    }
2113
2114    /// Match pattern with backreferences, tracking captures as we go
2115    /// Returns Some(end_pos) if match succeeds, None otherwise
2116    fn match_pattern_with_backreferences(
2117        text: &str,
2118        start_pos: usize,
2119        elements: &[CompiledCaptureElement],
2120    ) -> Option<usize> {
2121        let mut pos = start_pos;
2122        let mut capture_positions: Vec<(usize, usize)> = Vec::new();
2123
2124        for element in elements {
2125            match element {
2126                CompiledCaptureElement::Capture(m, num) => {
2127                    if let Some((rel_start, rel_end)) = m.find(safe_slice(text, pos).unwrap_or(""))
2128                    {
2129                        if rel_start != 0 {
2130                            return None; // Must match at current position
2131                        }
2132
2133                        let abs_start = pos;
2134                        let abs_end = pos + rel_end;
2135
2136                        // Record capture position
2137                        while capture_positions.len() < *num {
2138                            capture_positions.push((0, 0));
2139                        }
2140                        capture_positions[*num - 1] = (abs_start, abs_end);
2141                        pos = abs_end;
2142                    } else {
2143                        return None;
2144                    }
2145                }
2146                CompiledCaptureElement::NonCapture(m) => {
2147                    // Check if this is a Backreference
2148                    if let Matcher::Backreference(ref_num) = m {
2149                        // Get the captured text for this backreference
2150                        if *ref_num > 0 && *ref_num <= capture_positions.len() {
2151                            let (cap_start, cap_end) = capture_positions[*ref_num - 1];
2152                            let captured_text = &text[cap_start..cap_end];
2153
2154                            // Check if remaining text starts with the captured text
2155                            if text[pos..].starts_with(captured_text) {
2156                                pos += captured_text.len();
2157                            } else {
2158                                return None;
2159                            }
2160                        } else {
2161                            // Invalid backreference or not captured yet
2162                            return None;
2163                        }
2164                    } else {
2165                        // Normal non-capture element
2166                        if let Some((rel_start, rel_end)) =
2167                            m.find(safe_slice(text, pos).unwrap_or(""))
2168                        {
2169                            if rel_start != 0 {
2170                                return None;
2171                            }
2172                            pos += rel_end;
2173                        } else {
2174                            return None;
2175                        }
2176                    }
2177                }
2178            }
2179        }
2180
2181        Some(pos)
2182    }
2183
2184    /// Specialized fast path for \d+ pattern
2185    #[inline(always)]
2186    fn digit_run_is_match(text: &str) -> bool {
2187        let bytes = text.as_bytes();
2188        if bytes.is_empty() {
2189            return false;
2190        }
2191
2192        // Check if text starts with at least one digit
2193        bytes.iter().any(|&b| b.is_ascii_digit())
2194    }
2195
2196    /// Specialized fast path for \w+ pattern  
2197    #[inline(always)]
2198    fn word_run_is_match(text: &str) -> bool {
2199        let bytes = text.as_bytes();
2200        if bytes.is_empty() {
2201            return false;
2202        }
2203
2204        // Check if text contains at least one word char [a-zA-Z0-9_]
2205        bytes.iter().any(|&b| {
2206            b.is_ascii_lowercase() || b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_'
2207        })
2208    }
2209
2210    /// Match quantified capture pattern
2211    fn quantified_is_match(
2212        text: &str,
2213        inner_matcher: &Matcher,
2214        quantifier: &parser::quantifier::Quantifier,
2215    ) -> bool {
2216        Self::quantified_find(text, inner_matcher, quantifier).is_some()
2217    }
2218
2219    /// Find quantified capture pattern
2220    fn quantified_find(
2221        text: &str,
2222        inner_matcher: &Matcher,
2223        quantifier: &parser::quantifier::Quantifier,
2224    ) -> Option<(usize, usize)> {
2225        let (min, max) = quantifier_bounds(quantifier);
2226
2227        // Special case: empty text can match if min is 0
2228        if text.is_empty() {
2229            return if min == 0 { Some((0, 0)) } else { None };
2230        }
2231
2232        // Try to match at each position in text
2233        for start_pos in 0..text.len() {
2234            let mut pos = start_pos;
2235            let mut count = 0;
2236
2237            // Match inner pattern as many times as possible (greedy)
2238            while count < max && pos < text.len() {
2239                if let Some((rel_start, rel_end)) =
2240                    inner_matcher.find(safe_slice(text, pos).unwrap_or(""))
2241                {
2242                    // Must match at current position
2243                    if rel_start != 0 {
2244                        break;
2245                    }
2246                    if rel_end == 0 {
2247                        break; // Avoid infinite loops on zero-width matches
2248                    }
2249                    pos += rel_end;
2250                    count += 1;
2251                } else {
2252                    break;
2253                }
2254            }
2255
2256            if count >= min {
2257                return Some((start_pos, pos));
2258            }
2259        }
2260
2261        None
2262    }
2263
2264    /// Check if a matcher contains a quantified pattern that can match variable lengths
2265    /// This is used to determine if backtracking is needed
2266    fn contains_quantified(matcher: &Matcher) -> bool {
2267        match matcher {
2268            Matcher::Quantified(_) | Matcher::QuantifiedCapture(_, _) => true,
2269            Matcher::Capture(inner, _) => Self::contains_quantified(inner),
2270            Matcher::PatternWithCaptures { elements, .. } => {
2271                // Check if this is a simple sequence with quantified elements
2272                // But NOT if it's just wrapping an alternation
2273                elements.iter().any(|elem| match elem {
2274                    CompiledCaptureElement::Capture(m, _)
2275                    | CompiledCaptureElement::NonCapture(m) => {
2276                        // Don't recurse into AlternationWithCaptures - alternations are not quantified
2277                        match m {
2278                            Matcher::AlternationWithCaptures { .. } => false,
2279                            _ => Self::contains_quantified(m),
2280                        }
2281                    }
2282                })
2283            }
2284            // AlternationWithCaptures itself is NOT a quantified pattern
2285            // It's a choice between alternatives, each of which has a fixed match
2286            Matcher::AlternationWithCaptures { .. } => false,
2287            _ => false,
2288        }
2289    }
2290
2291    /// Try to match sequence of elements with backtracking support AND extract captures
2292    /// Returns (end_pos, capture_positions) if successful
2293    fn match_elements_with_backtrack_and_captures(
2294        text: &str,
2295        start_pos: usize,
2296        elements: &[CompiledCaptureElement],
2297    ) -> Option<(usize, Vec<(usize, usize, usize)>)> {
2298        // Base case: no more elements
2299        if elements.is_empty() {
2300            return Some((start_pos, Vec::new()));
2301        }
2302
2303        // Get first element
2304        let first_element = &elements[0];
2305
2306        // Check if this element contains a quantified pattern that needs backtracking
2307        let needs_backtracking = if elements.len() <= 1 {
2308            false
2309        } else {
2310            match first_element {
2311                CompiledCaptureElement::Capture(m, _) | CompiledCaptureElement::NonCapture(m) => {
2312                    Self::contains_quantified(m)
2313                }
2314            }
2315        };
2316
2317        if needs_backtracking {
2318            // Backtracking needed
2319            let remaining_text = safe_slice(text, start_pos).unwrap_or("");
2320            let remaining_len = remaining_text.len();
2321
2322            // Try each possible length from longest to shortest, including 0 for zero-width matches
2323            // This is important for quantifiers with min=0 like * and ?
2324            for try_len in (0..=remaining_len).rev() {
2325                let next_pos = start_pos + try_len;
2326
2327                // Try to match remaining elements
2328                if let Some((final_pos, mut remaining_caps)) =
2329                    Self::match_elements_with_backtrack_and_captures(text, next_pos, &elements[1..])
2330                {
2331                    // Handle zero-width matches (try_len == 0)
2332                    if try_len == 0 {
2333                        // For zero-width matches, check if the quantifier allows min=0
2334                        match first_element {
2335                            CompiledCaptureElement::Capture(m, num) => {
2336                                if let Some((rel_start, rel_end)) = m.find("") {
2337                                    if rel_start == 0 && rel_end == 0 {
2338                                        // Zero-width capture matched
2339                                        let mut caps = vec![(*num, start_pos, start_pos)];
2340                                        caps.append(&mut remaining_caps);
2341                                        return Some((final_pos, caps));
2342                                    }
2343                                }
2344                            }
2345                            CompiledCaptureElement::NonCapture(m) => {
2346                                if let Some((rel_start, rel_end)) = m.find("") {
2347                                    if rel_start == 0 && rel_end == 0 {
2348                                        // Zero-width non-capture matched
2349                                        return Some((final_pos, remaining_caps));
2350                                    }
2351                                }
2352                            }
2353                        }
2354                    } else {
2355                        // Non-zero width match
2356                        let substring = safe_slice_range(text, start_pos, next_pos).unwrap_or("");
2357
2358                        // Check if first element matches exactly this substring
2359                        match first_element {
2360                            CompiledCaptureElement::Capture(m, num) => {
2361                                if let Some((rel_start, rel_end)) = m.find(substring) {
2362                                    if rel_start == 0 && rel_end == substring.len() {
2363                                        // Capture matched
2364                                        let mut caps = vec![(*num, start_pos, next_pos)];
2365                                        caps.append(&mut remaining_caps);
2366                                        return Some((final_pos, caps));
2367                                    }
2368                                }
2369                            }
2370                            CompiledCaptureElement::NonCapture(m) => {
2371                                if let Some((rel_start, rel_end)) = m.find(substring) {
2372                                    if rel_start == 0 && rel_end == substring.len() {
2373                                        // Extract any nested captures from this matcher
2374                                        let nested_caps =
2375                                            m.extract_nested_captures(text, start_pos);
2376                                        let mut all_caps = nested_caps;
2377                                        all_caps.extend(remaining_caps);
2378                                        return Some((final_pos, all_caps));
2379                                    }
2380                                }
2381                            }
2382                        }
2383                    }
2384                }
2385            }
2386            None
2387        } else {
2388            // No backtracking needed
2389            match first_element {
2390                CompiledCaptureElement::Capture(m, num) => {
2391                    if let Some((rel_start, rel_end)) =
2392                        m.find(safe_slice(text, start_pos).unwrap_or(""))
2393                    {
2394                        if rel_start == 0 {
2395                            let next_pos = start_pos + rel_end;
2396                            if let Some((final_pos, mut remaining_caps)) =
2397                                Self::match_elements_with_backtrack_and_captures(
2398                                    text,
2399                                    next_pos,
2400                                    &elements[1..],
2401                                )
2402                            {
2403                                let mut caps = vec![(*num, start_pos, next_pos)];
2404                                caps.append(&mut remaining_caps);
2405                                return Some((final_pos, caps));
2406                            }
2407                        }
2408                    }
2409                    None
2410                }
2411                CompiledCaptureElement::NonCapture(m) => {
2412                    if let Some((rel_start, rel_end)) =
2413                        m.find(safe_slice(text, start_pos).unwrap_or(""))
2414                    {
2415                        if rel_start == 0 {
2416                            let next_pos = start_pos + rel_end;
2417                            if let Some((final_pos, remaining_caps)) =
2418                                Self::match_elements_with_backtrack_and_captures(
2419                                    text,
2420                                    next_pos,
2421                                    &elements[1..],
2422                                )
2423                            {
2424                                // Extract nested captures
2425                                let nested_caps = m.extract_nested_captures(text, start_pos);
2426                                let mut all_caps = nested_caps;
2427                                all_caps.extend(remaining_caps);
2428                                return Some((final_pos, all_caps));
2429                            }
2430                        }
2431                    }
2432                    None
2433                }
2434            }
2435        }
2436    }
2437
2438    /// Try to match sequence of elements with backtracking support
2439    /// Returns (start, end) if successful
2440    fn match_elements_with_backtrack(
2441        text: &str,
2442        start_pos: usize,
2443        elements: &[CompiledCaptureElement],
2444    ) -> Option<usize> {
2445        // Base case: no more elements
2446        if elements.is_empty() {
2447            return Some(start_pos);
2448        }
2449
2450        // Get first element
2451        let first_element = &elements[0];
2452        let first_matcher = match first_element {
2453            CompiledCaptureElement::Capture(m, _) => m,
2454            CompiledCaptureElement::NonCapture(m) => m,
2455        };
2456
2457        // Check if this element contains a quantified pattern that needs backtracking
2458        // This includes: Quantified, QuantifiedCapture, and Captures containing quantified patterns
2459        let needs_backtracking = if elements.len() <= 1 {
2460            false // No backtracking needed if this is the last element
2461        } else {
2462            // Check if first_matcher contains a quantified pattern
2463            Self::contains_quantified(first_matcher)
2464        };
2465
2466        if needs_backtracking {
2467            // Quantified element followed by more elements - need backtracking
2468            // Strategy: Try matching with progressively shorter lengths from remaining text
2469
2470            let remaining_text = safe_slice(text, start_pos).unwrap_or("");
2471            let remaining_len = remaining_text.len();
2472
2473            // Try each possible length from longest to shortest, including 0 for zero-width matches
2474            // This is important for quantifiers with min=0 like * and ?
2475            for try_len in (0..=remaining_len).rev() {
2476                let next_pos = start_pos + try_len;
2477
2478                // Try to match remaining elements FIRST
2479                if let Some(final_pos) =
2480                    Self::match_elements_with_backtrack(text, next_pos, &elements[1..])
2481                {
2482                    // Remaining elements matched! Now check if first element can match EXACTLY this length
2483                    let substring = safe_slice_range(text, start_pos, next_pos).unwrap_or("");
2484
2485                    // Check if first element matches exactly this substring
2486                    // It must match from start (rel_start == 0) and consume the entire substring (rel_end == substring.len())
2487                    // For zero-width matches (try_len == 0), we need to check if the quantifier allows min=0
2488                    if try_len == 0 {
2489                        // Zero-width match - only valid for quantifiers with min=0 (*, ?)
2490                        // Check by seeing if the matcher can match empty string
2491                        if let Some((rel_start, rel_end)) = first_matcher.find("") {
2492                            if rel_start == 0 && rel_end == 0 {
2493                                return Some(final_pos);
2494                            }
2495                        }
2496                    } else {
2497                        // Non-zero match
2498                        if let Some((rel_start, rel_end)) = first_matcher.find(substring) {
2499                            if rel_start == 0 && rel_end == substring.len() {
2500                                return Some(final_pos);
2501                            }
2502                        }
2503                    }
2504                }
2505            }
2506
2507            None
2508        } else {
2509            // Non-quantified element or last element - match normally
2510
2511            // Special case: if first element is AlternationWithCaptures, try all branches
2512            if let Matcher::AlternationWithCaptures { branches, .. } = first_matcher {
2513                // Try each branch - return first one that leads to complete match
2514                for branch in branches {
2515                    if let Some((rel_start, rel_end)) =
2516                        branch.find(safe_slice(text, start_pos).unwrap_or(""))
2517                    {
2518                        if rel_start == 0 {
2519                            let next_pos = start_pos + rel_end;
2520                            // Try to match remaining elements with this branch
2521                            if let Some(final_pos) =
2522                                Self::match_elements_with_backtrack(text, next_pos, &elements[1..])
2523                            {
2524                                return Some(final_pos);
2525                            }
2526                            // This branch didn't lead to complete match, try next branch
2527                        }
2528                    }
2529                }
2530                return None;
2531            }
2532
2533            // Regular case: non-alternation element
2534            if let Some((rel_start, rel_end)) =
2535                first_matcher.find(safe_slice(text, start_pos).unwrap_or(""))
2536            {
2537                if rel_start == 0 {
2538                    let next_pos = start_pos + rel_end;
2539                    // Match remaining elements
2540                    return Self::match_elements_with_backtrack(text, next_pos, &elements[1..]);
2541                }
2542            }
2543            None
2544        }
2545    }
2546
2547    /// Find all quantified capture matches
2548    fn quantified_find_all(
2549        text: &str,
2550        inner_matcher: &Matcher,
2551        quantifier: &parser::quantifier::Quantifier,
2552    ) -> Vec<(usize, usize)> {
2553        let mut matches = Vec::new();
2554        let mut search_pos = 0;
2555
2556        while search_pos < text.len() {
2557            if let Some((start, end)) =
2558                Self::quantified_find(&text[search_pos..], inner_matcher, quantifier)
2559            {
2560                matches.push((search_pos + start, search_pos + end));
2561                search_pos += start + 1; // Avoid overlapping
2562                if start == end {
2563                    search_pos += 1; // Avoid infinite loop on zero-width match
2564                }
2565            } else {
2566                break;
2567            }
2568        }
2569
2570        matches
2571    }
2572
2573    fn find(&self, text: &str) -> Option<(usize, usize)> {
2574        match self {
2575            Matcher::Literal(lit) => {
2576                let pos = memmem::find(text.as_bytes(), lit.as_bytes())?;
2577                Some((pos, pos + lit.len()))
2578            }
2579            Matcher::MultiLiteral(ac) => {
2580                let mat = ac.find(text)?;
2581                Some((mat.start(), mat.end()))
2582            }
2583            Matcher::AnchoredLiteral {
2584                literal,
2585                start,
2586                end,
2587            } => match (start, end) {
2588                (true, true) => (text == literal).then_some((0, text.len())),
2589                (true, false) => text.starts_with(literal).then_some((0, literal.len())),
2590                (false, true) => text
2591                    .ends_with(literal)
2592                    .then(|| (text.len() - literal.len(), text.len())),
2593                _ => unreachable!(),
2594            },
2595            Matcher::AnchoredGroup { group, start, end } => {
2596                match (start, end) {
2597                    (true, true) => {
2598                        // Must match entire text
2599                        group.match_at(text, 0).and_then(|len| {
2600                            if len == text.len() {
2601                                Some((0, len))
2602                            } else {
2603                                None
2604                            }
2605                        })
2606                    }
2607                    (true, false) => {
2608                        // Must match at start
2609                        group.match_at(text, 0).map(|len| (0, len))
2610                    }
2611                    (false, true) => {
2612                        // Must match at end
2613                        group.find(text).and_then(|(start_pos, end_pos)| {
2614                            if end_pos == text.len() {
2615                                Some((start_pos, end_pos))
2616                            } else {
2617                                None
2618                            }
2619                        })
2620                    }
2621                    _ => unreachable!(),
2622                }
2623            }
2624            Matcher::AnchoredPattern { inner, start, end } => {
2625                match (start, end) {
2626                    (true, true) => {
2627                        // Must match entire text: match at position 0 and cover full text
2628                        inner.find(text).and_then(|(match_start, match_end)| {
2629                            if match_start == 0 && match_end == text.len() {
2630                                Some((0, text.len()))
2631                            } else {
2632                                None
2633                            }
2634                        })
2635                    }
2636                    (true, false) => {
2637                        // Must match at start
2638                        inner.find(text).and_then(|(match_start, match_end)| {
2639                            if match_start == 0 {
2640                                Some((0, match_end))
2641                            } else {
2642                                None
2643                            }
2644                        })
2645                    }
2646                    (false, true) => {
2647                        // Must match at end
2648                        inner.find(text).and_then(|(match_start, match_end)| {
2649                            if match_end == text.len() {
2650                                Some((match_start, match_end))
2651                            } else {
2652                                None
2653                            }
2654                        })
2655                    }
2656                    _ => unreachable!(),
2657                }
2658            }
2659            Matcher::CharClass(cc) => {
2660                // Find first character matching the class
2661                for (idx, ch) in text.char_indices() {
2662                    if cc.matches(ch) {
2663                        return Some((idx, idx + ch.len_utf8()));
2664                    }
2665                }
2666                None
2667            }
2668            Matcher::Quantified(qp) => qp.find(text),
2669            Matcher::Sequence(seq) => seq.find(text),
2670            Matcher::Group(group) => group.find(text),
2671            Matcher::DigitRun => Self::digit_run_find(text), // NEW: Specialized digit find
2672            Matcher::WordRun => Self::word_run_find(text),   // NEW: Specialized word find
2673            Matcher::Boundary(boundary_type) => {
2674                // Boundary returns position, need to map to (pos, pos) range
2675                boundary_type.find_first(text).map(|pos| (pos, pos))
2676            }
2677            Matcher::Lookaround(lookaround, inner_matcher) => {
2678                // Find first position where lookaround succeeds
2679                for pos in 0..=text.len() {
2680                    if lookaround.matches_at(text, pos, inner_matcher) {
2681                        return Some((pos, pos)); // Zero-width match
2682                    }
2683                }
2684                None
2685            }
2686            Matcher::Capture(inner_matcher, _group_index) => {
2687                // Capture groups don't affect position, use inner matcher
2688                inner_matcher.find(text)
2689            }
2690            Matcher::QuantifiedCapture(inner_matcher, quantifier) => {
2691                // Find quantified capture pattern
2692                Self::quantified_find(text, inner_matcher, quantifier)
2693            }
2694            Matcher::CombinedWithLookaround {
2695                prefix,
2696                lookaround,
2697                lookaround_matcher,
2698            } => {
2699                // Find first position where prefix matches AND lookaround succeeds
2700                let mut search_pos = 0;
2701                while search_pos < text.len() {
2702                    let remaining = &text[search_pos..];
2703                    if let Some((rel_start, rel_end)) = prefix.find(remaining) {
2704                        let abs_start = search_pos + rel_start;
2705                        let abs_end = search_pos + rel_end;
2706
2707                        // Check if lookaround succeeds at the end of the prefix match
2708                        if lookaround.matches_at(text, abs_end, lookaround_matcher) {
2709                            return Some((abs_start, abs_end));
2710                        }
2711
2712                        // Move search position past this match to try next one
2713                        search_pos = abs_start + 1;
2714                    } else {
2715                        break;
2716                    }
2717                }
2718                None
2719            }
2720            Matcher::LookbehindWithSuffix {
2721                lookbehind,
2722                lookbehind_matcher,
2723                suffix,
2724            } => {
2725                // Find first position where suffix matches AND lookbehind succeeds before it
2726                let mut search_pos = 0;
2727                while search_pos < text.len() {
2728                    let remaining = &text[search_pos..];
2729                    if let Some((rel_start, rel_end)) = suffix.find(remaining) {
2730                        let abs_start = search_pos + rel_start;
2731                        let abs_end = search_pos + rel_end;
2732
2733                        // Check if lookbehind succeeds at the start of the suffix match
2734                        if lookbehind.matches_at(text, abs_start, lookbehind_matcher) {
2735                            return Some((abs_start, abs_end));
2736                        }
2737
2738                        // Move search position past this match to try next one
2739                        search_pos = abs_start + 1;
2740                    } else {
2741                        break;
2742                    }
2743                }
2744                None
2745            }
2746            Matcher::PatternWithCaptures { elements, .. } => {
2747                // Special case: single element can match anywhere
2748                if elements.len() == 1 {
2749                    let matcher = match &elements[0] {
2750                        CompiledCaptureElement::Capture(m, _) => m,
2751                        CompiledCaptureElement::NonCapture(m) => m,
2752                    };
2753                    return matcher.find(text);
2754                }
2755
2756                // Check if pattern contains backreferences
2757                let has_backrefs = elements.iter().any(|elem| {
2758                    matches!(
2759                        elem,
2760                        CompiledCaptureElement::NonCapture(Matcher::Backreference(_))
2761                    )
2762                });
2763
2764                if has_backrefs {
2765                    // Use backreference-aware matching
2766                    for start_pos in 0..=text.len() {
2767                        if let Some(end_pos) =
2768                            Self::match_pattern_with_backreferences(text, start_pos, elements)
2769                        {
2770                            if end_pos > start_pos || elements.is_empty() {
2771                                return Some((start_pos, end_pos));
2772                            }
2773                        }
2774                    }
2775                    return None;
2776                }
2777
2778                // NEW: Try to compile to DFA for efficient single-pass matching
2779                if let Some(dfa) = crate::engine::capture_dfa::compile_capture_pattern(elements) {
2780                    return dfa.find(text);
2781                }
2782
2783                // Fallback: Linear scan through all positions
2784                for start_pos in 0..=text.len() {
2785                    if let Some(end_pos) =
2786                        Self::match_elements_with_backtrack(text, start_pos, elements)
2787                    {
2788                        if end_pos > start_pos || elements.is_empty() {
2789                            return Some((start_pos, end_pos));
2790                        }
2791                    }
2792                }
2793
2794                None
2795            }
2796            Matcher::Backreference(_) => {
2797                // Backreferences cannot find without capture context
2798                None
2799            }
2800            Matcher::DFA(dfa) => {
2801                // DFA-optimized find
2802                dfa.find(text)
2803            }
2804            Matcher::LazyDFA(lazy_dfa) => {
2805                // Lazy DFA requires mutable access, clone it
2806                let mut dfa = lazy_dfa.clone();
2807                dfa.find(text)
2808            }
2809            Matcher::SequenceWithFlags(seq, flags) => {
2810                // Find with flags (e.g., DOTALL mode where . matches newlines)
2811                seq.find_with_flags(text, flags)
2812            }
2813            Matcher::AlternationWithCaptures { branches, .. } => {
2814                // Try each branch in order (leftmost-first), return first match
2815                let mut best_match: Option<(usize, usize)> = None;
2816
2817                for branch in branches {
2818                    if let Some((start, end)) = branch.find(text) {
2819                        // Keep the leftmost (earliest starting) match
2820                        if best_match.is_none() || start < best_match.unwrap().0 {
2821                            best_match = Some((start, end));
2822                        }
2823                    }
2824                }
2825                best_match
2826            }
2827            Matcher::CaseInsensitive(inner) => {
2828                let bytes = text.as_bytes();
2829                let len = bytes.len();
2830                if len <= 256 {
2831                    let mut buf = [0u8; 256];
2832                    let mut all_ascii = true;
2833                    for i in 0..len {
2834                        let b = bytes[i];
2835                        if b >= 128 {
2836                            all_ascii = false;
2837                            break;
2838                        }
2839                        buf[i] = if b >= b'A' && b <= b'Z' { b + 32 } else { b };
2840                    }
2841                    if all_ascii {
2842                        let lower = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
2843                        return inner.find(lower);
2844                    }
2845                }
2846                let lower_text = text.to_lowercase();
2847                inner.find(&lower_text)
2848            }
2849        }
2850    }
2851
2852    /// Find first run of digits in text
2853    #[inline(always)]
2854    fn digit_run_find(text: &str) -> Option<(usize, usize)> {
2855        let bytes = text.as_bytes();
2856
2857        // Find start: first digit
2858        let mut start = None;
2859        for (i, &b) in bytes.iter().enumerate() {
2860            if b.is_ascii_digit() {
2861                start = Some(i);
2862                break;
2863            }
2864        }
2865
2866        let start_idx = start?;
2867
2868        // Find end: first non-digit after start
2869        let mut end_idx = bytes.len();
2870        for (i, &b) in bytes[start_idx..].iter().enumerate() {
2871            if !b.is_ascii_digit() {
2872                end_idx = start_idx + i;
2873                break;
2874            }
2875        }
2876
2877        Some((start_idx, end_idx))
2878    }
2879
2880    /// Find first run of word characters in text
2881    #[inline(always)]
2882    fn word_run_find(text: &str) -> Option<(usize, usize)> {
2883        let bytes = text.as_bytes();
2884
2885        // Find start: first word char
2886        let mut start = None;
2887        for (i, &b) in bytes.iter().enumerate() {
2888            if b.is_ascii_lowercase() || b.is_ascii_uppercase() || b.is_ascii_digit() || b == b'_' {
2889                start = Some(i);
2890                break;
2891            }
2892        }
2893
2894        let start_idx = start?;
2895
2896        // Find end: first non-word char after start
2897        let mut end_idx = bytes.len();
2898        for (i, &b) in bytes[start_idx..].iter().enumerate() {
2899            if !(b.is_ascii_lowercase()
2900                || b.is_ascii_uppercase()
2901                || b.is_ascii_digit()
2902                || b == b'_')
2903            {
2904                end_idx = start_idx + i;
2905                break;
2906            }
2907        }
2908
2909        Some((start_idx, end_idx))
2910    }
2911
2912    fn find_all(&self, text: &str) -> Vec<(usize, usize)> {
2913        match self {
2914            Matcher::Literal(lit) => {
2915                let finder = memmem::Finder::new(lit.as_bytes());
2916                finder
2917                    .find_iter(text.as_bytes())
2918                    .map(|pos| (pos, pos + lit.len()))
2919                    .collect()
2920            }
2921            Matcher::MultiLiteral(ac) => ac
2922                .find_iter(text)
2923                .map(|mat| (mat.start(), mat.end()))
2924                .collect(),
2925            Matcher::AnchoredLiteral { .. } => {
2926                if let Some(m) = self.find(text) {
2927                    vec![m]
2928                } else {
2929                    vec![]
2930                }
2931            }
2932            Matcher::AnchoredGroup { .. } => {
2933                // Anchored groups can only match once
2934                if let Some(m) = self.find(text) {
2935                    vec![m]
2936                } else {
2937                    vec![]
2938                }
2939            }
2940            Matcher::AnchoredPattern { .. } => {
2941                // Anchored patterns can only match once
2942                if let Some(m) = self.find(text) {
2943                    vec![m]
2944                } else {
2945                    vec![]
2946                }
2947            }
2948            Matcher::CharClass(cc) => {
2949                // Find all characters matching the class
2950                text.char_indices()
2951                    .filter(|(_, ch)| cc.matches(*ch))
2952                    .map(|(idx, ch)| (idx, idx + ch.len_utf8()))
2953                    .collect()
2954            }
2955            Matcher::Quantified(qp) => qp.find_all(text),
2956            Matcher::Sequence(seq) => seq.find_all(text),
2957            Matcher::Group(group) => group.find_all(text),
2958            Matcher::DigitRun => Self::digit_run_find_all(text), // NEW: Specialized digit find_all
2959            Matcher::WordRun => Self::word_run_find_all(text),   // NEW: Specialized word find_all
2960            Matcher::Boundary(boundary_type) => {
2961                // Boundary returns positions, map to (pos, pos) ranges
2962                boundary_type
2963                    .find_all(text)
2964                    .into_iter()
2965                    .map(|pos| (pos, pos))
2966                    .collect()
2967            }
2968            Matcher::Lookaround(lookaround, inner_matcher) => {
2969                // Find all positions where lookaround succeeds
2970                (0..=text.len())
2971                    .filter(|&pos| lookaround.matches_at(text, pos, inner_matcher))
2972                    .map(|pos| (pos, pos)) // Zero-width matches
2973                    .collect()
2974            }
2975            Matcher::Capture(inner_matcher, _group_index) => {
2976                // Capture groups don't affect find_all, use inner matcher
2977                inner_matcher.find_all(text)
2978            }
2979            Matcher::QuantifiedCapture(inner_matcher, quantifier) => {
2980                // Find all quantified capture matches
2981                Self::quantified_find_all(text, inner_matcher, quantifier)
2982            }
2983            Matcher::CombinedWithLookaround {
2984                prefix,
2985                lookaround,
2986                lookaround_matcher,
2987            } => {
2988                // Find all positions where prefix matches AND lookaround succeeds
2989                let mut matches = Vec::new();
2990                let mut search_pos = 0;
2991
2992                while search_pos < text.len() {
2993                    let remaining = &text[search_pos..];
2994                    if let Some((rel_start, rel_end)) = prefix.find(remaining) {
2995                        let abs_start = search_pos + rel_start;
2996                        let abs_end = search_pos + rel_end;
2997
2998                        // Check if lookaround succeeds at the end of the prefix match
2999                        if lookaround.matches_at(text, abs_end, lookaround_matcher) {
3000                            matches.push((abs_start, abs_end));
3001                        }
3002
3003                        // Move search position past the start of this match
3004                        search_pos = abs_start + 1;
3005                    } else {
3006                        break;
3007                    }
3008                }
3009
3010                matches
3011            }
3012            Matcher::LookbehindWithSuffix {
3013                lookbehind,
3014                lookbehind_matcher,
3015                suffix,
3016            } => {
3017                // Find all positions where suffix matches AND lookbehind succeeds before it
3018                let mut matches = Vec::new();
3019                let mut search_pos = 0;
3020
3021                while search_pos < text.len() {
3022                    let remaining = &text[search_pos..];
3023                    if let Some((rel_start, rel_end)) = suffix.find(remaining) {
3024                        let abs_start = search_pos + rel_start;
3025                        let abs_end = search_pos + rel_end;
3026
3027                        // Check if lookbehind succeeds at the start of the suffix match
3028                        if lookbehind.matches_at(text, abs_start, lookbehind_matcher) {
3029                            matches.push((abs_start, abs_end));
3030                        }
3031
3032                        // Move search position past the start of this match
3033                        search_pos = abs_start + 1;
3034                    } else {
3035                        break;
3036                    }
3037                }
3038
3039                matches
3040            }
3041            Matcher::PatternWithCaptures { elements, .. } => {
3042                // Find all matches of all elements in sequence
3043                let mut matches = Vec::new();
3044                let mut start_pos = 0;
3045
3046                while start_pos < text.len() {
3047                    let mut pos = start_pos;
3048                    let mut all_matched = true;
3049
3050                    for element in elements {
3051                        let matcher = match element {
3052                            CompiledCaptureElement::Capture(m, _) => m,
3053                            CompiledCaptureElement::NonCapture(m) => m,
3054                        };
3055
3056                        if let Some((rel_start, rel_end)) =
3057                            matcher.find(safe_slice(text, pos).unwrap_or(""))
3058                        {
3059                            if rel_start != 0 {
3060                                // Element must match at current position
3061                                all_matched = false;
3062                                break;
3063                            }
3064                            pos += rel_end;
3065                        } else {
3066                            all_matched = false;
3067                            break;
3068                        }
3069                    }
3070
3071                    if all_matched {
3072                        matches.push((start_pos, pos));
3073                        start_pos = pos.max(start_pos + 1); // Move past this match
3074                    } else {
3075                        start_pos += 1;
3076                    }
3077                }
3078
3079                matches
3080            }
3081            Matcher::Backreference(_) => {
3082                // Backreferences cannot find_all without capture context
3083                vec![]
3084            }
3085            Matcher::DFA(dfa) => {
3086                // DFA find_all - multiple matches
3087                let mut matches = Vec::new();
3088                let mut search_start = 0;
3089
3090                while search_start < text.len() {
3091                    if let Some((start, end)) = dfa.find(&text[search_start..]) {
3092                        let abs_start = search_start + start;
3093                        let abs_end = search_start + end;
3094                        matches.push((abs_start, abs_end));
3095                        search_start = abs_end.max(abs_start + 1);
3096                    } else {
3097                        break;
3098                    }
3099                }
3100
3101                matches
3102            }
3103            Matcher::LazyDFA(lazy_dfa) => {
3104                // Lazy DFA find_all
3105                let mut dfa = lazy_dfa.clone();
3106                let mut matches = Vec::new();
3107                let mut search_start = 0;
3108
3109                while search_start < text.len() {
3110                    if let Some((start, end)) = dfa.find(&text[search_start..]) {
3111                        let abs_start = search_start + start;
3112                        let abs_end = search_start + end;
3113                        matches.push((abs_start, abs_end));
3114                        search_start = abs_end.max(abs_start + 1);
3115                    } else {
3116                        break;
3117                    }
3118                }
3119
3120                matches
3121            }
3122            Matcher::SequenceWithFlags(seq, flags) => {
3123                // Find all with flags
3124                let mut matches = Vec::new();
3125                let mut search_start = 0;
3126
3127                while search_start < text.len() {
3128                    if let Some((start, end)) = seq.find_with_flags(&text[search_start..], flags) {
3129                        let abs_start = search_start + start;
3130                        let abs_end = search_start + end;
3131                        matches.push((abs_start, abs_end));
3132                        search_start = abs_end.max(abs_start + 1);
3133                    } else {
3134                        break;
3135                    }
3136                }
3137
3138                matches
3139            }
3140            Matcher::AlternationWithCaptures { branches, .. } => {
3141                // Find all matches from any branch
3142                let mut matches = Vec::new();
3143                let mut search_start = 0;
3144
3145                while search_start < text.len() {
3146                    // Try each branch and find the leftmost match
3147                    let mut best_match: Option<(usize, usize)> = None;
3148
3149                    for branch in branches {
3150                        if let Some((start, end)) = branch.find(&text[search_start..]) {
3151                            let abs_start = search_start + start;
3152                            let abs_end = search_start + end;
3153
3154                            if best_match.is_none() || abs_start < best_match.unwrap().0 {
3155                                best_match = Some((abs_start, abs_end));
3156                            }
3157                        }
3158                    }
3159
3160                    if let Some((start, end)) = best_match {
3161                        matches.push((start, end));
3162                        search_start = end.max(start + 1);
3163                    } else {
3164                        break;
3165                    }
3166                }
3167
3168                matches
3169            }
3170            Matcher::CaseInsensitive(inner) => {
3171                let bytes = text.as_bytes();
3172                let len = bytes.len();
3173                if len <= 256 {
3174                    let mut buf = [0u8; 256];
3175                    let mut all_ascii = true;
3176                    for i in 0..len {
3177                        let b = bytes[i];
3178                        if b >= 128 {
3179                            all_ascii = false;
3180                            break;
3181                        }
3182                        buf[i] = if b >= b'A' && b <= b'Z' { b + 32 } else { b };
3183                    }
3184                    if all_ascii {
3185                        let lower = unsafe { std::str::from_utf8_unchecked(&buf[..len]) };
3186                        return inner.find_all(lower);
3187                    }
3188                }
3189                let lower_text = text.to_lowercase();
3190                inner.find_all(&lower_text)
3191            }
3192        }
3193    }
3194
3195    /// Find all runs of digits in text (optimized)
3196    #[inline]
3197    fn digit_run_find_all(text: &str) -> Vec<(usize, usize)> {
3198        let bytes = text.as_bytes();
3199        let mut matches = Vec::new();
3200        let mut i = 0;
3201
3202        while i < bytes.len() {
3203            // Skip non-digits
3204            while i < bytes.len() && (bytes[i] < b'0' || bytes[i] > b'9') {
3205                i += 1;
3206            }
3207
3208            if i >= bytes.len() {
3209                break;
3210            }
3211
3212            // Found start of digit run
3213            let start = i;
3214
3215            // Consume all digits
3216            while i < bytes.len() && bytes[i] >= b'0' && bytes[i] <= b'9' {
3217                i += 1;
3218            }
3219
3220            matches.push((start, i));
3221        }
3222
3223        matches
3224    }
3225
3226    /// Find all runs of word characters in text (optimized)
3227    #[inline]
3228    fn word_run_find_all(text: &str) -> Vec<(usize, usize)> {
3229        let bytes = text.as_bytes();
3230        let mut matches = Vec::new();
3231        let mut i = 0;
3232
3233        while i < bytes.len() {
3234            // Skip non-word chars
3235            while i < bytes.len() {
3236                let b = bytes[i];
3237                if b.is_ascii_lowercase()
3238                    || b.is_ascii_uppercase()
3239                    || b.is_ascii_digit()
3240                    || b == b'_'
3241                {
3242                    break;
3243                }
3244                i += 1;
3245            }
3246
3247            if i >= bytes.len() {
3248                break;
3249            }
3250
3251            // Found start of word run
3252            let start = i;
3253
3254            // Consume all word chars
3255            while i < bytes.len() {
3256                let b = bytes[i];
3257                if !(b.is_ascii_lowercase()
3258                    || b.is_ascii_uppercase()
3259                    || b.is_ascii_digit()
3260                    || b == b'_')
3261                {
3262                    break;
3263                }
3264                i += 1;
3265            }
3266
3267            matches.push((start, i));
3268        }
3269
3270        matches
3271    }
3272}
3273
3274fn compile_ast(ast: &Ast) -> Result<Matcher, PatternError> {
3275    match ast {
3276        Ast::Literal(lit) => Ok(Matcher::Literal(lit.clone())),
3277        Ast::Dot => {
3278            // Dot matches any character except newline
3279            // Parse as [^\n] character class
3280            use crate::parser::charclass::CharClass;
3281            let char_class = CharClass::parse(r"^\n")
3282                .map_err(|e| PatternError::ParseError(format!("Dot charclass: {}", e)))?;
3283            Ok(Matcher::CharClass(char_class))
3284        }
3285        Ast::Alternation(parts) => {
3286            use aho_corasick::MatchKind;
3287            let ac = AhoCorasick::builder()
3288                .match_kind(MatchKind::LeftmostFirst)
3289                .build(parts)
3290                .map_err(|e| PatternError::ParseError(format!("Aho-Corasick: {}", e)))?;
3291            Ok(Matcher::MultiLiteral(ac))
3292        }
3293        Ast::Anchored {
3294            literal,
3295            start,
3296            end,
3297        } => Ok(Matcher::AnchoredLiteral {
3298            literal: literal.clone(),
3299            start: *start,
3300            end: *end,
3301        }),
3302        Ast::AnchoredGroup { group, start, end } => Ok(Matcher::AnchoredGroup {
3303            group: group.clone(),
3304            start: *start,
3305            end: *end,
3306        }),
3307        Ast::AnchoredPattern { inner, start, end } => {
3308            let inner_matcher = compile_ast(inner)?;
3309            Ok(Matcher::AnchoredPattern {
3310                inner: Box::new(inner_matcher),
3311                start: *start,
3312                end: *end,
3313            })
3314        }
3315        Ast::CharClass(cc) => Ok(Matcher::CharClass(cc.clone())),
3316        Ast::Quantified(qp) => {
3317            // OPTIMIZATION: Detect \d+ and \w+ patterns for specialized fast path
3318            if let crate::parser::quantifier::Quantifier::OneOrMore = qp.quantifier {
3319                if let crate::parser::quantifier::QuantifiedElement::CharClass(ref cc) = qp.element
3320                {
3321                    // Check if this is \d+ (digits)
3322                    if is_digit_charclass(cc) {
3323                        return Ok(Matcher::DigitRun);
3324                    }
3325                    // Check if this is \w+ (word chars)
3326                    if is_word_charclass(cc) {
3327                        return Ok(Matcher::WordRun);
3328                    }
3329                }
3330            }
3331            Ok(Matcher::Quantified(qp.clone()))
3332        }
3333        Ast::Sequence(seq) => {
3334            // Lazy DFA is experimental and currently slower - disabled for now
3335            // TODO: Optimize LazyDFA implementation
3336            // if let Some(lazy_dfa) = engine::lazy_dfa::LazyDFA::try_compile(seq) {
3337            //     return Ok(Matcher::LazyDFA(lazy_dfa));
3338            // }
3339
3340            // Try to compile to DFA for better performance
3341            if let Some(dfa) = engine::dfa::DFA::try_compile(seq) {
3342                return Ok(Matcher::DFA(dfa));
3343            }
3344            // Fallback to regular sequence matcher
3345            Ok(Matcher::Sequence(seq.clone()))
3346        }
3347        Ast::Group(group) => Ok(Matcher::Group(group.clone())),
3348        Ast::Boundary(boundary_type) => Ok(Matcher::Boundary(*boundary_type)),
3349        Ast::Lookaround(lookaround) => {
3350            // Compile the inner pattern of the lookaround
3351            let inner_matcher = compile_ast(&lookaround.pattern)?;
3352            Ok(Matcher::Lookaround(
3353                Box::new(lookaround.clone()),
3354                Box::new(inner_matcher),
3355            ))
3356        }
3357        Ast::Capture(inner_ast, group_index) => {
3358            // Compile the inner pattern of the capture group
3359            let inner_matcher = compile_ast(inner_ast)?;
3360            Ok(Matcher::Capture(Box::new(inner_matcher), *group_index))
3361        }
3362        Ast::QuantifiedCapture(inner_ast, quantifier) => {
3363            // Compile the inner pattern and create a quantified capture matcher
3364            let inner_matcher = compile_ast(inner_ast)?;
3365            Ok(Matcher::QuantifiedCapture(
3366                Box::new(inner_matcher),
3367                quantifier.clone(),
3368            ))
3369        }
3370        Ast::CombinedWithLookaround { prefix, lookaround } => {
3371            // Compile both the prefix and the lookaround's inner pattern
3372            let prefix_matcher = compile_ast(prefix)?;
3373            let lookaround_inner = compile_ast(&lookaround.pattern)?;
3374            Ok(Matcher::CombinedWithLookaround {
3375                prefix: Box::new(prefix_matcher),
3376                lookaround: Box::new(lookaround.clone()),
3377                lookaround_matcher: Box::new(lookaround_inner),
3378            })
3379        }
3380        Ast::LookbehindWithSuffix { lookbehind, suffix } => {
3381            // Compile both the lookbehind and the suffix
3382            let lookbehind_inner = compile_ast(&lookbehind.pattern)?;
3383            let suffix_matcher = compile_ast(suffix)?;
3384            Ok(Matcher::LookbehindWithSuffix {
3385                lookbehind: Box::new(lookbehind.clone()),
3386                lookbehind_matcher: Box::new(lookbehind_inner),
3387                suffix: Box::new(suffix_matcher),
3388            })
3389        }
3390        Ast::PatternWithCaptures {
3391            elements,
3392            total_groups,
3393        } => {
3394            // Compile each element
3395            let mut compiled_elements = Vec::new();
3396            for elem in elements {
3397                match elem {
3398                    CaptureElement::Capture(ast, group_num) => {
3399                        let matcher = compile_ast(ast)?;
3400                        compiled_elements
3401                            .push(CompiledCaptureElement::Capture(matcher, *group_num));
3402                    }
3403                    CaptureElement::NonCapture(ast) => {
3404                        let matcher = compile_ast(ast)?;
3405                        compiled_elements.push(CompiledCaptureElement::NonCapture(matcher));
3406                    }
3407                }
3408            }
3409            Ok(Matcher::PatternWithCaptures {
3410                elements: compiled_elements,
3411                total_groups: *total_groups,
3412            })
3413        }
3414        Ast::AlternationWithCaptures {
3415            branches,
3416            total_groups,
3417        } => {
3418            // Compile each branch
3419            let mut compiled_branches = Vec::new();
3420            for branch_ast in branches {
3421                let branch_matcher = compile_ast(branch_ast)?;
3422                compiled_branches.push(branch_matcher);
3423            }
3424            Ok(Matcher::AlternationWithCaptures {
3425                branches: compiled_branches,
3426                total_groups: *total_groups,
3427            })
3428        }
3429        Ast::Backreference(group_num) => Ok(Matcher::Backreference(*group_num)),
3430        Ast::DotAll => {
3431            // DotAll matches ANY character including newline
3432            // Create a character class that matches everything
3433            use crate::parser::charclass::CharClass;
3434            // Use empty negated class which matches everything
3435            let mut char_class = CharClass::new();
3436            char_class.add_range('\0', char::MAX); // Match all unicode
3437            char_class.finalize();
3438            Ok(Matcher::CharClass(char_class))
3439        }
3440        Ast::SequenceWithFlags(seq, flags) => {
3441            // Compile sequence with flag awareness
3442            Ok(Matcher::SequenceWithFlags(seq.clone(), *flags))
3443        }
3444        Ast::CaseInsensitive(inner) => {
3445            // Lowercase the pattern before compiling
3446            let lowercased = lowercase_ast(inner);
3447            let inner_matcher = compile_ast(&lowercased)?;
3448            Ok(Matcher::CaseInsensitive(Box::new(inner_matcher)))
3449        }
3450    }
3451}
3452
3453/// Lowercase all literals in an AST for case-insensitive matching
3454fn lowercase_ast(ast: &Ast) -> Ast {
3455    match ast {
3456        Ast::Literal(s) => Ast::Literal(s.to_lowercase()),
3457        Ast::Alternation(branches) => {
3458            Ast::Alternation(branches.iter().map(|s| s.to_lowercase()).collect())
3459        }
3460        Ast::Group(g) => {
3461            // Lowercase group content
3462            let mut new_group = g.clone();
3463            new_group.content = match &g.content {
3464                parser::group::GroupContent::Single(s) => {
3465                    parser::group::GroupContent::Single(s.to_lowercase())
3466                }
3467                parser::group::GroupContent::Alternation(branches) => {
3468                    parser::group::GroupContent::Alternation(
3469                        branches.iter().map(|s| s.to_lowercase()).collect(),
3470                    )
3471                }
3472                parser::group::GroupContent::Sequence(seq) => {
3473                    let mut new_seq = seq.clone();
3474                    new_seq.elements = new_seq
3475                        .elements
3476                        .into_iter()
3477                        .map(|elem| match elem {
3478                            parser::sequence::SequenceElement::Literal(s) => {
3479                                parser::sequence::SequenceElement::Literal(s.to_lowercase())
3480                            }
3481                            parser::sequence::SequenceElement::Char(c) => {
3482                                let lowered: String = c.to_lowercase().collect();
3483                                if lowered.len() == 1 {
3484                                    parser::sequence::SequenceElement::Char(
3485                                        lowered.chars().next().unwrap(),
3486                                    )
3487                                } else {
3488                                    parser::sequence::SequenceElement::Literal(lowered)
3489                                }
3490                            }
3491                            other => other,
3492                        })
3493                        .collect();
3494                    parser::group::GroupContent::Sequence(new_seq)
3495                }
3496                parser::group::GroupContent::ParsedAlternation(sequences) => {
3497                    let new_sequences: Vec<_> = sequences
3498                        .iter()
3499                        .map(|seq| {
3500                            let mut new_seq = seq.clone();
3501                            new_seq.elements = new_seq
3502                                .elements
3503                                .into_iter()
3504                                .map(|elem| match elem {
3505                                    parser::sequence::SequenceElement::Literal(s) => {
3506                                        parser::sequence::SequenceElement::Literal(s.to_lowercase())
3507                                    }
3508                                    parser::sequence::SequenceElement::Char(c) => {
3509                                        let lowered: String = c.to_lowercase().collect();
3510                                        if lowered.len() == 1 {
3511                                            parser::sequence::SequenceElement::Char(
3512                                                lowered.chars().next().unwrap(),
3513                                            )
3514                                        } else {
3515                                            parser::sequence::SequenceElement::Literal(lowered)
3516                                        }
3517                                    }
3518                                    other => other,
3519                                })
3520                                .collect();
3521                            new_seq
3522                        })
3523                        .collect();
3524                    parser::group::GroupContent::ParsedAlternation(new_sequences)
3525                }
3526            };
3527            Ast::Group(new_group)
3528        }
3529        Ast::Sequence(seq) => {
3530            // Lowercase literals in sequence elements and rebuild sequence
3531            // (must rebuild NFA table with lowered chars)
3532            let new_elements: Vec<_> = seq
3533                .elements
3534                .iter()
3535                .map(|elem| match elem {
3536                    parser::sequence::SequenceElement::Literal(s) => {
3537                        parser::sequence::SequenceElement::Literal(s.to_lowercase())
3538                    }
3539                    parser::sequence::SequenceElement::Char(c) => {
3540                        let lowered: String = c.to_lowercase().collect();
3541                        if lowered.len() == 1 {
3542                            parser::sequence::SequenceElement::Char(lowered.chars().next().unwrap())
3543                        } else {
3544                            parser::sequence::SequenceElement::Literal(lowered)
3545                        }
3546                    }
3547                    other => other.clone(),
3548                })
3549                .collect();
3550            Ast::Sequence(parser::sequence::Sequence::new(new_elements))
3551        }
3552        Ast::Anchored {
3553            literal,
3554            start,
3555            end,
3556        } => Ast::Anchored {
3557            literal: literal.to_lowercase(),
3558            start: *start,
3559            end: *end,
3560        },
3561        Ast::AnchoredPattern { inner, start, end } => Ast::AnchoredPattern {
3562            inner: Box::new(lowercase_ast(inner)),
3563            start: *start,
3564            end: *end,
3565        },
3566        Ast::SequenceWithFlags(seq, flags) => {
3567            let mut new_seq = seq.clone();
3568            new_seq.elements = new_seq
3569                .elements
3570                .into_iter()
3571                .map(|elem| match elem {
3572                    parser::sequence::SequenceElement::Literal(s) => {
3573                        parser::sequence::SequenceElement::Literal(s.to_lowercase())
3574                    }
3575                    parser::sequence::SequenceElement::Char(c) => {
3576                        let lowered: String = c.to_lowercase().collect();
3577                        if lowered.len() == 1 {
3578                            parser::sequence::SequenceElement::Char(lowered.chars().next().unwrap())
3579                        } else {
3580                            parser::sequence::SequenceElement::Literal(lowered)
3581                        }
3582                    }
3583                    other => other,
3584                })
3585                .collect();
3586            Ast::SequenceWithFlags(new_seq, *flags)
3587        }
3588        Ast::CaseInsensitive(inner) => {
3589            // Already case-insensitive, just lowercase inner
3590            Ast::CaseInsensitive(Box::new(lowercase_ast(inner)))
3591        }
3592        Ast::PatternWithCaptures {
3593            elements,
3594            total_groups,
3595        } => {
3596            // Lowercase literals in capture elements
3597            let new_elements = elements
3598                .iter()
3599                .map(|elem| match elem {
3600                    CaptureElement::NonCapture(ast) => {
3601                        CaptureElement::NonCapture(lowercase_ast(ast))
3602                    }
3603                    CaptureElement::Capture(ast, group_num) => {
3604                        CaptureElement::Capture(lowercase_ast(ast), *group_num)
3605                    }
3606                })
3607                .collect();
3608            Ast::PatternWithCaptures {
3609                elements: new_elements,
3610                total_groups: *total_groups,
3611            }
3612        }
3613        Ast::AlternationWithCaptures {
3614            branches,
3615            total_groups,
3616        } => Ast::AlternationWithCaptures {
3617            branches: branches.iter().map(lowercase_ast).collect(),
3618            total_groups: *total_groups,
3619        },
3620        Ast::Capture(inner, group_index) => {
3621            // Lowercase the inner AST of the capture group
3622            Ast::Capture(Box::new(lowercase_ast(inner)), *group_index)
3623        }
3624        // For other AST types that don't contain literals, just clone
3625        _ => ast.clone(),
3626    }
3627}
3628
3629/// Get min and max repetitions for a quantifier
3630fn quantifier_bounds(q: &parser::quantifier::Quantifier) -> (usize, usize) {
3631    use parser::quantifier::Quantifier;
3632    match q {
3633        Quantifier::ZeroOrMore | Quantifier::ZeroOrMoreLazy => (0, usize::MAX),
3634        Quantifier::OneOrMore | Quantifier::OneOrMoreLazy => (1, usize::MAX),
3635        Quantifier::ZeroOrOne | Quantifier::ZeroOrOneLazy => (0, 1),
3636        Quantifier::Exactly(n) => (*n, *n),
3637        Quantifier::AtLeast(n) => (*n, usize::MAX),
3638        Quantifier::Between(n, m) => (*n, *m),
3639    }
3640}
3641
3642/// Check if CharClass matches \d pattern (only [0-9])
3643fn is_digit_charclass(cc: &CharClass) -> bool {
3644    // Check if ranges contain exactly [0-9] and no other chars
3645    cc.ranges.len() == 1 && cc.ranges[0] == ('0', '9') && cc.chars.is_empty() && !cc.negated
3646}
3647
3648/// Check if CharClass matches \w pattern ([a-zA-Z0-9_])
3649fn is_word_charclass(cc: &CharClass) -> bool {
3650    // Check if ranges contain [a-z], [A-Z], [0-9] and chars contain '_'
3651    if cc.negated || cc.ranges.len() != 3 {
3652        return false;
3653    }
3654
3655    let mut has_lower = false;
3656    let mut has_upper = false;
3657    let mut has_digit = false;
3658
3659    for &(start, end) in &cc.ranges {
3660        if start == 'a' && end == 'z' {
3661            has_lower = true;
3662        } else if start == 'A' && end == 'Z' {
3663            has_upper = true;
3664        } else if start == '0' && end == '9' {
3665            has_digit = true;
3666        }
3667    }
3668
3669    has_lower && has_upper && has_digit && cc.chars.len() == 1 && cc.chars[0] == '_'
3670}
3671
3672/// Parse lookaround assertion patterns: (?=...), (?!...), (?<=...), (?<!...)
3673fn parse_lookaround(pattern: &str, depth: usize) -> Result<Ast, PatternError> {
3674    let lookaround_type = if pattern.starts_with("(?=") {
3675        LookaroundType::PositiveLookahead
3676    } else if pattern.starts_with("(?!") {
3677        LookaroundType::NegativeLookahead
3678    } else if pattern.starts_with("(?<=") {
3679        LookaroundType::PositiveLookbehind
3680    } else if pattern.starts_with("(?<!") {
3681        LookaroundType::NegativeLookbehind
3682    } else {
3683        return Err(PatternError::ParseError(
3684            "Invalid lookaround syntax".to_string(),
3685        ));
3686    };
3687
3688    // Find the matching closing parenthesis
3689    let prefix_len = if pattern.starts_with("(?<=") || pattern.starts_with("(?<!") {
3690        4 // "(?<=" or "(?<!"
3691    } else {
3692        3 // "(?=" or "(?!"
3693    };
3694
3695    if let Some(close_idx) = find_matching_paren(pattern, 0) {
3696        let inner = &pattern[prefix_len..close_idx];
3697        let inner_ast = parse_pattern_with_depth(inner, depth + 1)?;
3698
3699        // Check if there's a suffix after the lookaround
3700        if close_idx != pattern.len() - 1 {
3701            // This is a lookaround with suffix
3702            let suffix = &pattern[close_idx + 1..];
3703            let suffix_ast = parse_pattern_with_depth(suffix, depth + 1)?;
3704
3705            // For lookbehind: (?<=foo)bar - match bar only if preceded by foo
3706            // For lookahead: (?=foo)bar - doesn't make semantic sense
3707            if matches!(
3708                lookaround_type,
3709                LookaroundType::PositiveLookbehind | LookaroundType::NegativeLookbehind
3710            ) {
3711                let lookbehind = Lookaround::new(lookaround_type, inner_ast);
3712                return Ok(Ast::LookbehindWithSuffix {
3713                    lookbehind,
3714                    suffix: Box::new(suffix_ast),
3715                });
3716            } else {
3717                return Err(PatternError::ParseError(
3718                    "Lookahead cannot have suffix pattern after it".to_string(),
3719                ));
3720            }
3721        }
3722
3723        Ok(Ast::Lookaround(Lookaround::new(lookaround_type, inner_ast)))
3724    } else {
3725        Err(PatternError::ParseError(
3726            "Unmatched parenthesis in lookaround".to_string(),
3727        ))
3728    }
3729}
3730
3731/// Parse combined patterns with lookaround: foo(?=bar), \d+(?!x), etc.
3732fn parse_combined_with_lookaround(pattern: &str, depth: usize) -> Result<Ast, PatternError> {
3733    // Find the lookaround position
3734    let lookaround_patterns = ["(?=", "(?!", "(?<=", "(?<!"];
3735
3736    for lookaround_start in lookaround_patterns {
3737        if let Some(pos) = pattern.find(lookaround_start) {
3738            if pos == 0 {
3739                // This is a standalone lookaround, not combined
3740                continue;
3741            }
3742
3743            // Split into prefix and lookaround
3744            let prefix = &pattern[..pos];
3745            let lookaround_part = &pattern[pos..];
3746
3747            // Parse the prefix
3748            let prefix_ast = parse_pattern_with_depth(prefix, depth + 1)?;
3749
3750            // Parse the lookaround
3751            let lookaround_type = if lookaround_start == "(?=" {
3752                LookaroundType::PositiveLookahead
3753            } else if lookaround_start == "(?!" {
3754                LookaroundType::NegativeLookahead
3755            } else if lookaround_start == "(?<=" {
3756                LookaroundType::PositiveLookbehind
3757            } else {
3758                LookaroundType::NegativeLookbehind
3759            };
3760
3761            let prefix_len = lookaround_start.len();
3762            if let Some(close_idx) = find_matching_paren(lookaround_part, 0) {
3763                if close_idx != lookaround_part.len() - 1 {
3764                    return Err(PatternError::ParseError(
3765                        "Extra characters after lookaround".to_string(),
3766                    ));
3767                }
3768
3769                let inner = &lookaround_part[prefix_len..close_idx];
3770                let inner_ast = parse_pattern_with_depth(inner, depth + 1)?;
3771
3772                let lookaround = Lookaround::new(lookaround_type, inner_ast);
3773
3774                return Ok(Ast::CombinedWithLookaround {
3775                    prefix: Box::new(prefix_ast),
3776                    lookaround,
3777                });
3778            } else {
3779                return Err(PatternError::ParseError(
3780                    "Unmatched parenthesis in lookaround".to_string(),
3781                ));
3782            }
3783        }
3784    }
3785
3786    Err(PatternError::ParseError(
3787        "No lookaround found in pattern".to_string(),
3788    ))
3789}
3790
3791/// Find the index of the matching closing parenthesis
3792/// Returns None if no match found
3793/// Check if a pattern contains unescaped parentheses (not \( or \) and not inside [...])
3794fn contains_unescaped_paren(pattern: &str) -> bool {
3795    let bytes = pattern.as_bytes();
3796    let mut i = 0;
3797    while i < bytes.len() {
3798        if bytes[i] == b'\\' && i + 1 < bytes.len() {
3799            i += 2; // Skip escaped character
3800        } else if bytes[i] == b'[' {
3801            // Skip character class to avoid counting parens inside it
3802            i += 1;
3803            if i < bytes.len() && bytes[i] == b'^' {
3804                i += 1;
3805            }
3806            while i < bytes.len() {
3807                if bytes[i] == b'\\' {
3808                    i += 2;
3809                } else if bytes[i] == b']' {
3810                    i += 1;
3811                    break;
3812                } else {
3813                    i += 1;
3814                }
3815            }
3816        } else if bytes[i] == b'(' || bytes[i] == b')' {
3817            return true;
3818        } else {
3819            i += 1;
3820        }
3821    }
3822    false
3823}
3824
3825fn find_matching_paren(pattern: &str, start: usize) -> Option<usize> {
3826    let bytes = pattern.as_bytes();
3827    if start >= bytes.len() || bytes[start] != b'(' {
3828        return None;
3829    }
3830
3831    let mut depth = 0;
3832    let mut i = 0;
3833    while i < bytes[start..].len() {
3834        match bytes[start + i] {
3835            b'\\' => {
3836                // Skip next character (could be escaped parenthesis)
3837                i += 2;
3838                continue;
3839            }
3840            b'[' => {
3841                // Skip character class [...] to avoid counting ) inside it
3842                i += 1;
3843                // Handle negation [^...]
3844                if i < bytes[start..].len() && bytes[start + i] == b'^' {
3845                    i += 1;
3846                }
3847                // Skip until closing ]
3848                while i < bytes[start..].len() {
3849                    if bytes[start + i] == b'\\' {
3850                        i += 2; // Skip escaped character
3851                    } else if bytes[start + i] == b']' {
3852                        i += 1;
3853                        break;
3854                    } else {
3855                        i += 1;
3856                    }
3857                }
3858                continue;
3859            }
3860            b'(' => depth += 1,
3861            b')' => {
3862                depth -= 1;
3863                if depth == 0 {
3864                    return Some(start + i);
3865                }
3866            }
3867            _ => {}
3868        }
3869        i += 1;
3870    }
3871
3872    None // Unmatched
3873}
3874
3875/// Parse patterns with embedded capture groups: Hello (\w+), (\w+)=(\d+), (\d{4})-(\d{2})-(\d{2})
3876/// Returns an AST that represents a sequence with captures
3877fn parse_pattern_with_captures(pattern: &str) -> Result<Ast, PatternError> {
3878    let mut group_counter = 1;
3879    let (ast, _total_groups) = parse_pattern_with_captures_inner(pattern, &mut group_counter)?;
3880    Ok(ast)
3881}
3882
3883/// Split pattern by top-level '|' characters (not inside groups)
3884/// Returns None if no top-level alternation found
3885fn split_by_alternation(pattern: &str) -> Option<Vec<String>> {
3886    let mut branches = Vec::new();
3887    let mut current = String::new();
3888    let mut depth = 0;
3889    let mut chars = pattern.chars().peekable();
3890
3891    while let Some(ch) = chars.next() {
3892        match ch {
3893            '\\' => {
3894                // Escape sequence - consume next char too
3895                current.push(ch);
3896                if let Some(next) = chars.next() {
3897                    current.push(next);
3898                }
3899            }
3900            '[' => {
3901                // Skip character class to avoid counting parens inside it
3902                current.push(ch);
3903                // Handle negation [^...]
3904                if chars.peek() == Some(&'^') {
3905                    current.push(chars.next().unwrap());
3906                }
3907                // Consume until closing ]
3908                while let Some(c) = chars.next() {
3909                    current.push(c);
3910                    if c == '\\' {
3911                        if let Some(next) = chars.next() {
3912                            current.push(next);
3913                        }
3914                    } else if c == ']' {
3915                        break;
3916                    }
3917                }
3918            }
3919            '(' => {
3920                depth += 1;
3921                current.push(ch);
3922            }
3923            ')' => {
3924                depth -= 1;
3925                current.push(ch);
3926            }
3927            '|' if depth == 0 => {
3928                // Top-level alternation found
3929                branches.push(current.clone());
3930                current.clear();
3931            }
3932            _ => {
3933                current.push(ch);
3934            }
3935        }
3936    }
3937
3938    // Add the last branch
3939    if !current.is_empty() || !branches.is_empty() {
3940        branches.push(current);
3941    }
3942
3943    // Only return Some if we found actual alternation (more than 1 branch)
3944    if branches.len() > 1 {
3945        Some(branches)
3946    } else {
3947        None
3948    }
3949}
3950
3951/// Inner recursive parser that tracks group numbers across nested captures
3952fn parse_pattern_with_captures_inner(
3953    pattern: &str,
3954    group_counter: &mut usize,
3955) -> Result<(Ast, usize), PatternError> {
3956    // FIRST: Check if this pattern contains top-level alternation
3957    if let Some(branches) = split_by_alternation(pattern) {
3958        // This is an alternation pattern like (a)|(b) or foo|bar
3959        let _start_group = *group_counter;
3960        let mut parsed_branches = Vec::new();
3961
3962        for branch in branches {
3963            // Parse each branch independently
3964            let (branch_ast, _) = parse_pattern_with_captures_inner(&branch, group_counter)?;
3965            parsed_branches.push(branch_ast);
3966        }
3967
3968        let total_groups = *group_counter - 1;
3969
3970        // Create an alternation AST
3971        // For now, we need to represent alternation with captures
3972        // We'll create a PatternWithCaptures that contains the alternation logic
3973        // But first check if all branches are simple literals
3974        let all_literals = parsed_branches
3975            .iter()
3976            .all(|ast| matches!(ast, Ast::Literal(_)));
3977
3978        if all_literals {
3979            // Simple case: all branches are literals like "a"|"b"
3980            let literals: Vec<String> = parsed_branches
3981                .into_iter()
3982                .filter_map(|ast| {
3983                    if let Ast::Literal(s) = ast {
3984                        Some(s)
3985                    } else {
3986                        None
3987                    }
3988                })
3989                .collect();
3990            return Ok((Ast::Alternation(literals), total_groups));
3991        } else {
3992            // Complex case: branches contain captures or other complex patterns
3993            // Try to convert branches to sequences for ParsedAlternation
3994            let mut sequences = Vec::new();
3995            for branch_ast in &parsed_branches {
3996                // Try to extract a sequence from each branch
3997                if let Ast::Sequence(seq) = branch_ast {
3998                    sequences.push(seq.clone());
3999                } else {
4000                    // Can't use ParsedAlternation for non-sequence branches
4001                    break;
4002                }
4003            }
4004
4005            if sequences.len() == parsed_branches.len() {
4006                // All branches are sequences - use ParsedAlternation
4007                use crate::parser::group::{Group, GroupContent};
4008                return Ok((
4009                    Ast::Group(Group::new_non_capturing(GroupContent::ParsedAlternation(
4010                        sequences,
4011                    ))),
4012                    total_groups,
4013                ));
4014            } else {
4015                // Mixed types or non-sequences (like Capture, Literal, etc.)
4016                // Use the new AlternationWithCaptures variant
4017                return Ok((
4018                    Ast::AlternationWithCaptures {
4019                        branches: parsed_branches,
4020                        total_groups,
4021                    },
4022                    total_groups,
4023                ));
4024            }
4025        }
4026    }
4027
4028    // NO alternation at top level - parse as sequence
4029    let mut elements: Vec<CaptureElement> = Vec::new();
4030    let mut pos = 0;
4031    let start_group = *group_counter;
4032
4033    while pos < pattern.len() {
4034        if pattern[pos..].starts_with("(?:") {
4035            // Found a non-capturing group (?:...)
4036            if let Some(close_idx) = find_matching_paren(pattern, pos) {
4037                // Parse the content as a non-capturing group (recursive)
4038                let inner = &pattern[pos + 3..close_idx]; // Skip "(?:"
4039                let (inner_ast, _) = parse_pattern_with_captures_inner(inner, group_counter)?;
4040
4041                // Check for quantifier after the non-capturing group (same as capturing groups)
4042                let mut after_group = close_idx + 1;
4043                let mut quantifier: Option<parser::quantifier::Quantifier> = None;
4044
4045                if after_group < pattern.len() {
4046                    let remaining = &pattern[after_group..];
4047                    let chars: Vec<char> = remaining.chars().take(2).collect();
4048                    if !chars.is_empty() {
4049                        let first = chars[0];
4050                        let has_lazy = chars.len() > 1 && chars[1] == '?';
4051
4052                        match first {
4053                            '*' if has_lazy => {
4054                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrMoreLazy);
4055                                after_group += 2;
4056                            }
4057                            '*' => {
4058                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrMore);
4059                                after_group += 1;
4060                            }
4061                            '+' if has_lazy => {
4062                                quantifier = Some(parser::quantifier::Quantifier::OneOrMoreLazy);
4063                                after_group += 2;
4064                            }
4065                            '+' => {
4066                                quantifier = Some(parser::quantifier::Quantifier::OneOrMore);
4067                                after_group += 1;
4068                            }
4069                            '?' if has_lazy => {
4070                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrOneLazy);
4071                                after_group += 2;
4072                            }
4073                            '?' => {
4074                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrOne);
4075                                after_group += 1;
4076                            }
4077                            _ => {}
4078                        }
4079                    }
4080                }
4081
4082                // Build the non-capture element with optional quantifier
4083                if let Some(q) = quantifier {
4084                    // Wrap the inner AST with a Quantified matcher
4085                    elements.push(CaptureElement::NonCapture(Ast::QuantifiedCapture(
4086                        Box::new(inner_ast),
4087                        q,
4088                    )));
4089                } else {
4090                    elements.push(CaptureElement::NonCapture(inner_ast));
4091                }
4092                pos = after_group;
4093            } else {
4094                return Err(PatternError::ParseError(
4095                    "Unmatched parenthesis".to_string(),
4096                ));
4097            }
4098        } else if pattern[pos..].starts_with('(') && !pattern[pos..].starts_with("(?") {
4099            // Found a capture group
4100            if let Some(close_idx) = find_matching_paren(pattern, pos) {
4101                let my_group_num = *group_counter;
4102                *group_counter += 1;
4103
4104                // Parse the content of the capture (recursive, may have nested captures)
4105                let inner = &pattern[pos + 1..close_idx];
4106                let (inner_ast, _) = parse_pattern_with_captures_inner(inner, group_counter)?;
4107
4108                // Check for quantifier after the group
4109                let mut after_group = close_idx + 1;
4110                let mut quantifier: Option<parser::quantifier::Quantifier> = None;
4111
4112                if after_group < pattern.len() {
4113                    let remaining = &pattern[after_group..];
4114                    let chars: Vec<char> = remaining.chars().take(2).collect();
4115                    if !chars.is_empty() {
4116                        let first = chars[0];
4117                        let has_lazy = chars.len() > 1 && chars[1] == '?';
4118
4119                        match first {
4120                            '*' if has_lazy => {
4121                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrMoreLazy);
4122                                after_group += 2;
4123                            }
4124                            '*' => {
4125                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrMore);
4126                                after_group += 1;
4127                            }
4128                            '+' if has_lazy => {
4129                                quantifier = Some(parser::quantifier::Quantifier::OneOrMoreLazy);
4130                                after_group += 2;
4131                            }
4132                            '+' => {
4133                                quantifier = Some(parser::quantifier::Quantifier::OneOrMore);
4134                                after_group += 1;
4135                            }
4136                            '?' if has_lazy => {
4137                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrOneLazy);
4138                                after_group += 2;
4139                            }
4140                            '?' => {
4141                                quantifier = Some(parser::quantifier::Quantifier::ZeroOrOne);
4142                                after_group += 1;
4143                            }
4144                            _ => {}
4145                        }
4146                    }
4147                }
4148
4149                // Build the capture AST with optional quantifier
4150                if let Some(q) = quantifier {
4151                    elements.push(CaptureElement::Capture(
4152                        Ast::QuantifiedCapture(Box::new(inner_ast), q),
4153                        my_group_num,
4154                    ));
4155                } else {
4156                    elements.push(CaptureElement::Capture(inner_ast, my_group_num));
4157                }
4158                pos = after_group;
4159            } else {
4160                return Err(PatternError::ParseError(
4161                    "Unmatched parenthesis".to_string(),
4162                ));
4163            }
4164        } else {
4165            // Check for backreference \1, \2, etc. AT CURRENT POSITION
4166            if pattern[pos..].starts_with('\\') && pos + 1 < pattern.len() {
4167                let next_char = pattern.chars().nth(pos + 1);
4168                if let Some(ch) = next_char {
4169                    if ch.is_ascii_digit() {
4170                        // This is a backreference like \1
4171                        let digit = ch.to_digit(10).unwrap() as usize;
4172                        elements.push(CaptureElement::NonCapture(Ast::Backreference(digit)));
4173                        pos += 2; // Skip \1
4174                        continue;
4175                    }
4176                }
4177            }
4178
4179            // Find the next capture group, backreference, or end of pattern
4180            // Skip escaped parentheses and character classes when searching
4181            let next_paren = {
4182                let mut search_pos = pos;
4183                let mut result = pattern.len();
4184                let bytes = pattern.as_bytes();
4185                while search_pos < bytes.len() {
4186                    if bytes[search_pos] == b'\\' && search_pos + 1 < bytes.len() {
4187                        search_pos += 2; // Skip escaped character
4188                    } else if bytes[search_pos] == b'[' {
4189                        // Skip character class to avoid finding ( inside it
4190                        search_pos += 1;
4191                        if search_pos < bytes.len() && bytes[search_pos] == b'^' {
4192                            search_pos += 1;
4193                        }
4194                        while search_pos < bytes.len() {
4195                            if bytes[search_pos] == b'\\' {
4196                                search_pos += 2;
4197                            } else if bytes[search_pos] == b']' {
4198                                search_pos += 1;
4199                                break;
4200                            } else {
4201                                search_pos += 1;
4202                            }
4203                        }
4204                    } else if bytes[search_pos] == b'(' {
4205                        result = search_pos;
4206                        break;
4207                    } else {
4208                        search_pos += 1;
4209                    }
4210                }
4211                result
4212            };
4213
4214            // Find next backreference \digit (search from current position + 1 to avoid finding current char)
4215            let mut search_pos = pos;
4216            let mut next_backref = pattern.len();
4217
4218            while search_pos < pattern.len() {
4219                if pattern[search_pos..].starts_with('\\') && search_pos + 1 < pattern.len() {
4220                    let next_ch = pattern.chars().nth(search_pos + 1);
4221                    if next_ch.map(|c| c.is_ascii_digit()).unwrap_or(false) {
4222                        next_backref = search_pos;
4223                        break;
4224                    }
4225                    search_pos += 2; // Skip this escape
4226                } else {
4227                    search_pos += 1;
4228                }
4229            }
4230
4231            // Take the minimum of next_paren and next_backref
4232            let next_boundary = next_paren.min(next_backref);
4233
4234            if next_boundary > pos {
4235                // There's a literal or other pattern before the next capture/backref
4236                let segment = &pattern[pos..next_boundary];
4237
4238                // Parse segment without going through capture detection
4239                let segment_ast = if segment.is_empty() {
4240                    Ast::Literal(String::new())
4241                } else {
4242                    // Use basic parsing for non-capture segments
4243                    parse_pattern(segment)?
4244                };
4245
4246                elements.push(CaptureElement::NonCapture(segment_ast));
4247                pos = next_boundary;
4248            } else {
4249                // Move forward
4250                pos += 1;
4251            }
4252        }
4253    }
4254
4255    let total_groups = *group_counter - 1;
4256
4257    // If we only have one element and it's a single capture, return it directly
4258    if elements.len() == 1 {
4259        if let CaptureElement::Capture(ast, num) = &elements[0] {
4260            return Ok((Ast::Capture(Box::new(ast.clone()), *num), total_groups));
4261        }
4262    }
4263
4264    // Build a PatternWithCaptures AST
4265    Ok((
4266        Ast::PatternWithCaptures {
4267            elements,
4268            total_groups,
4269        },
4270        *group_counter - start_group,
4271    ))
4272}
4273
4274/// Element in a pattern with captures
4275#[derive(Debug, Clone, PartialEq)]
4276enum CaptureElement {
4277    Capture(Ast, usize), // (pattern), group_number
4278    NonCapture(Ast),     // literal or other pattern
4279}
4280
4281#[cfg(test)]
4282mod tests {
4283    use super::*;
4284
4285    #[test]
4286    fn literal() {
4287        let p = Pattern::new("hello").unwrap();
4288        assert!(p.is_match("hello world"));
4289        assert!(!p.is_match("goodbye"));
4290    }
4291
4292    #[test]
4293    fn alternation() {
4294        let p = Pattern::new("foo|bar|baz").unwrap();
4295        assert!(p.is_match("foo"));
4296        assert!(p.is_match("bar"));
4297        assert!(!p.is_match("qux"));
4298    }
4299
4300    #[test]
4301    fn anchors() {
4302        let p = Pattern::new("^hello$").unwrap();
4303        assert!(p.is_match("hello"));
4304        assert!(!p.is_match("hello world"));
4305    }
4306
4307    #[test]
4308    fn find_test() {
4309        let p = Pattern::new("world").unwrap();
4310        assert_eq!(p.find("hello world"), Some((6, 11)));
4311    }
4312
4313    #[test]
4314    fn cached() {
4315        assert!(is_match("test", "this is a test").unwrap());
4316    }
4317}
4318
4319#[test]
4320fn char_class_simple() {
4321    let p = Pattern::new("[abc]").unwrap();
4322    assert!(p.is_match("a"));
4323    assert!(p.is_match("apple"));
4324    assert!(p.is_match("cab"));
4325    assert!(!p.is_match("xyz"));
4326}
4327
4328#[test]
4329fn char_class_range() {
4330    let p = Pattern::new("[a-z]").unwrap();
4331    assert!(p.is_match("hello"));
4332    assert!(p.is_match("xyz"));
4333    assert!(!p.is_match("HELLO"));
4334    assert!(!p.is_match("123"));
4335}
4336
4337#[test]
4338fn char_class_multiple_ranges() {
4339    let p = Pattern::new("[a-zA-Z0-9]").unwrap();
4340    assert!(p.is_match("hello"));
4341    assert!(p.is_match("WORLD"));
4342    assert!(p.is_match("test123"));
4343    assert!(!p.is_match("!!!"));
4344}
4345
4346#[test]
4347fn char_class_negated() {
4348    let p = Pattern::new("[^0-9]").unwrap();
4349    assert!(p.is_match("abc"));
4350    assert!(!p.is_match("123"));
4351    assert!(p.is_match("a1b")); // Contains non-digit
4352}
4353
4354#[test]
4355fn char_class_find() {
4356    let p = Pattern::new("[0-9]").unwrap();
4357    assert_eq!(p.find("abc123"), Some((3, 4))); // Finds 1
4358
4359    let matches = p.find_all("a1b2c3");
4360    assert_eq!(matches, vec![(1, 2), (3, 4), (5, 6)]);
4361}
4362
4363#[test]
4364fn debug_parse_group() {
4365    let pattern = "(foo|bar)+";
4366    match parser::group::parse_group(pattern) {
4367        Ok((group, bytes_consumed)) => {
4368            eprintln!("bytes_consumed: {}", bytes_consumed);
4369            eprintln!("pattern.len(): {}", pattern.len());
4370            eprintln!("group: {:?}", group);
4371            assert_eq!(
4372                bytes_consumed,
4373                pattern.len(),
4374                "Group should consume entire pattern"
4375            );
4376        }
4377        Err(e) => {
4378            panic!("Error: {}", e);
4379        }
4380    }
4381
4382    // Test actual matching
4383    eprintln!("\n--- Testing Pattern::new ---");
4384    let re = Pattern::new(pattern).unwrap();
4385    eprintln!("Pattern created: {:?}", re);
4386    eprintln!("is_match('foo'): {}", re.is_match("foo"));
4387    eprintln!("is_match('bar'): {}", re.is_match("bar"));
4388    eprintln!("is_match('foobar'): {}", re.is_match("foobar"));
4389}