regress/
classicalbacktrack.rs

1//! Classical backtracking execution engine
2
3use crate::api::Match;
4use crate::bytesearch;
5use crate::cursor;
6use crate::cursor::{Backward, Direction, Forward};
7use crate::exec;
8use crate::indexing;
9use crate::indexing::{AsciiInput, ElementType, InputIndexer, Utf8Input};
10#[cfg(not(feature = "utf16"))]
11use crate::insn::StartPredicate;
12use crate::insn::{CompiledRegex, Insn, LoopFields};
13use crate::matchers;
14use crate::matchers::CharProperties;
15use crate::position::PositionType;
16use crate::scm;
17use crate::scm::SingleCharMatcher;
18use crate::types::{CaptureGroupID, GroupData, IP, LoopData, LoopID, MAX_CAPTURE_GROUPS};
19use crate::util::DebugCheckIndex;
20#[cfg(not(feature = "std"))]
21use alloc::vec::Vec;
22use core::ops::Range;
23
24#[derive(Clone, Debug)]
25enum BacktrackInsn<Input: InputIndexer> {
26    /// Nothing more to backtrack.
27    /// This "backstops" our stack.
28    Exhausted,
29
30    /// Restore the IP and position.
31    SetPosition { ip: IP, pos: Input::Position },
32
33    SetLoopData {
34        id: LoopID,
35        data: LoopData<Input::Position>,
36    },
37
38    SetCaptureGroup {
39        id: CaptureGroupID,
40        data: GroupData<Input::Position>,
41    },
42
43    EnterNonGreedyLoop {
44        // The IP of the loop.
45        // This is guaranteed to point to an EnterLoopInsn.
46        ip: IP,
47        data: LoopData<Input::Position>,
48    },
49
50    GreedyLoop1Char {
51        continuation: IP,
52        min: Input::Position,
53        max: Input::Position,
54    },
55
56    NonGreedyLoop1Char {
57        continuation: IP,
58        min: Input::Position,
59        max: Input::Position,
60    },
61}
62
63#[derive(Debug, Default)]
64struct State<Position: PositionType> {
65    loops: Vec<LoopData<Position>>,
66    groups: Vec<GroupData<Position>>,
67}
68
69#[derive(Debug)]
70pub(crate) struct MatchAttempter<'a, Input: InputIndexer> {
71    re: &'a CompiledRegex,
72    bts: Vec<BacktrackInsn<Input>>,
73    s: State<Input::Position>,
74}
75
76impl<'a, Input: InputIndexer> MatchAttempter<'a, Input> {
77    pub(crate) fn new(re: &'a CompiledRegex, entry: Input::Position) -> Self {
78        Self {
79            re,
80            bts: vec![BacktrackInsn::Exhausted],
81            s: State {
82                loops: vec![LoopData::new(entry); re.loops as usize],
83                groups: vec![GroupData::new(); re.groups as usize],
84            },
85        }
86    }
87
88    #[inline(always)]
89    fn push_backtrack(&mut self, bt: BacktrackInsn<Input>) {
90        self.bts.push(bt)
91    }
92
93    #[inline(always)]
94    fn pop_backtrack(&mut self) {
95        // Note we never pop the last instruction so this will never be empty.
96        debug_assert!(!self.bts.is_empty());
97        if cfg!(feature = "prohibit-unsafe") {
98            self.bts.pop();
99        } else {
100            unsafe { self.bts.set_len(self.bts.len() - 1) }
101        }
102    }
103
104    fn prepare_to_enter_loop(
105        bts: &mut Vec<BacktrackInsn<Input>>,
106        pos: Input::Position,
107        loop_fields: &LoopFields,
108        loop_data: &mut LoopData<Input::Position>,
109    ) {
110        bts.push(BacktrackInsn::SetLoopData {
111            id: loop_fields.loop_id,
112            data: *loop_data,
113        });
114        loop_data.iters += 1;
115        loop_data.entry = pos;
116    }
117
118    fn run_loop(
119        &mut self,
120        loop_fields: &'a LoopFields,
121        pos: Input::Position,
122        ip: IP,
123    ) -> Option<IP> {
124        let loop_data = &mut self.s.loops[loop_fields.loop_id as usize];
125        let iteration = loop_data.iters;
126
127        let do_taken = iteration < loop_fields.max_iters;
128        let do_not_taken = iteration >= loop_fields.min_iters;
129
130        let loop_taken_ip = ip + 1;
131        let loop_not_taken_ip = loop_fields.exit as IP;
132
133        // If we have looped more than the minimum number of iterations, reject empty
134        // matches. ES6 21.2.2.5.1 Note 4: "once the minimum number of
135        // repetitions has been satisfied, any more expansions of Atom that match the
136        // empty character sequence are not considered for further repetitions."
137        if loop_data.entry == pos && iteration > loop_fields.min_iters {
138            return None;
139        }
140
141        match (do_taken, do_not_taken) {
142            (false, false) => {
143                // No arms viable.
144                None
145            }
146            (false, true) => {
147                // Only skipping is viable.
148                Some(loop_not_taken_ip)
149            }
150            (true, false) => {
151                // Only entering is viable.
152                MatchAttempter::prepare_to_enter_loop(&mut self.bts, pos, loop_fields, loop_data);
153                Some(loop_taken_ip)
154            }
155            (true, true) if !loop_fields.greedy => {
156                // Both arms are viable; backtrack into the loop.
157                loop_data.entry = pos;
158                self.bts.push(BacktrackInsn::EnterNonGreedyLoop {
159                    ip,
160                    data: *loop_data,
161                });
162                Some(loop_not_taken_ip)
163            }
164            (true, true) => {
165                debug_assert!(loop_fields.greedy, "Should be greedy");
166                // Both arms are viable; backtrack out of the loop.
167                self.bts.push(BacktrackInsn::SetPosition {
168                    ip: loop_not_taken_ip,
169                    pos,
170                });
171                MatchAttempter::prepare_to_enter_loop(&mut self.bts, pos, loop_fields, loop_data);
172                Some(loop_taken_ip)
173            }
174        }
175    }
176
177    // Drive the loop up to \p max times.
178    // \return the position (min, max), or None on failure.
179    #[inline(always)]
180    fn run_scm_loop_impl<Dir: Direction, Scm: SingleCharMatcher<Input, Dir>>(
181        input: &Input,
182        mut pos: Input::Position,
183        min: usize,
184        max: usize,
185        dir: Dir,
186        matcher: Scm,
187    ) -> Option<(Input::Position, Input::Position)> {
188        debug_assert!(min <= max, "min should be <= max");
189        // Drive the iteration min times.
190        // That tells us the min position.
191        for _ in 0..min {
192            if !matcher.matches(input, dir, &mut pos) {
193                return None;
194            }
195        }
196        let min_pos = pos;
197
198        // Drive it up to the max.
199        for _ in 0..(max - min) {
200            let saved = pos;
201            if !matcher.matches(input, dir, &mut pos) {
202                pos = saved;
203                break;
204            }
205        }
206        let max_pos = pos;
207        Some((min_pos, max_pos))
208    }
209
210    // Compute the maximum position from a starting position, up to a limit.
211    // This is used for lazy computation in non-greedy loops.
212    fn compute_max_pos<Dir: Direction, Scm: SingleCharMatcher<Input, Dir>>(
213        input: &Input,
214        mut pos: Input::Position,
215        limit: usize,
216        dir: Dir,
217        matcher: Scm,
218    ) -> Input::Position {
219        for _ in 0..limit {
220            let saved = pos;
221            if !matcher.matches(input, dir, &mut pos) {
222                pos = saved;
223                break;
224            }
225        }
226        pos
227    }
228
229    // Helper function to extract the duplicated match blocks that handle different instruction types
230    // with different matcher functions. This significantly reduces code duplication and compile times.
231    fn with_scm_loop_impl<Dir: Direction>(
232        re: &CompiledRegex,
233        input: &Input,
234        pos: Input::Position,
235        min: usize,
236        max: usize,
237        dir: Dir,
238        ip: IP,
239    ) -> Option<(Input::Position, Input::Position)> {
240        match re.insns.iat(ip + 1) {
241            &Insn::Char(c) => {
242                let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
243                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::Char { c })
244            }
245            &Insn::CharICase(c) => {
246                let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
247                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::CharICase { c })
248            }
249            &Insn::Bracket(idx) => {
250                let bc = &re.brackets[idx];
251                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::Bracket { bc })
252            }
253            Insn::AsciiBracket(bitmap) => Self::run_scm_loop_impl(
254                input,
255                pos,
256                min,
257                max,
258                dir,
259                scm::MatchByteSet { bytes: bitmap },
260            ),
261            Insn::MatchAny => {
262                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchAny::new())
263            }
264            Insn::MatchAnyExceptLineTerminator => Self::run_scm_loop_impl(
265                input,
266                pos,
267                min,
268                max,
269                dir,
270                scm::MatchAnyExceptLineTerminator::new(),
271            ),
272            Insn::CharSet(chars) => {
273                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::CharSet { chars })
274            }
275            &Insn::ByteSet2(bytes) => {
276                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteArraySet { bytes })
277            }
278            &Insn::ByteSet3(bytes) => {
279                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteArraySet { bytes })
280            }
281            &Insn::ByteSet4(bytes) => {
282                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteArraySet { bytes })
283            }
284            Insn::ByteSeq1(bytes) => {
285                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
286            }
287            Insn::ByteSeq2(bytes) => {
288                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
289            }
290            Insn::ByteSeq3(bytes) => {
291                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
292            }
293            Insn::ByteSeq4(bytes) => {
294                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
295            }
296            Insn::ByteSeq5(bytes) => {
297                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
298            }
299            Insn::ByteSeq6(bytes) => {
300                Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
301            }
302            _ => {
303                unreachable!("Missing SCM: {:?}", re.insns.iat(ip + 1));
304            }
305        }
306    }
307
308    // Helper function for compute_max_pos to avoid duplication
309    fn with_scm_compute_max<Dir: Direction>(
310        re: &CompiledRegex,
311        input: &Input,
312        pos: Input::Position,
313        limit: usize,
314        dir: Dir,
315        ip: IP,
316    ) -> Option<Input::Position> {
317        let result = match re.insns.iat(ip + 1) {
318            &Insn::Char(c) => {
319                let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
320                Self::compute_max_pos(input, pos, limit, dir, scm::Char { c })
321            }
322            &Insn::CharICase(c) => {
323                let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
324                Self::compute_max_pos(input, pos, limit, dir, scm::CharICase { c })
325            }
326            &Insn::Bracket(idx) => {
327                let bc = &re.brackets[idx];
328                Self::compute_max_pos(input, pos, limit, dir, scm::Bracket { bc })
329            }
330            Insn::AsciiBracket(bitmap) => {
331                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSet { bytes: bitmap })
332            }
333            Insn::MatchAny => Self::compute_max_pos(input, pos, limit, dir, scm::MatchAny::new()),
334            Insn::MatchAnyExceptLineTerminator => Self::compute_max_pos(
335                input,
336                pos,
337                limit,
338                dir,
339                scm::MatchAnyExceptLineTerminator::new(),
340            ),
341            Insn::CharSet(chars) => {
342                Self::compute_max_pos(input, pos, limit, dir, scm::CharSet { chars })
343            }
344            &Insn::ByteSet2(bytes) => {
345                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteArraySet { bytes })
346            }
347            &Insn::ByteSet3(bytes) => {
348                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteArraySet { bytes })
349            }
350            &Insn::ByteSet4(bytes) => {
351                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteArraySet { bytes })
352            }
353            Insn::ByteSeq1(bytes) => {
354                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
355            }
356            Insn::ByteSeq2(bytes) => {
357                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
358            }
359            Insn::ByteSeq3(bytes) => {
360                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
361            }
362            Insn::ByteSeq4(bytes) => {
363                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
364            }
365            Insn::ByteSeq5(bytes) => {
366                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
367            }
368            Insn::ByteSeq6(bytes) => {
369                Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
370            }
371            _ => {
372                unreachable!("Missing SCM: {:?}", re.insns.iat(ip + 1));
373            }
374        };
375        Some(result)
376    }
377
378    // Given that ip points at a loop whose body matches exactly one character, run
379    // a "single character loop". The big idea here is that we don't need to save
380    // our position every iteration: we know that our loop body matches a single
381    // character so we can backtrack by matching a character backwards.
382    // \return the next IP, or None if the loop failed.
383    #[allow(clippy::too_many_arguments)]
384    fn run_scm_loop<Dir: Direction>(
385        &mut self,
386        input: &Input,
387        dir: Dir,
388        pos: &mut Input::Position,
389        min: usize,
390        max: usize,
391        ip: IP,
392        greedy: bool,
393    ) -> Option<IP> {
394        // For non-greedy loops, we can avoid computing the maximum match eagerly.
395        // We'll only compute it when we need to set up backtracking.
396        let (min_pos, max_pos) = if greedy {
397            // For greedy loops, compute both min and max positions
398            Self::with_scm_loop_impl(self.re, input, *pos, min, max, dir, ip)?
399        } else {
400            // For non-greedy loops, initially only compute the minimum position
401            let (min_pos, _) = Self::with_scm_loop_impl(self.re, input, *pos, min, min, dir, ip)?;
402
403            // For non-greedy loops, we only compute the max position if we need to set up backtracking
404            let max_pos = if min < max {
405                // We need to compute the max for backtracking purposes
406                Self::with_scm_compute_max(self.re, input, min_pos, max - min, dir, ip)?
407            } else {
408                min_pos
409            };
410            (min_pos, max_pos)
411        };
412
413        debug_assert!(
414            if Dir::FORWARD {
415                min_pos <= max_pos
416            } else {
417                min_pos >= max_pos
418            },
419            "min should be <= (>=) max if cursor is tracking forwards (backwards)"
420        );
421
422        // Oh no where is the continuation? It's one past the loop body, which is one
423        // past the loop. Strap in!
424        let continuation = ip + 2;
425        if min_pos != max_pos {
426            // Backtracking is possible.
427            let bti = if greedy {
428                BacktrackInsn::GreedyLoop1Char {
429                    continuation,
430                    min: min_pos,
431                    max: max_pos,
432                }
433            } else {
434                BacktrackInsn::NonGreedyLoop1Char {
435                    continuation,
436                    min: min_pos,
437                    max: max_pos,
438                }
439            };
440            self.bts.push(bti);
441        }
442
443        // Start at the max (min) if greedy (nongreedy).
444        *pos = if greedy { max_pos } else { min_pos };
445        Some(continuation)
446    }
447
448    // Run a lookaround instruction, which is either forwards or backwards
449    // (according to Direction). The half-open range
450    // start_group..end_group is the range of contained capture groups.
451    // \return whether we matched and negate was false, or did not match but negate
452    // is true.
453    fn run_lookaround<Dir: Direction>(
454        &mut self,
455        input: &Input,
456        ip: IP,
457        pos: Input::Position,
458        start_group: CaptureGroupID,
459        end_group: CaptureGroupID,
460        negate: bool,
461    ) -> bool {
462        // Copy capture groups, because if the match fails (or if we are inverted)
463        // we need to restore these.
464        let range = (start_group as usize)..(end_group as usize);
465        // TODO: consider retaining storage here?
466        // Temporarily defeat backtracking.
467        let saved_groups = self.s.groups.iat(range.clone()).to_vec();
468
469        // Start with an "empty" backtrack stack.
470        // TODO: consider using a stack-allocated array.
471        let mut saved_bts = vec![BacktrackInsn::Exhausted];
472        core::mem::swap(&mut self.bts, &mut saved_bts);
473
474        // Enter into the lookaround's instruction stream.
475        let matched = self.try_at_pos(*input, ip, pos, Dir::new()).is_some();
476
477        // Put back our bts.
478        core::mem::swap(&mut self.bts, &mut saved_bts);
479
480        // If we are a positive lookahead that successfully matched, retain the
481        // capture groups (but we need to set up backtracking). Otherwise restore
482        // them.
483        if matched && !negate {
484            for (idx, cg) in saved_groups.iter().enumerate() {
485                debug_assert!(idx + (start_group as usize) < MAX_CAPTURE_GROUPS);
486                self.push_backtrack(BacktrackInsn::SetCaptureGroup {
487                    id: (idx as CaptureGroupID) + start_group,
488                    data: *cg,
489                });
490            }
491        } else {
492            self.s.groups.splice(range, saved_groups);
493        }
494        matched != negate
495    }
496
497    /// Attempt to backtrack.
498    /// \return true if we backtracked, false if we exhaust the backtrack stack.
499    fn try_backtrack<Dir: Direction>(
500        &mut self,
501        input: &Input,
502        ip: &mut IP,
503        pos: &mut Input::Position,
504        _dir: Dir,
505    ) -> bool {
506        loop {
507            // We always have a single Exhausted instruction backstopping our stack,
508            // so we do not need to check for empty bts.
509            debug_assert!(!self.bts.is_empty(), "Backtrack stack should not be empty");
510            let bt = match self.bts.last_mut() {
511                Some(bt) => bt,
512                None => rs_unreachable!("BT stack should never be empty"),
513            };
514            match bt {
515                BacktrackInsn::Exhausted => return false,
516
517                BacktrackInsn::SetPosition {
518                    ip: saved_ip,
519                    pos: saved_pos,
520                } => {
521                    *ip = *saved_ip;
522                    *pos = *saved_pos;
523                    self.pop_backtrack();
524                    return true;
525                }
526                BacktrackInsn::SetLoopData { id, data } => {
527                    *self.s.loops.mat(*id as usize) = *data;
528                    self.pop_backtrack();
529                }
530                BacktrackInsn::SetCaptureGroup { id, data } => {
531                    *self.s.groups.mat(*id as usize) = *data;
532                    self.pop_backtrack();
533                }
534
535                &mut BacktrackInsn::EnterNonGreedyLoop { ip: loop_ip, data } => {
536                    // Must pop before we enter the loop.
537                    self.pop_backtrack();
538                    *ip = loop_ip + 1;
539                    *pos = data.entry;
540                    let loop_fields = match &self.re.insns.iat(loop_ip) {
541                        Insn::EnterLoop(loop_fields) => loop_fields,
542                        _ => rs_unreachable!("EnterNonGreedyLoop must point at a loop instruction"),
543                    };
544                    let loop_data = self.s.loops.mat(loop_fields.loop_id as usize);
545                    *loop_data = data;
546                    MatchAttempter::prepare_to_enter_loop(
547                        &mut self.bts,
548                        *pos,
549                        loop_fields,
550                        loop_data,
551                    );
552                    return true;
553                }
554
555                BacktrackInsn::GreedyLoop1Char {
556                    continuation,
557                    min,
558                    max,
559                } => {
560                    // The match failed at the max location.
561                    debug_assert!(
562                        if Dir::FORWARD { max >= min } else { max <= min },
563                        "max should be >= min (or <= if tracking backwards)"
564                    );
565                    // If min is equal to max, there is no more backtracking to be done;
566                    // otherwise move opposite the direction of the cursor.
567                    if *max == *min {
568                        // We have backtracked this loop as far as possible.
569                        self.bts.pop();
570                        continue;
571                    }
572                    let newmax = if Dir::FORWARD {
573                        input.next_left_pos(*max)
574                    } else {
575                        input.next_right_pos(*max)
576                    };
577                    if let Some(newmax) = newmax {
578                        *pos = newmax;
579                        *max = newmax;
580                    } else {
581                        rs_unreachable!("Should always be able to advance since min != max")
582                    }
583                    *ip = *continuation;
584                    return true;
585                }
586
587                BacktrackInsn::NonGreedyLoop1Char {
588                    continuation,
589                    min,
590                    max,
591                } => {
592                    // The match failed at the min location.
593                    debug_assert!(
594                        if Dir::FORWARD { max >= min } else { max <= min },
595                        "max should be >= min (or <= if tracking backwards)"
596                    );
597                    if *max == *min {
598                        // We have backtracked this loop as far as possible.
599                        self.bts.pop();
600                        continue;
601                    }
602                    // Move in the direction of the cursor.
603                    let newmin = if Dir::FORWARD {
604                        input.next_right_pos(*min)
605                    } else {
606                        input.next_left_pos(*min)
607                    };
608                    if let Some(newmin) = newmin {
609                        *pos = newmin;
610                        *min = newmin;
611                    } else {
612                        rs_unreachable!("Should always be able to advance since min != max")
613                    }
614                    *ip = *continuation;
615                    return true;
616                }
617            }
618        }
619    }
620
621    /// Attempt to match at a given IP and position.
622    fn try_at_pos<Dir: Direction>(
623        &mut self,
624        inp: Input,
625        mut ip: IP,
626        mut pos: Input::Position,
627        dir: Dir,
628    ) -> Option<Input::Position> {
629        debug_assert!(
630            self.bts.len() == 1,
631            "Should be only initial exhausted backtrack insn"
632        );
633        // TODO: we are inconsistent about passing Input by reference or value.
634        let input = &inp;
635        let re = self.re;
636        // These are not really loops, they are just labels that we effectively 'goto'
637        // to.
638        #[allow(clippy::never_loop)]
639        'nextinsn: loop {
640            'backtrack: loop {
641                // Helper macro to either increment ip and go to the next insn, or backtrack.
642                macro_rules! next_or_bt {
643                    ($e:expr) => {
644                        if $e {
645                            ip += 1;
646                            continue 'nextinsn;
647                        } else {
648                            break 'backtrack;
649                        }
650                    };
651                }
652
653                match re.insns.iat(ip) {
654                    &Insn::Char(c) => {
655                        let m = match <<Input as InputIndexer>::Element as ElementType>::try_from(c)
656                        {
657                            Some(c) => scm::Char { c }.matches(input, dir, &mut pos),
658                            None => false,
659                        };
660                        next_or_bt!(m);
661                    }
662
663                    Insn::CharSet(chars) => {
664                        let m = scm::CharSet { chars }.matches(input, dir, &mut pos);
665                        next_or_bt!(m);
666                    }
667
668                    &Insn::ByteSet2(bytes) => {
669                        next_or_bt!(scm::MatchByteArraySet { bytes }.matches(input, dir, &mut pos))
670                    }
671                    &Insn::ByteSet3(bytes) => {
672                        next_or_bt!(scm::MatchByteArraySet { bytes }.matches(input, dir, &mut pos))
673                    }
674                    &Insn::ByteSet4(bytes) => {
675                        next_or_bt!(scm::MatchByteArraySet { bytes }.matches(input, dir, &mut pos))
676                    }
677
678                    Insn::ByteSeq1(v) => {
679                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
680                    }
681                    Insn::ByteSeq2(v) => {
682                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
683                    }
684                    Insn::ByteSeq3(v) => {
685                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
686                    }
687                    Insn::ByteSeq4(v) => {
688                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
689                    }
690                    Insn::ByteSeq5(v) => {
691                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
692                    }
693                    Insn::ByteSeq6(v) => {
694                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
695                    }
696                    Insn::ByteSeq7(v) => {
697                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
698                    }
699                    Insn::ByteSeq8(v) => {
700                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
701                    }
702                    Insn::ByteSeq9(v) => {
703                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
704                    }
705                    Insn::ByteSeq10(v) => {
706                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
707                    }
708                    Insn::ByteSeq11(v) => {
709                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
710                    }
711                    Insn::ByteSeq12(v) => {
712                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
713                    }
714                    Insn::ByteSeq13(v) => {
715                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
716                    }
717                    Insn::ByteSeq14(v) => {
718                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
719                    }
720                    Insn::ByteSeq15(v) => {
721                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
722                    }
723                    Insn::ByteSeq16(v) => {
724                        next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
725                    }
726
727                    &Insn::CharICase(c) => {
728                        let m = match <<Input as indexing::InputIndexer>::Element as indexing::ElementType>::try_from(c) {
729                            Some(c) => scm::CharICase { c }.matches(input, dir, &mut pos),
730                            None => false,
731                        };
732                        next_or_bt!(m)
733                    }
734
735                    Insn::AsciiBracket(bitmap) => next_or_bt!(
736                        scm::MatchByteSet { bytes: bitmap }.matches(input, dir, &mut pos)
737                    ),
738
739                    &Insn::Bracket(idx) => {
740                        next_or_bt!(
741                            scm::Bracket {
742                                bc: &self.re.brackets[idx]
743                            }
744                            .matches(input, dir, &mut pos)
745                        )
746                    }
747
748                    Insn::MatchAny => {
749                        next_or_bt!(scm::MatchAny::new().matches(input, dir, &mut pos))
750                    }
751
752                    Insn::MatchAnyExceptLineTerminator => {
753                        next_or_bt!(
754                            scm::MatchAnyExceptLineTerminator::new().matches(input, dir, &mut pos)
755                        )
756                    }
757
758                    &Insn::WordBoundary { invert } => {
759                        // Copy the positions since these destructively move them.
760                        let prev_wordchar = input
761                            .peek_left(pos)
762                            .is_some_and(Input::CharProps::is_word_char);
763                        let curr_wordchar = input
764                            .peek_right(pos)
765                            .is_some_and(Input::CharProps::is_word_char);
766                        let is_boundary = prev_wordchar != curr_wordchar;
767                        next_or_bt!(is_boundary != invert)
768                    }
769
770                    Insn::StartOfLine => {
771                        let matches = match input.peek_left(pos) {
772                            None => true,
773                            Some(c)
774                                if re.flags.multiline
775                                    && Input::CharProps::is_line_terminator(c) =>
776                            {
777                                true
778                            }
779                            _ => false,
780                        };
781                        next_or_bt!(matches)
782                    }
783                    Insn::EndOfLine => {
784                        let matches = match input.peek_right(pos) {
785                            None => true, // we're at the right of the string
786                            Some(c)
787                                if re.flags.multiline
788                                    && Input::CharProps::is_line_terminator(c) =>
789                            {
790                                true
791                            }
792                            _ => false,
793                        };
794                        next_or_bt!(matches)
795                    }
796
797                    &Insn::Jump { target } => {
798                        ip = target as usize;
799                        continue 'nextinsn;
800                    }
801
802                    &Insn::BeginCaptureGroup(cg_idx) => {
803                        let cg = self.s.groups.mat(cg_idx as usize);
804                        self.bts.push(BacktrackInsn::SetCaptureGroup {
805                            id: cg_idx,
806                            data: *cg,
807                        });
808                        if Dir::FORWARD {
809                            cg.start = Some(pos);
810                            debug_assert!(
811                                cg.end.is_none(),
812                                "Should not have already exited capture group we are entering"
813                            )
814                        } else {
815                            cg.end = Some(pos);
816                            debug_assert!(
817                                cg.start.is_none(),
818                                "Should not have already exited capture group we are entering"
819                            )
820                        }
821                        next_or_bt!(true)
822                    }
823
824                    &Insn::EndCaptureGroup(cg_idx) => {
825                        let cg = self.s.groups.mat(cg_idx as usize);
826                        if Dir::FORWARD {
827                            debug_assert!(
828                                cg.start_matched(),
829                                "Capture group should have been entered"
830                            );
831                            cg.end = Some(pos);
832                        } else {
833                            debug_assert!(
834                                cg.end_matched(),
835                                "Capture group should have been entered"
836                            );
837                            cg.start = Some(pos)
838                        }
839                        next_or_bt!(true)
840                    }
841
842                    &Insn::ResetCaptureGroup(cg_idx) => {
843                        let cg = self.s.groups.mat(cg_idx as usize);
844                        self.bts.push(BacktrackInsn::SetCaptureGroup {
845                            id: cg_idx,
846                            data: *cg,
847                        });
848                        cg.reset();
849                        next_or_bt!(true)
850                    }
851
852                    &Insn::BackRef(cg_idx) => {
853                        let cg = self.s.groups.mat(cg_idx as usize);
854                        // Backreferences to a capture group that did not match always succeed (ES5
855                        // 15.10.2.9).
856                        // Note we may be in the capture group we are examining, e.g. /(abc\1)/.
857                        let matched;
858                        if let Some(orig_range) = cg.as_range() {
859                            if re.flags.icase {
860                                matched = matchers::backref_icase(input, dir, orig_range, &mut pos);
861                            } else {
862                                matched = matchers::backref(input, dir, orig_range, &mut pos);
863                            }
864                        } else {
865                            // This group has not been exited and so the match succeeds (ES6
866                            // 21.2.2.9).
867                            matched = true;
868                        }
869                        next_or_bt!(matched)
870                    }
871
872                    &Insn::Lookahead {
873                        negate,
874                        start_group,
875                        end_group,
876                        continuation,
877                    } => {
878                        if self.run_lookaround::<Forward>(
879                            input,
880                            ip + 1,
881                            pos,
882                            start_group,
883                            end_group,
884                            negate,
885                        ) {
886                            ip = continuation as IP;
887                            continue 'nextinsn;
888                        } else {
889                            break 'backtrack;
890                        }
891                    }
892
893                    &Insn::Lookbehind {
894                        negate,
895                        start_group,
896                        end_group,
897                        continuation,
898                    } => {
899                        if self.run_lookaround::<Backward>(
900                            input,
901                            ip + 1,
902                            pos,
903                            start_group,
904                            end_group,
905                            negate,
906                        ) {
907                            ip = continuation as IP;
908                            continue 'nextinsn;
909                        } else {
910                            break 'backtrack;
911                        }
912                    }
913
914                    &Insn::Alt { secondary } => {
915                        self.push_backtrack(BacktrackInsn::SetPosition {
916                            ip: secondary as IP,
917                            pos,
918                        });
919                        next_or_bt!(true);
920                    }
921
922                    Insn::EnterLoop(fields) => {
923                        // Entering a loop, not re-entering it.
924                        self.s.loops.mat(fields.loop_id as usize).iters = 0;
925                        match self.run_loop(fields, pos, ip) {
926                            Some(next_ip) => {
927                                ip = next_ip;
928                                continue 'nextinsn;
929                            }
930                            None => {
931                                break 'backtrack;
932                            }
933                        }
934                    }
935
936                    &Insn::LoopAgain { begin } => {
937                        let act = match re.insns.iat(begin as IP) {
938                            Insn::EnterLoop(fields) => self.run_loop(fields, pos, begin as IP),
939                            _ => rs_unreachable!("EnterLoop should always refer to loop field"),
940                        };
941                        match act {
942                            Some(next_ip) => {
943                                ip = next_ip;
944                                continue 'nextinsn;
945                            }
946                            None => break 'backtrack,
947                        }
948                    }
949
950                    &Insn::Loop1CharBody {
951                        min_iters,
952                        max_iters,
953                        greedy,
954                    } => {
955                        if let Some(next_ip) = self
956                            .run_scm_loop(input, dir, &mut pos, min_iters, max_iters, ip, greedy)
957                        {
958                            ip = next_ip;
959                            continue 'nextinsn;
960                        } else {
961                            break 'backtrack;
962                        }
963                    }
964
965                    Insn::Goal => {
966                        // Keep all but the initial give-up bts.
967                        self.bts.truncate(1);
968                        return Some(pos);
969                    }
970
971                    Insn::JustFail => {
972                        break 'backtrack;
973                    }
974                }
975            }
976
977            // This after the backtrack loop.
978            // A break 'backtrack will jump here.
979            if self.try_backtrack(input, &mut ip, &mut pos, dir) {
980                continue 'nextinsn;
981            } else {
982                // We have exhausted the backtracking stack.
983                debug_assert!(self.bts.len() == 1, "Should have exhausted backtrack stack");
984                return None;
985            }
986        }
987
988        // This is outside the nextinsn loop.
989        // It is an error to get here.
990        // Every instruction should either continue 'nextinsn, or break 'backtrack.
991        {
992            #![allow(unreachable_code)]
993            rs_unreachable!("Should not fall to end of nextinsn loop")
994        }
995    }
996}
997
998#[derive(Debug)]
999pub struct BacktrackExecutor<'r, Input: InputIndexer> {
1000    input: Input,
1001    matcher: MatchAttempter<'r, Input>,
1002}
1003
1004#[cfg(feature = "utf16")]
1005impl<'r, Input: InputIndexer> BacktrackExecutor<'r, Input> {
1006    pub(crate) fn new(input: Input, matcher: MatchAttempter<'r, Input>) -> Self {
1007        Self { input, matcher }
1008    }
1009}
1010
1011impl<Input: InputIndexer> BacktrackExecutor<'_, Input> {
1012    fn successful_match(&mut self, start: Input::Position, end: Input::Position) -> Match {
1013        // We want to simultaneously map our groups to offsets, and clear the groups.
1014        // A for loop is the easiest way to do this while satisfying the borrow checker.
1015        // TODO: avoid allocating so much.
1016        let mut captures = Vec::new();
1017        captures.reserve_exact(self.matcher.s.groups.len());
1018        for gd in self.matcher.s.groups.iter_mut() {
1019            captures.push(match gd.as_range() {
1020                Some(r) => Some(Range {
1021                    start: self.input.pos_to_offset(r.start),
1022                    end: self.input.pos_to_offset(r.end),
1023                }),
1024                None => None,
1025            });
1026            gd.start = None;
1027            gd.end = None;
1028        }
1029        Match {
1030            range: self.input.pos_to_offset(start)..self.input.pos_to_offset(end),
1031            captures,
1032            group_names: self.matcher.re.group_names.clone(),
1033        }
1034    }
1035
1036    /// \return the next match for an anchored regex that only matches at the start.
1037    /// This avoids any string searching and only tries matching at the given position.
1038    #[cfg(not(feature = "utf16"))]
1039    fn next_match_anchored(
1040        &mut self,
1041        pos: Input::Position,
1042        next_start: &mut Option<Input::Position>,
1043    ) -> Option<Match> {
1044        let inp = self.input;
1045        // For anchored regexes, only try matching at the current position
1046        if let Some(end) = self.matcher.try_at_pos(inp, 0, pos, Forward::new()) {
1047            // If we matched the empty string, we have to increment.
1048            if end != pos {
1049                *next_start = Some(end)
1050            } else {
1051                *next_start = inp.next_right_pos(end);
1052            }
1053            Some(self.successful_match(pos, end))
1054        } else {
1055            // Anchored regex failed to match at this position, no more matches
1056            None
1057        }
1058    }
1059
1060    /// \return the next match, searching the remaining bytes using the given
1061    /// prefix searcher to quickly find the first potential match location.
1062    fn next_match_with_prefix_search<PrefixSearch: bytesearch::ByteSearcher>(
1063        &mut self,
1064        mut pos: Input::Position,
1065        next_start: &mut Option<Input::Position>,
1066        prefix_search: &PrefixSearch,
1067    ) -> Option<Match> {
1068        let inp = self.input;
1069        loop {
1070            // Find the next start location, or None if none.
1071            // Don't try this unless CODE_UNITS_ARE_BYTES - i.e. don't do byte searches
1072            // on UTF-16 or UCS2.
1073            if Input::CODE_UNITS_ARE_BYTES {
1074                pos = inp.find_bytes(pos, prefix_search)?;
1075            }
1076            if let Some(end) = self.matcher.try_at_pos(inp, 0, pos, Forward::new()) {
1077                // If we matched the empty string, we have to increment.
1078                if end != pos {
1079                    *next_start = Some(end)
1080                } else {
1081                    *next_start = inp.next_right_pos(end);
1082                }
1083                return Some(self.successful_match(pos, end));
1084            }
1085            // Didn't find it at this position, try the next one.
1086            pos = inp.next_right_pos(pos)?;
1087        }
1088    }
1089}
1090
1091impl<Input: InputIndexer> exec::MatchProducer for BacktrackExecutor<'_, Input> {
1092    type Position = Input::Position;
1093
1094    fn initial_position(&self, offset: usize) -> Option<Self::Position> {
1095        self.input.try_move_right(self.input.left_end(), offset)
1096    }
1097
1098    fn next_match(
1099        &mut self,
1100        pos: Input::Position,
1101        next_start: &mut Option<Input::Position>,
1102    ) -> Option<Match> {
1103        // When UTF-16 support is active prefix search is not used due to the different encoding.
1104        #[cfg(feature = "utf16")]
1105        return self.next_match_with_prefix_search(pos, next_start, &bytesearch::EmptyString {});
1106
1107        #[cfg(not(feature = "utf16"))]
1108        match &self.matcher.re.start_pred {
1109            StartPredicate::Arbitrary => {
1110                self.next_match_with_prefix_search(pos, next_start, &bytesearch::EmptyString {})
1111            }
1112            StartPredicate::StartAnchored => self.next_match_anchored(pos, next_start),
1113            StartPredicate::ByteSet1(bytes) => {
1114                self.next_match_with_prefix_search(pos, next_start, bytes)
1115            }
1116            StartPredicate::ByteSet2(bytes) => {
1117                self.next_match_with_prefix_search(pos, next_start, bytes)
1118            }
1119            StartPredicate::ByteSet3(bytes) => {
1120                self.next_match_with_prefix_search(pos, next_start, bytes)
1121            }
1122            StartPredicate::ByteSeq(bytes) => {
1123                self.next_match_with_prefix_search(pos, next_start, bytes.as_ref())
1124            }
1125            StartPredicate::ByteBracket(bitmap) => {
1126                self.next_match_with_prefix_search(pos, next_start, bitmap)
1127            }
1128        }
1129    }
1130}
1131
1132impl<'r, 't> exec::Executor<'r, 't> for BacktrackExecutor<'r, Utf8Input<'t>> {
1133    type AsAscii = BacktrackExecutor<'r, AsciiInput<'t>>;
1134
1135    fn new(re: &'r CompiledRegex, text: &'t str) -> Self {
1136        let input = Utf8Input::new(text, re.flags.unicode);
1137        Self {
1138            input,
1139            matcher: MatchAttempter::new(re, input.left_end()),
1140        }
1141    }
1142}
1143
1144impl<'r, 't> exec::Executor<'r, 't> for BacktrackExecutor<'r, AsciiInput<'t>> {
1145    type AsAscii = BacktrackExecutor<'r, AsciiInput<'t>>;
1146
1147    fn new(re: &'r CompiledRegex, text: &'t str) -> Self {
1148        let input = AsciiInput::new(text);
1149        Self {
1150            input,
1151            matcher: MatchAttempter::new(re, input.left_end()),
1152        }
1153    }
1154}