1use crate::api::Match;
4use crate::bytesearch;
5use crate::cursor;
6use crate::cursor::{Backward, Direction, Forward};
7use crate::exec;
8use crate::indexing;
9use crate::indexing::{AsciiInput, ElementType, InputIndexer, Utf8Input};
10#[cfg(not(feature = "utf16"))]
11use crate::insn::StartPredicate;
12use crate::insn::{CompiledRegex, Insn, LoopFields};
13use crate::matchers;
14use crate::matchers::CharProperties;
15use crate::position::PositionType;
16use crate::scm;
17use crate::scm::SingleCharMatcher;
18use crate::types::{CaptureGroupID, GroupData, IP, LoopData, LoopID, MAX_CAPTURE_GROUPS};
19use crate::util::DebugCheckIndex;
20#[cfg(not(feature = "std"))]
21use alloc::vec::Vec;
22use core::ops::Range;
23
24#[derive(Clone, Debug)]
25enum BacktrackInsn<Input: InputIndexer> {
26 Exhausted,
29
30 SetPosition { ip: IP, pos: Input::Position },
32
33 SetLoopData {
34 id: LoopID,
35 data: LoopData<Input::Position>,
36 },
37
38 SetCaptureGroup {
39 id: CaptureGroupID,
40 data: GroupData<Input::Position>,
41 },
42
43 EnterNonGreedyLoop {
44 ip: IP,
47 data: LoopData<Input::Position>,
48 },
49
50 GreedyLoop1Char {
51 continuation: IP,
52 min: Input::Position,
53 max: Input::Position,
54 },
55
56 NonGreedyLoop1Char {
57 continuation: IP,
58 min: Input::Position,
59 max: Input::Position,
60 },
61}
62
63#[derive(Debug, Default)]
64struct State<Position: PositionType> {
65 loops: Vec<LoopData<Position>>,
66 groups: Vec<GroupData<Position>>,
67}
68
69#[derive(Debug)]
70pub(crate) struct MatchAttempter<'a, Input: InputIndexer> {
71 re: &'a CompiledRegex,
72 bts: Vec<BacktrackInsn<Input>>,
73 s: State<Input::Position>,
74}
75
76impl<'a, Input: InputIndexer> MatchAttempter<'a, Input> {
77 pub(crate) fn new(re: &'a CompiledRegex, entry: Input::Position) -> Self {
78 Self {
79 re,
80 bts: vec![BacktrackInsn::Exhausted],
81 s: State {
82 loops: vec![LoopData::new(entry); re.loops as usize],
83 groups: vec![GroupData::new(); re.groups as usize],
84 },
85 }
86 }
87
88 #[inline(always)]
89 fn push_backtrack(&mut self, bt: BacktrackInsn<Input>) {
90 self.bts.push(bt)
91 }
92
93 #[inline(always)]
94 fn pop_backtrack(&mut self) {
95 debug_assert!(!self.bts.is_empty());
97 if cfg!(feature = "prohibit-unsafe") {
98 self.bts.pop();
99 } else {
100 unsafe { self.bts.set_len(self.bts.len() - 1) }
101 }
102 }
103
104 fn prepare_to_enter_loop(
105 bts: &mut Vec<BacktrackInsn<Input>>,
106 pos: Input::Position,
107 loop_fields: &LoopFields,
108 loop_data: &mut LoopData<Input::Position>,
109 ) {
110 bts.push(BacktrackInsn::SetLoopData {
111 id: loop_fields.loop_id,
112 data: *loop_data,
113 });
114 loop_data.iters += 1;
115 loop_data.entry = pos;
116 }
117
118 fn run_loop(
119 &mut self,
120 loop_fields: &'a LoopFields,
121 pos: Input::Position,
122 ip: IP,
123 ) -> Option<IP> {
124 let loop_data = &mut self.s.loops[loop_fields.loop_id as usize];
125 let iteration = loop_data.iters;
126
127 let do_taken = iteration < loop_fields.max_iters;
128 let do_not_taken = iteration >= loop_fields.min_iters;
129
130 let loop_taken_ip = ip + 1;
131 let loop_not_taken_ip = loop_fields.exit as IP;
132
133 if loop_data.entry == pos && iteration > loop_fields.min_iters {
138 return None;
139 }
140
141 match (do_taken, do_not_taken) {
142 (false, false) => {
143 None
145 }
146 (false, true) => {
147 Some(loop_not_taken_ip)
149 }
150 (true, false) => {
151 MatchAttempter::prepare_to_enter_loop(&mut self.bts, pos, loop_fields, loop_data);
153 Some(loop_taken_ip)
154 }
155 (true, true) if !loop_fields.greedy => {
156 loop_data.entry = pos;
158 self.bts.push(BacktrackInsn::EnterNonGreedyLoop {
159 ip,
160 data: *loop_data,
161 });
162 Some(loop_not_taken_ip)
163 }
164 (true, true) => {
165 debug_assert!(loop_fields.greedy, "Should be greedy");
166 self.bts.push(BacktrackInsn::SetPosition {
168 ip: loop_not_taken_ip,
169 pos,
170 });
171 MatchAttempter::prepare_to_enter_loop(&mut self.bts, pos, loop_fields, loop_data);
172 Some(loop_taken_ip)
173 }
174 }
175 }
176
177 #[inline(always)]
180 fn run_scm_loop_impl<Dir: Direction, Scm: SingleCharMatcher<Input, Dir>>(
181 input: &Input,
182 mut pos: Input::Position,
183 min: usize,
184 max: usize,
185 dir: Dir,
186 matcher: Scm,
187 ) -> Option<(Input::Position, Input::Position)> {
188 debug_assert!(min <= max, "min should be <= max");
189 for _ in 0..min {
192 if !matcher.matches(input, dir, &mut pos) {
193 return None;
194 }
195 }
196 let min_pos = pos;
197
198 for _ in 0..(max - min) {
200 let saved = pos;
201 if !matcher.matches(input, dir, &mut pos) {
202 pos = saved;
203 break;
204 }
205 }
206 let max_pos = pos;
207 Some((min_pos, max_pos))
208 }
209
210 fn compute_max_pos<Dir: Direction, Scm: SingleCharMatcher<Input, Dir>>(
213 input: &Input,
214 mut pos: Input::Position,
215 limit: usize,
216 dir: Dir,
217 matcher: Scm,
218 ) -> Input::Position {
219 for _ in 0..limit {
220 let saved = pos;
221 if !matcher.matches(input, dir, &mut pos) {
222 pos = saved;
223 break;
224 }
225 }
226 pos
227 }
228
229 fn with_scm_loop_impl<Dir: Direction>(
232 re: &CompiledRegex,
233 input: &Input,
234 pos: Input::Position,
235 min: usize,
236 max: usize,
237 dir: Dir,
238 ip: IP,
239 ) -> Option<(Input::Position, Input::Position)> {
240 match re.insns.iat(ip + 1) {
241 &Insn::Char(c) => {
242 let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
243 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::Char { c })
244 }
245 &Insn::CharICase(c) => {
246 let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
247 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::CharICase { c })
248 }
249 &Insn::Bracket(idx) => {
250 let bc = &re.brackets[idx];
251 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::Bracket { bc })
252 }
253 Insn::AsciiBracket(bitmap) => Self::run_scm_loop_impl(
254 input,
255 pos,
256 min,
257 max,
258 dir,
259 scm::MatchByteSet { bytes: bitmap },
260 ),
261 Insn::MatchAny => {
262 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchAny::new())
263 }
264 Insn::MatchAnyExceptLineTerminator => Self::run_scm_loop_impl(
265 input,
266 pos,
267 min,
268 max,
269 dir,
270 scm::MatchAnyExceptLineTerminator::new(),
271 ),
272 Insn::CharSet(chars) => {
273 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::CharSet { chars })
274 }
275 &Insn::ByteSet2(bytes) => {
276 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteArraySet { bytes })
277 }
278 &Insn::ByteSet3(bytes) => {
279 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteArraySet { bytes })
280 }
281 &Insn::ByteSet4(bytes) => {
282 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteArraySet { bytes })
283 }
284 Insn::ByteSeq1(bytes) => {
285 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
286 }
287 Insn::ByteSeq2(bytes) => {
288 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
289 }
290 Insn::ByteSeq3(bytes) => {
291 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
292 }
293 Insn::ByteSeq4(bytes) => {
294 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
295 }
296 Insn::ByteSeq5(bytes) => {
297 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
298 }
299 Insn::ByteSeq6(bytes) => {
300 Self::run_scm_loop_impl(input, pos, min, max, dir, scm::MatchByteSeq { bytes })
301 }
302 _ => {
303 unreachable!("Missing SCM: {:?}", re.insns.iat(ip + 1));
304 }
305 }
306 }
307
308 fn with_scm_compute_max<Dir: Direction>(
310 re: &CompiledRegex,
311 input: &Input,
312 pos: Input::Position,
313 limit: usize,
314 dir: Dir,
315 ip: IP,
316 ) -> Option<Input::Position> {
317 let result = match re.insns.iat(ip + 1) {
318 &Insn::Char(c) => {
319 let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
320 Self::compute_max_pos(input, pos, limit, dir, scm::Char { c })
321 }
322 &Insn::CharICase(c) => {
323 let c = <<Input as InputIndexer>::Element as ElementType>::try_from(c)?;
324 Self::compute_max_pos(input, pos, limit, dir, scm::CharICase { c })
325 }
326 &Insn::Bracket(idx) => {
327 let bc = &re.brackets[idx];
328 Self::compute_max_pos(input, pos, limit, dir, scm::Bracket { bc })
329 }
330 Insn::AsciiBracket(bitmap) => {
331 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSet { bytes: bitmap })
332 }
333 Insn::MatchAny => Self::compute_max_pos(input, pos, limit, dir, scm::MatchAny::new()),
334 Insn::MatchAnyExceptLineTerminator => Self::compute_max_pos(
335 input,
336 pos,
337 limit,
338 dir,
339 scm::MatchAnyExceptLineTerminator::new(),
340 ),
341 Insn::CharSet(chars) => {
342 Self::compute_max_pos(input, pos, limit, dir, scm::CharSet { chars })
343 }
344 &Insn::ByteSet2(bytes) => {
345 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteArraySet { bytes })
346 }
347 &Insn::ByteSet3(bytes) => {
348 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteArraySet { bytes })
349 }
350 &Insn::ByteSet4(bytes) => {
351 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteArraySet { bytes })
352 }
353 Insn::ByteSeq1(bytes) => {
354 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
355 }
356 Insn::ByteSeq2(bytes) => {
357 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
358 }
359 Insn::ByteSeq3(bytes) => {
360 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
361 }
362 Insn::ByteSeq4(bytes) => {
363 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
364 }
365 Insn::ByteSeq5(bytes) => {
366 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
367 }
368 Insn::ByteSeq6(bytes) => {
369 Self::compute_max_pos(input, pos, limit, dir, scm::MatchByteSeq { bytes })
370 }
371 _ => {
372 unreachable!("Missing SCM: {:?}", re.insns.iat(ip + 1));
373 }
374 };
375 Some(result)
376 }
377
378 #[allow(clippy::too_many_arguments)]
384 fn run_scm_loop<Dir: Direction>(
385 &mut self,
386 input: &Input,
387 dir: Dir,
388 pos: &mut Input::Position,
389 min: usize,
390 max: usize,
391 ip: IP,
392 greedy: bool,
393 ) -> Option<IP> {
394 let (min_pos, max_pos) = if greedy {
397 Self::with_scm_loop_impl(self.re, input, *pos, min, max, dir, ip)?
399 } else {
400 let (min_pos, _) = Self::with_scm_loop_impl(self.re, input, *pos, min, min, dir, ip)?;
402
403 let max_pos = if min < max {
405 Self::with_scm_compute_max(self.re, input, min_pos, max - min, dir, ip)?
407 } else {
408 min_pos
409 };
410 (min_pos, max_pos)
411 };
412
413 debug_assert!(
414 if Dir::FORWARD {
415 min_pos <= max_pos
416 } else {
417 min_pos >= max_pos
418 },
419 "min should be <= (>=) max if cursor is tracking forwards (backwards)"
420 );
421
422 let continuation = ip + 2;
425 if min_pos != max_pos {
426 let bti = if greedy {
428 BacktrackInsn::GreedyLoop1Char {
429 continuation,
430 min: min_pos,
431 max: max_pos,
432 }
433 } else {
434 BacktrackInsn::NonGreedyLoop1Char {
435 continuation,
436 min: min_pos,
437 max: max_pos,
438 }
439 };
440 self.bts.push(bti);
441 }
442
443 *pos = if greedy { max_pos } else { min_pos };
445 Some(continuation)
446 }
447
448 fn run_lookaround<Dir: Direction>(
454 &mut self,
455 input: &Input,
456 ip: IP,
457 pos: Input::Position,
458 start_group: CaptureGroupID,
459 end_group: CaptureGroupID,
460 negate: bool,
461 ) -> bool {
462 let range = (start_group as usize)..(end_group as usize);
465 let saved_groups = self.s.groups.iat(range.clone()).to_vec();
468
469 let mut saved_bts = vec![BacktrackInsn::Exhausted];
472 core::mem::swap(&mut self.bts, &mut saved_bts);
473
474 let matched = self.try_at_pos(*input, ip, pos, Dir::new()).is_some();
476
477 core::mem::swap(&mut self.bts, &mut saved_bts);
479
480 if matched && !negate {
484 for (idx, cg) in saved_groups.iter().enumerate() {
485 debug_assert!(idx + (start_group as usize) < MAX_CAPTURE_GROUPS);
486 self.push_backtrack(BacktrackInsn::SetCaptureGroup {
487 id: (idx as CaptureGroupID) + start_group,
488 data: *cg,
489 });
490 }
491 } else {
492 self.s.groups.splice(range, saved_groups);
493 }
494 matched != negate
495 }
496
497 fn try_backtrack<Dir: Direction>(
500 &mut self,
501 input: &Input,
502 ip: &mut IP,
503 pos: &mut Input::Position,
504 _dir: Dir,
505 ) -> bool {
506 loop {
507 debug_assert!(!self.bts.is_empty(), "Backtrack stack should not be empty");
510 let bt = match self.bts.last_mut() {
511 Some(bt) => bt,
512 None => rs_unreachable!("BT stack should never be empty"),
513 };
514 match bt {
515 BacktrackInsn::Exhausted => return false,
516
517 BacktrackInsn::SetPosition {
518 ip: saved_ip,
519 pos: saved_pos,
520 } => {
521 *ip = *saved_ip;
522 *pos = *saved_pos;
523 self.pop_backtrack();
524 return true;
525 }
526 BacktrackInsn::SetLoopData { id, data } => {
527 *self.s.loops.mat(*id as usize) = *data;
528 self.pop_backtrack();
529 }
530 BacktrackInsn::SetCaptureGroup { id, data } => {
531 *self.s.groups.mat(*id as usize) = *data;
532 self.pop_backtrack();
533 }
534
535 &mut BacktrackInsn::EnterNonGreedyLoop { ip: loop_ip, data } => {
536 self.pop_backtrack();
538 *ip = loop_ip + 1;
539 *pos = data.entry;
540 let loop_fields = match &self.re.insns.iat(loop_ip) {
541 Insn::EnterLoop(loop_fields) => loop_fields,
542 _ => rs_unreachable!("EnterNonGreedyLoop must point at a loop instruction"),
543 };
544 let loop_data = self.s.loops.mat(loop_fields.loop_id as usize);
545 *loop_data = data;
546 MatchAttempter::prepare_to_enter_loop(
547 &mut self.bts,
548 *pos,
549 loop_fields,
550 loop_data,
551 );
552 return true;
553 }
554
555 BacktrackInsn::GreedyLoop1Char {
556 continuation,
557 min,
558 max,
559 } => {
560 debug_assert!(
562 if Dir::FORWARD { max >= min } else { max <= min },
563 "max should be >= min (or <= if tracking backwards)"
564 );
565 if *max == *min {
568 self.bts.pop();
570 continue;
571 }
572 let newmax = if Dir::FORWARD {
573 input.next_left_pos(*max)
574 } else {
575 input.next_right_pos(*max)
576 };
577 if let Some(newmax) = newmax {
578 *pos = newmax;
579 *max = newmax;
580 } else {
581 rs_unreachable!("Should always be able to advance since min != max")
582 }
583 *ip = *continuation;
584 return true;
585 }
586
587 BacktrackInsn::NonGreedyLoop1Char {
588 continuation,
589 min,
590 max,
591 } => {
592 debug_assert!(
594 if Dir::FORWARD { max >= min } else { max <= min },
595 "max should be >= min (or <= if tracking backwards)"
596 );
597 if *max == *min {
598 self.bts.pop();
600 continue;
601 }
602 let newmin = if Dir::FORWARD {
604 input.next_right_pos(*min)
605 } else {
606 input.next_left_pos(*min)
607 };
608 if let Some(newmin) = newmin {
609 *pos = newmin;
610 *min = newmin;
611 } else {
612 rs_unreachable!("Should always be able to advance since min != max")
613 }
614 *ip = *continuation;
615 return true;
616 }
617 }
618 }
619 }
620
621 fn try_at_pos<Dir: Direction>(
623 &mut self,
624 inp: Input,
625 mut ip: IP,
626 mut pos: Input::Position,
627 dir: Dir,
628 ) -> Option<Input::Position> {
629 debug_assert!(
630 self.bts.len() == 1,
631 "Should be only initial exhausted backtrack insn"
632 );
633 let input = &inp;
635 let re = self.re;
636 #[allow(clippy::never_loop)]
639 'nextinsn: loop {
640 'backtrack: loop {
641 macro_rules! next_or_bt {
643 ($e:expr) => {
644 if $e {
645 ip += 1;
646 continue 'nextinsn;
647 } else {
648 break 'backtrack;
649 }
650 };
651 }
652
653 match re.insns.iat(ip) {
654 &Insn::Char(c) => {
655 let m = match <<Input as InputIndexer>::Element as ElementType>::try_from(c)
656 {
657 Some(c) => scm::Char { c }.matches(input, dir, &mut pos),
658 None => false,
659 };
660 next_or_bt!(m);
661 }
662
663 Insn::CharSet(chars) => {
664 let m = scm::CharSet { chars }.matches(input, dir, &mut pos);
665 next_or_bt!(m);
666 }
667
668 &Insn::ByteSet2(bytes) => {
669 next_or_bt!(scm::MatchByteArraySet { bytes }.matches(input, dir, &mut pos))
670 }
671 &Insn::ByteSet3(bytes) => {
672 next_or_bt!(scm::MatchByteArraySet { bytes }.matches(input, dir, &mut pos))
673 }
674 &Insn::ByteSet4(bytes) => {
675 next_or_bt!(scm::MatchByteArraySet { bytes }.matches(input, dir, &mut pos))
676 }
677
678 Insn::ByteSeq1(v) => {
679 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
680 }
681 Insn::ByteSeq2(v) => {
682 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
683 }
684 Insn::ByteSeq3(v) => {
685 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
686 }
687 Insn::ByteSeq4(v) => {
688 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
689 }
690 Insn::ByteSeq5(v) => {
691 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
692 }
693 Insn::ByteSeq6(v) => {
694 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
695 }
696 Insn::ByteSeq7(v) => {
697 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
698 }
699 Insn::ByteSeq8(v) => {
700 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
701 }
702 Insn::ByteSeq9(v) => {
703 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
704 }
705 Insn::ByteSeq10(v) => {
706 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
707 }
708 Insn::ByteSeq11(v) => {
709 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
710 }
711 Insn::ByteSeq12(v) => {
712 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
713 }
714 Insn::ByteSeq13(v) => {
715 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
716 }
717 Insn::ByteSeq14(v) => {
718 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
719 }
720 Insn::ByteSeq15(v) => {
721 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
722 }
723 Insn::ByteSeq16(v) => {
724 next_or_bt!(cursor::try_match_lit(input, dir, &mut pos, v))
725 }
726
727 &Insn::CharICase(c) => {
728 let m = match <<Input as indexing::InputIndexer>::Element as indexing::ElementType>::try_from(c) {
729 Some(c) => scm::CharICase { c }.matches(input, dir, &mut pos),
730 None => false,
731 };
732 next_or_bt!(m)
733 }
734
735 Insn::AsciiBracket(bitmap) => next_or_bt!(
736 scm::MatchByteSet { bytes: bitmap }.matches(input, dir, &mut pos)
737 ),
738
739 &Insn::Bracket(idx) => {
740 next_or_bt!(
741 scm::Bracket {
742 bc: &self.re.brackets[idx]
743 }
744 .matches(input, dir, &mut pos)
745 )
746 }
747
748 Insn::MatchAny => {
749 next_or_bt!(scm::MatchAny::new().matches(input, dir, &mut pos))
750 }
751
752 Insn::MatchAnyExceptLineTerminator => {
753 next_or_bt!(
754 scm::MatchAnyExceptLineTerminator::new().matches(input, dir, &mut pos)
755 )
756 }
757
758 &Insn::WordBoundary { invert } => {
759 let prev_wordchar = input
761 .peek_left(pos)
762 .is_some_and(Input::CharProps::is_word_char);
763 let curr_wordchar = input
764 .peek_right(pos)
765 .is_some_and(Input::CharProps::is_word_char);
766 let is_boundary = prev_wordchar != curr_wordchar;
767 next_or_bt!(is_boundary != invert)
768 }
769
770 Insn::StartOfLine => {
771 let matches = match input.peek_left(pos) {
772 None => true,
773 Some(c)
774 if re.flags.multiline
775 && Input::CharProps::is_line_terminator(c) =>
776 {
777 true
778 }
779 _ => false,
780 };
781 next_or_bt!(matches)
782 }
783 Insn::EndOfLine => {
784 let matches = match input.peek_right(pos) {
785 None => true, Some(c)
787 if re.flags.multiline
788 && Input::CharProps::is_line_terminator(c) =>
789 {
790 true
791 }
792 _ => false,
793 };
794 next_or_bt!(matches)
795 }
796
797 &Insn::Jump { target } => {
798 ip = target as usize;
799 continue 'nextinsn;
800 }
801
802 &Insn::BeginCaptureGroup(cg_idx) => {
803 let cg = self.s.groups.mat(cg_idx as usize);
804 self.bts.push(BacktrackInsn::SetCaptureGroup {
805 id: cg_idx,
806 data: *cg,
807 });
808 if Dir::FORWARD {
809 cg.start = Some(pos);
810 debug_assert!(
811 cg.end.is_none(),
812 "Should not have already exited capture group we are entering"
813 )
814 } else {
815 cg.end = Some(pos);
816 debug_assert!(
817 cg.start.is_none(),
818 "Should not have already exited capture group we are entering"
819 )
820 }
821 next_or_bt!(true)
822 }
823
824 &Insn::EndCaptureGroup(cg_idx) => {
825 let cg = self.s.groups.mat(cg_idx as usize);
826 if Dir::FORWARD {
827 debug_assert!(
828 cg.start_matched(),
829 "Capture group should have been entered"
830 );
831 cg.end = Some(pos);
832 } else {
833 debug_assert!(
834 cg.end_matched(),
835 "Capture group should have been entered"
836 );
837 cg.start = Some(pos)
838 }
839 next_or_bt!(true)
840 }
841
842 &Insn::ResetCaptureGroup(cg_idx) => {
843 let cg = self.s.groups.mat(cg_idx as usize);
844 self.bts.push(BacktrackInsn::SetCaptureGroup {
845 id: cg_idx,
846 data: *cg,
847 });
848 cg.reset();
849 next_or_bt!(true)
850 }
851
852 &Insn::BackRef(cg_idx) => {
853 let cg = self.s.groups.mat(cg_idx as usize);
854 let matched;
858 if let Some(orig_range) = cg.as_range() {
859 if re.flags.icase {
860 matched = matchers::backref_icase(input, dir, orig_range, &mut pos);
861 } else {
862 matched = matchers::backref(input, dir, orig_range, &mut pos);
863 }
864 } else {
865 matched = true;
868 }
869 next_or_bt!(matched)
870 }
871
872 &Insn::Lookahead {
873 negate,
874 start_group,
875 end_group,
876 continuation,
877 } => {
878 if self.run_lookaround::<Forward>(
879 input,
880 ip + 1,
881 pos,
882 start_group,
883 end_group,
884 negate,
885 ) {
886 ip = continuation as IP;
887 continue 'nextinsn;
888 } else {
889 break 'backtrack;
890 }
891 }
892
893 &Insn::Lookbehind {
894 negate,
895 start_group,
896 end_group,
897 continuation,
898 } => {
899 if self.run_lookaround::<Backward>(
900 input,
901 ip + 1,
902 pos,
903 start_group,
904 end_group,
905 negate,
906 ) {
907 ip = continuation as IP;
908 continue 'nextinsn;
909 } else {
910 break 'backtrack;
911 }
912 }
913
914 &Insn::Alt { secondary } => {
915 self.push_backtrack(BacktrackInsn::SetPosition {
916 ip: secondary as IP,
917 pos,
918 });
919 next_or_bt!(true);
920 }
921
922 Insn::EnterLoop(fields) => {
923 self.s.loops.mat(fields.loop_id as usize).iters = 0;
925 match self.run_loop(fields, pos, ip) {
926 Some(next_ip) => {
927 ip = next_ip;
928 continue 'nextinsn;
929 }
930 None => {
931 break 'backtrack;
932 }
933 }
934 }
935
936 &Insn::LoopAgain { begin } => {
937 let act = match re.insns.iat(begin as IP) {
938 Insn::EnterLoop(fields) => self.run_loop(fields, pos, begin as IP),
939 _ => rs_unreachable!("EnterLoop should always refer to loop field"),
940 };
941 match act {
942 Some(next_ip) => {
943 ip = next_ip;
944 continue 'nextinsn;
945 }
946 None => break 'backtrack,
947 }
948 }
949
950 &Insn::Loop1CharBody {
951 min_iters,
952 max_iters,
953 greedy,
954 } => {
955 if let Some(next_ip) = self
956 .run_scm_loop(input, dir, &mut pos, min_iters, max_iters, ip, greedy)
957 {
958 ip = next_ip;
959 continue 'nextinsn;
960 } else {
961 break 'backtrack;
962 }
963 }
964
965 Insn::Goal => {
966 self.bts.truncate(1);
968 return Some(pos);
969 }
970
971 Insn::JustFail => {
972 break 'backtrack;
973 }
974 }
975 }
976
977 if self.try_backtrack(input, &mut ip, &mut pos, dir) {
980 continue 'nextinsn;
981 } else {
982 debug_assert!(self.bts.len() == 1, "Should have exhausted backtrack stack");
984 return None;
985 }
986 }
987
988 {
992 #![allow(unreachable_code)]
993 rs_unreachable!("Should not fall to end of nextinsn loop")
994 }
995 }
996}
997
998#[derive(Debug)]
999pub struct BacktrackExecutor<'r, Input: InputIndexer> {
1000 input: Input,
1001 matcher: MatchAttempter<'r, Input>,
1002}
1003
1004#[cfg(feature = "utf16")]
1005impl<'r, Input: InputIndexer> BacktrackExecutor<'r, Input> {
1006 pub(crate) fn new(input: Input, matcher: MatchAttempter<'r, Input>) -> Self {
1007 Self { input, matcher }
1008 }
1009}
1010
1011impl<Input: InputIndexer> BacktrackExecutor<'_, Input> {
1012 fn successful_match(&mut self, start: Input::Position, end: Input::Position) -> Match {
1013 let mut captures = Vec::new();
1017 captures.reserve_exact(self.matcher.s.groups.len());
1018 for gd in self.matcher.s.groups.iter_mut() {
1019 captures.push(match gd.as_range() {
1020 Some(r) => Some(Range {
1021 start: self.input.pos_to_offset(r.start),
1022 end: self.input.pos_to_offset(r.end),
1023 }),
1024 None => None,
1025 });
1026 gd.start = None;
1027 gd.end = None;
1028 }
1029 Match {
1030 range: self.input.pos_to_offset(start)..self.input.pos_to_offset(end),
1031 captures,
1032 group_names: self.matcher.re.group_names.clone(),
1033 }
1034 }
1035
1036 #[cfg(not(feature = "utf16"))]
1039 fn next_match_anchored(
1040 &mut self,
1041 pos: Input::Position,
1042 next_start: &mut Option<Input::Position>,
1043 ) -> Option<Match> {
1044 let inp = self.input;
1045 if let Some(end) = self.matcher.try_at_pos(inp, 0, pos, Forward::new()) {
1047 if end != pos {
1049 *next_start = Some(end)
1050 } else {
1051 *next_start = inp.next_right_pos(end);
1052 }
1053 Some(self.successful_match(pos, end))
1054 } else {
1055 None
1057 }
1058 }
1059
1060 fn next_match_with_prefix_search<PrefixSearch: bytesearch::ByteSearcher>(
1063 &mut self,
1064 mut pos: Input::Position,
1065 next_start: &mut Option<Input::Position>,
1066 prefix_search: &PrefixSearch,
1067 ) -> Option<Match> {
1068 let inp = self.input;
1069 loop {
1070 if Input::CODE_UNITS_ARE_BYTES {
1074 pos = inp.find_bytes(pos, prefix_search)?;
1075 }
1076 if let Some(end) = self.matcher.try_at_pos(inp, 0, pos, Forward::new()) {
1077 if end != pos {
1079 *next_start = Some(end)
1080 } else {
1081 *next_start = inp.next_right_pos(end);
1082 }
1083 return Some(self.successful_match(pos, end));
1084 }
1085 pos = inp.next_right_pos(pos)?;
1087 }
1088 }
1089}
1090
1091impl<Input: InputIndexer> exec::MatchProducer for BacktrackExecutor<'_, Input> {
1092 type Position = Input::Position;
1093
1094 fn initial_position(&self, offset: usize) -> Option<Self::Position> {
1095 self.input.try_move_right(self.input.left_end(), offset)
1096 }
1097
1098 fn next_match(
1099 &mut self,
1100 pos: Input::Position,
1101 next_start: &mut Option<Input::Position>,
1102 ) -> Option<Match> {
1103 #[cfg(feature = "utf16")]
1105 return self.next_match_with_prefix_search(pos, next_start, &bytesearch::EmptyString {});
1106
1107 #[cfg(not(feature = "utf16"))]
1108 match &self.matcher.re.start_pred {
1109 StartPredicate::Arbitrary => {
1110 self.next_match_with_prefix_search(pos, next_start, &bytesearch::EmptyString {})
1111 }
1112 StartPredicate::StartAnchored => self.next_match_anchored(pos, next_start),
1113 StartPredicate::ByteSet1(bytes) => {
1114 self.next_match_with_prefix_search(pos, next_start, bytes)
1115 }
1116 StartPredicate::ByteSet2(bytes) => {
1117 self.next_match_with_prefix_search(pos, next_start, bytes)
1118 }
1119 StartPredicate::ByteSet3(bytes) => {
1120 self.next_match_with_prefix_search(pos, next_start, bytes)
1121 }
1122 StartPredicate::ByteSeq(bytes) => {
1123 self.next_match_with_prefix_search(pos, next_start, bytes.as_ref())
1124 }
1125 StartPredicate::ByteBracket(bitmap) => {
1126 self.next_match_with_prefix_search(pos, next_start, bitmap)
1127 }
1128 }
1129 }
1130}
1131
1132impl<'r, 't> exec::Executor<'r, 't> for BacktrackExecutor<'r, Utf8Input<'t>> {
1133 type AsAscii = BacktrackExecutor<'r, AsciiInput<'t>>;
1134
1135 fn new(re: &'r CompiledRegex, text: &'t str) -> Self {
1136 let input = Utf8Input::new(text, re.flags.unicode);
1137 Self {
1138 input,
1139 matcher: MatchAttempter::new(re, input.left_end()),
1140 }
1141 }
1142}
1143
1144impl<'r, 't> exec::Executor<'r, 't> for BacktrackExecutor<'r, AsciiInput<'t>> {
1145 type AsAscii = BacktrackExecutor<'r, AsciiInput<'t>>;
1146
1147 fn new(re: &'r CompiledRegex, text: &'t str) -> Self {
1148 let input = AsciiInput::new(text);
1149 Self {
1150 input,
1151 matcher: MatchAttempter::new(re, input.left_end()),
1152 }
1153 }
1154}