1use alloc::boxed::Box;
24use alloc::string::{String, ToString};
25#[cfg(feature = "variable-lookbehinds")]
26use alloc::sync::Arc;
27use alloc::vec::Vec;
28use regex_automata::meta::Regex as RaRegex;
29use regex_automata::meta::{Builder as RaBuilder, Config as RaConfig};
30#[cfg(feature = "variable-lookbehinds")]
31use regex_automata::util::pool::Pool;
32#[cfg(all(test, feature = "std"))]
33use std::{collections::BTreeMap, sync::RwLock};
34
35use crate::analyze::Info;
36#[cfg(feature = "variable-lookbehinds")]
37use crate::vm::{CachePoolFn, ReverseBackwardsDelegate};
38use crate::vm::{CaptureGroupRange, Delegate, Insn, Prog};
39use crate::LookAround::*;
40use crate::{CompileError, Error, Expr, LookAround, RegexOptions, Result};
41
42struct VMBuilder {
45 prog: Vec<Insn>,
46 n_saves: usize,
47}
48
49impl VMBuilder {
50 fn new(max_group: usize) -> VMBuilder {
51 VMBuilder {
52 prog: Vec::new(),
53 n_saves: max_group * 2,
54 }
55 }
56
57 fn build(self) -> Prog {
58 Prog::new(self.prog, self.n_saves)
59 }
60
61 fn newsave(&mut self) -> usize {
62 let result = self.n_saves;
63 self.n_saves += 1;
64 result
65 }
66
67 fn pc(&self) -> usize {
68 self.prog.len()
69 }
70
71 fn add(&mut self, insn: Insn) {
73 self.prog.push(insn);
74 }
75
76 fn set_jmp_target(&mut self, jmp_pc: usize, target: usize) {
77 match self.prog[jmp_pc] {
78 Insn::Jmp(ref mut next) => *next = target,
79 _ => panic!("mutating instruction other than Jmp"),
80 }
81 }
82
83 fn set_split_target(&mut self, split_pc: usize, target: usize, second: bool) {
84 match self.prog[split_pc] {
85 Insn::Split(_, ref mut y) if second => *y = target,
86 Insn::Split(ref mut x, _) => *x = target,
87 _ => panic!("mutating instruction other than Split"),
88 }
89 }
90
91 fn set_repeat_target(&mut self, repeat_pc: usize, target: usize) {
92 match self.prog[repeat_pc] {
93 Insn::RepeatGr { ref mut next, .. }
94 | Insn::RepeatNg { ref mut next, .. }
95 | Insn::RepeatEpsilonGr { ref mut next, .. }
96 | Insn::RepeatEpsilonNg { ref mut next, .. } => *next = target,
97 _ => panic!("mutating instruction other than Repeat"),
98 }
99 }
100}
101
102struct Compiler {
103 b: VMBuilder,
104 options: RegexOptions,
105 inside_alternation: bool,
106}
107
108impl Compiler {
109 fn new(max_group: usize) -> Compiler {
110 Compiler {
111 b: VMBuilder::new(max_group),
112 options: Default::default(),
113 inside_alternation: false,
114 }
115 }
116
117 fn visit(&mut self, info: &Info<'_>, hard: bool) -> Result<()> {
118 if !hard && !info.hard {
119 return self.compile_delegate(info);
121 }
122 match *info.expr {
123 Expr::Empty => (),
124 Expr::Literal { ref val, casei } => {
125 if !casei {
126 self.b.add(Insn::Lit(val.clone()));
127 } else {
128 self.compile_delegate(info)?;
129 }
130 }
131 Expr::Any { newline: true } => {
132 self.b.add(Insn::Any);
133 }
134 Expr::Any { newline: false } => {
135 self.b.add(Insn::AnyNoNL);
136 }
137 Expr::Concat(_) => {
138 self.compile_concat(info, hard)?;
139 }
140 Expr::Alt(_) => {
141 let count = info.children.len();
142 let inside_alternation = self.inside_alternation;
143 self.inside_alternation = true;
144 self.compile_alt(count, |compiler, i| compiler.visit(&info.children[i], hard))?;
145 self.inside_alternation = inside_alternation;
146 }
147 Expr::Group(_) => {
148 let group = info.start_group();
149 self.b.add(Insn::Save(group * 2));
150 self.visit(&info.children[0], hard)?;
151 self.b.add(Insn::Save(group * 2 + 1));
152 }
153 Expr::Repeat { lo, hi, greedy, .. } => {
154 self.compile_repeat(info, lo, hi, greedy, hard)?;
155 }
156 Expr::LookAround(_, la) => {
157 self.compile_lookaround(info, la)?;
158 }
159 Expr::Backref { group, casei } => {
160 self.b.add(Insn::Backref {
161 slot: group * 2,
162 casei,
163 });
164 }
165 Expr::BackrefExistsCondition(group) => {
166 self.b.add(Insn::BackrefExistsCondition(group));
167 }
168 Expr::AtomicGroup(_) => {
169 self.b.add(Insn::BeginAtomic);
172 self.visit(&info.children[0], false)?;
173 self.b.add(Insn::EndAtomic);
174 }
175 Expr::Delegate { .. } => {
176 self.compile_delegate(info)?;
178 }
179 Expr::Assertion(assertion) => {
180 self.b.add(Insn::Assertion(assertion));
181 }
182 Expr::KeepOut => {
183 self.b.add(Insn::Save(0));
184 }
185 Expr::ContinueFromPreviousMatchEnd => {
186 self.b.add(Insn::ContinueFromPreviousMatchEnd {
187 at_start: info.start_group() == 1
188 && info.min_pos_in_group == 0
189 && !self.inside_alternation,
190 });
191 }
192 Expr::Conditional { .. } => {
193 self.compile_conditional(|compiler, i| compiler.visit(&info.children[i], hard))?;
194 }
195 Expr::SubroutineCall(_) => {
196 return Err(Error::CompileError(Box::new(
197 CompileError::FeatureNotYetSupported("Subroutine Call".to_string()),
198 )));
199 }
200 Expr::UnresolvedNamedSubroutineCall { .. } => unreachable!(),
201 Expr::BackrefWithRelativeRecursionLevel { .. } => unreachable!(),
202 }
203 Ok(())
204 }
205
206 fn compile_alt<F>(&mut self, count: usize, mut handle_alternative: F) -> Result<()>
207 where
208 F: FnMut(&mut Compiler, usize) -> Result<()>,
209 {
210 let mut jmps = Vec::new();
211 let mut last_pc = usize::MAX;
212 for i in 0..count {
213 let has_next = i != count - 1;
214 let pc = self.b.pc();
215 if has_next {
216 self.b.add(Insn::Split(pc + 1, usize::MAX));
217 }
218 if last_pc != usize::MAX {
219 self.b.set_split_target(last_pc, pc, true);
220 }
221 last_pc = pc;
222
223 handle_alternative(self, i)?;
224
225 if has_next {
226 let pc = self.b.pc();
230 jmps.push(pc);
231 self.b.add(Insn::Jmp(0));
232 }
233 }
234 let next_pc = self.b.pc();
235 for jmp_pc in jmps {
236 self.b.set_jmp_target(jmp_pc, next_pc);
237 }
238 Ok(())
239 }
240
241 fn compile_conditional<F>(&mut self, mut handle_child: F) -> Result<()>
242 where
243 F: FnMut(&mut Compiler, usize) -> Result<()>,
244 {
245 self.b.add(Insn::BeginAtomic);
250
251 let split_pc = self.b.pc();
252 self.b.add(Insn::Split(split_pc + 1, usize::MAX));
254
255 handle_child(self, 0)?;
257
258 self.b.add(Insn::EndAtomic);
260
261 handle_child(self, 1)?;
263 let jump_over_false_pc = self.b.pc();
265 self.b.add(Insn::Jmp(0));
266
267 self.b.set_split_target(split_pc, self.b.pc(), true);
269 handle_child(self, 2)?;
270
271 self.b.set_jmp_target(jump_over_false_pc, self.b.pc());
273
274 Ok(())
275 }
276
277 fn compile_concat(&mut self, info: &Info<'_>, hard: bool) -> Result<()> {
278 let prefix_end = info
280 .children
281 .iter()
282 .take_while(|c| c.const_size && !c.hard)
283 .count();
284
285 let suffix_len = if !hard {
288 info.children[prefix_end..]
289 .iter()
290 .rev()
291 .take_while(|c| !c.hard)
292 .count()
293 } else {
294 info.children[prefix_end..]
296 .iter()
297 .rev()
298 .take_while(|c| c.const_size && !c.hard)
299 .count()
300 };
301 let suffix_begin = info.children.len() - suffix_len;
302
303 self.compile_delegates(&info.children[..prefix_end])?;
304
305 for child in info.children[prefix_end..suffix_begin].iter() {
306 self.visit(child, true)?;
307 }
308
309 self.compile_delegates(&info.children[suffix_begin..])
310 }
311
312 fn compile_repeat(
313 &mut self,
314 info: &Info<'_>,
315 lo: usize,
316 hi: usize,
317 greedy: bool,
318 hard: bool,
319 ) -> Result<()> {
320 let child = &info.children[0];
321 if lo == 0 && hi == 1 {
322 let pc = self.b.pc();
324 self.b.add(Insn::Split(pc + 1, pc + 1));
325 self.visit(child, hard)?;
329 let next_pc = self.b.pc();
330 self.b.set_split_target(pc, next_pc, greedy);
331 return Ok(());
332 }
333 let hard = hard | info.hard;
334 if hi == usize::MAX && child.min_size == 0 {
335 let repeat = self.b.newsave();
337 let check = self.b.newsave();
338 self.b.add(Insn::Save0(repeat));
339 let pc = self.b.pc();
340 if greedy {
341 self.b.add(Insn::RepeatEpsilonGr {
342 lo,
343 next: usize::MAX,
344 repeat,
345 check,
346 });
347 } else {
348 self.b.add(Insn::RepeatEpsilonNg {
349 lo,
350 next: usize::MAX,
351 repeat,
352 check,
353 });
354 }
355 self.visit(child, hard)?;
356 self.b.add(Insn::Jmp(pc));
357 let next_pc = self.b.pc();
358 self.b.set_repeat_target(pc, next_pc);
359 } else if lo == 0 && hi == usize::MAX {
360 let pc = self.b.pc();
362 self.b.add(Insn::Split(pc + 1, pc + 1));
363 self.visit(child, hard)?;
364 self.b.add(Insn::Jmp(pc));
365 let next_pc = self.b.pc();
366 self.b.set_split_target(pc, next_pc, greedy);
367 } else if lo == 1 && hi == usize::MAX {
368 let pc = self.b.pc();
370 self.visit(child, hard)?;
371 let next = self.b.pc() + 1;
372 let (x, y) = if greedy { (pc, next) } else { (next, pc) };
373 self.b.add(Insn::Split(x, y));
374 } else {
375 let repeat = self.b.newsave();
376 self.b.add(Insn::Save0(repeat));
377 let pc = self.b.pc();
378 if greedy {
379 self.b.add(Insn::RepeatGr {
380 lo,
381 hi,
382 next: usize::MAX,
383 repeat,
384 });
385 } else {
386 self.b.add(Insn::RepeatNg {
387 lo,
388 hi,
389 next: usize::MAX,
390 repeat,
391 });
392 }
393 self.visit(child, hard)?;
394 self.b.add(Insn::Jmp(pc));
395 let next_pc = self.b.pc();
396 self.b.set_repeat_target(pc, next_pc);
397 }
398 Ok(())
399 }
400
401 fn compile_lookaround(&mut self, info: &Info<'_>, la: LookAround) -> Result<()> {
402 let inner = &info.children[0];
403 match la {
404 LookBehind => {
405 if let &Info {
406 const_size: false,
407 expr: &Expr::Alt(_),
408 ..
409 } = inner
410 {
411 let alternatives = &inner.children;
413 self.compile_alt(alternatives.len(), |compiler, i| {
414 let alternative = &alternatives[i];
415 compiler.compile_positive_lookaround(alternative, la)
416 })
417 } else {
418 self.compile_positive_lookaround(inner, la)
419 }
420 }
421 LookBehindNeg => {
422 if let &Info {
423 const_size: false,
424 expr: &Expr::Alt(_),
425 ..
426 } = inner
427 {
428 let alternatives = &inner.children;
430 for alternative in alternatives {
431 self.compile_negative_lookaround(alternative, la)?;
432 }
433 Ok(())
434 } else {
435 self.compile_negative_lookaround(inner, la)
436 }
437 }
438 LookAhead => self.compile_positive_lookaround(inner, la),
439 LookAheadNeg => self.compile_negative_lookaround(inner, la),
440 }
441 }
442
443 fn compile_positive_lookaround(&mut self, inner: &Info<'_>, la: LookAround) -> Result<()> {
444 let save = self.b.newsave();
445 self.b.add(Insn::Save(save));
446 self.compile_lookaround_inner(inner, la)?;
447 self.b.add(Insn::Restore(save));
448 Ok(())
449 }
450
451 fn compile_negative_lookaround(&mut self, inner: &Info<'_>, la: LookAround) -> Result<()> {
452 let pc = self.b.pc();
453 self.b.add(Insn::Split(pc + 1, usize::MAX));
454 self.compile_lookaround_inner(inner, la)?;
455 self.b.add(Insn::FailNegativeLookAround);
456 let next_pc = self.b.pc();
457 self.b.set_split_target(pc, next_pc, true);
458 Ok(())
459 }
460
461 fn compile_lookaround_inner(&mut self, inner: &Info<'_>, la: LookAround) -> Result<()> {
462 if la == LookBehind || la == LookBehindNeg {
463 if inner.const_size {
464 self.b.add(Insn::GoBack(inner.min_size));
465 self.visit(inner, false)
466 } else if !inner.hard {
467 #[cfg(feature = "variable-lookbehinds")]
468 {
469 let mut delegate_builder = DelegateBuilder::new();
470 delegate_builder.push(inner);
471 let pattern = &delegate_builder.re;
472 let capture_groups = delegate_builder
473 .capture_groups
474 .expect("Expected at least one expression");
475
476 use regex_automata::nfa::thompson;
478 let dfa = match regex_automata::hybrid::dfa::DFA::builder()
480 .thompson(thompson::Config::new().reverse(true))
481 .build(pattern)
482 {
483 Ok(dfa) => Arc::new(dfa),
484 Err(e) => {
485 return Err(Error::CompileError(Box::new(CompileError::DfaBuildError(
486 e.to_string(),
487 ))))
488 }
489 };
490
491 let create: CachePoolFn = alloc::boxed::Box::new({
492 let dfa = Arc::clone(&dfa);
493 move || dfa.create_cache()
494 });
495 let cache_pool = Pool::new(create);
496
497 let forward_regex = if inner.start_group() != inner.end_group() {
499 Some(compile_inner(pattern, &self.options)?)
500 } else {
501 None
502 };
503
504 self.b
505 .add(Insn::BackwardsDelegate(ReverseBackwardsDelegate {
506 dfa,
507 cache_pool,
508 pattern: pattern.to_string(),
509 capture_group_extraction_inner: forward_regex,
510 capture_groups: capture_groups.to_option_if_non_empty(),
511 }));
512 Ok(())
513 }
514 #[cfg(not(feature = "variable-lookbehinds"))]
515 {
516 Err(Error::CompileError(Box::new(
517 CompileError::VariableLookBehindRequiresFeature,
518 )))
519 }
520 } else {
521 Err(Error::CompileError(Box::new(
523 CompileError::LookBehindNotConst,
524 )))
525 }
526 } else {
527 self.visit(inner, false)
528 }
529 }
530
531 fn compile_delegates(&mut self, infos: &[Info<'_>]) -> Result<()> {
532 if infos.is_empty() {
533 return Ok(());
534 }
535 if infos.iter().all(|e| e.is_literal()) {
538 let mut val = String::new();
539 for info in infos {
540 info.push_literal(&mut val);
541 }
542 self.b.add(Insn::Lit(val));
543 return Ok(());
544 }
545
546 let mut delegate_builder = DelegateBuilder::new();
547 for info in infos {
548 delegate_builder.push(info);
549 }
550 let delegate = delegate_builder.build(&self.options)?;
551
552 self.b.add(delegate);
553 Ok(())
554 }
555
556 fn compile_delegate(&mut self, info: &Info) -> Result<()> {
557 let insn = if info.is_literal() {
558 let mut val = String::new();
559 info.push_literal(&mut val);
560 Insn::Lit(val)
561 } else {
562 DelegateBuilder::new().push(info).build(&self.options)?
563 };
564 self.b.add(insn);
565 Ok(())
566 }
567}
568
569#[cfg(all(test, feature = "std"))]
574static PATTERN_MAPPING: RwLock<BTreeMap<String, String>> = RwLock::new(BTreeMap::new());
575
576pub(crate) fn compile_inner(inner_re: &str, options: &RegexOptions) -> Result<RaRegex> {
577 let mut config = RaConfig::new();
578 if let Some(size_limit) = options.delegate_size_limit {
579 config = config.nfa_size_limit(Some(size_limit));
580 }
581 if let Some(dfa_size_limit) = options.delegate_dfa_size_limit {
582 config = config.dfa_size_limit(Some(dfa_size_limit));
583 }
584
585 let re = RaBuilder::new()
586 .configure(config)
587 .syntax(options.syntaxc)
588 .build(inner_re)
589 .map_err(CompileError::InnerError)
590 .map_err(|e| Error::CompileError(Box::new(e)))?;
591
592 #[cfg(all(test, feature = "std"))]
593 PATTERN_MAPPING
594 .write()
595 .unwrap()
596 .insert(format!("{:?}", re), inner_re.to_owned());
597
598 Ok(re)
599}
600
601pub fn compile(info: &Info<'_>, anchored: bool) -> Result<Prog> {
603 let mut c = Compiler::new(info.end_group());
604 if !anchored {
605 let current_pc = c.b.pc();
608 c.b.add(Insn::Split(current_pc + 3, current_pc + 1));
610 c.b.add(Insn::Any);
611 c.b.add(Insn::Jmp(current_pc));
612 }
613 if info.start_group() == 1 {
614 c.b.add(Insn::Save(0));
616 }
617 c.visit(info, false)?;
618 if info.start_group() == 1 {
619 c.b.add(Insn::Save(1));
621 }
622 c.b.add(Insn::End);
623 Ok(c.b.build())
624}
625
626struct DelegateBuilder {
627 re: String,
628 min_size: usize,
629 const_size: bool,
630 capture_groups: Option<CaptureGroupRange>,
631}
632
633impl DelegateBuilder {
634 fn new() -> Self {
635 Self {
636 re: String::new(),
637 min_size: 0,
638 const_size: true,
639 capture_groups: None,
640 }
641 }
642
643 fn push(&mut self, info: &Info<'_>) -> &mut DelegateBuilder {
644 self.min_size += info.min_size;
648 self.const_size &= info.const_size;
649 if self.capture_groups.is_none() {
650 self.capture_groups = Some(info.capture_groups);
651 } else {
652 self.capture_groups = self
654 .capture_groups
655 .map(|range| CaptureGroupRange(range.start(), info.end_group()));
656 }
657
658 info.expr.to_str(&mut self.re, 1);
669 self
670 }
671
672 fn build(&self, options: &RegexOptions) -> Result<Insn> {
673 let capture_groups = self
674 .capture_groups
675 .expect("Expected at least one expression");
676
677 let compiled = compile_inner(&self.re, options)?;
678
679 Ok(Insn::Delegate(Delegate {
680 inner: compiled,
681 pattern: self.re.clone(),
682 capture_groups: capture_groups.to_option_if_non_empty(),
683 }))
684 }
685}
686
687#[cfg(test)]
688mod tests {
689 use super::*;
690 use crate::analyze::analyze;
691 use crate::parse::ExprTree;
692 use crate::vm::Insn::*;
693 use alloc::vec;
694 use bit_set::BitSet;
695 use matches::assert_matches;
696
697 #[test]
698 fn jumps_for_alternation() {
699 let tree = ExprTree {
700 expr: Expr::Alt(vec![
701 Expr::Literal {
702 val: "a".into(),
703 casei: false,
704 },
705 Expr::Literal {
706 val: "b".into(),
707 casei: false,
708 },
709 Expr::Literal {
710 val: "c".into(),
711 casei: false,
712 },
713 ]),
714 backrefs: BitSet::new(),
715 named_groups: Default::default(),
716 contains_subroutines: false,
717 self_recursive: false,
718 };
719 let info = analyze(&tree, false).unwrap();
720
721 let mut c = Compiler::new(0);
722 c.visit(&info, true).unwrap();
724 c.b.add(Insn::End);
725
726 let prog = c.b.prog;
727
728 assert_eq!(prog.len(), 8, "prog: {:?}", prog);
729 assert_matches!(prog[0], Split(1, 3));
730 assert_matches!(prog[1], Lit(ref l) if l == "a");
731 assert_matches!(prog[2], Jmp(7));
732 assert_matches!(prog[3], Split(4, 6));
733 assert_matches!(prog[4], Lit(ref l) if l == "b");
734 assert_matches!(prog[5], Jmp(7));
735 assert_matches!(prog[6], Lit(ref l) if l == "c");
736 assert_matches!(prog[7], End);
737 }
738
739 #[cfg_attr(not(feature = "std"), ignore = "this test need std")]
740 #[test]
741 fn look_around_pattern_can_be_delegated() {
742 let prog = compile_prog("(?=ab*)c");
743
744 assert_eq!(prog.len(), 5, "prog: {:?}", prog);
745 assert_matches!(prog[0], Save(0));
746 assert_delegate(&prog[1], "ab*");
747 assert_matches!(prog[2], Restore(0));
748 assert_matches!(prog[3], Lit(ref l) if l == "c");
749 assert_matches!(prog[4], End);
750 }
751
752 #[cfg_attr(not(feature = "std"), ignore = "this test need std")]
753 #[test]
754 fn easy_concat_can_delegate_end() {
755 let prog = compile_prog("(?!x)(?:a|ab)x*");
756
757 assert_eq!(prog.len(), 5, "prog: {:?}", prog);
758 assert_matches!(prog[0], Split(1, 3));
759 assert_matches!(prog[1], Lit(ref l) if l == "x");
760 assert_matches!(prog[2], FailNegativeLookAround);
761 assert_delegate(&prog[3], "(?:a|ab)x*");
762 assert_matches!(prog[4], End);
763 }
764
765 #[cfg_attr(not(feature = "std"), ignore = "this test need std")]
766 #[test]
767 fn hard_concat_can_delegate_const_size_end() {
768 let prog = compile_prog("(?:(?!x)(?:a|b)c)x*");
769
770 assert_eq!(prog.len(), 6, "prog: {:?}", prog);
771 assert_matches!(prog[0], Split(1, 3));
772 assert_matches!(prog[1], Lit(ref l) if l == "x");
773 assert_matches!(prog[2], FailNegativeLookAround);
774 assert_delegate(&prog[3], "(?:a|b)c");
775 assert_delegate(&prog[4], "x*");
776 assert_matches!(prog[5], End);
777 }
778
779 #[cfg_attr(not(feature = "std"), ignore = "this test need std")]
780 #[test]
781 fn hard_concat_can_not_delegate_variable_end() {
782 let prog = compile_prog("(?:(?!x)(?:a|ab))x*");
783
784 assert_eq!(prog.len(), 9, "prog: {:?}", prog);
785 assert_matches!(prog[0], Split(1, 3));
786 assert_matches!(prog[1], Lit(ref l) if l == "x");
787 assert_matches!(prog[2], FailNegativeLookAround);
788 assert_matches!(prog[3], Split(4, 6));
789 assert_matches!(prog[4], Lit(ref l) if l == "a");
790 assert_matches!(prog[5], Jmp(7));
791 assert_matches!(prog[6], Lit(ref l) if l == "ab");
792 assert_delegate(&prog[7], "x*");
793 assert_matches!(prog[8], End);
794 }
795
796 #[test]
797 fn conditional_expression_can_be_compiled() {
798 let prog = compile_prog(r"(?(ab)c|d)");
799
800 assert_eq!(prog.len(), 8, "prog: {:?}", prog);
801
802 assert_matches!(prog[0], BeginAtomic);
803 assert_matches!(prog[1], Split(2, 6));
804 assert_matches!(prog[2], Lit(ref l) if l == "ab");
805 assert_matches!(prog[3], EndAtomic);
806 assert_matches!(prog[4], Lit(ref l) if l == "c");
807 assert_matches!(prog[5], Jmp(7));
808 assert_matches!(prog[6], Lit(ref l) if l == "d");
809 assert_matches!(prog[7], End);
810 }
811
812 #[test]
813 fn lazy_any_can_be_compiled_explicit_capture_group_zero() {
814 let prog = compile_prog(r"\O*?((?!a))");
815
816 assert_eq!(prog.len(), 9, "prog: {:?}", prog);
817
818 assert_matches!(prog[0], Split(3, 1));
819 assert_matches!(prog[1], Any);
820 assert_matches!(prog[2], Jmp(0));
821 assert_matches!(prog[3], Save(0));
822 assert_matches!(prog[4], Split(5, 7));
823 assert_matches!(prog[5], Lit(ref l) if l == "a");
824 assert_matches!(prog[6], FailNegativeLookAround);
825 assert_matches!(prog[7], Save(1));
826 assert_matches!(prog[8], End);
827 }
828
829 #[test]
830 #[cfg(not(feature = "variable-lookbehinds"))]
831 fn variable_lookbehind_requires_feature() {
832 let tree = Expr::parse_tree(r"(?<=ab+)x").unwrap();
834 let info = analyze(&tree, true).unwrap();
835 let result = compile(&info, true);
836 assert!(result.is_err());
837 assert_matches!(
838 result.err().unwrap(),
839 Error::CompileError(box_err) if matches!(*box_err, CompileError::VariableLookBehindRequiresFeature)
840 );
841 }
842
843 #[test]
844 #[cfg(feature = "variable-lookbehinds")]
845 fn variable_lookbehind_with_required_feature_no_captures() {
846 let prog = compile_prog(r"(?<=ab+)x");
847
848 assert_eq!(prog.len(), 5, "prog: {:?}", prog);
849
850 assert_matches!(prog[0], Save(0));
851 assert_matches!(&prog[1], BackwardsDelegate(ReverseBackwardsDelegate { pattern, dfa: _, cache_pool: _, capture_group_extraction_inner: None, capture_groups: None }) if pattern == "ab+");
852 assert_matches!(prog[2], Restore(0));
853 assert_matches!(prog[3], Lit(ref l) if l == "x");
854 assert_matches!(prog[4], End);
855 }
856
857 #[test]
858 #[cfg(feature = "variable-lookbehinds")]
859 fn variable_lookbehind_with_required_feature_captures() {
860 let prog = compile_prog(r"(?<=a(b+))x");
861
862 assert_eq!(prog.len(), 5, "prog: {:?}", prog);
863
864 assert_matches!(prog[0], Save(2));
865 assert_matches!(&prog[1], BackwardsDelegate(ReverseBackwardsDelegate { pattern, dfa: _, cache_pool: _, capture_group_extraction_inner: ref inner, capture_groups: Some(CaptureGroupRange(0, 1)) }) if pattern == "a(b+)" && inner.is_some());
866 assert_matches!(prog[2], Restore(2));
867 assert_matches!(prog[3], Lit(ref l) if l == "x");
868 assert_matches!(prog[4], End);
869 }
870
871 #[test]
872 #[cfg(feature = "variable-lookbehinds")]
873 fn variable_lookbehind_with_required_feature_backref_captures() {
874 let tree = Expr::parse_tree(r"(?<=a(b+))\1").unwrap();
877 let info = analyze(&tree, false).unwrap();
878 let result = compile(&info, true);
879 assert!(result.is_err());
880 assert_matches!(
881 result.err().unwrap(),
882 Error::CompileError(box_err) if matches!(*box_err, CompileError::LookBehindNotConst)
883 );
884 }
885
886 fn compile_prog(re: &str) -> Vec<Insn> {
887 let tree = Expr::parse_tree(re).unwrap();
888 let info = analyze(&tree, true).unwrap();
889 let prog = compile(&info, true).unwrap();
890 prog.body
891 }
892
893 #[cfg(feature = "std")]
894 fn assert_delegate(insn: &Insn, re: &str) {
895 use crate::vm::Delegate;
896
897 match insn {
898 Insn::Delegate(Delegate { inner, .. }) => {
899 assert_eq!(
900 PATTERN_MAPPING
901 .read()
902 .unwrap()
903 .get(&alloc::format!("{:?}", inner))
904 .unwrap(),
905 re
906 );
907 }
908 _ => {
909 panic!("Expected Insn::Delegate but was {:#?}", insn);
910 }
911 }
912 }
913
914 #[cfg(not(feature = "std"))]
915 fn assert_delegate(_: &Insn, _: &str) {}
916}