use super::Flags;
use super::parser::{PropKind, Shorthand};
use alloc::vec::Vec;
use core::cell::Cell;
const STEP_BASE: u64 = crate::limits::DEFAULT_REGEX_STEP_BASE;
const STEP_PER_CHAR: u64 = 1_000;
const MAX_DEPTH: u32 = crate::limits::DEFAULT_REGEX_MAX_DEPTH;
pub(crate) enum Inst {
Char(char),
Any,
Class(Class),
Match,
Jmp(usize),
Split(usize, usize),
Save(usize),
Assert(Assert),
Look { neg: bool, prog: Vec<Inst> },
LookBehind { neg: bool, prog: Vec<Inst> },
Backref(usize),
}
pub(crate) struct Class {
pub neg: bool,
pub members: Vec<ClassMember>,
}
pub(crate) enum ClassMember {
Char(char),
Range(char, char),
Shorthand(Shorthand),
}
pub(crate) enum Assert {
Start,
End,
WordBoundary,
NotWordBoundary,
}
pub(crate) fn budget_for(input_len: usize) -> u64 {
STEP_BASE.saturating_add(STEP_PER_CHAR.saturating_mul(input_len as u64))
}
pub(crate) fn run_shared(
prog: &[Inst],
input: &[char],
start: usize,
group_count: usize,
flags: Flags,
steps: &Cell<u64>,
budget: u64,
) -> Option<Vec<Option<(usize, usize)>>> {
let mut saves = alloc::vec![None; 2 * (group_count + 1)];
let ctx = Ctx {
prog,
input,
flags,
match_end: None,
steps,
budget,
};
let mut loops: Vec<(usize, usize)> = Vec::new();
if backtrack(&ctx, 0, start, &mut saves, 0, &mut loops) {
let mut groups = Vec::with_capacity(group_count + 1);
for g in 0..=group_count {
groups.push(match (saves[2 * g], saves[2 * g + 1]) {
(Some(s), Some(e)) => Some((s, e)),
_ => None,
});
}
Some(groups)
} else {
None
}
}
struct Ctx<'a> {
prog: &'a [Inst],
input: &'a [char],
flags: Flags,
match_end: Option<usize>,
steps: &'a Cell<u64>,
budget: u64,
}
impl Ctx<'_> {
#[inline]
fn tick(&self) -> bool {
let n = self.steps.get().saturating_add(1);
self.steps.set(n);
n <= self.budget
}
}
fn backtrack(
ctx: &Ctx,
mut pc: usize,
mut sp: usize,
saves: &mut Vec<Option<usize>>,
depth: u32,
loops: &mut Vec<(usize, usize)>,
) -> bool {
if depth > MAX_DEPTH {
return false;
}
loop {
if !ctx.tick() {
return false;
}
match &ctx.prog[pc] {
Inst::Match => return ctx.match_end.is_none_or(|p| sp == p),
Inst::Char(c) => {
if sp < ctx.input.len() && char_eq(ctx.input[sp], *c, ctx.flags) {
sp += 1;
pc += 1;
} else {
return false;
}
}
Inst::Any => {
if sp < ctx.input.len() && (ctx.flags.dotall || !is_line_term(ctx.input[sp])) {
sp += 1;
pc += 1;
} else {
return false;
}
}
Inst::Class(class) => {
if sp < ctx.input.len() && class_matches(class, ctx.input[sp], ctx.flags) {
sp += 1;
pc += 1;
} else {
return false;
}
}
Inst::Jmp(target) => pc = *target,
Inst::Split(a, b) => {
if let Some((consume_pc, cont_pc, greedy)) = simple_loop(ctx.prog, pc) {
if greedy {
let mut positions = alloc::vec![sp];
while consume_one(&ctx.prog[consume_pc], ctx.input, sp, ctx.flags) {
sp += 1;
positions.push(sp);
if !ctx.tick() {
return false;
}
}
while let Some(p) = positions.pop() {
if backtrack(ctx, cont_pc, p, saves, depth + 1, loops) {
return true;
}
if !ctx.tick() {
return false;
}
}
return false;
}
loop {
if backtrack(ctx, cont_pc, sp, saves, depth + 1, loops) {
return true;
}
if !consume_one(&ctx.prog[consume_pc], ctx.input, sp, ctx.flags) {
return false;
}
sp += 1;
if !ctx.tick() {
return false;
}
}
}
if let Some((body, exit)) = loop_head(ctx.prog, pc) {
if loops.contains(&(pc, sp)) {
pc = exit;
continue;
}
loops.push((pc, sp));
let took_body = backtrack(ctx, body, sp, saves, depth + 1, loops);
loops.pop();
if took_body {
return true;
}
pc = exit;
continue;
}
if backtrack(ctx, *a, sp, saves, depth + 1, loops) {
return true;
}
pc = *b;
}
Inst::Save(slot) => {
let old = saves[*slot];
saves[*slot] = Some(sp);
if backtrack(ctx, pc + 1, sp, saves, depth + 1, loops) {
return true;
}
saves[*slot] = old;
return false;
}
Inst::Look { neg, prog } => {
let sub = Ctx {
prog,
input: ctx.input,
flags: ctx.flags,
match_end: None,
steps: ctx.steps,
budget: ctx.budget,
};
let mut sub_saves = alloc::vec![None; saves.len()];
let mut sub_loops = Vec::new();
let matched = backtrack(&sub, 0, sp, &mut sub_saves, depth + 1, &mut sub_loops);
if matched != *neg {
pc += 1;
} else {
return false;
}
}
Inst::LookBehind { neg, prog } => {
let sub = Ctx {
prog,
input: ctx.input,
flags: ctx.flags,
match_end: Some(sp),
steps: ctx.steps,
budget: ctx.budget,
};
let mut matched = false;
for j in (0..=sp).rev() {
if !ctx.tick() {
return false;
}
let mut sub_saves = alloc::vec![None; saves.len()];
let mut sub_loops = Vec::new();
if backtrack(&sub, 0, j, &mut sub_saves, depth + 1, &mut sub_loops) {
matched = true;
break;
}
}
if matched != *neg {
pc += 1;
} else {
return false;
}
}
Inst::Backref(g) => {
match (
saves.get(2 * g).copied().flatten(),
saves.get(2 * g + 1).copied().flatten(),
) {
(Some(s), Some(e)) => {
let len = e - s;
if sp + len <= ctx.input.len()
&& (0..len)
.all(|i| char_eq(ctx.input[sp + i], ctx.input[s + i], ctx.flags))
{
sp += len;
pc += 1;
} else {
return false;
}
}
_ => pc += 1,
}
}
Inst::Assert(assert) => {
if assert_ok(assert, ctx.input, sp, ctx.flags) {
pc += 1;
} else {
return false;
}
}
}
}
}
fn simple_loop(prog: &[Inst], split_pc: usize) -> Option<(usize, usize, bool)> {
let Inst::Split(a, b) = &prog[split_pc] else {
return None;
};
if *a == split_pc + 1
&& is_single_consume(&prog[split_pc + 1])
&& matches!(prog.get(split_pc + 2), Some(Inst::Jmp(t)) if *t == split_pc)
{
return Some((split_pc + 1, *b, true));
}
if *b == split_pc + 1
&& is_single_consume(&prog[split_pc + 1])
&& matches!(prog.get(split_pc + 2), Some(Inst::Jmp(t)) if *t == split_pc)
{
return Some((split_pc + 1, *a, false));
}
None
}
fn is_single_consume(inst: &Inst) -> bool {
matches!(inst, Inst::Char(_) | Inst::Any | Inst::Class(_))
}
fn loop_head(prog: &[Inst], split_pc: usize) -> Option<(usize, usize)> {
let Inst::Split(a, b) = &prog[split_pc] else {
return None;
};
let body = split_pc + 1;
let exit = if *a == body {
*b
} else if *b == body {
*a
} else {
return None;
};
if exit > 0 && matches!(prog.get(exit - 1), Some(Inst::Jmp(t)) if *t == split_pc) {
Some((body, exit))
} else {
None
}
}
fn consume_one(inst: &Inst, input: &[char], sp: usize, flags: Flags) -> bool {
if sp >= input.len() {
return false;
}
match inst {
Inst::Char(c) => char_eq(input[sp], *c, flags),
Inst::Any => flags.dotall || !is_line_term(input[sp]),
Inst::Class(class) => class_matches(class, input[sp], flags),
_ => false,
}
}
fn char_eq(a: char, b: char, flags: Flags) -> bool {
if a == b {
return true;
}
if !flags.ignore_case {
return false;
}
#[cfg(feature = "intl")]
{
intl::unicode::case::case_fold(a).eq(intl::unicode::case::case_fold(b))
}
#[cfg(not(feature = "intl"))]
{
a.eq_ignore_ascii_case(&b) || a.to_lowercase().eq(b.to_lowercase())
}
}
fn is_line_term(c: char) -> bool {
matches!(c, '\n' | '\r' | '\u{2028}' | '\u{2029}')
}
fn is_word(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '_'
}
fn class_matches(class: &Class, c: char, flags: Flags) -> bool {
let mut hit = false;
for m in &class.members {
let matched = match m {
ClassMember::Char(ch) => char_eq(c, *ch, flags),
ClassMember::Range(lo, hi) => {
(c >= *lo && c <= *hi)
|| (flags.ignore_case && {
let cl = c.to_ascii_lowercase();
let cu = c.to_ascii_uppercase();
(cl >= *lo && cl <= *hi) || (cu >= *lo && cu <= *hi)
})
}
ClassMember::Shorthand(s) => shorthand_matches(*s, c),
};
if matched {
hit = true;
break;
}
}
hit ^ class.neg
}
fn shorthand_matches(s: Shorthand, c: char) -> bool {
match s {
Shorthand::Digit => c.is_ascii_digit(),
Shorthand::NotDigit => !c.is_ascii_digit(),
Shorthand::Word => is_word(c),
Shorthand::NotWord => !is_word(c),
Shorthand::Space => c.is_whitespace(),
Shorthand::NotSpace => !c.is_whitespace(),
Shorthand::Property(kind, neg) => property_matches(kind, c) ^ neg,
}
}
fn property_matches(kind: PropKind, c: char) -> bool {
match kind {
PropKind::Letter => c.is_alphabetic(),
PropKind::Upper => c.is_uppercase(),
PropKind::Lower => c.is_lowercase(),
PropKind::Number => c.is_numeric(),
PropKind::White => c.is_whitespace(),
PropKind::Alnum => c.is_alphanumeric(),
PropKind::Gc(code) => general_category_matches(code, c),
}
}
#[cfg(feature = "intl")]
fn general_category_matches(code: [u8; 2], c: char) -> bool {
use intl::unicode::category::Group;
let gc = intl::unicode::general_category(c);
if code[1] == 0 {
let want = match code[0] {
b'L' => Group::Letter,
b'M' => Group::Mark,
b'N' => Group::Number,
b'P' => Group::Punctuation,
b'S' => Group::Symbol,
b'Z' => Group::Separator,
b'C' => Group::Other,
_ => return false,
};
gc.group() == want
} else {
gc.abbr().as_bytes() == code
}
}
#[cfg(not(feature = "intl"))]
fn general_category_matches(code: [u8; 2], c: char) -> bool {
match &code {
b"L\0" => c.is_alphabetic(),
b"N\0" => c.is_numeric(),
b"Z\0" => c == ' ' || (c.is_whitespace() && !c.is_control()),
b"C\0" => c.is_control(),
b"P\0" => c.is_ascii_punctuation(),
b"Lu" => c.is_uppercase(),
b"Ll" => c.is_lowercase(),
b"Lo" => c.is_alphabetic() && !c.is_uppercase() && !c.is_lowercase(),
b"Nd" => c.is_ascii_digit(),
b"Cc" => c.is_control(),
_ => false,
}
}
fn assert_ok(assert: &Assert, input: &[char], sp: usize, flags: Flags) -> bool {
match assert {
Assert::Start => sp == 0 || (flags.multiline && is_line_term(input[sp - 1])),
Assert::End => sp == input.len() || (flags.multiline && is_line_term(input[sp])),
Assert::WordBoundary => is_boundary(input, sp),
Assert::NotWordBoundary => !is_boundary(input, sp),
}
}
fn is_boundary(input: &[char], sp: usize) -> bool {
let before = sp > 0 && is_word(input[sp - 1]);
let after = sp < input.len() && is_word(input[sp]);
before != after
}