use super::parser::{ClassItem, Node, RegexError};
use super::vm::{Assert, Class, ClassMember, Inst};
use alloc::vec::Vec;
const MAX_PROG_SIZE: usize = 100_000;
pub(crate) fn compile(
ast: &Node,
group_names: &[(usize, alloc::string::String)],
) -> Result<(Vec<Inst>, usize), RegexError> {
let mut c = Compiler {
prog: Vec::new(),
groups: 0,
group_names,
};
c.emit(Inst::Save(0));
c.compile(ast)?;
c.emit(Inst::Save(1));
c.emit(Inst::Match);
Ok((c.prog, c.groups))
}
struct Compiler<'a> {
prog: Vec<Inst>,
groups: usize,
group_names: &'a [(usize, alloc::string::String)],
}
impl Compiler<'_> {
fn emit(&mut self, inst: Inst) -> usize {
self.prog.push(inst);
self.prog.len() - 1
}
fn here(&self) -> usize {
self.prog.len()
}
fn check_size(&self) -> Result<(), RegexError> {
if self.prog.len() > MAX_PROG_SIZE {
return Err(RegexError::new("compiled pattern too large"));
}
Ok(())
}
fn compile(&mut self, node: &Node) -> Result<(), RegexError> {
match node {
Node::Empty => {}
Node::Char(c) => {
self.emit(Inst::Char(*c));
}
Node::Any => {
self.emit(Inst::Any);
}
Node::Start => {
self.emit(Inst::Assert(Assert::Start));
}
Node::End => {
self.emit(Inst::Assert(Assert::End));
}
Node::WordBoundary { neg } => {
self.emit(Inst::Assert(if *neg {
Assert::NotWordBoundary
} else {
Assert::WordBoundary
}));
}
Node::Class { neg, items } => {
let class = Class {
neg: *neg,
members: items.iter().map(convert_item).collect(),
};
self.emit(Inst::Class(class));
}
Node::Concat(nodes) => {
for n in nodes {
self.compile(n)?;
}
}
Node::Group { index, inner } => {
if let Some(idx) = index {
self.groups = self.groups.max(*idx);
self.emit(Inst::Save(2 * idx));
self.compile(inner)?;
self.emit(Inst::Save(2 * idx + 1));
} else {
self.compile(inner)?;
}
}
Node::Alt(branches) => self.compile_alt(branches)?,
Node::Repeat {
inner,
min,
max,
greedy,
} => self.compile_repeat(inner, *min, *max, *greedy)?,
Node::Look { neg, inner } => {
let mut sub = Compiler {
prog: Vec::new(),
groups: 0,
group_names: self.group_names,
};
sub.compile(inner)?;
sub.emit(Inst::Match);
self.groups = self.groups.max(sub.groups);
self.emit(Inst::Look {
neg: *neg,
prog: sub.prog,
});
}
Node::LookBehind { neg, inner } => {
let mut sub = Compiler {
prog: Vec::new(),
groups: 0,
group_names: self.group_names,
};
sub.compile(inner)?;
sub.emit(Inst::Match);
self.groups = self.groups.max(sub.groups);
self.emit(Inst::LookBehind {
neg: *neg,
prog: sub.prog,
});
}
Node::Backref(n) => {
self.groups = self.groups.max(*n);
self.emit(Inst::Backref(*n));
}
Node::NamedBackref(name) => {
let n = self
.group_names
.iter()
.find(|(_, gn)| gn == name)
.map_or(0, |(idx, _)| *idx);
self.groups = self.groups.max(n);
self.emit(Inst::Backref(n));
}
}
self.check_size()
}
fn compile_alt(&mut self, branches: &[Node]) -> Result<(), RegexError> {
let mut jmp_to_end = Vec::new();
for (i, branch) in branches.iter().enumerate() {
if i + 1 < branches.len() {
let split = self.emit(Inst::Split(0, 0));
let branch_start = self.here();
self.compile(branch)?;
let jmp = self.emit(Inst::Jmp(0));
jmp_to_end.push(jmp);
let next = self.here();
self.prog[split] = Inst::Split(branch_start, next);
} else {
self.compile(branch)?;
}
}
let end = self.here();
for j in jmp_to_end {
self.prog[j] = Inst::Jmp(end);
}
Ok(())
}
fn compile_repeat(
&mut self,
inner: &Node,
min: usize,
max: Option<usize>,
greedy: bool,
) -> Result<(), RegexError> {
let mut probe = Compiler {
prog: Vec::new(),
groups: 0,
group_names: self.group_names,
};
probe.compile(inner)?;
let inner_size = probe.prog.len().max(1);
let copies = match max {
None => min,
Some(max) => max,
};
let projected = self
.prog
.len()
.saturating_add(copies.saturating_mul(inner_size.saturating_add(1)));
if projected > MAX_PROG_SIZE {
return Err(RegexError::new("compiled pattern too large"));
}
for _ in 0..min {
self.compile(inner)?;
}
match max {
None => self.compile_star(inner, greedy)?,
Some(max) => {
for _ in min..max {
self.compile_optional(inner, greedy)?;
}
}
}
Ok(())
}
fn compile_star(&mut self, inner: &Node, greedy: bool) -> Result<(), RegexError> {
let l1 = self.emit(Inst::Split(0, 0));
let body = self.here();
self.compile(inner)?;
self.emit(Inst::Jmp(l1));
let exit = self.here();
self.prog[l1] = if greedy {
Inst::Split(body, exit)
} else {
Inst::Split(exit, body)
};
Ok(())
}
fn compile_optional(&mut self, inner: &Node, greedy: bool) -> Result<(), RegexError> {
let split = self.emit(Inst::Split(0, 0));
let body = self.here();
self.compile(inner)?;
let exit = self.here();
self.prog[split] = if greedy {
Inst::Split(body, exit)
} else {
Inst::Split(exit, body)
};
Ok(())
}
}
fn convert_item(item: &ClassItem) -> ClassMember {
match item {
ClassItem::Char(c) => ClassMember::Char(*c),
ClassItem::Range(a, b) => ClassMember::Range(*a, *b),
ClassItem::Shorthand(s) => ClassMember::Shorthand(*s),
}
}