use syntax::{self, Expr, Repeater};
use Error;
use program::{CharRanges, Inst, InstIdx, OneChar};
type Compiled = (Vec<Inst>, Vec<Option<String>>);
pub struct Compiler {
size_limit: usize,
insts: Vec<Inst>,
cap_names: Vec<Option<String>>,
}
impl Compiler {
pub fn new(size_limit: usize) -> Compiler {
Compiler {
size_limit: size_limit,
insts: vec![],
cap_names: vec![None],
}
}
pub fn compile(mut self, ast: Expr) -> Result<Compiled, Error> {
self.insts.push(Inst::Save(0));
try!(self.c(ast));
self.insts.push(Inst::Save(1));
self.insts.push(Inst::Match);
Ok((self.insts, self.cap_names))
}
fn c(&mut self, ast: Expr) -> Result<(), Error> {
use program::Inst::*;
use program::LookInst::*;
match ast {
Expr::Empty => {},
Expr::Literal { chars, casei } => {
for mut c in chars {
if casei {
c = syntax::simple_case_fold(c);
}
self.push(Char(OneChar { c: c, casei: casei }));
}
}
Expr::AnyChar => self.push(Ranges(CharRanges::any())),
Expr::AnyCharNoNL => self.push(Ranges(CharRanges::any_nonl())),
Expr::Class(cls) => {
if cls.len() == 1 && cls[0].start == cls[0].end {
self.push(Char(OneChar {
c: cls[0].start,
casei: cls.is_case_insensitive(),
}));
} else {
self.push(Ranges(CharRanges::from_class(cls)));
}
}
Expr::StartLine => self.push(EmptyLook(StartLine)),
Expr::EndLine => self.push(EmptyLook(EndLine)),
Expr::StartText => self.push(EmptyLook(StartText)),
Expr::EndText => self.push(EmptyLook(EndText)),
Expr::WordBoundary => self.push(EmptyLook(WordBoundary)),
Expr::NotWordBoundary => self.push(EmptyLook(NotWordBoundary)),
Expr::Group { e, i: None, name: None } => try!(self.c(*e)),
Expr::Group { e, i, name } => {
let i = i.expect("capture index");
self.cap_names.push(name);
self.push(Save(2 * i));
try!(self.c(*e));
self.push(Save(2 * i + 1));
}
Expr::Concat(es) => {
for e in es {
try!(self.c(e));
}
}
Expr::Alternate(mut es) => {
if es.len() == 0 {
return Ok(());
}
let e1 = es.remove(0);
if es.len() == 0 {
try!(self.c(e1));
return Ok(());
}
let e2 = Expr::Alternate(es);
let split = self.empty_split();
let j1 = self.insts.len();
try!(self.c(e1));
let jmp = self.empty_jump();
let j2 = self.insts.len();
try!(self.c(e2));
let j3 = self.insts.len();
self.set_split(split, j1, j2);
self.set_jump(jmp, j3);
}
Expr::Repeat { e, r: Repeater::ZeroOrOne, greedy } => {
let split = self.empty_split();
let j1 = self.insts.len();
try!(self.c(*e));
let j2 = self.insts.len();
if greedy {
self.set_split(split, j1, j2);
} else {
self.set_split(split, j2, j1);
}
}
Expr::Repeat { e, r: Repeater::ZeroOrMore, greedy } => {
let j1 = self.insts.len();
let split = self.empty_split();
let j2 = self.insts.len();
try!(self.c(*e));
let jmp = self.empty_jump();
let j3 = self.insts.len();
self.set_jump(jmp, j1);
if greedy {
self.set_split(split, j2, j3);
} else {
self.set_split(split, j3, j2);
}
}
Expr::Repeat { e, r: Repeater::OneOrMore, greedy } => {
let j1 = self.insts.len();
try!(self.c(*e));
let split = self.empty_split();
let j2 = self.insts.len();
if greedy {
self.set_split(split, j1, j2);
} else {
self.set_split(split, j2, j1);
}
}
Expr::Repeat {
e,
r: Repeater::Range { min, max: None },
greedy,
} => {
let e = *e;
for _ in 0..min {
try!(self.c(e.clone()));
}
try!(self.c(Expr::Repeat {
e: Box::new(e),
r: Repeater::ZeroOrMore,
greedy: greedy,
}));
}
Expr::Repeat {
e,
r: Repeater::Range { min, max: Some(max) },
greedy,
} => {
let e = *e;
for _ in 0..min {
try!(self.c(e.clone()));
}
for _ in min..max {
try!(self.c(Expr::Repeat {
e: Box::new(e.clone()),
r: Repeater::ZeroOrOne,
greedy: greedy,
}));
}
}
}
self.check_size()
}
fn check_size(&self) -> Result<(), Error> {
use std::mem::size_of;
if self.insts.len() * size_of::<Inst>() > self.size_limit {
Err(Error::CompiledTooBig(self.size_limit))
} else {
Ok(())
}
}
#[inline]
fn push(&mut self, x: Inst) {
self.insts.push(x)
}
#[inline]
fn empty_split(&mut self) -> InstIdx {
self.insts.push(Inst::Split(0, 0));
self.insts.len() - 1
}
#[inline]
fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
let split = &mut self.insts[i];
match *split {
Inst::Split(_, _) => *split = Inst::Split(pc1, pc2),
_ => panic!("BUG: Invalid split index."),
}
}
#[inline]
fn empty_jump(&mut self) -> InstIdx {
self.insts.push(Inst::Jump(0));
self.insts.len() - 1
}
#[inline]
fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
let jmp = &mut self.insts[i];
match *jmp {
Inst::Jump(_) => *jmp = Inst::Jump(pc),
_ => panic!("BUG: Invalid jump index."),
}
}
}