use self::Inst::*;
use std::cmp;
use std::iter::repeat;
use parse;
use parse::{Flags, FLAG_EMPTY};
use parse::Ast::{Nothing, Literal, Dot, AstClass, Begin, End, WordBoundary, Capture, Cat, Alt,
Rep};
use parse::Repeater::{ZeroOne, ZeroMore, OneMore};
pub type InstIdx = uint;
#[derive(Show, Clone)]
pub enum Inst {
Match,
OneChar(char, Flags),
CharClass(Vec<(char, char)>, Flags),
Any(Flags),
EmptyBegin(Flags),
EmptyEnd(Flags),
EmptyWordBoundary(Flags),
Save(uint),
Jump(InstIdx),
Split(InstIdx, InstIdx),
}
#[derive(Clone)]
pub struct Program {
pub insts: Vec<Inst>,
pub prefix: String,
}
impl Program {
pub fn new(ast: parse::Ast) -> (Program, Vec<Option<String>>) {
let mut c = Compiler {
insts: Vec::with_capacity(100),
names: Vec::with_capacity(10),
};
c.insts.push(Save(0));
c.compile(ast);
c.insts.push(Save(1));
c.insts.push(Match);
let mut pre = String::with_capacity(5);
for inst in c.insts[1..].iter() {
match *inst {
OneChar(c, FLAG_EMPTY) => pre.push(c),
_ => break
}
}
let Compiler { insts, names } = c;
let prog = Program {
insts: insts,
prefix: pre,
};
(prog, names)
}
pub fn num_captures(&self) -> uint {
let mut n = 0;
for inst in self.insts.iter() {
match *inst {
Save(c) => n = cmp::max(n, c+1),
_ => {}
}
}
n / 2
}
}
struct Compiler<'r> {
insts: Vec<Inst>,
names: Vec<Option<String>>,
}
impl<'r> Compiler<'r> {
fn compile(&mut self, ast: parse::Ast) {
match ast {
Nothing => {},
Literal(c, flags) => self.push(OneChar(c, flags)),
Dot(nl) => self.push(Any(nl)),
AstClass(ranges, flags) =>
self.push(CharClass(ranges, flags)),
Begin(flags) => self.push(EmptyBegin(flags)),
End(flags) => self.push(EmptyEnd(flags)),
WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
Capture(cap, name, x) => {
let len = self.names.len();
if cap >= len {
self.names.extend(repeat(None).take(10 + cap - len))
}
self.names[cap] = name;
self.push(Save(2 * cap));
self.compile(*x);
self.push(Save(2 * cap + 1));
}
Cat(xs) => {
for x in xs.into_iter() {
self.compile(x)
}
}
Alt(x, y) => {
let split = self.empty_split(); let j1 = self.insts.len();
self.compile(*x); let jmp = self.empty_jump(); let j2 = self.insts.len();
self.compile(*y); let j3 = self.insts.len();
self.set_split(split, j1, j2); self.set_jump(jmp, j3); }
Rep(x, ZeroOne, g) => {
let split = self.empty_split();
let j1 = self.insts.len();
self.compile(*x);
let j2 = self.insts.len();
if g.is_greedy() {
self.set_split(split, j1, j2);
} else {
self.set_split(split, j2, j1);
}
}
Rep(x, ZeroMore, g) => {
let j1 = self.insts.len();
let split = self.empty_split();
let j2 = self.insts.len();
self.compile(*x);
let jmp = self.empty_jump();
let j3 = self.insts.len();
self.set_jump(jmp, j1);
if g.is_greedy() {
self.set_split(split, j2, j3);
} else {
self.set_split(split, j3, j2);
}
}
Rep(x, OneMore, g) => {
let j1 = self.insts.len();
self.compile(*x);
let split = self.empty_split();
let j2 = self.insts.len();
if g.is_greedy() {
self.set_split(split, j1, j2);
} else {
self.set_split(split, j2, j1);
}
}
}
}
#[inline]
fn push(&mut self, x: Inst) {
self.insts.push(x)
}
#[inline]
fn empty_split(&mut self) -> InstIdx {
self.insts.push(Split(0, 0));
self.insts.len() - 1
}
#[inline]
fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
let split = &mut self.insts[i];
match *split {
Split(_, _) => *split = Split(pc1, pc2),
_ => panic!("BUG: Invalid split index."),
}
}
#[inline]
fn empty_jump(&mut self) -> InstIdx {
self.insts.push(Jump(0));
self.insts.len() - 1
}
#[inline]
fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
let jmp = &mut self.insts[i];
match *jmp {
Jump(_) => *jmp = Jump(pc),
_ => panic!("BUG: Invalid jump index."),
}
}
}