#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
html_favicon_url = "http://www.rust-lang.org/favicon.ico",
html_root_url = "http://doc.rust-lang.org/nightly/")]
#![feature(plugin_registrar, quote, rustc_private)]
extern crate regex;
extern crate regex_syntax;
extern crate rustc_plugin;
extern crate syntax;
use std::collections::BTreeMap;
use std::usize;
use syntax::ast;
use syntax::codemap;
use syntax::tokenstream;
use syntax::ext::build::AstBuilder;
use syntax::ext::base::{ExtCtxt, MacResult, MacEager, DummyResult};
use syntax::parse::token;
use syntax::print::pprust;
use syntax::fold::Folder;
use syntax::ptr::P;
use rustc_plugin::Registry;
use regex::internal::{Compiler, EmptyLook, Inst, Program};
use regex_syntax::Expr;
#[plugin_registrar]
#[doc(hidden)]
pub fn plugin_registrar(reg: &mut Registry) {
reg.register_macro("regex", native);
}
fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[tokenstream::TokenTree])
-> Box<MacResult+'static> {
let regex = match parse(cx, tts) {
Some(r) => r,
None => return DummyResult::any(sp),
};
let expr = match Expr::parse(®ex) {
Ok(expr) => expr,
Err(err) => {
cx.span_err(sp, &err.to_string());
return DummyResult::any(sp)
}
};
let prog = match Compiler::new().size_limit(usize::MAX).compile(&[expr]) {
Ok(re) => re,
Err(err) => {
cx.span_err(sp, &err.to_string());
return DummyResult::any(sp)
}
};
let names = prog.captures.iter().cloned().collect();
let mut gen = NfaGen {
cx: cx,
sp: sp,
prog: prog,
names: names,
original: regex,
};
MacEager::expr(gen.code())
}
struct NfaGen<'cx, 'a: 'cx> {
cx: &'cx ExtCtxt<'a>,
sp: codemap::Span,
prog: Program,
names: Vec<Option<String>>,
original: String,
}
impl<'a, 'cx> NfaGen<'a, 'cx> {
fn code(&mut self) -> P<ast::Expr> {
let num_cap_locs = 2 * self.prog.captures.len();
let num_insts = self.prog.len();
let cap_names = self.vec_expr(self.names.iter(),
&mut |cx, name| match *name {
Some(ref name) => {
let name = &**name;
quote_expr!(cx, Some($name))
}
None => cx.expr_none(self.sp),
}
);
let capture_name_idx = {
let mut capture_name_idx = BTreeMap::new();
for (i, name) in self.names.iter().enumerate() {
if let Some(ref name) = *name {
capture_name_idx.insert(name.to_owned(), i);
}
}
self.vec_expr(capture_name_idx.iter(),
&mut |cx, (name, group_idx)|
quote_expr!(cx, ($name, $group_idx))
)
};
let is_anchored_start = self.prog.is_anchored_start;
let step_insts = self.step_insts();
let add_insts = self.add_insts();
let regex = &*self.original;
quote_expr!(self.cx, {
#[allow(dead_code)]
static CAPTURES: &'static [Option<&'static str>] = &$cap_names;
#[allow(dead_code)]
static CAPTURE_NAME_IDX: &'static [(&'static str, usize)] = &$capture_name_idx;
#[allow(dead_code)]
fn exec<'t>(
mut caps: &mut [Option<usize>],
input: &'t str,
start: usize,
) -> bool {
#![allow(unused_imports)]
#![allow(unused_mut)]
use regex::internal::{Char, CharInput, InputAt, Input, Inst};
let input = CharInput::new(input.as_bytes());
let at = input.at(start);
return Nfa {
input: input,
ncaps: caps.len(),
}.exec(&mut NfaThreads::new(), &mut caps, at);
struct Nfa<'t> {
input: CharInput<'t>,
ncaps: usize,
}
impl<'t> Nfa<'t> {
#[allow(unused_variables)]
fn exec(
&mut self,
mut q: &mut NfaThreads,
mut caps: &mut [Option<usize>],
mut at: InputAt,
) -> bool {
let mut matched = false;
let (mut clist, mut nlist) = (&mut q.clist, &mut q.nlist);
clist.empty(); nlist.empty();
'LOOP: loop {
if clist.size == 0 {
if matched || (!at.is_start() && $is_anchored_start) {
break;
}
}
if clist.size == 0 || (!$is_anchored_start && !matched) {
self.add(clist, &mut caps, 0, at);
}
let at_next = self.input.at(at.next_pos());
for i in 0..clist.size {
let pc = clist.pc(i);
let tcaps = clist.caps(i);
if self.step(nlist, caps, tcaps, pc, at, at_next) {
matched = true;
if caps.len() == 0 {
break 'LOOP;
}
break;
}
}
if at.char().is_none() {
break;
}
at = at_next;
::std::mem::swap(&mut clist, &mut nlist);
nlist.empty();
}
matched
}
#[allow(unused_variables)]
#[inline]
fn step(
&self,
nlist: &mut Threads,
caps: &mut [Option<usize>],
thread_caps: &mut [Option<usize>],
pc: usize,
at: InputAt,
at_next: InputAt,
) -> bool {
$step_insts;
false
}
fn add(
&self,
nlist: &mut Threads,
thread_caps: &mut [Option<usize>],
pc: usize,
at: InputAt,
) {
if nlist.contains(pc) {
return;
}
let ti = nlist.add(pc);
$add_insts
}
}
struct NfaThreads {
clist: Threads,
nlist: Threads,
}
struct Threads {
dense: [Thread; $num_insts],
sparse: [usize; $num_insts],
size: usize,
}
struct Thread {
pc: usize,
caps: [Option<usize>; $num_cap_locs],
}
impl NfaThreads {
fn new() -> NfaThreads {
NfaThreads {
clist: Threads::new(),
nlist: Threads::new(),
}
}
fn swap(&mut self) {
::std::mem::swap(&mut self.clist, &mut self.nlist);
}
}
impl Threads {
fn new() -> Threads {
Threads {
dense: unsafe { ::std::mem::uninitialized() },
sparse: unsafe { ::std::mem::uninitialized() },
size: 0,
}
}
#[inline]
fn add(&mut self, pc: usize) -> usize {
let i = self.size;
self.dense[i].pc = pc;
self.sparse[pc] = i;
self.size += 1;
i
}
#[inline]
fn thread(&mut self, i: usize) -> &mut Thread {
&mut self.dense[i]
}
#[inline]
fn contains(&self, pc: usize) -> bool {
let s = unsafe { ::std::ptr::read_volatile(&self.sparse[pc]) };
s < self.size && self.dense[s].pc == pc
}
#[inline]
fn empty(&mut self) {
self.size = 0;
}
#[inline]
fn pc(&self, i: usize) -> usize {
self.dense[i].pc
}
#[inline]
fn caps<'r>(&'r mut self, i: usize) -> &'r mut [Option<usize>] {
&mut self.dense[i].caps
}
}
}
::regex::Regex(::regex::internal::_Regex::Plugin(::regex::internal::Plugin {
original: $regex,
names: &CAPTURES,
groups: &CAPTURE_NAME_IDX,
prog: exec,
}))
})
}
fn add_insts(&self) -> P<ast::Expr> {
let arms = self.prog.iter().enumerate().map(|(pc, inst)| {
let body = match *inst {
Inst::EmptyLook(ref inst) => {
let nextpc = inst.goto;
match inst.look {
EmptyLook::StartLine => {
quote_expr!(self.cx, {
let prev = self.input.previous_char(at);
if prev.is_none() || prev == '\n' {
self.add(nlist, thread_caps, $nextpc, at);
}
})
}
EmptyLook::EndLine => {
quote_expr!(self.cx, {
if at.char().is_none() || at.char() == '\n' {
self.add(nlist, thread_caps, $nextpc, at);
}
})
}
EmptyLook::StartText => {
quote_expr!(self.cx, {
let prev = self.input.previous_char(at);
if prev.is_none() {
self.add(nlist, thread_caps, $nextpc, at);
}
})
}
EmptyLook::EndText => {
quote_expr!(self.cx, {
if at.char().is_none() {
self.add(nlist, thread_caps, $nextpc, at);
}
})
}
EmptyLook::WordBoundary
| EmptyLook::NotWordBoundary => {
let m = if inst.look == EmptyLook::WordBoundary {
quote_expr!(self.cx, { w1 ^ w2 })
} else {
quote_expr!(self.cx, { !(w1 ^ w2) })
};
quote_expr!(self.cx, {
let prev = self.input.previous_char(at);
let w1 = prev.is_word_char();
let w2 = at.char().is_word_char();
if $m {
self.add(nlist, thread_caps, $nextpc, at);
}
})
}
EmptyLook::WordBoundaryAscii
| EmptyLook::NotWordBoundaryAscii => {
unreachable!()
}
}
}
Inst::Save(ref inst) => {
let nextpc = inst.goto;
let slot = inst.slot;
quote_expr!(self.cx, {
if $slot >= self.ncaps {
self.add(nlist, thread_caps, $nextpc, at);
} else {
let old = thread_caps[$slot];
thread_caps[$slot] = Some(at.pos());
self.add(nlist, thread_caps, $nextpc, at);
thread_caps[$slot] = old;
}
})
}
Inst::Split(ref inst) => {
let (x, y) = (inst.goto1, inst.goto2);
quote_expr!(self.cx, {
self.add(nlist, thread_caps, $x, at);
self.add(nlist, thread_caps, $y, at);
})
}
_ => quote_expr!(self.cx, {
let mut t = &mut nlist.thread(ti);
for (slot, val) in t.caps.iter_mut().zip(thread_caps.iter()) {
*slot = *val;
}
}),
};
self.arm_inst(pc, body)
}).collect::<Vec<ast::Arm>>();
self.match_insts(arms)
}
fn step_insts(&self) -> P<ast::Expr> {
let arms = self.prog.iter().enumerate().map(|(pc, inst)| {
let body = match *inst {
Inst::Match(_) => quote_expr!(self.cx, {
for (slot, val) in caps.iter_mut().zip(thread_caps.iter()) {
*slot = *val;
}
return true;
}),
Inst::Char(ref inst) => {
let nextpc = inst.goto;
let c = inst.c;
quote_expr!(self.cx, {
if $c == at.char() {
self.add(nlist, thread_caps, $nextpc, at_next);
}
return false;
})
}
Inst::Ranges(ref inst) => {
let match_class = self.match_class(&inst.ranges);
let nextpc = inst.goto;
quote_expr!(self.cx, {
let mut c = at.char();
if let Some(c) = c.as_char() {
if $match_class {
self.add(nlist, thread_caps, $nextpc, at_next);
}
}
return false;
})
}
_ => quote_expr!(self.cx, { return false; }),
};
self.arm_inst(pc, body)
}).collect::<Vec<ast::Arm>>();
self.match_insts(arms)
}
fn match_class(&self, ranges: &[(char, char)]) -> P<ast::Expr> {
let mut arms = ranges.iter().map(|&(start, end)| {
let pat = self.cx.pat(
self.sp, ast::PatKind::Range(
quote_expr!(self.cx, $start), quote_expr!(self.cx, $end)));
self.cx.arm(self.sp, vec!(pat), quote_expr!(self.cx, true))
}).collect::<Vec<ast::Arm>>();
arms.push(self.wild_arm_expr(quote_expr!(self.cx, false)));
let match_on = quote_expr!(self.cx, c);
self.cx.expr_match(self.sp, match_on, arms)
}
fn match_insts(&self, mut arms: Vec<ast::Arm>) -> P<ast::Expr> {
arms.push(self.wild_arm_expr(self.empty_block()));
self.cx.expr_match(self.sp, quote_expr!(self.cx, pc), arms)
}
fn empty_block(&self) -> P<ast::Expr> {
quote_expr!(self.cx, {})
}
fn arm_inst(&self, pc: usize, body: P<ast::Expr>) -> ast::Arm {
let pc_pat = self.cx.pat_lit(self.sp, quote_expr!(self.cx, $pc));
self.cx.arm(self.sp, vec!(pc_pat), body)
}
fn wild_arm_expr(&self, body: P<ast::Expr>) -> ast::Arm {
ast::Arm {
attrs: vec!(),
pats: vec!(P(ast::Pat{
id: ast::DUMMY_NODE_ID,
span: self.sp,
node: ast::PatKind::Wild,
})),
guard: None,
body: body,
}
}
fn vec_expr<T, It: Iterator<Item=T>>(
&self,
xs: It,
to_expr: &mut FnMut(&ExtCtxt, T) -> P<ast::Expr>,
) -> P<ast::Expr> {
let exprs = xs.map(|x| to_expr(self.cx, x)).collect();
self.cx.expr_vec(self.sp, exprs)
}
}
fn parse(cx: &mut ExtCtxt, tts: &[tokenstream::TokenTree]) -> Option<String> {
let mut parser = cx.new_parser_from_tts(tts);
if let Ok(expr) = parser.parse_expr() {
let entry = cx.expander().fold_expr(expr);
let regex = match entry.node {
ast::ExprKind::Lit(ref lit) => {
match lit.node {
ast::LitKind::Str(ref s, _) => s.to_string(),
_ => {
cx.span_err(entry.span, &format!(
"expected string literal but got `{}`",
pprust::lit_to_string(&**lit)));
return None
}
}
}
_ => {
cx.span_err(entry.span, &format!(
"expected string literal but got `{}`",
pprust::expr_to_string(&*entry)));
return None
}
};
if !parser.eat(&token::Eof) {
cx.span_err(parser.span, "only one string literal allowed");
return None;
}
Some(regex)
} else {
cx.parse_sess().span_diagnostic.err("failure parsing token tree");
None
}
}