use self::MatchKind::*;
use self::StepState::*;
use std::cmp;
use std::mem;
use compile::Program;
use compile::Inst::*;
use syntax;
pub type CaptureLocs = Vec<Option<usize>>;
#[derive(Copy, Clone)]
pub enum MatchKind {
Exists,
Location,
Submatches,
}
pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str,
start: usize, end: usize) -> CaptureLocs {
Nfa {
which: which,
prog: prog,
input: input,
start: start,
end: end,
ic: 0,
chars: CharReader::new(input),
}.run()
}
struct Nfa<'r, 't> {
which: MatchKind,
prog: &'r Program,
input: &'t str,
start: usize,
end: usize,
ic: usize,
chars: CharReader<'t>,
}
#[derive(Copy, Clone)]
pub enum StepState {
StepMatchEarlyReturn,
StepMatch,
StepContinue,
}
impl<'r, 't> Nfa<'r, 't> {
fn run(&mut self) -> CaptureLocs {
let ncaps = match self.which {
Exists => 0,
Location => 1,
Submatches => self.prog.num_captures(),
};
let mut matched = false;
let ninsts = self.prog.insts.len();
let mut clist = Threads::new(self.which, ninsts, ncaps);
let mut nlist = Threads::new(self.which, ninsts, ncaps);
let mut groups = vec![None; ncaps * 2];
let prefix_anchor = match self.prog.insts[1] {
StartText => true,
_ => false,
};
self.ic = self.start;
let mut next_ic = self.chars.set(self.start);
while self.ic <= self.end {
if clist.size == 0 {
if matched {
break
}
if self.ic != 0 && prefix_anchor {
break;
}
if self.prog.prefix.len() > 0 {
let needle = self.prog.prefix.as_bytes();
let haystack = &self.input.as_bytes()[self.ic..];
match find_prefix(needle, haystack) {
None => break,
Some(i) => {
self.ic += i;
next_ic = self.chars.set(self.ic);
}
}
}
}
if clist.size == 0 || (!prefix_anchor && !matched) {
self.add(&mut clist, 0, &mut groups)
}
self.ic = next_ic;
next_ic = self.chars.advance();
for i in 0..clist.size {
let pc = clist.pc(i);
let step_state = self.step(&mut groups, &mut nlist,
clist.groups(i), pc);
match step_state {
StepMatchEarlyReturn => return vec![Some(0), Some(0)],
StepMatch => { matched = true; break },
StepContinue => {},
}
}
mem::swap(&mut clist, &mut nlist);
nlist.empty();
}
match self.which {
Exists if matched => vec![Some(0), Some(0)],
Exists => vec![None, None],
Location | Submatches => groups,
}
}
fn step(&self, groups: &mut [Option<usize>], nlist: &mut Threads,
caps: &mut [Option<usize>], pc: usize)
-> StepState {
match self.prog.insts[pc] {
Match => {
match self.which {
Exists => {
return StepMatchEarlyReturn
}
Location => {
groups[0] = caps[0];
groups[1] = caps[1];
return StepMatch
}
Submatches => {
for (slot, val) in groups.iter_mut().zip(caps.iter()) {
*slot = *val;
}
return StepMatch
}
}
}
OneChar { c, casei } => {
if self.char_eq(casei, self.chars.prev, c) {
self.add(nlist, pc+1, caps);
}
}
CharClass(ref cls) => {
if self.chars.prev.map(|c| cls.matches(c)).unwrap_or(false) {
self.add(nlist, pc+1, caps);
}
}
Any => self.add(nlist, pc+1, caps),
AnyNoNL => {
if !self.char_eq(false, self.chars.prev, '\n') {
self.add(nlist, pc+1, caps)
}
}
StartLine | EndLine | StartText | EndText
| WordBoundary | NotWordBoundary
| Save(_) | Jump(_) | Split(_, _) => {},
}
StepContinue
}
fn add(&self, nlist: &mut Threads, pc: usize, groups: &mut [Option<usize>]) {
if nlist.contains(pc) {
return
}
match self.prog.insts[pc] {
StartLine => {
nlist.add(pc, groups, true);
if self.chars.is_begin() || self.char_is(self.chars.prev, '\n') {
self.add(nlist, pc + 1, groups);
}
}
StartText => {
nlist.add(pc, groups, true);
if self.chars.is_begin() {
self.add(nlist, pc + 1, groups);
}
}
EndLine => {
nlist.add(pc, groups, true);
if self.chars.is_end() || self.char_is(self.chars.cur, '\n') {
self.add(nlist, pc + 1, groups)
}
}
EndText => {
nlist.add(pc, groups, true);
if self.chars.is_end() {
self.add(nlist, pc + 1, groups)
}
}
WordBoundary => {
nlist.add(pc, groups, true);
if self.chars.is_word_boundary() {
self.add(nlist, pc + 1, groups);
}
}
NotWordBoundary => {
nlist.add(pc, groups, true);
if !self.chars.is_word_boundary() {
self.add(nlist, pc + 1, groups);
}
}
Save(slot) => {
nlist.add(pc, groups, true);
match self.which {
Location if slot <= 1 => {
let old = groups[slot];
groups[slot] = Some(self.ic);
self.add(nlist, pc + 1, groups);
groups[slot] = old;
}
Submatches => {
let old = groups[slot];
groups[slot] = Some(self.ic);
self.add(nlist, pc + 1, groups);
groups[slot] = old;
}
Exists | Location => self.add(nlist, pc + 1, groups),
}
}
Jump(to) => {
nlist.add(pc, groups, true);
self.add(nlist, to, groups)
}
Split(x, y) => {
nlist.add(pc, groups, true);
self.add(nlist, x, groups);
self.add(nlist, y, groups);
}
Match | OneChar{..} | CharClass(_) | Any | AnyNoNL => {
nlist.add(pc, groups, false);
}
}
}
#[inline]
fn char_eq(&self, casei: bool, textc: Option<char>, regc: char) -> bool {
match textc {
None => false,
Some(textc) => {
regc == textc || (casei && syntax::simple_case_fold(regc) == syntax::simple_case_fold(textc))
}
}
}
#[inline]
fn char_is(&self, textc: Option<char>, regc: char) -> bool {
textc == Some(regc)
}
}
pub struct CharReader<'t> {
pub prev: Option<char>,
pub cur: Option<char>,
input: &'t str,
next: usize,
}
impl<'t> CharReader<'t> {
pub fn new(input: &'t str) -> CharReader<'t> {
CharReader {
prev: None,
cur: None,
input: input,
next: 0,
}
}
#[inline]
pub fn set(&mut self, ic: usize) -> usize {
self.prev = None;
self.cur = None;
self.next = 0;
if self.input.len() == 0 {
return 1
}
if ic > 0 {
let i = cmp::min(ic, self.input.len());
self.prev = self.input[..i].chars().rev().next();
}
if ic < self.input.len() {
let cur = self.input[ic..].chars().next().unwrap();
self.cur = Some(cur);
self.next = ic + cur.len_utf8();
self.next
} else {
self.input.len() + 1
}
}
#[inline]
pub fn advance(&mut self) -> usize {
self.prev = self.cur;
if self.next < self.input.len() {
let cur = self.input[self.next..].chars().next().unwrap();
self.cur = Some(cur);
self.next += cur.len_utf8();
} else {
self.cur = None;
self.next = self.input.len() + 1;
}
self.next
}
#[inline]
pub fn is_begin(&self) -> bool { self.prev.is_none() }
#[inline]
pub fn is_end(&self) -> bool { self.cur.is_none() }
pub fn is_word_boundary(&self) -> bool {
fn is_word(c: Option<char>) -> bool {
c.map(syntax::is_word_char).unwrap_or(false)
}
if self.is_begin() {
return is_word(self.cur);
}
if self.is_end() {
return is_word(self.prev);
}
(is_word(self.cur) && !is_word(self.prev))
|| (is_word(self.prev) && !is_word(self.cur))
}
}
#[derive(Clone)]
struct Thread {
pc: usize,
groups: Vec<Option<usize>>,
}
struct Threads {
which: MatchKind,
queue: Vec<Thread>,
sparse: Vec<usize>,
size: usize,
}
impl Threads {
fn new(which: MatchKind, num_insts: usize, ncaps: usize) -> Threads {
let t = Thread { pc: 0, groups: vec![None; ncaps * 2] };
Threads {
which: which,
queue: vec![t; num_insts],
sparse: vec![0; num_insts],
size: 0,
}
}
fn add(&mut self, pc: usize, groups: &[Option<usize>], empty: bool) {
let t = &mut self.queue[self.size];
t.pc = pc;
match (empty, self.which) {
(_, Exists) | (true, _) => {},
(false, Location) => {
t.groups[0] = groups[0];
t.groups[1] = groups[1];
}
(false, Submatches) => {
for (slot, val) in t.groups.iter_mut().zip(groups.iter()) {
*slot = *val;
}
}
}
self.sparse[pc] = self.size;
self.size += 1;
}
#[inline]
fn contains(&self, pc: usize) -> bool {
let s = self.sparse[pc];
s < self.size && self.queue[s].pc == pc
}
#[inline]
fn empty(&mut self) {
self.size = 0;
}
#[inline]
fn pc(&self, i: usize) -> usize {
self.queue[i].pc
}
#[inline]
fn groups(&mut self, i: usize) -> &mut [Option<usize>] {
&mut self.queue[i].groups
}
}
#[inline]
pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option<usize> {
let (hlen, nlen) = (haystack.len(), needle.len());
if nlen > hlen || nlen == 0 {
return None
}
for (offset, window) in haystack.windows(nlen).enumerate() {
if window == needle {
return Some(offset)
}
}
None
}