use std::collections::HashMap;
use std::sync::Arc;
use backtrack::{self, Backtrack};
use compile::Compiler;
use dfa::{self, Dfa};
use input::{ByteInput, CharInput};
use literals::BuildPrefixes;
use nfa::Nfa;
use prog::{Program, InstPtr};
use syntax;
use {Regex, Error};
pub type CaptureSlots<'a> = &'a mut [CaptureSlot];
pub type CaptureSlot = Option<usize>;
#[derive(Debug)]
pub struct Search<'caps, 'matches> {
pub captures: CaptureSlots<'caps>,
pub matches: &'matches mut [bool],
}
impl<'caps, 'matches> Search<'caps, 'matches> {
pub fn quit_after_first_match(&self) -> bool {
self.captures.is_empty() && self.matches.len() == 1
}
pub fn all_matched(&self) -> bool {
self.matches.iter().all(|m| *m)
}
pub fn copy_captures_from(&mut self, caps: &[Option<usize>]) {
for (slot, val) in self.captures.iter_mut().zip(caps.iter()) {
*slot = *val;
}
}
pub fn set_match(&mut self, match_slot: usize) {
if let Some(old) = self.matches.get_mut(match_slot) {
*old = true;
}
}
pub fn set_start(&mut self, pos: Option<usize>) {
self.set_capture(0, pos);
}
pub fn set_end(&mut self, pos: Option<usize>) {
self.set_capture(1, pos);
}
fn set_capture(&mut self, i: usize, pos: Option<usize>) {
if let Some(old_pos) = self.captures.get_mut(i) {
*old_pos = pos;
}
}
}
#[derive(Clone, Debug)]
pub struct Exec {
res: Vec<String>,
prog: Program,
dfa: Program,
dfa_reverse: Program,
can_dfa: bool,
match_engine: MatchEngine,
}
pub struct ExecBuilder {
res: Vec<String>,
match_engine: MatchEngine,
size_limit: usize,
bytes: bool,
}
impl ExecBuilder {
pub fn new(re: &str) -> Self {
Self::new_many(&[re])
}
pub fn new_many<I, S>(res: I) -> Self
where S: AsRef<str>, I: IntoIterator<Item=S> {
ExecBuilder {
res: res.into_iter().map(|s| s.as_ref().to_owned()).collect(),
match_engine: MatchEngine::Automatic,
size_limit: 10 * (1 << 20),
bytes: false,
}
}
pub fn automatic(mut self) -> Self {
self.match_engine = MatchEngine::Automatic;
self
}
pub fn nfa(mut self) -> Self {
self.match_engine = MatchEngine::Nfa;
self
}
pub fn bounded_backtracking(mut self) -> Self {
self.match_engine = MatchEngine::Backtrack;
self
}
pub fn size_limit(mut self, bytes: usize) -> Self {
self.size_limit = bytes;
self
}
pub fn bytes(mut self, yes: bool) -> Self {
self.bytes = yes;
self
}
pub fn build(self) -> Result<Exec, Error> {
if self.res.is_empty() {
return Err(Error::InvalidSet);
}
let mut exprs = vec![];
for re in &self.res {
exprs.push(try!(syntax::Expr::parse(re)));
}
let mut prog = try!(
Compiler::new()
.size_limit(self.size_limit)
.bytes(self.bytes)
.compile(&exprs));
let mut dfa = try!(
Compiler::new()
.size_limit(self.size_limit)
.dfa(true)
.compile(&exprs));
let dfa_reverse = try!(
Compiler::new()
.size_limit(self.size_limit)
.dfa(true)
.reverse(true)
.compile(&exprs));
prog.prefixes = BuildPrefixes::new(&prog).literals().into_matcher();
dfa.prefixes = prog.prefixes.clone();
let can_dfa = dfa::can_exec(&dfa);
Ok(Exec {
res: self.res,
prog: prog,
dfa: dfa,
dfa_reverse: dfa_reverse,
can_dfa: can_dfa,
match_engine: self.match_engine,
})
}
}
impl Exec {
pub fn exec<'c, 'm>(
&self,
search: &mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
match self.match_engine {
MatchEngine::Automatic => self.exec_auto(search, text, start),
MatchEngine::Backtrack => self.exec_backtrack(search, text, start),
MatchEngine::Nfa => self.exec_nfa(search, text, start),
}
}
fn exec_auto<'c, 'm>(
&self,
search: &mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
if search.captures.len() <= 2 && self.prog.prefixes.at_match() {
self.exec_literals(search, text, start)
} else if self.can_dfa {
self.exec_dfa(search, text, start)
} else {
self.exec_auto_nfa(search, text, start)
}
}
fn exec_dfa<'a, 'c, 'm>(
&self,
search: &'a mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
debug_assert!(self.can_dfa);
let btext = text.as_bytes();
if !Dfa::exec(&self.dfa, search, btext, start) {
return false;
}
let match_end = match search.captures.get(1) {
Some(&Some(i)) => i,
_ => return true,
};
if start == match_end {
if search.captures.len() == 2 {
search.captures[0] = Some(start);
search.captures[1] = Some(start);
return true;
}
return self.exec_auto_nfa(search, text, start);
}
let matched = Dfa::exec(
&self.dfa_reverse, search, &btext[start..], match_end - start);
if !matched {
panic!("BUG: forward match implies backward match");
}
let match_start = match search.captures.get(0) {
Some(&Some(i)) => start + i,
_ => panic!("BUG: early match can't happen on reverse search"),
};
if search.captures.len() == 2 {
search.captures[0] = Some(match_start);
search.captures[1] = Some(match_end);
return true;
}
self.exec_auto_nfa(search, text, match_start)
}
fn exec_auto_nfa<'c, 'm>(
&self,
search: &mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
if backtrack::should_exec(self.prog.len(), text.len()) {
self.exec_backtrack(search, text, start)
} else {
self.exec_nfa(search, text, start)
}
}
fn exec_nfa<'c, 'm>(
&self,
search: &mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
if self.prog.is_bytes {
Nfa::exec(&self.prog, search, ByteInput::new(text), start)
} else {
Nfa::exec(&self.prog, search, CharInput::new(text), start)
}
}
fn exec_backtrack<'c, 'm>(
&self,
search: &mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
if self.prog.is_bytes {
Backtrack::exec(&self.prog, search, ByteInput::new(text), start)
} else {
Backtrack::exec(&self.prog, search, CharInput::new(text), start)
}
}
fn exec_literals<'c, 'm>(
&self,
search: &mut Search<'c, 'm>,
text: &str,
start: usize,
) -> bool {
debug_assert!(self.prog.prefixes.at_match());
match self.prog.prefixes.find(&text.as_bytes()[start..]) {
None => false,
Some((s, e)) => {
if search.captures.len() == 2 {
search.captures[0] = Some(start + s);
search.captures[1] = Some(start + e);
}
true
}
}
}
pub fn into_regex(self) -> Regex {
Regex::Dynamic(self)
}
pub fn regex_strings(&self) -> &[String] {
&self.res
}
pub fn matches(&self) -> &[InstPtr] {
&self.prog.matches
}
pub fn captures(&self) -> &[Option<String>] {
&self.prog.captures
}
pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
&self.prog.capture_name_idx
}
}
#[doc(hidden)]
#[derive(Clone, Copy, Debug)]
enum MatchEngine {
Automatic,
Backtrack,
Nfa,
}