use std::cell::RefCell;
use std::collections::HashMap;
use std::ops::Deref;
use std::sync::Arc;
use mempool::Pool;
use syntax::{Expr, ExprBuilder, Literals};
use backtrack::{self, Backtrack, BacktrackCache};
use compile::Compiler;
use dfa::{self, Dfa, DfaCache, DfaResult};
use error::Error;
use input::{ByteInput, CharInput};
use literals::LiteralSearcher;
use nfa::{Nfa, NfaCache};
use params::Params;
use prog::{Program, InstPtr};
use re_bytes;
use re_unicode;
use set;
#[derive(Clone, Debug)]
pub struct Exec {
res: Vec<String>,
nfa: Program,
dfa: Program,
dfa_reverse: Program,
can_dfa: bool,
suffixes: LiteralSearcher,
match_engine: MatchEngine,
cache: ProgramPool,
}
pub struct ExecBuilder {
res: Vec<String>,
match_engine: MatchEngine,
size_limit: usize,
bytes: bool,
only_utf8: bool,
}
impl ExecBuilder {
pub fn new(re: &str) -> Self {
Self::new_many(&[re])
}
pub fn new_many<I, S>(res: I) -> Self
where S: AsRef<str>, I: IntoIterator<Item=S> {
ExecBuilder {
res: res.into_iter().map(|s| s.as_ref().to_owned()).collect(),
match_engine: MatchEngine::Automatic,
size_limit: 10 * (1 << 20),
bytes: false,
only_utf8: true,
}
}
pub fn automatic(mut self) -> Self {
self.match_engine = MatchEngine::Automatic;
self
}
pub fn nfa(mut self) -> Self {
self.match_engine = MatchEngine::Nfa;
self
}
pub fn bounded_backtracking(mut self) -> Self {
self.match_engine = MatchEngine::Backtrack;
self
}
pub fn size_limit(mut self, bytes: usize) -> Self {
self.size_limit = bytes;
self
}
pub fn bytes(mut self, yes: bool) -> Self {
self.bytes = yes;
self
}
pub fn only_utf8(mut self, yes: bool) -> Self {
self.only_utf8 = yes;
self
}
pub fn build(self) -> Result<Exec, Error> {
if self.res.is_empty() {
return Ok(Exec {
res: vec![],
nfa: Program::new(),
dfa: Program::new(),
dfa_reverse: Program::new(),
can_dfa: false,
suffixes: LiteralSearcher::empty(),
match_engine: MatchEngine::Automatic,
cache: ProgramPool::new(),
});
}
let parsed = try!(parse(&self.res, self.only_utf8));
let mut nfa = try!(
Compiler::new()
.size_limit(self.size_limit)
.bytes(self.bytes)
.only_utf8(self.only_utf8)
.compile(&parsed.exprs));
let mut dfa = try!(
Compiler::new()
.size_limit(self.size_limit)
.dfa(true)
.only_utf8(self.only_utf8)
.compile(&parsed.exprs));
let dfa_reverse = try!(
Compiler::new()
.size_limit(self.size_limit)
.dfa(true)
.only_utf8(self.only_utf8)
.reverse(true)
.compile(&parsed.exprs));
let prefixes = parsed.prefixes.unambiguous_prefixes();
let suffixes = parsed.suffixes.unambiguous_suffixes();
nfa.prefixes = LiteralSearcher::prefixes(prefixes);
dfa.prefixes = nfa.prefixes.clone();
let can_dfa = dfa::can_exec(&dfa);
Ok(Exec {
res: self.res,
nfa: nfa,
dfa: dfa,
dfa_reverse: dfa_reverse,
can_dfa: can_dfa,
suffixes: LiteralSearcher::suffixes(suffixes),
match_engine: self.match_engine,
cache: ProgramPool::new(),
})
}
}
impl Exec {
pub fn exec(
&self,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
if self.nfa.insts.is_empty() {
return false;
}
if text.len() > 256 && !self.is_anchor_match(text, start) {
return false;
}
match self.match_engine {
MatchEngine::Automatic => self.exec_auto(params, text, start),
MatchEngine::Backtrack => {
let mut cache = &mut *self.cache.get().borrow_mut();
self.exec_backtrack(cache, params, text, start)
}
MatchEngine::Nfa => {
let mut cache = &mut *self.cache.get().borrow_mut();
self.exec_nfa(cache, params, text, start)
}
}
}
fn exec_auto(
&self,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
if params.captures().len() <= 2 && self.nfa.prefixes.complete() {
self.exec_literals(&self.nfa.prefixes, params, text, start)
} else if self.can_dfa {
self.exec_dfa(params, text, start)
} else {
let mut cache = &mut *self.cache.get().borrow_mut();
self.exec_auto_nfa(cache, params, text, start)
}
}
fn exec_dfa<'a>(
&self,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
debug_assert!(self.can_dfa);
if self.should_suffix_scan() {
return self.exec_dfa_reverse_first(params, text, start);
}
let mut cache = &mut *self.cache.get().borrow_mut();
match Dfa::exec(&self.dfa, &mut cache.dfa, params, text, start) {
DfaResult::Match => {} DfaResult::NoMatch => return false,
DfaResult::Quit => {
params.reset();
return self.exec_auto_nfa(cache, params, text, start);
}
}
if params.style().match_only() {
return true;
}
let match_end = match params.captures().get(1) {
Some(&Some(i)) => i,
_ => return true,
};
if start == match_end {
if params.captures().len() == 2 {
params.set_start(Some(start));
params.set_end(Some(start));
return true;
}
return self.exec_auto_nfa(cache, params, text, start);
}
let result = Dfa::exec(
&self.dfa_reverse,
&mut cache.dfa_reverse,
params,
&text[start..],
match_end - start);
match result {
DfaResult::Match => {} DfaResult::NoMatch => {
panic!("BUG: forward match implies reverse match");
}
DfaResult::Quit => {
params.reset();
return self.exec_auto_nfa(cache, params, text, start);
}
}
let match_start = match params.captures().get(0) {
Some(&Some(i)) => start + i,
_ => panic!("BUG: early match can't happen on reverse search"),
};
if params.captures().len() == 2 {
params.set_start(Some(match_start));
params.set_end(Some(match_end));
return true;
}
self.exec_auto_nfa(cache, params, text, match_start)
}
fn exec_dfa_reverse_first(
&self,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
let mut cache = &mut *self.cache.get().borrow_mut();
let lcs = self.suffixes.lcs();
let mut end = start;
while end <= text.len() {
end = end + match lcs.find(&text[end..]) {
None => return false,
Some(e) => e + lcs.len(),
};
params.set_end(Some(end));
let result = Dfa::exec(
&self.dfa_reverse,
&mut cache.dfa_reverse,
params,
&text[start..end],
end - start);
let match_start = match result {
DfaResult::Match => match params.captures().get(0) {
Some(&Some(i)) => start + i,
_ => return true,
},
DfaResult::NoMatch => continue,
DfaResult::Quit => {
params.reset();
return self.exec_auto_nfa(cache, params, text, start);
}
};
if params.style().match_only() {
return true;
}
let result = Dfa::exec(
&self.dfa,
&mut cache.dfa,
params,
text,
match_start);
let match_end = match result {
DfaResult::Match => match params.captures().get(1) {
Some(&Some(i)) => i,
_ => panic!("BUG: early match can't happen"),
},
DfaResult::NoMatch => {
panic!("BUG: reverse match implies forward match");
}
DfaResult::Quit => {
params.reset();
return self.exec_auto_nfa(cache, params, text, start);
}
};
if params.captures().len() == 2 {
params.set_start(Some(match_start));
params.set_end(Some(match_end));
return true;
}
return self.exec_auto_nfa(cache, params, text, match_start);
}
false
}
fn exec_auto_nfa(
&self,
cache: &mut ProgramCache,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
if backtrack::should_exec(self.nfa.len(), text.len()) {
self.exec_backtrack(cache, params, text, start)
} else {
self.exec_nfa(cache, params, text, start)
}
}
fn exec_nfa(
&self,
cache: &mut ProgramCache,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
if self.nfa.uses_bytes() {
Nfa::exec(
&self.nfa,
&mut cache.nfa,
params,
ByteInput::new(text),
start)
} else {
Nfa::exec(
&self.nfa,
&mut cache.nfa,
params,
CharInput::new(text),
start)
}
}
fn exec_backtrack(
&self,
cache: &mut ProgramCache,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
if self.nfa.uses_bytes() {
Backtrack::exec(
&self.nfa,
&mut cache.backtrack,
params,
ByteInput::new(text),
start)
} else {
Backtrack::exec(
&self.nfa,
&mut cache.backtrack,
params,
CharInput::new(text),
start)
}
}
fn exec_literals(
&self,
literals: &LiteralSearcher,
params: &mut Params,
text: &[u8],
start: usize,
) -> bool {
debug_assert!(literals.complete());
debug_assert!(self.res.len() == 1);
match literals.find(&text[start..]) {
None => false,
Some((s, e)) => {
if s > 0 && self.nfa.is_anchored_start
|| e < text.len() && self.nfa.is_anchored_end {
return false;
}
if params.captures().len() == 2 {
params.set_start(Some(start + s));
params.set_end(Some(start + e));
}
params.set_match(0);
true
}
}
}
pub fn is_anchor_match(&self, text: &[u8], start: usize) -> bool {
self.is_anchor_start_match(text, start)
&& self.is_anchor_end_match(text, start)
}
fn is_anchor_start_match(&self, text: &[u8], _start: usize) -> bool {
if !self.nfa.is_anchored_start || self.nfa.prefixes.is_empty() {
return true;
}
self.nfa.prefixes.find_start(text).is_some()
}
fn is_anchor_end_match(&self, text: &[u8], _start: usize) -> bool {
if !self.nfa.is_anchored_end || self.suffixes.is_empty() {
return true;
}
self.suffixes.find_end(text).is_some()
}
fn should_suffix_scan(&self) -> bool {
if self.suffixes.is_empty() {
return false;
}
let lcs_len = self.suffixes.lcs().char_len();
lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
}
pub fn into_regex(self) -> re_unicode::Regex {
re_unicode::Regex::from(self)
}
pub fn into_regex_set(self) -> set::RegexSet {
set::RegexSet::from(self)
}
pub fn into_byte_regex(self) -> re_bytes::Regex {
re_bytes::Regex::from(self)
}
pub fn into_byte_regex_set(self) -> re_bytes::RegexSet {
re_bytes::RegexSet::from(self)
}
pub fn regex_strings(&self) -> &[String] {
&self.res
}
pub fn matches(&self) -> &[InstPtr] {
&self.nfa.matches
}
pub fn captures(&self) -> &[Option<String>] {
&self.nfa.captures
}
pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
&self.nfa.capture_name_idx
}
}
#[doc(hidden)]
#[derive(Clone, Copy, Debug)]
enum MatchEngine {
Automatic,
Backtrack,
Nfa,
}
#[derive(Debug)]
struct ProgramPool(Pool<RefCell<ProgramCache>>);
impl ProgramPool {
fn new() -> Self {
let create = || RefCell::new(ProgramCache::new());
ProgramPool(Pool::new(Box::new(create)))
}
}
impl Deref for ProgramPool {
type Target = Pool<RefCell<ProgramCache>>;
fn deref(&self) -> &Self::Target { &self.0 }
}
impl Clone for ProgramPool {
fn clone(&self) -> ProgramPool { ProgramPool::new() }
}
#[derive(Debug)]
struct ProgramCache {
nfa: NfaCache,
backtrack: BacktrackCache,
dfa: DfaCache,
dfa_reverse: DfaCache,
}
impl ProgramCache {
fn new() -> Self {
ProgramCache {
nfa: NfaCache::new(),
backtrack: BacktrackCache::new(),
dfa: DfaCache::new(),
dfa_reverse: DfaCache::new(),
}
}
}
impl Clone for ProgramCache {
fn clone(&self) -> ProgramCache {
ProgramCache::new()
}
}
struct Parsed {
exprs: Vec<Expr>,
prefixes: Literals,
suffixes: Literals,
}
fn parse(res: &[String], only_utf8: bool) -> Result<Parsed, Error> {
let mut exprs = Vec::with_capacity(res.len());
let mut prefixes = Some(Literals::empty());
let mut suffixes = Some(Literals::empty());
for re in res {
let parser =
ExprBuilder::new()
.allow_bytes(!only_utf8)
.unicode(only_utf8);
let expr = try!(parser.parse(re));
prefixes = prefixes.and_then(|mut prefixes| {
if !prefixes.union_prefixes(&expr) {
None
} else {
Some(prefixes)
}
});
suffixes = suffixes.and_then(|mut suffixes| {
if !suffixes.union_suffixes(&expr) {
None
} else {
Some(suffixes)
}
});
exprs.push(expr);
}
if res.len() != 1 {
if let Some(ref mut prefixes) = prefixes {
prefixes.cut();
}
if let Some(ref mut suffixes) = suffixes {
suffixes.cut();
}
}
Ok(Parsed {
exprs: exprs,
prefixes: prefixes.unwrap_or(Literals::empty()),
suffixes: suffixes.unwrap_or(Literals::empty()),
})
}