use std::fmt;
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
use memchr::memchr;
#[derive(Clone)]
pub enum Prefix {
Empty,
Byte(u8),
Bytes {
chars: Vec<u8>,
sparse: Vec<bool>,
},
Single(SingleSearch),
Automaton(FullAcAutomaton<String>),
}
impl Prefix {
pub fn new(mut pfxs: Vec<String>) -> Prefix {
if pfxs.len() == 0 || pfxs[0].len() == 0 {
Prefix::Empty
} else if pfxs.len() == 1 && pfxs[0].len() == 1 {
Prefix::Byte(pfxs[0].as_bytes()[0])
} else if pfxs.len() >= 2 && pfxs.iter().all(|s| s.len() == 1) {
let mut set = vec![false; 256];
let mut chars = vec![];
for p in pfxs {
chars.push(p.as_bytes()[0]);
set[p.as_bytes()[0] as usize] = true;
}
Prefix::Bytes { chars: chars, sparse: set }
} else if pfxs.len() == 1 {
Prefix::Single(SingleSearch::new(pfxs.pop().unwrap()))
} else {
Prefix::Automaton(AcAutomaton::new(pfxs).into_full())
}
}
pub fn find(&self, haystack: &str) -> Option<(usize, usize)> {
use self::Prefix::*;
match *self {
Empty => Some((0, 0)),
Byte(b) => memchr(b, haystack.as_bytes()).map(|i| (i, i+1)),
Bytes { ref sparse, .. } => {
find_singles(sparse, haystack.as_bytes())
}
Single(ref searcher) => {
searcher.find(haystack).map(|i| (i, i + searcher.pat.len()))
}
Automaton(ref aut) => {
aut.find(haystack).next().map(|m| (m.start, m.end))
}
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn len(&self) -> usize {
match *self {
Prefix::Empty => 0,
Prefix::Byte(_) => 1,
Prefix::Bytes { ref chars, .. } => chars.len(),
Prefix::Single(_) => 1,
Prefix::Automaton(ref aut) => aut.len(),
}
}
pub fn preserves_priority(&self) -> bool {
match *self {
Prefix::Empty => true,
Prefix::Byte(_) => true,
Prefix::Bytes{..} => true,
Prefix::Single(_) => true,
Prefix::Automaton(ref aut) => {
aut.patterns().iter().all(|p| p.len() == aut.pattern(0).len())
}
}
}
#[allow(dead_code)]
pub fn prefixes(&self) -> Vec<String> {
match *self {
Prefix::Empty => vec![],
Prefix::Byte(b) => vec![format!("{}", b as char)],
Prefix::Bytes { ref chars, .. } => {
chars.iter().map(|&b| format!("{}", b as char)).collect()
}
Prefix::Single(ref searcher) => vec![searcher.pat.clone()],
Prefix::Automaton(ref aut) => aut.patterns().to_vec(),
}
}
}
#[derive(Clone, Debug)]
struct SingleSearch {
pat: String,
shift: Vec<usize>,
}
impl SingleSearch {
fn new(pat: String) -> SingleSearch {
assert!(pat.len() >= 1);
let mut shift = vec![pat.len(); 256];
for i in 0..(pat.len() - 1) {
shift[pat.as_bytes()[i] as usize] = pat.len() - i - 1;
}
SingleSearch {
pat: pat,
shift: shift,
}
}
fn find(&self, haystack: &str) -> Option<usize> {
let pat = self.pat.as_bytes();
let haystack = haystack.as_bytes();
if haystack.len() < pat.len() {
return None;
}
let mut i = match memchr(pat[0], haystack) {
None => return None,
Some(i) => i,
};
while i <= haystack.len() - pat.len() {
let b = haystack[i + pat.len() - 1];
if b == pat[pat.len() - 1]
&& haystack[i] == pat[0]
&& haystack[i + (pat.len() / 2)] == pat[pat.len() / 2]
&& pat == &haystack[i..i + pat.len()] {
return Some(i);
}
i += self.shift[b as usize];
i += match memchr(pat[0], &haystack[i..]) {
None => return None,
Some(i) => i,
};
}
None
}
}
fn find_singles(sparse: &[bool], haystack: &[u8]) -> Option<(usize, usize)> {
for (hi, &b) in haystack.iter().enumerate() {
if sparse[b as usize] {
return Some((hi, hi+1));
}
}
None
}
impl fmt::Debug for Prefix {
#[allow(deprecated)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Prefix::Empty => write!(f, "Empty"),
Prefix::Byte(b) => write!(f, "{:?}", b as char),
Prefix::Bytes { ref chars, .. } => {
let chars: Vec<String> =
chars.iter()
.map(|&c| format!("{:?}", c as char))
.collect();
write!(f, "{}", chars.connect(", "))
}
Prefix::Single(ref searcher) => write!(f, "{:?}", searcher),
Prefix::Automaton(ref aut) => write!(f, "{:?}", aut),
}
}
}