use std::cmp;
use std::usize;
use super::Searcher;
#[derive(Clone, Debug)]
pub struct TwoWaySearcher<'a> {
needle: &'a [u8],
crit_pos: usize,
period: usize,
byteset: u64,
is_long: bool,
}
struct TwoWayState {
position: usize,
memory: usize,
}
impl<'a> Searcher for TwoWaySearcher<'a> {
fn search_in(&self, haystack: &[u8]) -> Option<usize> {
if self.needle.is_empty() {
Some(0)
} else if self.is_long {
let state = TwoWayState {
position: 0,
memory: usize::MAX,
};
self.next(haystack, state, true)
} else {
let state = TwoWayState {
position: 0,
memory: 0,
};
self.next(haystack, state, false)
}
}
}
impl<'a> TwoWaySearcher<'a> {
pub fn new<'b>(needle: &'b [u8]) -> TwoWaySearcher<'b> {
if needle.is_empty() {
return TwoWaySearcher {
needle: needle,
crit_pos: 0,
period: 0,
byteset: 0,
is_long: false,
};
}
let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
let (crit_pos, period) =
if crit_pos_false > crit_pos_true {
(crit_pos_false, period_false)
} else {
(crit_pos_true, period_true)
};
if &needle[..crit_pos] == &needle[period.. period + crit_pos] {
TwoWaySearcher {
needle: needle,
crit_pos: crit_pos,
period: period,
byteset: Self::byteset_create(&needle[..period]),
is_long: false,
}
} else {
TwoWaySearcher {
needle: needle,
crit_pos: crit_pos,
period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
byteset: Self::byteset_create(needle),
is_long: true,
}
}
}
#[inline]
fn byteset_create(bytes: &[u8]) -> u64 {
bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
}
#[inline(always)]
fn byteset_contains(&self, byte: u8) -> bool {
(self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
}
#[inline(always)]
fn next(&self, haystack: &[u8], mut state: TwoWayState, long_period: bool) -> Option<usize> {
let needle_last = self.needle.len() - 1;
'search: loop {
let tail_byte = match haystack.get(state.position + needle_last) {
Some(&b) => b,
None => { return None; }
};
if !self.byteset_contains(tail_byte) {
state.position += self.needle.len();
if !long_period {
state.memory = 0;
}
continue 'search;
}
let start = if long_period { self.crit_pos }
else { cmp::max(self.crit_pos, state.memory) };
for i in start..self.needle.len() {
if self.needle[i] != haystack[state.position + i] {
state.position += i - self.crit_pos + 1;
if !long_period {
state.memory = 0;
}
continue 'search;
}
}
let start = if long_period { 0 } else { state.memory };
for i in (start..self.crit_pos).rev() {
if self.needle[i] != haystack[state.position + i] {
state.position += self.period;
if !long_period {
state.memory = self.needle.len() - self.period;
}
continue 'search;
}
}
let match_pos = state.position;
state.position += self.needle.len();
if !long_period {
state.memory = 0; }
return Some(match_pos);
}
}
#[inline]
fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1;
while let Some(&a) = arr.get(right + offset) {
let b = arr[left + offset];
if (a < b && !order_greater) || (a > b && order_greater) {
right += offset + 1;
offset = 0;
period = right - left;
} else if a == b {
if offset + 1 == period {
right += offset + 1;
offset = 0;
} else {
offset += 1;
}
} else {
left = right;
right += 1;
offset = 0;
period = 1;
}
}
(left, period)
}
}
#[cfg(test)]
mod tests {
use {Searcher, TwoWaySearcher};
use quickcheck::quickcheck;
#[test]
fn same_as_std() {
fn prop(needle: String, haystack: String) -> bool {
let search = TwoWaySearcher::new(needle.as_bytes());
search.search_in(haystack.as_bytes()) == haystack.find(&needle)
}
quickcheck(prop as fn(String, String) -> bool);
}
}