use core::mem;
use ext_slice::ByteSlice;
use search::byte_frequencies::BYTE_FREQUENCIES;
#[derive(Clone, Debug)]
pub struct PrefilterState {
skips: usize,
skipped: usize,
max_match_len: usize,
inert: bool,
}
impl PrefilterState {
const MIN_SKIPS: usize = 50;
const MIN_SKIP_BYTES: usize = 8;
pub fn new(max_match_len: usize) -> PrefilterState {
if max_match_len == 0 {
return PrefilterState::inert();
}
PrefilterState { skips: 0, skipped: 0, max_match_len, inert: false }
}
fn inert() -> PrefilterState {
PrefilterState { skips: 0, skipped: 0, max_match_len: 0, inert: true }
}
#[inline]
pub fn update(&mut self, skipped: usize) {
self.skips += 1;
self.skipped += skipped;
}
#[inline]
pub fn is_effective(&mut self) -> bool {
if self.inert {
return false;
}
if self.skips < PrefilterState::MIN_SKIPS {
return true;
}
if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips {
return true;
}
self.inert = true;
false
}
}
#[derive(Clone, Debug)]
pub struct Freqy {
inert: bool,
needle_len: usize,
rare1: u8,
rare1i: usize,
rare2: u8,
rare2i: usize,
}
impl Freqy {
const MAX_RANK: usize = 200;
pub fn prefilter_state(&self) -> PrefilterState {
if self.inert {
PrefilterState::inert()
} else {
PrefilterState::new(self.needle_len)
}
}
fn inert() -> Freqy {
Freqy {
inert: true,
needle_len: 0,
rare1: 0,
rare1i: 0,
rare2: 0,
rare2i: 0,
}
}
pub fn forward(needle: &[u8]) -> Freqy {
if needle.is_empty() {
return Freqy::inert();
}
let (mut rare1, mut rare1i) = (needle[0], 0);
let (mut rare2, mut rare2i) = (needle[0], 0);
if needle.len() >= 2 {
rare2 = needle[1];
rare2i = 1;
}
if Freqy::rank(rare2) < Freqy::rank(rare1) {
mem::swap(&mut rare1, &mut rare2);
mem::swap(&mut rare1i, &mut rare2i);
}
for (i, b) in needle.bytes().enumerate().skip(2) {
if Freqy::rank(b) < Freqy::rank(rare1) {
rare2 = rare1;
rare2i = rare1i;
rare1 = b;
rare1i = i;
} else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
rare2 = b;
rare2i = i;
}
}
if Freqy::rank(rare1) > Freqy::MAX_RANK {
return Freqy::inert();
}
let needle_len = needle.len();
Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
}
pub fn reverse(needle: &[u8]) -> Freqy {
if needle.is_empty() {
return Freqy::inert();
}
let (mut rare1i, mut rare2i) = (0, 0);
if needle.len() >= 2 {
rare2i += 1;
}
let mut rare1 = needle[needle.len() - rare1i - 1];
let mut rare2 = needle[needle.len() - rare2i - 1];
if Freqy::rank(rare2) < Freqy::rank(rare1) {
mem::swap(&mut rare1, &mut rare2);
mem::swap(&mut rare1i, &mut rare2i);
}
for (i, b) in needle.bytes().rev().enumerate().skip(2) {
if Freqy::rank(b) < Freqy::rank(rare1) {
rare2 = rare1;
rare2i = rare1i;
rare1 = b;
rare1i = i;
} else if b != rare1 && Freqy::rank(b) < Freqy::rank(rare2) {
rare2 = b;
rare2i = i;
}
}
if Freqy::rank(rare1) > Freqy::MAX_RANK {
return Freqy::inert();
}
let needle_len = needle.len();
Freqy { inert: false, needle_len, rare1, rare1i, rare2, rare2i }
}
pub fn find_candidate(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
) -> Option<usize> {
debug_assert!(!self.inert);
let mut i = 0;
while prestate.is_effective() {
i += match haystack[i..].find_byte(self.rare1) {
None => return None,
Some(found) => {
prestate.update(found);
found
}
};
if i < self.rare1i {
i += 1;
continue;
}
let aligned_rare2i = i - self.rare1i + self.rare2i;
if haystack.get(aligned_rare2i) != Some(&self.rare2) {
i += 1;
continue;
}
return Some(i - self.rare1i);
}
Some(i.saturating_sub(self.rare1i))
}
pub fn rfind_candidate(
&self,
prestate: &mut PrefilterState,
haystack: &[u8],
) -> Option<usize> {
debug_assert!(!self.inert);
let mut i = haystack.len();
while prestate.is_effective() {
i = match haystack[..i].rfind_byte(self.rare1) {
None => return None,
Some(found) => {
prestate.update(i - found);
found
}
};
if i + self.rare1i + 1 > haystack.len() {
continue;
}
let aligned = match (i + self.rare1i).checked_sub(self.rare2i) {
None => continue,
Some(aligned) => aligned,
};
if haystack.get(aligned) != Some(&self.rare2) {
continue;
}
return Some(i + self.rare1i + 1);
}
Some(i + self.rare1i + 1)
}
fn rank(b: u8) -> usize {
BYTE_FREQUENCIES[b as usize] as usize
}
}
#[cfg(test)]
mod tests {
use super::*;
use ext_slice::B;
#[test]
fn freqy_forward() {
let s = Freqy::forward(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(0), s.find_candidate(&mut pre, B("BARFOO")));
let s = Freqy::forward(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(3), s.find_candidate(&mut pre, B("FOOBAR")));
let s = Freqy::forward(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(0), s.find_candidate(&mut pre, B("zyzz")));
let s = Freqy::forward(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.find_candidate(&mut pre, B("zzzy")));
let s = Freqy::forward(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.find_candidate(&mut pre, B("zazb")));
let s = Freqy::forward(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(0), s.find_candidate(&mut pre, B("yzyy")));
let s = Freqy::forward(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.find_candidate(&mut pre, B("yyyz")));
let s = Freqy::forward(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.find_candidate(&mut pre, B("yayb")));
}
#[test]
fn freqy_reverse() {
let s = Freqy::reverse(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(3), s.rfind_candidate(&mut pre, B("BARFOO")));
let s = Freqy::reverse(B("BAR"));
let mut pre = s.prefilter_state();
assert_eq!(Some(6), s.rfind_candidate(&mut pre, B("FOOBAR")));
let s = Freqy::reverse(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("zyzz")));
let s = Freqy::reverse(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("zzzy")));
let s = Freqy::reverse(B("zyzy"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.rfind_candidate(&mut pre, B("zazb")));
let s = Freqy::reverse(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(2), s.rfind_candidate(&mut pre, B("yzyy")));
let s = Freqy::reverse(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(Some(4), s.rfind_candidate(&mut pre, B("yyyz")));
let s = Freqy::reverse(B("yzyz"));
let mut pre = s.prefilter_state();
assert_eq!(None, s.rfind_candidate(&mut pre, B("yayb")));
}
}