#![cfg(feature = "buzhash")]
use crate::{Chunk, ChunkIncr, ToChunkIncr};
use std::fmt;
use std::num::Wrapping;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BuzHash<H: BuzHashHash> {
k: usize,
h: H,
mask: u32,
max_chunk_size: u64,
}
impl<H: BuzHashHash> BuzHash<H> {
pub fn new(capacity: usize, mask: u32, hash: H, max_chunk_size: u64) -> Self {
assert!(capacity > 0);
BuzHash {
k: capacity,
h: hash,
mask,
max_chunk_size,
}
}
}
impl<'a> BuzHash<BuzHashTableByteSaltHash<'a>> {
pub fn new_nom(salt: u8) -> Self {
BuzHash::new(
67,
(1 << 12u32) - 1,
BuzHashTableByteSaltHash::from((salt, &crate::buzhash_table::GO_BUZHASH)),
1 << 24,
)
}
}
impl<H: BuzHashHash + Clone> Chunk for BuzHash<H> {
type SearchState = BuzHashSearchState;
fn to_search_state(&self) -> Self::SearchState {
Self::SearchState::default()
}
fn find_chunk_edge(
&self,
state: &mut Self::SearchState,
data: &[u8],
) -> (Option<usize>, usize) {
for i in state.offset..data.len() {
state.state.add_buf(data, self, i);
if (state.state.h & self.mask) == self.mask {
state.reset();
return (Some(i + 1), i + 1);
}
}
let discard_ct = data.len().saturating_sub(self.k);
state.offset = data.len() - discard_ct;
(None, discard_ct)
}
}
impl<H: BuzHashHash + Clone> From<&BuzHash<H>> for BuzHashIncr<H> {
fn from(src: &BuzHash<H>) -> Self {
src.clone().into()
}
}
impl<H: BuzHashHash + Clone> ToChunkIncr for BuzHash<H> {
type Incr = BuzHashIncr<H>;
fn to_chunk_incr(&self) -> Self::Incr {
self.into()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct BuzHashSearchState {
offset: usize,
state: BuzHashState,
}
impl BuzHashSearchState {
fn reset(&mut self) {
self.offset = 0;
self.state.reset();
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
struct BuzHashState {
h: u32,
}
impl BuzHashState {
fn reset(&mut self) {
self.h = 0;
}
fn add_buf<H: BuzHashHash>(&mut self, data: &[u8], params: &BuzHash<H>, i: usize) {
if i >= params.k {
let drop_i = i - params.k;
let drop = data[drop_i];
self.add_overflow(params, data[i], drop);
} else {
self.add(params, data[i]);
}
}
fn add<H: BuzHashHash>(&mut self, params: &BuzHash<H>, v: u8) {
self.h = self.h.rotate_left(1) ^ params.h.hash(v);
}
fn add_overflow<H: BuzHashHash>(&mut self, params: &BuzHash<H>, add_v: u8, remove_v: u8) {
let h = self.h.rotate_left(1);
let drop = params.h.hash(remove_v).rotate_left((params.k % 8) as u32);
self.h = h ^ drop ^ params.h.hash(add_v);
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BuzHashIncr<H: BuzHashHash> {
params: BuzHash<H>,
state: BuzHashState,
buf: Box<[u8]>,
buf_idx: Wrapping<usize>,
input_idx: u64,
}
impl<H: BuzHashHash> ChunkIncr for BuzHashIncr<H> {
fn push(&mut self, data: &[u8]) -> Option<usize> {
for (i, &v) in data.iter().enumerate() {
self.push_byte(v);
if (self.state.h & self.params.mask) == self.params.mask {
self.reset();
return Some(i + 1);
}
if self.input_idx > self.params.max_chunk_size {
self.reset();
return Some(i + 1);
}
}
None
}
}
impl<H: BuzHashHash> BuzHashIncr<H> {
fn reset(&mut self) {
self.buf_idx = Wrapping(0);
self.input_idx = 0;
self.state.reset();
}
fn push_byte(&mut self, val: u8) {
if self.input_idx >= self.params.k as u64 {
let o = self.buf[self.buf_idx.0];
self.state.add_overflow(&self.params, val, o);
} else {
self.state.add(&self.params, val);
}
self.buf[self.buf_idx.0] = val;
self.buf_idx += Wrapping(1);
self.buf_idx.0 %= self.params.k;
self.input_idx += 1;
}
}
impl<H: BuzHashHash> From<BuzHash<H>> for BuzHashIncr<H> {
fn from(params: BuzHash<H>) -> Self {
let buf = vec![0; params.k].into_boxed_slice();
Self {
params,
state: Default::default(),
buf,
buf_idx: Wrapping(0),
input_idx: 0,
}
}
}
pub trait BuzHashHash {
fn hash(&self, data: u8) -> u32;
}
#[derive(Clone)]
pub struct BuzHashTableHash<'a> {
table: &'a [u32; 256],
}
impl<'a> fmt::Debug for BuzHashTableHash<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("BuzHashTableHash").finish()
}
}
impl<'a> From<&'a [u32; 256]> for BuzHashTableHash<'a> {
fn from(table: &'a [u32; 256]) -> Self {
Self { table }
}
}
impl<'a> BuzHashHash for BuzHashTableHash<'a> {
fn hash(&self, data: u8) -> u32 {
self.table[data as usize]
}
}
#[derive(Clone)]
pub struct BuzHashTableBufHash {
table: Box<[u32; 256]>,
}
impl fmt::Debug for BuzHashTableBufHash {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("BuzHashTableBufHash").finish()
}
}
impl<'a> From<Box<[u32; 256]>> for BuzHashTableBufHash {
fn from(table: Box<[u32; 256]>) -> Self {
Self { table }
}
}
impl BuzHashHash for BuzHashTableBufHash {
fn hash(&self, data: u8) -> u32 {
self.table[data as usize]
}
}
#[derive(Clone)]
pub struct BuzHashTableByteSaltHash<'a> {
table: &'a [u32; 256],
salt: u8,
}
impl<'a> fmt::Debug for BuzHashTableByteSaltHash<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("BuzHashTableByteSaltHash").finish()
}
}
impl<'a> From<(u8, &'a [u32; 256])> for BuzHashTableByteSaltHash<'a> {
fn from((salt, table): (u8, &'a [u32; 256])) -> Self {
Self { table, salt }
}
}
impl<'a> BuzHashHash for BuzHashTableByteSaltHash<'a> {
fn hash(&self, data: u8) -> u32 {
self.table[(data ^ self.salt) as usize]
}
}