1#![cfg(feature = "buzhash")]
2
3use crate::{Chunk, ChunkIncr, ToChunkIncr};
17use std::fmt;
18use std::num::Wrapping;
19#[derive(Debug, Clone, PartialEq, Eq)]
64pub struct BuzHash<H: BuzHashHash> {
65 k: usize,
67
68 h: H,
70
71 mask: u32,
74
75 max_chunk_size: u64,
77}
78
79impl<H: BuzHashHash> BuzHash<H> {
80 pub fn new(capacity: usize, mask: u32, hash: H, max_chunk_size: u64) -> Self {
87 assert!(capacity > 0);
88 BuzHash {
89 k: capacity,
90 h: hash,
91 mask,
92 max_chunk_size,
93 }
94 }
95
96 }
99
100impl<'a> BuzHash<BuzHashTableByteSaltHash<'a>> {
101 pub fn new_nom(salt: u8) -> Self {
108 BuzHash::new(
109 67,
110 (1 << 12u32) - 1,
111 BuzHashTableByteSaltHash::from((salt, &crate::buzhash_table::GO_BUZHASH)),
112 1 << 24,
113 )
114 }
115}
116
117impl<H: BuzHashHash + Clone> Chunk for BuzHash<H> {
118 type SearchState = BuzHashSearchState;
119
120 fn to_search_state(&self) -> Self::SearchState {
121 Self::SearchState::default()
122 }
123
124 fn find_chunk_edge(
125 &self,
126 state: &mut Self::SearchState,
127 data: &[u8],
128 ) -> (Option<usize>, usize) {
129 for i in state.offset..data.len() {
130 state.state.add_buf(data, self, i);
131
132 if (state.state.h & self.mask) == self.mask {
133 state.reset();
134 return (Some(i + 1), i + 1);
135 }
136
137 }
147
148 let discard_ct = data.len().saturating_sub(self.k);
150 state.offset = data.len() - discard_ct;
151 (None, discard_ct)
152 }
153}
154
155impl<H: BuzHashHash + Clone> From<&BuzHash<H>> for BuzHashIncr<H> {
156 fn from(src: &BuzHash<H>) -> Self {
157 src.clone().into()
158 }
159}
160
161impl<H: BuzHashHash + Clone> ToChunkIncr for BuzHash<H> {
162 type Incr = BuzHashIncr<H>;
163 fn to_chunk_incr(&self) -> Self::Incr {
164 self.into()
165 }
166}
167
168#[derive(Debug, Clone, PartialEq, Eq, Default)]
169pub struct BuzHashSearchState {
170 offset: usize,
171 state: BuzHashState,
172}
173
174impl BuzHashSearchState {
175 fn reset(&mut self) {
176 self.offset = 0;
177 self.state.reset();
178 }
179}
180
181#[derive(Debug, Clone, PartialEq, Eq, Default)]
182struct BuzHashState {
183 h: u32,
185}
186
187impl BuzHashState {
188 fn reset(&mut self) {
189 self.h = 0;
190 }
191
192 fn add_buf<H: BuzHashHash>(&mut self, data: &[u8], params: &BuzHash<H>, i: usize) {
193 if i >= params.k {
194 let drop_i = i - params.k;
196 let drop = data[drop_i];
197 self.add_overflow(params, data[i], drop);
198 } else {
199 self.add(params, data[i]);
201 }
202 }
203
204 fn add<H: BuzHashHash>(&mut self, params: &BuzHash<H>, v: u8) {
206 self.h = self.h.rotate_left(1) ^ params.h.hash(v);
207 }
208
209 fn add_overflow<H: BuzHashHash>(&mut self, params: &BuzHash<H>, add_v: u8, remove_v: u8) {
211 let h = self.h.rotate_left(1);
212 let drop = params.h.hash(remove_v).rotate_left((params.k % 8) as u32);
214 self.h = h ^ drop ^ params.h.hash(add_v);
215 }
216}
217
218#[derive(Debug, Clone, PartialEq, Eq)]
223pub struct BuzHashIncr<H: BuzHashHash> {
224 params: BuzHash<H>,
225 state: BuzHashState,
226 buf: Box<[u8]>,
227 buf_idx: Wrapping<usize>,
228 input_idx: u64,
229}
230
231impl<H: BuzHashHash> ChunkIncr for BuzHashIncr<H> {
232 fn push(&mut self, data: &[u8]) -> Option<usize> {
237 for (i, &v) in data.iter().enumerate() {
238 self.push_byte(v);
239 if (self.state.h & self.params.mask) == self.params.mask {
240 self.reset();
241 return Some(i + 1);
242 }
243
244 if self.input_idx > self.params.max_chunk_size {
245 self.reset();
246 return Some(i + 1);
247 }
248 }
249
250 None
251 }
252}
253
254impl<H: BuzHashHash> BuzHashIncr<H> {
255 fn reset(&mut self) {
256 self.buf_idx = Wrapping(0);
257 self.input_idx = 0;
258 self.state.reset();
259 }
260
261 fn push_byte(&mut self, val: u8) {
262 if self.input_idx >= self.params.k as u64 {
263 let o = self.buf[self.buf_idx.0];
264 self.state.add_overflow(&self.params, val, o);
265 } else {
266 self.state.add(&self.params, val);
267 }
268
269 self.buf[self.buf_idx.0] = val;
270
271 self.buf_idx += Wrapping(1);
272 self.buf_idx.0 %= self.params.k;
273 self.input_idx += 1;
274 }
275}
276
277impl<H: BuzHashHash> From<BuzHash<H>> for BuzHashIncr<H> {
278 fn from(params: BuzHash<H>) -> Self {
279 let buf = vec![0; params.k].into_boxed_slice();
280 Self {
281 params,
282 state: Default::default(),
283 buf,
284 buf_idx: Wrapping(0),
285 input_idx: 0,
286 }
287 }
288}
289
290pub trait BuzHashHash {
292 fn hash(&self, data: u8) -> u32;
293}
294
295#[derive(Clone)]
297pub struct BuzHashTableHash<'a> {
298 table: &'a [u32; 256],
299}
300
301impl<'a> fmt::Debug for BuzHashTableHash<'a> {
302 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
303 fmt.debug_struct("BuzHashTableHash").finish()
304 }
305}
306
307impl<'a> From<&'a [u32; 256]> for BuzHashTableHash<'a> {
308 fn from(table: &'a [u32; 256]) -> Self {
309 Self { table }
310 }
311}
312
313impl<'a> BuzHashHash for BuzHashTableHash<'a> {
314 fn hash(&self, data: u8) -> u32 {
315 self.table[data as usize]
316 }
317}
318
319#[derive(Clone)]
321pub struct BuzHashTableBufHash {
322 table: Box<[u32; 256]>,
323}
324
325impl fmt::Debug for BuzHashTableBufHash {
326 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
327 fmt.debug_struct("BuzHashTableBufHash").finish()
328 }
329}
330
331impl<'a> From<Box<[u32; 256]>> for BuzHashTableBufHash {
332 fn from(table: Box<[u32; 256]>) -> Self {
333 Self { table }
334 }
335}
336
337impl BuzHashHash for BuzHashTableBufHash {
338 fn hash(&self, data: u8) -> u32 {
339 self.table[data as usize]
340 }
341}
342
343#[derive(Clone)]
347pub struct BuzHashTableByteSaltHash<'a> {
348 table: &'a [u32; 256],
349 salt: u8,
350}
351
352impl<'a> fmt::Debug for BuzHashTableByteSaltHash<'a> {
353 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
354 fmt.debug_struct("BuzHashTableByteSaltHash").finish()
355 }
356}
357
358impl<'a> From<(u8, &'a [u32; 256])> for BuzHashTableByteSaltHash<'a> {
359 fn from((salt, table): (u8, &'a [u32; 256])) -> Self {
360 Self { table, salt }
361 }
362}
363
364impl<'a> BuzHashHash for BuzHashTableByteSaltHash<'a> {
365 fn hash(&self, data: u8) -> u32 {
366 self.table[(data ^ self.salt) as usize]
367 }
368}