#![cfg(feature = "zstd")]
use crate::{Chunk, ChunkIncr, ToChunkIncr};
use std::convert::TryInto;
use std::num::Wrapping;
const RSYNC_LENGTH: usize = 32;
const PRIME_8_BYTES: Wrapping<u64> = Wrapping(0xCF1BBCDCB7A56463);
const ROLL_HASH_CHAR_OFFSET: Wrapping<u64> = Wrapping(10);
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Zstd {
hit_mask: u64,
prime_power: u64,
}
impl Default for Zstd {
fn default() -> Self {
Self::with_target_section_size(8 << 20)
}
}
impl Zstd {
pub fn with_target_section_size(target_section_size: u64) -> Self {
let job_size_mb: u32 = (target_section_size >> 20).try_into().unwrap();
assert_ne!(job_size_mb, 0);
let rsync_bits = (job_size_mb.leading_zeros() ^ 31) + 20;
let hit_mask = (1u64 << rsync_bits) - 1;
let prime_power = PRIME_8_BYTES
.0
.wrapping_pow((RSYNC_LENGTH - 1).try_into().unwrap());
Self {
hit_mask,
prime_power,
}
}
}
#[cfg(test)]
mod test {
#[test]
fn test_zstd_init_matches_upstream() {
let zstd = super::Zstd::default();
assert_eq!(zstd.hit_mask, 0x7f_ffff);
assert_eq!(zstd.prime_power, 0xf5507fe35f91f8cb);
}
}
#[derive(Default, Debug, PartialEq, Eq)]
struct ZstdState {
hash: Wrapping<u64>,
}
impl ZstdState {
fn append(&mut self, data: &[u8]) {
for i in data {
self.hash *= PRIME_8_BYTES;
self.hash += Wrapping(*i as u64) + ROLL_HASH_CHAR_OFFSET;
}
}
fn rotate(&mut self, to_remove: u8, to_add: u8, prime_power: u64) {
self.hash -= (Wrapping(to_remove as u64) + ROLL_HASH_CHAR_OFFSET) * Wrapping(prime_power);
self.hash *= PRIME_8_BYTES;
self.hash += Wrapping(to_add as u64) + ROLL_HASH_CHAR_OFFSET;
}
fn at_split(&mut self, params: &Zstd) -> bool {
(self.hash.0 & params.hit_mask) == params.hit_mask
}
}
#[derive(Default, Debug, PartialEq, Eq)]
pub struct ZstdSearchState {
state: ZstdState,
offset: usize,
}
impl ZstdSearchState {
fn append(&mut self, data: &[u8]) {
self.state.append(data);
}
fn rotate(&mut self, to_remove: u8, to_add: u8, prime_power: u64) {
self.state.rotate(to_remove, to_add, prime_power);
}
fn at_split(&mut self, params: &Zstd) -> bool {
self.state.at_split(params)
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct ZstdIncr {
params: Zstd,
state: ZstdState,
window: Box<[u8]>,
window_offs: usize,
window_full: bool,
input_offs: u64,
}
impl ToChunkIncr for Zstd {
type Incr = ZstdIncr;
fn to_chunk_incr(&self) -> Self::Incr {
self.into()
}
}
impl From<Zstd> for ZstdIncr {
fn from(params: Zstd) -> Self {
Self {
params,
state: Default::default(),
window: vec![0; RSYNC_LENGTH].into_boxed_slice(),
window_offs: 0,
window_full: false,
input_offs: 0,
}
}
}
impl From<&Zstd> for ZstdIncr {
fn from(params: &Zstd) -> Self {
params.clone().into()
}
}
impl Chunk for Zstd {
type SearchState = ZstdSearchState;
fn to_search_state(&self) -> Self::SearchState {
Self::SearchState::default()
}
fn find_chunk_edge(
&self,
state: &mut Self::SearchState,
data: &[u8],
) -> (Option<usize>, usize) {
if state.offset < RSYNC_LENGTH {
let seed_b = &data[state.offset..std::cmp::min(RSYNC_LENGTH, data.len())];
state.append(seed_b);
state.offset += seed_b.len();
if state.offset < RSYNC_LENGTH {
return (None, 0);
}
}
for i in state.offset..data.len() {
let to_remove = data[i - RSYNC_LENGTH];
let to_add = data[i];
state.rotate(to_remove, to_add, self.prime_power);
if state.at_split(self) {
let discard_ct = data.len().saturating_sub(RSYNC_LENGTH);
return (Some(i + 1), discard_ct);
}
}
let discard_ct = data.len().saturating_sub(RSYNC_LENGTH);
let keep_ct = data.len() - discard_ct;
state.offset = keep_ct;
(None, discard_ct)
}
}
impl ChunkIncr for ZstdIncr {
fn push(&mut self, data: &[u8]) -> Option<usize> {
let use_len = if !self.window_full {
let use_len = std::cmp::min(self.window.len() - self.window_offs, data.len());
self.window[self.window_offs..(self.window_offs + use_len)]
.copy_from_slice(&data[..use_len]);
self.window_offs += use_len;
if self.window_offs != self.window.len() {
return None;
}
self.window_full = true;
self.window_offs = 0;
self.state.append(&self.window[..]);
use_len
} else {
0
};
for (i, &v) in data[use_len..].iter().enumerate() {
let to_remove = self.window[self.window_offs];
let to_add = v;
self.state
.rotate(to_remove, to_add, self.params.prime_power);
self.window[self.window_offs] = to_add;
self.window_offs = (self.window_offs + 1) % self.window.len();
if self.state.at_split(&self.params) {
return Some(i + use_len);
}
}
None
}
}