use std::io::{self, Read};
use rand::{RngExt, rng};
use rustic_cdc::{Polynom, Polynom64, Rabin64, RollingHash64};
use crate::error::{ErrorKind, RusticError, RusticResult};
pub(super) mod constants {
pub(super) const SIZE: usize = 1 << 20;
pub(super) const KB: usize = 1024;
pub(super) const MB: usize = 1024 * KB;
pub(super) const MIN_SIZE: usize = 512 * KB;
pub(super) const MAX_SIZE: usize = 8 * MB;
pub(super) const BUF_SIZE: usize = 4 * KB;
pub(super) const RAND_POLY_MAX_TRIES: i32 = 1_000_000;
}
pub(crate) fn check_rabin_params(
chunk_size: usize,
chunk_min_size: usize,
chunk_max_size: usize,
) -> RusticResult<()> {
if (chunk_size & (chunk_size - 1)) != 0 {
return Err(RusticError::new(
ErrorKind::Unsupported,
"Chunk size must be a power of 2 for the rabin chunker. chunk size = {chunk_size}.",
)
.attach_context("chunk_size", chunk_size.to_string()));
}
if chunk_min_size > chunk_size {
return Err(RusticError::new(
ErrorKind::Unsupported,
"Chunk min size must be smaller or equal than the chunk size.",
));
}
if chunk_max_size < chunk_size {
return Err(RusticError::new(
ErrorKind::Unsupported,
"Chunk max size must be larger or equal than the chunk size.",
));
}
Ok(())
}
pub(crate) struct ChunkIter<R: Read + Send> {
buf: Vec<u8>,
pos: usize,
reader: R,
split_mask: u64,
rabin: Rabin64,
size_hint: usize,
min_size: usize,
max_size: usize,
finished: bool,
}
impl<R: Read + Send> ChunkIter<R> {
pub(crate) fn new(
rabin: Rabin64,
chunk_size: usize,
chunk_min_size: usize,
chunk_max_size: usize,
reader: R,
size_hint: usize,
) -> RusticResult<Self> {
check_rabin_params(chunk_size, chunk_min_size, chunk_max_size)?;
let chunk_size: u64 = chunk_size.try_into().unwrap();
let split_mask: u64 = chunk_size - 1;
Ok(Self {
buf: vec![0; constants::BUF_SIZE],
pos: constants::BUF_SIZE,
reader,
split_mask,
rabin,
size_hint, min_size: chunk_min_size,
max_size: chunk_max_size,
finished: false,
})
}
}
impl<R: Read + Send> Iterator for ChunkIter<R> {
type Item = RusticResult<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
let mut min_size = self.min_size;
let mut vec = Vec::with_capacity(self.size_hint.min(min_size));
let open_buf_len = self.buf.len() - self.pos;
if open_buf_len > 0 {
vec.resize(open_buf_len, 0);
vec.copy_from_slice(&self.buf[self.pos..]);
self.pos = self.buf.len();
min_size -= open_buf_len;
}
let size = match (&mut self.reader)
.take(min_size as u64)
.read_to_end(&mut vec)
{
Ok(size) => size,
Err(err) => {
return Some(Err(RusticError::with_source(
ErrorKind::InputOutput,
"Failed to read from reader in iterator",
err,
)));
}
};
if size < min_size {
self.finished = true;
vec.truncate(size + open_buf_len);
return if vec.is_empty() { None } else { Some(Ok(vec)) };
}
_ = self
.rabin
.reset_and_prefill_window(&mut vec[vec.len() - 64..vec.len()].iter().copied());
loop {
if vec.len() >= self.max_size {
break;
}
if (self.rabin.hash & self.split_mask) == 0 {
break;
}
if self.buf.len() == self.pos {
match self.reader.read(&mut self.buf[..]) {
Ok(0) => {
self.finished = true;
break;
}
Ok(size) => {
self.pos = 0;
self.buf.truncate(size);
}
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(err) => {
return Some(Err(RusticError::with_source(
ErrorKind::InputOutput,
"Failed to read from reader in iterator",
err,
)));
}
}
}
let byte = self.buf[self.pos];
vec.push(byte);
self.pos += 1;
self.rabin.slide(byte);
}
self.size_hint = self.size_hint.saturating_sub(vec.len()); Some(Ok(vec))
}
}
pub fn random_poly() -> RusticResult<u64> {
for _ in 0..constants::RAND_POLY_MAX_TRIES {
let mut poly: u64 = rng().random();
poly &= (1 << 54) - 1;
poly |= (1 << 53) | 1;
if poly.irreducible() {
return Ok(poly);
}
}
Err(RusticError::new(
ErrorKind::Internal,
"No suitable polynomial found, this should essentially never happen. Please try again.",
)
.ask_report())
}
pub(crate) trait PolynomExtend {
fn irreducible(&self) -> bool;
fn gcd(self, other: Self) -> Self;
fn add(self, other: Self) -> Self;
fn mulmod(self, other: Self, modulo: Self) -> Self;
}
impl PolynomExtend for Polynom64 {
fn irreducible(&self) -> bool {
for i in 1..=self.degree() / 2 {
if self.gcd(qp(i, *self)) != 1 {
return false;
}
}
true
}
fn gcd(self, other: Self) -> Self {
if other == 0 {
return self;
}
if self == 0 {
return other;
}
if self.degree() < other.degree() {
self.gcd(other.modulo(&self))
} else {
other.gcd(self.modulo(&other))
}
}
fn add(self, other: Self) -> Self {
self ^ other
}
fn mulmod(self, other: Self, modulo: Self) -> Self {
if self == 0 || other == 0 {
return 0;
}
let mut res: Self = 0;
let mut a = self;
let mut b = other;
if b & 1 > 0 {
res = res.add(a).modulo(&modulo);
}
while b != 0 {
a = (a << 1).modulo(&modulo);
b >>= 1;
if b & 1 > 0 {
res = res.add(a).modulo(&modulo);
}
}
res
}
}
fn qp(p: i32, g: Polynom64) -> Polynom64 {
let mut res: Polynom64 = 2;
for _ in 0..p {
res = res.mulmod(res, g);
}
res.add(2).modulo(&g)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::crypto::hasher::hash;
use insta::assert_ron_snapshot;
use rand::prelude::*;
use std::io::{Cursor, repeat};
fn chunker<R: Read + Send>(reader: R, size_hint: usize) -> ChunkIter<R> {
let poly = 0x003D_A335_8B4D_C173;
let rabin = Rabin64::new_with_polynom(6, &poly);
ChunkIter::new(
rabin,
constants::SIZE,
constants::MIN_SIZE,
constants::MAX_SIZE,
reader,
size_hint,
)
.unwrap()
}
#[test]
fn chunk_random() {
const RANDOM_DATA_SIZE: usize = 32 * 1024 * 1024;
const SEED: u64 = 23;
let mut rng = StdRng::seed_from_u64(SEED);
let mut data = vec![0u8; RANDOM_DATA_SIZE];
rng.fill_bytes(&mut data);
let mut reader = Cursor::new(data);
let chunker = chunker(&mut reader, 0);
let chunks: Vec<_> = chunker
.map(|chunk| {
let chunk = chunk.unwrap();
(chunk.len(), hash(&chunk))
})
.collect();
assert_ron_snapshot!(chunks);
}
#[test]
fn chunk_empty() {
let empty: Vec<u8> = vec![];
let mut reader = Cursor::new(empty);
let chunker = chunker(&mut reader, 0);
assert_eq!(0, chunker.into_iter().count());
}
#[test]
fn chunk_empty_wrong_hint() {
let empty: Vec<u8> = vec![];
let mut reader = Cursor::new(empty);
let chunker = chunker(&mut reader, 100);
assert_eq!(0, chunker.into_iter().count());
}
#[test]
fn chunk_zeros() {
let mut reader = repeat(0u8);
let mut chunker = chunker(&mut reader, usize::MAX);
let chunk = chunker.next().unwrap().unwrap();
assert_eq!(constants::MIN_SIZE, chunk.len());
}
}