use std::cmp::{max, min};
use std::io::Read;
use crate::byte_table::BYTE_TABLE;
use crate::file_list::{Block, FileBlocks};
use crate::hashing::{Hashing, HashType};
const MIN_WINDOW_BITS: u8 = 8;
const MIN_BLOCK_BITS: u8 = 8;
pub struct Scanner {
min_bits: u8,
pref_bits: u8,
max_bits: u8,
window_bits: u8,
processed: isize,
window: Vec<u32>,
window_index: usize,
sum: u32,
digest: Hashing,
}
pub struct ScanIterator<'a, R: Read> {
scanner: &'a mut Scanner,
reader: &'a mut R,
buffer: &'a mut [u8],
buffer_index: usize,
buffer_size: usize,
ended: bool,
digest: &'a mut Hashing,
}
impl Scanner {
pub fn new(hash_type: HashType, min_bits: u8, pref_bits: u8, max_bits: u8, window_bits: u8) -> Scanner {
debug_assert!(window_bits >= MIN_WINDOW_BITS);
debug_assert!(min_bits >= MIN_BLOCK_BITS);
debug_assert!(min_bits < pref_bits);
debug_assert!(pref_bits < max_bits);
Scanner {
min_bits,
pref_bits,
max_bits,
window_bits,
processed: 0,
window: vec![0_u32; 1 << window_bits],
window_index: 0,
sum: 0,
digest: Hashing::new(hash_type),
}
}
pub fn feed(&mut self, data: &[u8]) -> (usize, Option<Block>) {
let min_size: isize = 1 << self.min_bits;
let max_size: isize = 1 << self.max_bits;
let window_size: isize = 1 << self.window_bits;
let mask: u32 = (1 << self.pref_bits) - 1;
let min_start = max(0, min_size - window_size);
let start = max(0, min_start - self.processed);
let end = min(data.len() as isize, max_size - self.processed);
let mut sum = self.sum;
let mut window_index = self.window_index;
for i in start..end {
let t = BYTE_TABLE[data[i as usize] as usize];
sum = sum.rotate_left(1) ^ t ^ self.window[window_index];
self.window[window_index] = t;
window_index = (window_index + 1) & (window_size as usize - 1);
if (sum & mask) == 0 && self.processed + i >= min_size {
let stop = i + 1;
self.processed += i + 1;
self.digest.update(&data[0..stop as usize]);
return (stop as usize, Some(self.mark()));
}
}
self.digest.update(&data[0..(end as usize)]);
self.processed += end;
self.window_index = window_index;
self.sum = sum;
if self.processed < max_size {
(end as usize, None)
} else {
(end as usize, Some(self.mark()))
}
}
pub fn finish(&mut self) -> Option<Block> {
if self.processed == 0 {
None
} else {
Some(self.mark())
}
}
fn mark(&mut self) -> Block {
let block = Block { size: self.processed as usize, hash: self.digest.finalize_reset() };
self.reset();
block
}
fn reset(&mut self) {
self.processed = 0;
self.window.clear();
self.window.resize(1 << self.window_bits, 0);
self.window_index = 0;
self.sum = 0;
}
pub fn reader_iter<'a, R: Read>(
&'a mut self,
reader: &'a mut R,
buffer: &'a mut [u8],
digest: &'a mut Hashing,
) -> ScanIterator<'a, R> {
ScanIterator {
scanner: self,
reader,
buffer,
buffer_index: 0,
buffer_size: 0,
ended: false,
digest,
}
}
pub fn scan_reader(
&mut self,
reader: &mut impl Read,
buffer: &mut [u8],
hash_type: HashType
) -> std::io::Result<FileBlocks> {
let mut blocks: Vec<Block> = Vec::new();
let mut digest = Hashing::new(hash_type);
loop {
let n = reader.read(buffer)?;
if n == 0 { break; }
digest.update(&buffer[..n]);
let mut i = 0;
while i < n {
let (consumed, maybe_block) = self.feed(&buffer[i..n]);
i += consumed;
if let Some(block) = maybe_block {
blocks.push(block);
}
}
}
if let Some(block) = self.finish() {
blocks.push(block);
}
let hash = digest.finalize_reset();
Ok(FileBlocks { blocks, hash })
}
}
impl<'a, R: Read> Iterator for ScanIterator<'a, R> {
type Item = std::io::Result<Block>;
fn next(&mut self) -> Option<std::io::Result<Block>> {
loop {
if self.ended { return None; }
if self.buffer_index == self.buffer_size {
match self.reader.read(self.buffer) {
Err(e) => {
self.ended = true;
return Some(Err(e));
}
Ok(n) => {
if n == 0 {
self.ended = true;
return self.scanner.finish().map(Ok);
}
self.buffer_index = 0;
self.buffer_size = n;
self.digest.update(&self.buffer[..n]);
}
}
}
let (consumed, maybe_block) = self.scanner.feed(&self.buffer[self.buffer_index .. self.buffer_size]);
self.buffer_index += consumed;
if let Some(block) = maybe_block {
return Some(Ok(block));
}
}
}
}