use std::cmp::min;
use std::io::{self, Read, Write};
use std::ops::Range;
use std::ptr;
use blake2_rfc::blake2b::Blake2b;
use byteorder::{BE, WriteBytesExt};
use super::rolling_checksum::RollingChecksum;
use super::signature_lookup::SignatureLookup;
use super::DELTA_MAGIC;
pub struct DeltaCommandWriter<W: Write> {
writer: W,
block_size: u32,
matched_blocks: Option<(u64, u64)>,
literal_buffer: Vec<u8>,
}
impl<W: Write> DeltaCommandWriter<W> {
pub fn new(mut writer: W, block_size: u32) -> io::Result<Self> {
writer.write_u32::<BE>(DELTA_MAGIC)?; Ok(DeltaCommandWriter {
writer,
block_size,
matched_blocks: None,
literal_buffer: vec![],
})
}
pub fn record_matched_block(&mut self, block_index: u64) -> io::Result<()> {
self.write_literal_command()?;
if let Some((start, count)) = self.matched_blocks {
if start + count == block_index {
self.matched_blocks = Some((start, count + 1));
} else {
self.write_current_copy_command()?;
self.matched_blocks = Some((block_index, 1));
}
} else {
self.matched_blocks = Some((block_index, 1));
}
Ok(())
}
pub fn add_literal_data(&mut self, data: &[u8]) -> io::Result<()> {
self.write_current_copy_command()?;
self.literal_buffer.extend(data);
Ok(())
}
pub fn finish(&mut self) -> io::Result<()> {
self.write_literal_command()?;
self.write_current_copy_command()?;
self.writer.write_u8(0)?;
Ok(())
}
fn write_current_copy_command(&mut self) -> io::Result<()> {
let block_size = self.block_size as u64;
if let Some((start, count)) = self.matched_blocks {
self.write_copy_command(start * block_size, count * block_size)?;
self.matched_blocks = None;
}
Ok(())
}
fn write_literal_command(&mut self) -> io::Result<()> {
if self.literal_buffer.is_empty() {
return Ok(());
}
let length = self.literal_buffer.len() as u64;
let len_size_class = get_uint_size_class(length);
if length <= 64 {
self.writer.write_u8(length as u8)?;
} else {
self.writer.write_u8(65 + len_size_class)?;
self.write_uint_big_endian(length, len_size_class)?;
}
self.writer.write(&self.literal_buffer)?;
self.literal_buffer.clear();
Ok(())
}
fn write_copy_command(&mut self, start: u64, length: u64) -> io::Result<()> {
let start_size_class = get_uint_size_class(start);
let len_size_class = get_uint_size_class(length);
self.writer.write_u8(69 + start_size_class * 4 + len_size_class)?;
self.write_uint_big_endian(start, start_size_class)?;
self.write_uint_big_endian(length, len_size_class)?;
Ok(())
}
fn write_uint_big_endian(&mut self, n: u64, size_class: u8) -> io::Result<()> {
match size_class {
0 => self.writer.write_u8(n as u8),
1 => self.writer.write_u16::<BE>(n as u16),
2 => self.writer.write_u32::<BE>(n as u32),
3 => self.writer.write_u64::<BE>(n as u64),
_ => panic!(),
}
}
}
fn get_uint_size_class(n: u64) -> u8 {
return if n <= 0xFF {
0
} else if n <= 0xFFFF {
1
} else if n <= 0xFFFFFFFF {
2
} else {
3
};
}
pub struct DeltaGenerator<W: Write> {
pub buffer: Vec<u8>,
pub buffer_end: usize,
sig_lookup: SignatureLookup,
block_size: usize,
processed_count: usize,
cmd_writer: DeltaCommandWriter<W>,
rolling_checksum: RollingChecksum,
}
impl<W: Write> DeltaGenerator<W> {
pub fn new(writer: W, sig_lookup: SignatureLookup) -> io::Result<Self> {
let block_size = sig_lookup.block_size();
Ok(DeltaGenerator {
buffer: vec![0u8; block_size as usize * 10],
buffer_end: 0,
sig_lookup,
block_size: block_size as usize,
processed_count: 0,
cmd_writer: DeltaCommandWriter::new(writer, block_size)?,
rolling_checksum: RollingChecksum::new(),
})
}
pub fn generate<R: Read>(mut self, mut new_file: R) -> io::Result<()> {
loop {
let buffer_len = self.buffer.len();
let num_read = new_file.read(&mut self.buffer[self.buffer_end..buffer_len])?;
self.buffer_end += num_read;
if num_read == 0 {
self.process_buffer(true)?;
break;
}
if self.buffer_end == self.buffer.len() {
self.process_buffer(false)?;
}
}
Ok(())
}
fn process_buffer(&mut self, finish: bool) -> io::Result<()> {
if self.buffer_end < self.block_size {
assert!(finish);
self.cmd_writer.add_literal_data(&self.buffer[0..self.buffer_end])?;
self.cmd_writer.finish()?;
return Ok(());
}
self.rolling_checksum.add_slice(
&self.buffer[self.processed_count..self.block_size]);
let mut i = self.block_size;
let mut literal_start = 0;
loop {
let matching_blocks = self.sig_lookup.get_blocks(self.rolling_checksum.get());
let mut matching_block = None;
if let Some(matching_blocks) = matching_blocks {
if !matching_blocks.is_empty() {
let mut hash = Blake2b::new(32);
hash.update(&self.buffer[i - self.block_size..i]);
let res = hash.finalize();
let hash = &res.as_bytes()[0..self.sig_lookup.strong_sum_size() as usize];
matching_block =
matching_blocks.iter().find(|block| { return block.strong_sum == hash; });
}
}
if let Some(matching_block) = matching_block {
self.cmd_writer.add_literal_data(
&self.buffer[literal_start..i - self.block_size])?;
self.cmd_writer.record_matched_block(matching_block.block_index)?;
literal_start = i;
self.rolling_checksum = RollingChecksum::new();
self.rolling_checksum.add_slice(
&self.buffer[i..min(i + self.block_size, self.buffer_end)]);
i += self.block_size;
if i > self.buffer_end {
break;
}
} else {
if i >= self.buffer_end {
break;
}
self.rolling_checksum.rotate(self.buffer[i], self.buffer[i - self.block_size]);
i += 1;
}
}
self.cmd_writer.add_literal_data(&self.buffer[literal_start..i - self.block_size])?;
let rem_data_range = i - self.block_size..self.buffer_end;
copy_within_slice(&mut self.buffer, rem_data_range.clone(), 0);
self.buffer_end = rem_data_range.end - rem_data_range.start;
self.processed_count = self.buffer_end;
if finish {
self.cmd_writer.add_literal_data(&self.buffer[0..self.buffer_end])?;
self.cmd_writer.finish()?;
}
Ok(())
}
}
fn copy_within_slice<T: Copy>(slice: &mut [T], src_range: Range<usize>, dest: usize) {
unsafe {
let src_len = src_range.end - src_range.start;
assert!(src_range.start <= slice.len());
assert!(src_range.end <= slice.len());
assert!(dest <= slice.len() - src_len);
ptr::copy(slice.as_ptr().offset(src_range.start as isize),
slice.as_mut_ptr().offset(dest as isize),
src_len);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_delta_cmd_gen_simple() {
let mut cmds = vec![];
{
let mut cmd_writer = DeltaCommandWriter::new(&mut cmds, 30).unwrap();
cmd_writer.add_literal_data(&[0xF8]).unwrap();
cmd_writer.record_matched_block(3).unwrap();
cmd_writer.add_literal_data(&[0x3D, 0xF8]).unwrap();
cmd_writer.finish().unwrap();
}
assert_eq!(cmds, &[
0x72, 0x73, 0x02, 0x36, 0x01, 0xF8, 0x45, 0x5A, 0x1E, 0x02, 0x3D, 0xF8, 0x00, ]);
}
#[test]
fn test_delta_cmd_gen() {
let mut cmds = vec![];
{
let mut cmd_writer = DeltaCommandWriter::new(&mut cmds, 2000).unwrap();
cmd_writer.record_matched_block(3).unwrap();
cmd_writer.add_literal_data(&[0x83]).unwrap();
cmd_writer.add_literal_data(&[0x3D, 0xF8, 0x23]).unwrap();
cmd_writer.record_matched_block(4).unwrap();
cmd_writer.record_matched_block(5).unwrap();
cmd_writer.record_matched_block(6).unwrap();
cmd_writer.record_matched_block(42).unwrap();
cmd_writer.record_matched_block(1).unwrap();
cmd_writer.finish().unwrap();
}
assert_eq!(cmds, &[
0x72, 0x73, 0x02, 0x36, 0x4A, 0x17, 0x70, 0x07, 0xD0, 0x04, 0x83, 0x3D, 0xF8, 0x23, 0x4A, 0x1F, 0x40, 0x17, 0x70, 0x4E, 0x00, 0x01, 0x48, 0x20, 0x07, 0xD0, 0x4A, 0x07, 0xD0, 0x07, 0xD0, 0x00, ]);
}
}