extern crate adler32;
extern crate blake2_rfc;
extern crate futures;
#[cfg(test)]
extern crate rand;
extern crate serde;
use std::collections::HashMap;
use std::io::{Read, Seek, SeekFrom, Write};
const BLAKE2_SIZE: usize = 32;
#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
struct Blake2b([u8; BLAKE2_SIZE]);
impl std::borrow::Borrow<[u8]> for Blake2b {
fn borrow(&self) -> &[u8] {
&self.0
}
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub struct Signature {
pub window: usize,
chunks: HashMap<u32, HashMap<Blake2b, usize>>
}
pub fn signature<R: Read, B: AsRef<[u8]>+AsMut<[u8]>>(mut r: R, mut block: B) -> Result<Signature, std::io::Error> {
let mut chunks = HashMap::new();
let mut i = 0;
let block = block.as_mut();
let mut eof = false;
while !eof {
let mut j = 0;
while j < block.len() {
let r = r.read(&mut block[j..])?;
if r == 0 {
eof = true;
break;
}
j += r
}
let block = &block[..j];
let hash = adler32::RollingAdler32::from_buffer(block);
let mut blake2 = [0; BLAKE2_SIZE];
blake2.clone_from_slice(blake2_rfc::blake2b::blake2b(BLAKE2_SIZE, &[], &block).as_bytes());
chunks
.entry(hash.hash())
.or_insert(HashMap::new())
.insert(Blake2b(blake2), i);
i += block.len()
}
Ok(Signature {
window: block.len(),
chunks
})
}
#[derive(Debug, Serialize, Deserialize, PartialEq)]
pub enum Block {
FromSource(u64),
Literal(Vec<u8>),
}
struct State {
result: Vec<Block>,
block_oldest: usize,
block_len: usize,
pending: Vec<u8>,
}
impl State {
fn new() -> Self {
State {
result: Vec::new(),
block_oldest: 0,
block_len: 1,
pending: Vec::new(),
}
}
}
#[derive(Default, Debug, Serialize, Deserialize, PartialEq)]
pub struct Delta {
pub blocks: Vec<Block>,
pub window: usize,
}
pub fn compare<R: Read, B:AsRef<[u8]>+AsMut<[u8]>>(sig: &Signature, mut r: R, mut block: B) -> Result<Delta, std::io::Error> {
let mut st = State::new();
let block = block.as_mut();
assert_eq!(block.len(), sig.window);
while st.block_len > 0 {
let mut hash = {
let mut j = 0;
let block = {
while j < sig.window {
let r = r.read(&mut block[..])?;
if r == 0 {
break;
}
j += r
}
st.block_oldest = 0;
st.block_len = j;
&block[..j]
};
adler32::RollingAdler32::from_buffer(block)
};
loop {
if matches(&mut st, sig, &block, &hash) {
break;
}
let oldest = block[st.block_oldest];
hash.remove(st.block_len, oldest);
let r = r.read(&mut block[st.block_oldest..st.block_oldest + 1])?;
if r > 0 {
hash.update(block[st.block_oldest]);
} else if st.block_len > 0 {
st.block_len -= 1;
} else {
break;
}
st.pending.push(oldest);
st.block_oldest = (st.block_oldest + 1) % sig.window;
}
if !st.pending.is_empty() {
st.result.push(Block::Literal(std::mem::replace(
&mut st.pending,
Vec::new(),
)))
}
}
Ok(Delta {
blocks: st.result,
window: sig.window,
})
}
fn matches(st: &mut State, sig: &Signature, block: &[u8], hash: &adler32::RollingAdler32) -> bool {
if let Some(h) = sig.chunks.get(&hash.hash()) {
let blake2 = {
let mut b = blake2_rfc::blake2b::Blake2b::new(BLAKE2_SIZE);
if st.block_oldest + st.block_len > sig.window {
b.update(&block[st.block_oldest..]);
b.update(&block[..(st.block_oldest + st.block_len) % sig.window]);
} else {
b.update(&block[st.block_oldest..st.block_oldest + st.block_len])
}
b.finalize()
};
if let Some(&index) = h.get(blake2.as_bytes()) {
if !st.pending.is_empty() {
st.result.push(Block::Literal(std::mem::replace(
&mut st.pending,
Vec::new(),
)));
}
st.result.push(Block::FromSource(index as u64));
return true;
}
}
false
}
#[allow(dead_code)]
pub fn restore<W: Write>(mut w: W, s: &[u8], delta: &Delta) -> Result<(), std::io::Error> {
for d in delta.blocks.iter() {
match *d {
Block::FromSource(i) => {
let i = i as usize;
if i + delta.window <= s.len() {
w.write(&s[i..i + delta.window])?
} else {
w.write(&s[i..])?
}
}
Block::Literal(ref l) => w.write(l)?,
};
}
Ok(())
}
pub fn restore_seek<W: Write, R: Read + Seek, B: AsRef<[u8]>+AsMut<[u8]>>(
mut w: W,
mut s: R,
mut buf: B,
delta: &Delta,
) -> Result<(), std::io::Error> {
let buf = buf.as_mut();
for d in delta.blocks.iter() {
match *d {
Block::FromSource(i) => {
s.seek(SeekFrom::Start(i as u64))?;
let mut n = 0;
loop {
let r = s.read(&mut buf[n..delta.window])?;
if r == 0 {
break;
}
n += r
}
let mut m = 0;
while m < n {
m += w.write(&buf[m..n])?;
}
}
Block::Literal(ref l) => {
w.write(l)?;
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use rand;
use super::*;
use rand::Rng;
const WINDOW: usize = 32;
#[test]
fn basic() {
for index in 0..10 {
let source = rand::thread_rng()
.gen_ascii_chars()
.take(WINDOW * 10 + 8)
.collect::<String>();
let mut modified = source.clone();
let index = WINDOW * index + 3;
unsafe {
modified.as_bytes_mut()[index] =
((source.as_bytes()[index] as usize + 1) & 255) as u8
}
let block = [0; WINDOW];
let source_sig = signature(source.as_bytes(), block).unwrap();
let comp = compare(&source_sig, modified.as_bytes(), block).unwrap();
let mut restored = Vec::new();
let source = std::io::Cursor::new(source.as_bytes());
restore_seek(&mut restored, source, [0; WINDOW], &comp).unwrap();
if &restored[..] != modified.as_bytes() {
for i in 0..10 {
let a = &restored[i * WINDOW..(i + 1) * WINDOW];
let b = &modified.as_bytes()[i * WINDOW..(i + 1) * WINDOW];
println!("{:?}\n{:?}\n", a, b);
if a != b {
println!(">>>>>>>>");
}
}
panic!("different");
}
}
}
}