use alloc::collections::BTreeMap;
use alloc::string::String;
use alloc::vec::Vec;
use super::types::{
BlockSignature, DEFAULT_BLOCK_SIZE, Delta, DeltaError, DeltaOp, DeltaResult, MAX_BLOCK_SIZE,
MIN_BLOCK_SIZE, RollingChecksum, SignatureSet,
};
fn strong_checksum(data: &[u8]) -> [u8; 16] {
let mut state: [u64; 4] = [
0x6a09e667f3bcc908,
0xbb67ae8584caa73b,
0x3c6ef372fe94f82b,
0xa54ff53a5f1d36f1,
];
for chunk in data.chunks(8) {
let mut val: u64 = 0;
for (i, &byte) in chunk.iter().enumerate() {
val |= (byte as u64) << (i * 8);
}
state[0] = state[0].wrapping_add(val);
state[1] = state[1].wrapping_mul(state[0] | 1);
state[2] ^= state[1].rotate_left(17);
state[3] = state[3].wrapping_add(state[2]);
}
for _ in 0..4 {
state[0] = state[0].wrapping_add(state[3]);
state[1] ^= state[0].rotate_left(13);
state[2] = state[2].wrapping_add(state[1]);
state[3] = state[3].wrapping_mul(state[2] | 1);
}
let mut result = [0u8; 16];
for (i, &s) in state.iter().take(2).enumerate() {
let bytes = s.to_le_bytes();
result[i * 8..(i + 1) * 8].copy_from_slice(&bytes);
}
result
}
pub fn file_checksum(data: &[u8]) -> [u64; 4] {
let mut state: [u64; 4] = [
0x243f6a8885a308d3,
0x13198a2e03707344,
0xa4093822299f31d0,
0x082efa98ec4e6c89,
];
for chunk in data.chunks(32) {
for (i, byte_chunk) in chunk.chunks(8).enumerate() {
let mut val: u64 = 0;
for (j, &byte) in byte_chunk.iter().enumerate() {
val |= (byte as u64) << (j * 8);
}
state[i % 4] = state[i % 4].wrapping_add(val);
}
state[0] ^= state[3].rotate_left(7);
state[1] = state[1].wrapping_add(state[0]);
state[2] ^= state[1].rotate_left(13);
state[3] = state[3].wrapping_add(state[2]);
}
state[0] ^= data.len() as u64;
for _ in 0..8 {
state[0] = state[0].wrapping_add(state[3].rotate_left(5));
state[1] ^= state[0];
state[2] = state[2].wrapping_add(state[1].rotate_left(11));
state[3] ^= state[2];
}
state
}
pub fn generate_signatures(data: &[u8], block_size: usize) -> DeltaResult<SignatureSet> {
if !(MIN_BLOCK_SIZE..=MAX_BLOCK_SIZE).contains(&block_size) {
return Err(DeltaError::InvalidBlockSize(block_size));
}
let file_cksum = file_checksum(data);
let mut set = SignatureSet::new(file_cksum, data.len() as u64, block_size as u32);
let mut offset = 0u64;
for (index, chunk) in data.chunks(block_size).enumerate() {
let rolling = RollingChecksum::compute(chunk);
let strong = strong_checksum(chunk);
set.add_block(BlockSignature::new(
index as u32,
offset,
chunk.len() as u32,
rolling,
strong,
));
offset += chunk.len() as u64;
}
Ok(set)
}
pub fn generate_signatures_default(data: &[u8]) -> DeltaResult<SignatureSet> {
generate_signatures(data, DEFAULT_BLOCK_SIZE)
}
pub fn compute_delta(source_sigs: &SignatureSet, target: &[u8]) -> DeltaResult<Delta> {
let block_size = source_sigs.block_size as usize;
let target_checksum = file_checksum(target);
let mut delta = Delta::new(
source_sigs.file_checksum,
target_checksum,
source_sigs.file_size,
target.len() as u64,
source_sigs.block_size,
);
if target.is_empty() {
return Ok(delta);
}
if target.len() < block_size {
delta.add_insert(target);
return Ok(delta);
}
let mut rolling_lookup: BTreeMap<u32, Vec<&BlockSignature>> = BTreeMap::new();
for block in &source_sigs.blocks {
rolling_lookup.entry(block.rolling).or_default().push(block);
}
let mut pos = 0usize;
let mut literal_start = 0usize;
while pos + block_size <= target.len() {
let window = &target[pos..pos + block_size];
let rolling = RollingChecksum::compute(window);
let mut found_match = false;
if let Some(candidates) = rolling_lookup.get(&rolling) {
let strong = strong_checksum(window);
for block in candidates {
if block.strong == strong && block.length as usize == block_size {
if pos > literal_start {
delta.add_insert(&target[literal_start..pos]);
}
delta.add_copy(block.offset, block.length as u64);
pos += block_size;
literal_start = pos;
found_match = true;
break;
}
}
}
if !found_match {
pos += 1;
}
}
if literal_start < target.len() {
delta.add_insert(&target[literal_start..]);
}
Ok(delta)
}
pub fn apply_delta(source: &[u8], delta: &Delta) -> DeltaResult<Vec<u8>> {
let source_cksum = file_checksum(source);
if source_cksum != delta.source_checksum {
return Err(DeltaError::ChecksumMismatch {
expected: delta.source_checksum,
actual: source_cksum,
});
}
let mut result = Vec::with_capacity(delta.target_size as usize);
for op in &delta.ops {
match op {
DeltaOp::Copy { src_offset, length } => {
let start = *src_offset as usize;
let end = start + *length as usize;
if end > source.len() {
return Err(DeltaError::ApplyFailed(String::from(
"copy extends past source end",
)));
}
result.extend_from_slice(&source[start..end]);
}
DeltaOp::Insert { data } => {
result.extend_from_slice(data);
}
}
}
let result_cksum = file_checksum(&result);
if result_cksum != delta.target_checksum {
return Err(DeltaError::ChecksumMismatch {
expected: delta.target_checksum,
actual: result_cksum,
});
}
Ok(result)
}
pub fn optimal_block_size(file_size: u64) -> usize {
match file_size {
0..=1024 => 128, 1025..=65536 => 512, 65537..=1048576 => 2048, 1048577..=16777216 => 4096, 16777217..=268435456 => 8192, _ => 16384, }
}
pub fn create_full_insert_delta(data: &[u8]) -> Delta {
let checksum = file_checksum(data);
let mut delta = Delta::new(
[0; 4],
checksum,
0,
data.len() as u64,
DEFAULT_BLOCK_SIZE as u32,
);
delta.add_insert(data);
delta
}
pub fn create_identity_delta(data: &[u8]) -> Delta {
let checksum = file_checksum(data);
let mut delta = Delta::new(
checksum,
checksum,
data.len() as u64,
data.len() as u64,
DEFAULT_BLOCK_SIZE as u32,
);
delta.add_copy(0, data.len() as u64);
delta
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
use alloc::vec::Vec;
#[test]
fn test_strong_checksum() {
let data1 = b"hello world";
let data2 = b"hello worle";
let cksum1 = strong_checksum(data1);
let cksum2 = strong_checksum(data2);
assert_ne!(cksum1, cksum2);
let cksum1_again = strong_checksum(data1);
assert_eq!(cksum1, cksum1_again);
}
#[test]
fn test_file_checksum() {
let data1 = b"test data";
let data2 = b"different";
let cksum1 = file_checksum(data1);
let cksum2 = file_checksum(data2);
assert_ne!(cksum1, cksum2);
}
#[test]
fn test_generate_signatures() {
let data = vec![0u8; 8192];
let sigs = generate_signatures(&data, 4096).unwrap();
assert_eq!(sigs.block_count(), 2);
assert_eq!(sigs.blocks[0].length, 4096);
assert_eq!(sigs.blocks[1].length, 4096);
}
#[test]
fn test_generate_signatures_small_file() {
let data = b"small file";
let sigs = generate_signatures(data, 4096).unwrap();
assert_eq!(sigs.block_count(), 1);
assert_eq!(sigs.blocks[0].length as usize, data.len());
}
#[test]
fn test_invalid_block_size() {
let data = b"test";
assert!(generate_signatures(data, 100).is_err()); assert!(generate_signatures(data, 10_000_000).is_err()); }
#[test]
fn test_compute_delta_identical() {
let data: Vec<u8> = (0..1024).map(|i| (i % 256) as u8).collect();
let sigs = generate_signatures(&data, 512).unwrap();
let delta = compute_delta(&sigs, &data).unwrap();
assert!(delta.is_identity());
assert_eq!(delta.copy_percentage(), 100.0);
}
#[test]
fn test_compute_delta_completely_different() {
let source: Vec<u8> = vec![0xAA; 1024];
let target: Vec<u8> = vec![0xBB; 1024];
let sigs = generate_signatures(&source, 512).unwrap();
let delta = compute_delta(&sigs, &target).unwrap();
assert!(delta.is_full_replace());
}
#[test]
fn test_compute_delta_small_change() {
let mut source = Vec::new();
for _ in 0..4 {
source.extend_from_slice(&[0xAA; 512]);
}
let mut target = source.clone();
target[1536..].fill(0xBB);
let sigs = generate_signatures(&source, 512).unwrap();
let delta = compute_delta(&sigs, &target).unwrap();
assert!(!delta.is_full_replace());
}
#[test]
fn test_apply_delta_identical() {
let data = b"test data for applying delta";
let sigs = generate_signatures(data, 512).unwrap();
let delta = compute_delta(&sigs, data).unwrap();
let result = apply_delta(data, &delta).unwrap();
assert_eq!(&result, data);
}
#[test]
fn test_apply_delta_modified() {
let source = b"original file content here";
let target = b"modified file content here";
let sigs = generate_signatures(source, 512).unwrap();
let delta = compute_delta(&sigs, target).unwrap();
let result = apply_delta(source, &delta).unwrap();
assert_eq!(&result, target);
}
#[test]
fn test_apply_delta_checksum_mismatch() {
let source = b"original";
let target = b"modified";
let sigs = generate_signatures(source, 512).unwrap();
let delta = compute_delta(&sigs, target).unwrap();
let wrong_source = b"different";
let result = apply_delta(wrong_source, &delta);
assert!(matches!(result, Err(DeltaError::ChecksumMismatch { .. })));
}
#[test]
fn test_full_insert_delta() {
let data = b"new file with no source";
let delta = create_full_insert_delta(data);
assert!(delta.is_full_replace());
assert_eq!(delta.target_size, data.len() as u64);
}
#[test]
fn test_identity_delta() {
let data = b"file that hasn't changed";
let delta = create_identity_delta(data);
assert!(delta.is_identity());
assert_eq!(delta.op_count(), 1);
}
#[test]
fn test_optimal_block_size() {
assert_eq!(optimal_block_size(500), 128);
assert_eq!(optimal_block_size(50000), 512);
assert_eq!(optimal_block_size(500000), 2048);
assert_eq!(optimal_block_size(5000000), 4096);
assert_eq!(optimal_block_size(50000000), 8192);
assert_eq!(optimal_block_size(500000000), 16384);
}
#[test]
fn test_roundtrip_various_sizes() {
for size in [64, 256, 1024, 4096, 8192, 16384] {
let mut source: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
let mut target = source.clone();
if size > 10 {
target[5] = 0xFF;
target[size - 5] = 0xAA;
}
let block_size = optimal_block_size(size as u64);
let sigs = generate_signatures(&source, block_size.max(MIN_BLOCK_SIZE)).unwrap();
let delta = compute_delta(&sigs, &target).unwrap();
let result = apply_delta(&source, &delta).unwrap();
assert_eq!(result, target, "Roundtrip failed for size {}", size);
}
}
#[test]
fn test_empty_files() {
let empty: &[u8] = &[];
let sigs = generate_signatures(empty, 512).unwrap();
assert_eq!(sigs.block_count(), 0);
let delta = compute_delta(&sigs, empty).unwrap();
assert_eq!(delta.op_count(), 0);
}
#[test]
fn test_append_scenario() {
let source = b"original content";
let target = b"original content with appended data";
let sigs = generate_signatures(source, 512).unwrap();
let delta = compute_delta(&sigs, target).unwrap();
let result = apply_delta(source, &delta).unwrap();
assert_eq!(&result, target);
}
#[test]
fn test_prepend_scenario() {
let source = b"original content";
let target = b"prepended data original content";
let sigs = generate_signatures(source, 512).unwrap();
let delta = compute_delta(&sigs, target).unwrap();
let result = apply_delta(source, &delta).unwrap();
assert_eq!(&result, target);
}
}