use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Delta {
pub source_hash: u64,
pub target_hash: u64,
pub operations: Vec<DeltaOp>,
pub metadata: DeltaMetadata,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeltaMetadata {
pub original_size: usize,
pub compressed_size: usize,
pub compression_ratio: f32,
pub op_count: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DeltaOp {
Copy { offset: usize, length: usize },
Insert { data: Vec<u8> },
Delete { offset: usize, length: usize },
}
pub struct DeltaCompressor {
block_size: usize,
chunk_cache: HashMap<u64, Vec<u8>>,
}
impl DeltaCompressor {
pub fn new() -> Self {
Self {
block_size: 4096, chunk_cache: HashMap::new(),
}
}
pub fn compute_delta(&mut self, source: &[u8], target: &[u8]) -> Delta {
let source_hash = self.hash_content(source);
let target_hash = self.hash_content(target);
let operations = self.compute_operations(source, target);
let compressed_size = self.estimate_compressed_size(&operations);
let op_count = operations.len();
Delta {
source_hash,
target_hash,
operations,
metadata: DeltaMetadata {
original_size: target.len(),
compressed_size,
compression_ratio: (compressed_size as f32 / target.len() as f32) * 100.0,
op_count,
},
}
}
pub fn apply_delta(&self, source: &[u8], delta: &Delta) -> Result<Vec<u8>, DeltaError> {
let mut result = Vec::with_capacity(delta.metadata.original_size);
for op in &delta.operations {
match op {
DeltaOp::Copy { offset, length } => {
if *offset + *length > source.len() {
return Err(DeltaError::InvalidOffset);
}
result.extend_from_slice(&source[*offset..*offset + *length]);
}
DeltaOp::Insert { data } => {
result.extend_from_slice(data);
}
DeltaOp::Delete { .. } => {
}
}
}
let result_hash = self.hash_content(&result);
if result_hash != delta.target_hash {
return Err(DeltaError::HashMismatch);
}
Ok(result)
}
fn compute_operations(&self, source: &[u8], target: &[u8]) -> Vec<DeltaOp> {
let mut operations = Vec::new();
if source != target {
operations.push(DeltaOp::Insert {
data: target.to_vec(),
});
}
operations
}
fn hash_content(&self, data: &[u8]) -> u64 {
const FNV_OFFSET: u64 = 14695981039346656037;
const FNV_PRIME: u64 = 1099511628211;
let mut hash = FNV_OFFSET;
for byte in data {
hash ^= *byte as u64;
hash = hash.wrapping_mul(FNV_PRIME);
}
hash
}
fn estimate_compressed_size(&self, operations: &[DeltaOp]) -> usize {
operations.iter().map(|op| match op {
DeltaOp::Copy { .. } => 16, DeltaOp::Insert { data } => data.len() + 8,
DeltaOp::Delete { .. } => 16,
}).sum()
}
}
impl Default for DeltaCompressor {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, thiserror::Error)]
pub enum DeltaError {
#[error("Invalid offset in delta operation")]
InvalidOffset,
#[error("Hash mismatch after applying delta")]
HashMismatch,
#[error("Compression failed: {0}")]
CompressionFailed(String),
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn test_delta_identical_data() {
let mut compressor = DeltaCompressor::new();
let data = b"Hello, World!";
let delta = compressor.compute_delta(data, data);
assert_eq!(delta.operations.len(), 0);
}
#[test]
fn test_delta_apply() {
let mut compressor = DeltaCompressor::new();
let source = b"Hello, World!";
let target = b"Hello, Rust!";
let delta = compressor.compute_delta(source, target);
let result = compressor.apply_delta(source, &delta).unwrap();
assert_eq!(result, target);
}
#[test]
fn test_delta_compression_ratio() {
let mut compressor = DeltaCompressor::new();
let source = vec![0u8; 1024];
let mut target = source.clone();
target[100] = 42;
let delta = compressor.compute_delta(&source, &target);
assert!(delta.metadata.compression_ratio >= 0.0);
}
}