use crate::error::{FossilError, Result};
use std::collections::HashMap;
fn encode_int(mut value: u32) -> Vec<u8> {
const CHARS: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~";
if value == 0 {
return vec![b'0'];
}
let mut result = Vec::new();
while value > 0 {
result.push(CHARS[(value % 64) as usize]);
value /= 64;
}
result.reverse();
result
}
pub fn create_delta(source: &[u8], target: &[u8]) -> Vec<u8> {
if source.len() < 16 {
return create_insert_only_delta(target);
}
const BLOCK_SIZE: usize = 4;
let mut hash_table: HashMap<u32, Vec<usize>> = HashMap::new();
for i in 0..source.len().saturating_sub(BLOCK_SIZE - 1) {
let hash = simple_hash(&source[i..i + BLOCK_SIZE]);
hash_table.entry(hash).or_default().push(i);
}
let mut delta = Vec::new();
delta.extend(encode_int(target.len() as u32));
delta.push(b'\n');
let mut pos = 0;
let mut pending_insert: Vec<u8> = Vec::new();
while pos < target.len() {
let remaining = target.len() - pos;
if remaining >= BLOCK_SIZE {
let hash = simple_hash(&target[pos..pos + BLOCK_SIZE]);
if let Some(positions) = hash_table.get(&hash) {
let mut best_match: Option<(usize, usize)> = None;
for &src_pos in positions {
let match_len = find_match_length(source, src_pos, target, pos);
if match_len >= BLOCK_SIZE {
if best_match.is_none() || match_len > best_match.unwrap().1 {
best_match = Some((src_pos, match_len));
}
}
}
if let Some((src_pos, match_len)) = best_match {
if !pending_insert.is_empty() {
delta.extend(encode_int(pending_insert.len() as u32));
delta.push(b':');
delta.extend(&pending_insert);
pending_insert.clear();
}
delta.extend(encode_int(match_len as u32));
delta.push(b'@');
delta.extend(encode_int(src_pos as u32));
delta.push(b',');
pos += match_len;
continue;
}
}
}
pending_insert.push(target[pos]);
pos += 1;
}
if !pending_insert.is_empty() {
delta.extend(encode_int(pending_insert.len() as u32));
delta.push(b':');
delta.extend(&pending_insert);
}
let checksum = simple_checksum(target);
delta.extend(encode_int(checksum));
delta.push(b';');
delta
}
fn create_insert_only_delta(target: &[u8]) -> Vec<u8> {
let mut delta = Vec::new();
delta.extend(encode_int(target.len() as u32));
delta.push(b'\n');
delta.extend(encode_int(target.len() as u32));
delta.push(b':');
delta.extend(target);
let checksum = simple_checksum(target);
delta.extend(encode_int(checksum));
delta.push(b';');
delta
}
fn simple_hash(data: &[u8]) -> u32 {
let mut h: u32 = 0;
for &b in data.iter().take(4) {
h = h.wrapping_mul(31).wrapping_add(b as u32);
}
h
}
fn find_match_length(source: &[u8], src_pos: usize, target: &[u8], tgt_pos: usize) -> usize {
let mut len = 0;
while src_pos + len < source.len()
&& tgt_pos + len < target.len()
&& source[src_pos + len] == target[tgt_pos + len]
{
len += 1;
}
len
}
fn simple_checksum(data: &[u8]) -> u32 {
let mut sum: u32 = 0;
for &b in data {
sum = sum.wrapping_add(b as u32);
}
sum & 0xFFFF
}
fn decode_int(data: &[u8], pos: &mut usize) -> Result<u32> {
let mut value: u64 = 0;
let mut count = 0;
while *pos < data.len() {
let c = data[*pos];
let digit = match c {
b'0'..=b'9' => (c - b'0') as u64,
b'A'..=b'Z' => (c - b'A' + 10) as u64,
b'_' => 36,
b'a'..=b'z' => (c - b'a' + 37) as u64,
b'~' => 63,
_ => break, };
value = value * 64 + digit;
*pos += 1;
count += 1;
if count > 10 {
return Err(FossilError::DeltaFailed("Integer too large".into()));
}
}
if count == 0 {
return Err(FossilError::DeltaFailed(format!(
"Expected integer at position {}",
pos
)));
}
Ok(value as u32)
}
pub fn apply_delta(source: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
if delta.is_empty() {
return Err(FossilError::DeltaFailed("Empty delta".into()));
}
let mut pos = 0;
let target_size = decode_int(delta, &mut pos)?;
if pos >= delta.len() || delta[pos] != b'\n' {
return Err(FossilError::DeltaFailed(format!(
"Invalid header: expected newline at {}, got {:?}",
pos,
delta.get(pos)
)));
}
pos += 1;
let mut output = Vec::with_capacity(target_size as usize);
while pos < delta.len() {
let _start_pos = pos;
let len = match decode_int(delta, &mut pos) {
Ok(n) => n,
Err(_) => {
break;
}
};
if pos >= delta.len() {
return Err(FossilError::DeltaFailed("Unexpected end of delta".into()));
}
if delta[pos] == b';' {
break;
}
if pos >= delta.len() {
return Err(FossilError::DeltaFailed("Unexpected end of delta".into()));
}
match delta[pos] {
b'@' => {
pos += 1;
let offset = decode_int(delta, &mut pos)?;
if pos >= delta.len() || delta[pos] != b',' {
return Err(FossilError::DeltaFailed(format!(
"Expected ',' at position {}",
pos
)));
}
pos += 1;
let start = offset as usize;
let end = start + len as usize;
if end > source.len() {
return Err(FossilError::DeltaFailed(format!(
"Copy range out of bounds: {}..{} > {}",
start,
end,
source.len()
)));
}
output.extend_from_slice(&source[start..end]);
}
b':' => {
pos += 1;
let end = pos + len as usize;
if end > delta.len() {
return Err(FossilError::DeltaFailed(format!(
"Literal out of bounds: need {} bytes at {}, have {}",
len,
pos,
delta.len() - pos
)));
}
output.extend_from_slice(&delta[pos..end]);
pos = end;
}
c => {
return Err(FossilError::DeltaFailed(format!(
"Unknown operator '{}' (0x{:02x}) at position {}",
c as char, c, pos
)));
}
}
}
if pos >= delta.len() || delta[pos] != b';' {
if output.len() == target_size as usize {
return Ok(output);
}
}
if output.len() != target_size as usize {
return Err(FossilError::DeltaFailed(format!(
"Size mismatch: expected {}, got {}",
target_size,
output.len()
)));
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_int_simple() {
let mut pos = 0;
assert_eq!(decode_int(b"0", &mut pos).unwrap(), 0);
assert_eq!(pos, 1);
pos = 0;
assert_eq!(decode_int(b"9", &mut pos).unwrap(), 9);
pos = 0;
assert_eq!(decode_int(b"A", &mut pos).unwrap(), 10);
pos = 0;
assert_eq!(decode_int(b"Z", &mut pos).unwrap(), 35);
pos = 0;
assert_eq!(decode_int(b"_", &mut pos).unwrap(), 36);
pos = 0;
assert_eq!(decode_int(b"a", &mut pos).unwrap(), 37);
pos = 0;
assert_eq!(decode_int(b"z", &mut pos).unwrap(), 62);
pos = 0;
assert_eq!(decode_int(b"~", &mut pos).unwrap(), 63);
}
#[test]
fn test_decode_int_multi() {
let mut pos = 0;
assert_eq!(decode_int(b"10", &mut pos).unwrap(), 64);
pos = 0;
assert_eq!(decode_int(b"1X", &mut pos).unwrap(), 97);
}
#[test]
fn test_apply_delta_insert_only() {
let source = b"";
let delta = b"5\n5:hello;";
let result = apply_delta(source, delta).unwrap();
assert_eq!(result, b"hello");
}
#[test]
fn test_apply_delta_copy_only() {
let source = b"hello";
let delta = b"5\n5@0,;";
let result = apply_delta(source, delta).unwrap();
assert_eq!(result, b"hello");
}
#[test]
fn test_apply_delta_mixed() {
let source = b"hello";
let delta = b"B\n5@0,6: world;";
let result = apply_delta(source, delta).unwrap();
assert_eq!(result, b"hello world");
}
#[test]
fn test_apply_delta_partial_copy() {
let source = b"hello";
let delta = b"3\n3@1,;";
let result = apply_delta(source, delta).unwrap();
assert_eq!(result, b"ell");
}
#[test]
fn test_create_and_apply_delta() {
let source = b"The quick brown fox jumps over the lazy dog";
let target = b"The quick brown cat jumps over the lazy dog";
let delta = create_delta(source, target);
let result = apply_delta(source, &delta).unwrap();
assert_eq!(result, target);
}
#[test]
fn test_create_delta_append() {
let source =
b"Hello, World! This is a longer piece of text that allows for good delta compression.";
let target = b"Hello, World! This is a longer piece of text that allows for good delta compression. Extra!";
let delta = create_delta(source, target);
let result = apply_delta(source, &delta).unwrap();
assert_eq!(result, target);
assert!(
delta.len() < target.len(),
"delta {} >= target {}",
delta.len(),
target.len()
);
}
#[test]
fn test_create_delta_empty_source() {
let source = b"";
let target = b"New content";
let delta = create_delta(source, target);
let result = apply_delta(source, &delta).unwrap();
assert_eq!(result, target);
}
#[test]
fn test_create_delta_identical() {
let source = b"Same content here";
let target = b"Same content here";
let delta = create_delta(source, target);
let result = apply_delta(source, &delta).unwrap();
assert_eq!(result, target);
assert!(delta.len() < source.len());
}
#[test]
fn test_encode_decode_int() {
for value in [0, 1, 63, 64, 100, 1000, 10000, 100000] {
let encoded = encode_int(value);
let mut pos = 0;
let decoded = decode_int(&encoded, &mut pos).unwrap();
assert_eq!(decoded, value, "Failed for value {}", value);
}
}
}