use std::collections::HashMap;
const MIN_MATCH_LENGTH_LARGE: usize = 16;
const MIN_MATCH_LENGTH_SMALL: usize = 8;
#[derive(Debug)]
pub struct DeltaEncoder;
impl DeltaEncoder {
pub fn new() -> Self {
Self
}
pub fn encode(base: &[u8], target: &[u8]) -> Vec<u8> {
if base.is_empty() {
return Self::encode_insert(target);
}
let index = Self::build_index(base);
Self::encode_with_index(&index, base, target)
}
pub fn encode_with_index(
index: &HashMap<[u8; 4], Vec<usize>>,
base: &[u8],
target: &[u8],
) -> Vec<u8> {
if base.is_empty() {
return Self::encode_insert(target);
}
let min_match = Self::min_match_for(target.len());
let mut delta = Vec::new();
let mut pos = 0;
while pos < target.len() {
if let Some((offset, length)) =
Self::find_best_match(index, base, target, pos, min_match)
{
Self::emit_copy(&mut delta, offset, length);
pos += length;
} else {
let start = pos;
while pos < target.len() && pos - start < 127 {
if Self::find_best_match(index, base, target, pos, min_match).is_some() {
break;
}
pos += 1;
}
let len = pos - start;
delta.push(len as u8 - 1);
delta.extend_from_slice(&target[start..pos]);
}
}
delta
}
pub fn estimate_delta_size(base: &[u8], target: &[u8]) -> usize {
if base.is_empty() {
return target.len() + target.len().div_ceil(128);
}
let index = Self::build_index(base);
Self::estimate_delta_size_with_index(&index, base, target)
}
pub fn estimate_delta_size_with_index(
index: &HashMap<[u8; 4], Vec<usize>>,
base: &[u8],
target: &[u8],
) -> usize {
if base.is_empty() {
return target.len() + target.len().div_ceil(128);
}
let min_match = Self::min_match_for(target.len());
let mut size = 0usize;
let mut pos = 0;
while pos < target.len() {
if let Some((offset, length)) =
Self::find_best_match(index, base, target, pos, min_match)
{
size += Self::copy_instruction_size(offset, length);
pos += length;
} else {
let start = pos;
while pos < target.len() && pos - start < 127 {
if Self::find_best_match(index, base, target, pos, min_match).is_some() {
break;
}
pos += 1;
}
size += 1 + (pos - start);
}
}
size
}
pub fn build_index(base: &[u8]) -> HashMap<[u8; 4], Vec<usize>> {
let mut index: HashMap<[u8; 4], Vec<usize>> = HashMap::new();
for i in 0..base.len().saturating_sub(4) {
let key = [base[i], base[i + 1], base[i + 2], base[i + 3]];
index.entry(key).or_default().push(i);
}
index
}
fn emit_copy(delta: &mut Vec<u8>, offset: usize, length: usize) {
let mut cmd: u8 = 0x80;
let offset = offset as u32;
let length = length as u32;
cmd |= 0x01; if offset & 0xFF00 != 0 {
cmd |= 0x02;
}
if offset & 0xFF_0000 != 0 {
cmd |= 0x04;
}
if offset & 0xFF00_0000 != 0 {
cmd |= 0x08;
}
if length != 0x10000 {
if length & 0xFF != 0 {
cmd |= 0x10;
}
if length & 0xFF00 != 0 {
cmd |= 0x20;
}
if length & 0xFF_0000 != 0 {
cmd |= 0x40;
}
}
delta.push(cmd);
delta.push(offset as u8); if offset & 0xFF00 != 0 {
delta.push((offset >> 8) as u8);
}
if offset & 0xFF_0000 != 0 {
delta.push((offset >> 16) as u8);
}
if offset & 0xFF00_0000 != 0 {
delta.push((offset >> 24) as u8);
}
if length != 0x10000 {
if length & 0xFF != 0 {
delta.push(length as u8);
}
if length & 0xFF00 != 0 {
delta.push((length >> 8) as u8);
}
if length & 0xFF_0000 != 0 {
delta.push((length >> 16) as u8);
}
}
}
fn copy_instruction_size(offset: usize, length: usize) -> usize {
let offset = offset as u32;
let length = length as u32;
let mut n = 1 + 1;
if offset & 0xFF00 != 0 {
n += 1;
}
if offset & 0xFF_0000 != 0 {
n += 1;
}
if offset & 0xFF00_0000 != 0 {
n += 1;
}
if length != 0x10000 {
if length & 0xFF != 0 {
n += 1;
}
if length & 0xFF00 != 0 {
n += 1;
}
if length & 0xFF_0000 != 0 {
n += 1;
}
}
n
}
fn min_match_for(target_len: usize) -> usize {
if target_len < 1024 {
MIN_MATCH_LENGTH_SMALL
} else {
MIN_MATCH_LENGTH_LARGE
}
}
fn encode_insert(data: &[u8]) -> Vec<u8> {
let mut delta = Vec::new();
for chunk in data.chunks(128) {
delta.push((chunk.len() - 1) as u8);
delta.extend_from_slice(chunk);
}
delta
}
fn find_best_match(
index: &HashMap<[u8; 4], Vec<usize>>,
base: &[u8],
target: &[u8],
pos: usize,
min_match: usize,
) -> Option<(usize, usize)> {
if pos + 4 > target.len() {
return None;
}
let key = [
target[pos],
target[pos + 1],
target[pos + 2],
target[pos + 3],
];
let offsets = index.get(&key)?;
let mut best_offset = 0;
let mut best_length = 0;
for &offset in offsets {
let length = Self::match_length(base, offset, target, pos);
if length > best_length {
best_length = length;
best_offset = offset;
}
}
if best_length >= min_match {
Some((best_offset, best_length))
} else {
None
}
}
fn match_length(base: &[u8], base_pos: usize, target: &[u8], target_pos: usize) -> usize {
let max_len = (base.len() - base_pos).min(target.len() - target_pos);
let mut len = 0;
while len < max_len && base[base_pos + len] == target[target_pos + len] {
len += 1;
}
len
}
}
impl Default for DeltaEncoder {
fn default() -> Self {
Self::new()
}
}