use alloc::vec::Vec;
use crate::delta::{Delta, Op, MAX_HUNK_SIZE};
use crate::error::Result;
#[derive(Debug)]
pub struct DeltaBuilder {
min_match_len: usize,
max_search_depth: usize,
}
impl Default for DeltaBuilder {
fn default() -> Self {
Self {
min_match_len: 8,
max_search_depth: 100,
}
}
}
impl DeltaBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn min_match_len(mut self, len: usize) -> Self {
self.min_match_len = len.max(4);
self
}
pub fn build(&self, old: &[u8], new: &[u8]) -> Result<Delta> {
if old.is_empty() {
return Ok(Delta {
source_size: 0,
target_size: new.len() as u64,
ops: vec![Op::Insert { data: new.to_vec() }],
});
}
let mut ops = Vec::new();
let mut new_pos: usize = 0;
let sa = SuffixArray::new(old);
while new_pos < new.len() {
let best = self.find_best_match(&sa, old, &new[new_pos..], new_pos);
if let Some((src_off, match_len)) = best {
if match_len >= self.min_match_len {
ops.push(Op::Copy {
src_offset: src_off as u64,
length: match_len as u64,
});
new_pos += match_len;
continue;
}
}
let literal_start = new_pos;
let mut literal_len = 1;
while literal_len < MAX_HUNK_SIZE
&& new_pos + literal_len < new.len() {
if literal_len >= self.min_match_len {
let lookahead = &new[new_pos + literal_len..];
if self.find_best_match(&sa, old, lookahead, new_pos + literal_len)
.map_or(false, |(_, len)| len >= self.min_match_len) {
break;
}
}
literal_len += 1;
}
ops.push(Op::Insert {
data: new[literal_start..literal_start + literal_len].to_vec(),
});
new_pos += literal_len;
}
let ops = self.coalesce_copies(ops);
Ok(Delta {
source_size: old.len() as u64,
target_size: new.len() as u64,
ops,
})
}
fn find_best_match(&self, sa: &SuffixArray, old: &[u8], new_suffix: &[u8], _new_pos: usize) -> Option<(usize, usize)> {
if new_suffix.len() < self.min_match_len {
return None;
}
let mut best_len = 0;
let mut best_pos = 0;
let step = (old.len() / self.max_search_depth).max(1);
for i in (0..old.len()).step_by(step) {
let len = common_prefix_length(&old[i..], new_suffix);
if len > best_len && len >= self.min_match_len {
best_len = len;
best_pos = i;
if best_len == new_suffix.len() {
break;
}
}
}
if best_len >= self.min_match_len {
Some((best_pos, best_len))
} else {
None
}
}
fn coalesce_copies(&self, ops: Vec<Op>) -> Vec<Op> {
let mut result = Vec::with_capacity(ops.len());
for op in ops {
if let Some(last) = result.last_mut() {
match (last, &op) {
(Op::Copy { src_offset, length }, Op::Copy {
src_offset: next_src,
length: next_len,
}) => {
if *src_offset + *length == *next_src {
*length += *next_len;
continue;
}
}
(Op::Insert { data }, Op::Insert { data: next_data }) => {
if data.len() + next_data.len() <= MAX_HUNK_SIZE {
data.extend_from_slice(next_data);
continue;
}
}
_ => {}
}
}
result.push(op);
}
result
}
}
struct SuffixArray<'a> {
text: &'a [u8],
sa: Vec<usize>,
}
impl<'a> SuffixArray<'a> {
fn new(text: &'a [u8]) -> Self {
let mut sa: Vec<usize> = (0..text.len()).collect();
sa.sort_by(|&a, &b| text[a..].cmp(&text[b..]));
Self { text, sa }
}
}
fn common_prefix_length(a: &[u8], b: &[u8]) -> usize {
a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
}