use crate::hash::config::{self, MatcherConfig};
use crate::hash::matching::{MatchEngine, SourceData};
use crate::vcdiff::code_table::Instruction;
use crate::vcdiff::decoder::{self, DecodeError};
use crate::vcdiff::encoder::{SourceWindow, StreamEncoder, WindowEncoder};
#[derive(Debug, Clone)]
pub struct EncodeOptions {
pub level: u32,
pub window_size: usize,
pub checksum: bool,
}
impl Default for EncodeOptions {
fn default() -> Self {
Self {
level: 6,
window_size: 1 << 23, checksum: true,
}
}
}
pub fn encode(source: &[u8], target: &[u8], output: &mut Vec<u8>) -> Result<(), EncodeError> {
encode_with_options(source, target, output, &EncodeOptions::default())
}
pub fn encode_with_options(
source: &[u8],
target: &[u8],
output: &mut Vec<u8>,
opts: &EncodeOptions,
) -> Result<(), EncodeError> {
let config = config::config_for_level(opts.level);
let src: &[u8] = source;
let mut stream = StreamEncoder::new(output, opts.checksum);
let mut target_offset = 0usize;
while target_offset < target.len() {
let win_end = (target_offset + opts.window_size).min(target.len());
let win_target = &target[target_offset..win_end];
let instructions = if source.is_empty() {
find_matches_no_source(config, win_target)
} else {
find_matches_with_source(config, &src, win_target)
};
let source_win = if !source.is_empty() {
Some(SourceWindow {
len: source.len() as u64,
offset: 0,
})
} else {
None
};
let mut we = WindowEncoder::new(source_win, opts.checksum);
emit_instructions(&mut we, win_target, source.len() as u64, &instructions);
stream
.write_window(we, Some(win_target))
.map_err(EncodeError::Io)?;
target_offset = win_end;
}
if target.is_empty() {
let we = WindowEncoder::new(None, opts.checksum);
stream
.write_window(we, Some(b""))
.map_err(EncodeError::Io)?;
}
let _ = stream.finish().map_err(EncodeError::Io)?;
Ok(())
}
fn find_matches_no_source(config: MatcherConfig, target: &[u8]) -> Vec<Instruction> {
let mut engine = MatchEngine::new(config, 0, target.len().max(64));
engine.find_matches(target, None::<&&[u8]>)
}
fn find_matches_with_source(
config: MatcherConfig,
source: &&[u8],
target: &[u8],
) -> Vec<Instruction> {
let src_len = source.len();
let mut engine = MatchEngine::new(config, src_len, target.len().max(64));
engine.index_source(source);
engine.find_matches(target, Some(source))
}
fn emit_instructions(
we: &mut WindowEncoder,
target: &[u8],
_source_len: u64,
instructions: &[Instruction],
) {
let mut target_pos = 0usize;
for inst in instructions {
match *inst {
Instruction::Add { len } => {
let len = len as usize;
we.add(&target[target_pos..target_pos + len]);
target_pos += len;
}
Instruction::Copy { len, addr, .. } => {
let len32 = len;
we.copy_with_auto_mode(len32, addr);
target_pos += len as usize;
}
Instruction::Run { len } => {
let byte = target[target_pos];
we.run(len, byte);
target_pos += len as usize;
}
}
}
}
pub fn decode(source: &[u8], delta: &[u8]) -> Result<Vec<u8>, DecodeError> {
decoder::decode_memory(delta, source)
}
#[derive(Debug)]
pub enum EncodeError {
Io(std::io::Error),
}
impl std::fmt::Display for EncodeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "I/O error: {e}"),
}
}
}
impl std::error::Error for EncodeError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Io(e) => Some(e),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn roundtrip(source: &[u8], target: &[u8]) {
let mut delta = Vec::new();
encode(source, target, &mut delta).expect("encode failed");
let reconstructed = decode(source, &delta).expect("decode failed");
assert_eq!(
reconstructed,
target,
"roundtrip mismatch (source={}, target={}, delta={})",
source.len(),
target.len(),
delta.len()
);
}
#[test]
fn roundtrip_identical() {
let data = b"The quick brown fox jumps over the lazy dog.";
roundtrip(data, data);
}
#[test]
fn roundtrip_small_edit() {
let source = b"Hello, world! This is a test of the delta engine.";
let target = b"Hello, earth! This is a test of the delta engine.";
roundtrip(source, target);
}
#[test]
fn roundtrip_no_source() {
let target = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
roundtrip(b"", target);
}
#[test]
fn roundtrip_empty_target() {
roundtrip(b"some source", b"");
}
#[test]
fn roundtrip_repeating_data() {
let source = b"AAAA BBBB CCCC DDDD EEEE FFFF GGGG HHHH";
let target = b"AAAA CCCC DDDD EEEE xxxx GGGG HHHH IIII";
roundtrip(source, target);
}
#[test]
fn roundtrip_binary_data() {
let source: Vec<u8> = (0..=255).cycle().take(4096).collect();
let mut target = source.clone();
target[100] = 0xFF;
target[200] = 0x00;
target[1000] = 0x42;
roundtrip(&source, &target);
}
#[test]
fn roundtrip_large_insert() {
let source = b"Start.";
let target = b"Start. And now a much longer piece of text that was inserted.";
roundtrip(source, target);
}
#[test]
fn roundtrip_all_levels() {
let source = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz";
let target = b"ABCDEFGHIJKLMNOP--CHANGED--UVWXYZ0123456789abcdefghijklmnopqrstuvwxyz!!!";
for level in [1, 3, 6, 9] {
let opts = EncodeOptions {
level,
checksum: true,
..Default::default()
};
let mut delta = Vec::new();
encode_with_options(source, target, &mut delta, &opts).expect("encode failed");
let reconstructed = decode(source, &delta).expect("decode failed");
assert_eq!(reconstructed, target, "level {level} roundtrip failed");
}
}
#[test]
fn roundtrip_run_data() {
let source = b"";
let target = vec![0xAA; 200];
roundtrip(source, &target);
}
#[test]
fn delta_is_smaller_for_similar_data() {
let source: Vec<u8> = (0..=255).cycle().take(8192).collect();
let mut target = source.clone();
target[4096] ^= 0xFF; let mut delta = Vec::new();
encode(&source, &target, &mut delta).expect("encode failed");
assert!(
delta.len() < target.len() / 2,
"delta ({}) should be much smaller than target ({})",
delta.len(),
target.len()
);
}
#[test]
fn xdelta3_can_decode_engine_output() {
use std::process::Command;
let status = Command::new("xdelta3").arg("-V").output();
if status.is_err() {
eprintln!("xdelta3 not found, skipping interop test");
return;
}
let source = b"The quick brown fox jumps over the lazy dog. 1234567890";
let target = b"The quick brown cat sits on the lazy mat. 1234567890!!!";
let mut delta = Vec::new();
encode(source, target, &mut delta).expect("encode failed");
let dir = std::env::temp_dir().join("xdelta_engine_test");
std::fs::create_dir_all(&dir).unwrap();
let src_path = dir.join("source.bin");
let delta_path = dir.join("delta.vcdiff");
let out_path = dir.join("output.bin");
std::fs::write(&src_path, source).unwrap();
std::fs::write(&delta_path, &delta).unwrap();
let result = Command::new("xdelta3")
.args(["-d", "-s"])
.arg(&src_path)
.arg(&delta_path)
.arg(&out_path)
.output();
match result {
Ok(output) => {
if output.status.success() {
let decoded = std::fs::read(&out_path).unwrap();
assert_eq!(decoded, target, "xdelta3 decoded different output");
} else {
panic!(
"xdelta3 decode failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
}
Err(e) => eprintln!("skipping xdelta3 interop: {e}"),
}
let _ = std::fs::remove_dir_all(&dir);
}
}