use crate::object::MkitError;
pub const STREAM_VERSION: u8 = 0x01;
pub const OP_COPY: u8 = 0x80;
pub const MAX_INSERT_LEN: usize = 127;
pub const HEADER_LEN: usize = 1 + 4 + 4;
const BLOCK_SIZE: usize = 16;
pub(crate) const CAP_MULTIPLIER: usize = 256;
#[inline]
pub(crate) fn compute_cap_hint(result_len: usize, _base_len: usize, stream_len: usize) -> usize {
result_len.min(stream_len.saturating_mul(CAP_MULTIPLIER))
}
pub fn encode(base: &[u8], result: &[u8]) -> Result<Vec<u8>, MkitError> {
use std::collections::HashMap;
check_length_bounds(base.len(), result.len())?;
let mut out = Vec::with_capacity(HEADER_LEN + result.len());
write_header(&mut out, base.len(), result.len());
let num_blocks = base.len() / BLOCK_SIZE;
let mut index: HashMap<u64, u32> = HashMap::with_capacity(num_blocks);
for i in 0..num_blocks {
let pos = i * BLOCK_SIZE;
if let Ok(pos_u32) = u32::try_from(pos) {
let block = &base[pos..pos + BLOCK_SIZE];
let h = block_hash(block);
index.entry(h).or_insert(pos_u32);
} else {
break; }
}
let mut insert_buf: Vec<u8> = Vec::with_capacity(MAX_INSERT_LEN);
let mut ti = 0usize;
while ti < result.len() {
let mut matched = false;
if ti + BLOCK_SIZE <= result.len() {
let target_block = &result[ti..ti + BLOCK_SIZE];
let h = block_hash(target_block);
if let Some(&base_pos) = index.get(&h) {
let base_pos_usize = base_pos as usize;
if &base[base_pos_usize..base_pos_usize + BLOCK_SIZE] == target_block {
flush_insert(&mut out, &mut insert_buf);
let mut match_len = BLOCK_SIZE;
while base_pos_usize + match_len < base.len()
&& ti + match_len < result.len()
&& base[base_pos_usize + match_len] == result[ti + match_len]
&& match_len < u16::MAX as usize
{
match_len += 1;
}
emit_copy(
&mut out,
base_pos,
u16::try_from(match_len).expect("<= u16::MAX"),
);
ti += match_len;
matched = true;
}
}
}
if !matched {
insert_buf.push(result[ti]);
ti += 1;
if insert_buf.len() == MAX_INSERT_LEN {
flush_insert(&mut out, &mut insert_buf);
}
}
}
flush_insert(&mut out, &mut insert_buf);
Ok(out)
}
pub(crate) fn check_length_bounds(base_len: usize, result_len: usize) -> Result<(), MkitError> {
if u32::try_from(base_len).is_err() {
return Err(MkitError::DeltaLengthOverflow {
field: "base_len",
len: base_len,
});
}
if u32::try_from(result_len).is_err() {
return Err(MkitError::DeltaLengthOverflow {
field: "result_len",
len: result_len,
});
}
Ok(())
}
pub fn decode(base: &[u8], stream: &[u8]) -> Result<Vec<u8>, MkitError> {
if stream.len() < HEADER_LEN {
return Err(MkitError::UnexpectedEof);
}
if stream[0] != STREAM_VERSION {
return Err(MkitError::UnsupportedObjectVersion);
}
let base_len = u32::from_le_bytes(stream[1..5].try_into().expect("4 bytes")) as usize;
let result_len = u32::from_le_bytes(stream[5..9].try_into().expect("4 bytes")) as usize;
if base_len != base.len() {
return Err(MkitError::TrailingData);
}
let cap_hint = compute_cap_hint(result_len, base.len(), stream.len());
let mut out: Vec<u8> = Vec::with_capacity(cap_hint);
let mut pos = HEADER_LEN;
while pos < stream.len() {
let op = stream[pos];
pos += 1;
if op & 0x80 != 0 {
if op & 0x7F != 0 {
return Err(MkitError::TrailingData);
}
if pos + 6 > stream.len() {
return Err(MkitError::UnexpectedEof);
}
let offset =
u32::from_le_bytes(stream[pos..pos + 4].try_into().expect("4 bytes")) as usize;
pos += 4;
let length =
u16::from_le_bytes(stream[pos..pos + 2].try_into().expect("2 bytes")) as usize;
pos += 2;
if length == 0 {
return Err(MkitError::TrailingData);
}
let end = offset.checked_add(length).ok_or(MkitError::TrailingData)?;
if end > base.len() {
return Err(MkitError::TrailingData);
}
if out.len().checked_add(length).is_none_or(|v| v > result_len) {
return Err(MkitError::TrailingData);
}
out.extend_from_slice(&base[offset..end]);
} else if op > 0 {
let length = op as usize;
if pos + length > stream.len() {
return Err(MkitError::UnexpectedEof);
}
if out.len().checked_add(length).is_none_or(|v| v > result_len) {
return Err(MkitError::TrailingData);
}
out.extend_from_slice(&stream[pos..pos + length]);
pos += length;
} else {
return Err(MkitError::TrailingData);
}
}
if out.len() != result_len {
return Err(MkitError::TrailingData);
}
Ok(out)
}
fn write_header(out: &mut Vec<u8>, base_len: usize, result_len: usize) {
let bl: u32 = u32::try_from(base_len).expect("base_len <= u32::MAX (checked)");
let rl: u32 = u32::try_from(result_len).expect("result_len <= u32::MAX (checked)");
out.push(STREAM_VERSION);
out.extend_from_slice(&bl.to_le_bytes());
out.extend_from_slice(&rl.to_le_bytes());
}
fn emit_copy(out: &mut Vec<u8>, offset: u32, length: u16) {
out.push(OP_COPY);
out.extend_from_slice(&offset.to_le_bytes());
out.extend_from_slice(&length.to_le_bytes());
}
fn flush_insert(out: &mut Vec<u8>, buf: &mut Vec<u8>) {
if buf.is_empty() {
return;
}
debug_assert!(buf.len() <= MAX_INSERT_LEN);
out.push(u8::try_from(buf.len()).expect("<= 127"));
out.extend_from_slice(buf);
buf.clear();
}
fn block_hash(block: &[u8]) -> u64 {
let mut h: u64 = 0xcbf2_9ce4_8422_2325;
for &b in block {
h ^= u64::from(b);
h = h.wrapping_mul(0x0000_0001_0000_01b3);
}
h
}
#[cfg(test)]
mod tests {
use super::*;
fn header(base_len: u32, result_len: u32) -> [u8; HEADER_LEN] {
let mut h = [0u8; HEADER_LEN];
h[0] = STREAM_VERSION;
h[1..5].copy_from_slice(&base_len.to_le_bytes());
h[5..9].copy_from_slice(&result_len.to_le_bytes());
h
}
#[test]
fn identity_roundtrip() {
let data = b"0123456789abcdef".repeat(4); let stream = encode(&data, &data).unwrap();
let restored = decode(&data, &stream).unwrap();
assert_eq!(restored, data);
}
#[test]
fn pure_insert_roundtrip() {
let base = b"aaa";
let target = b"zzz";
let stream = encode(base, target).unwrap();
assert_eq!(stream[HEADER_LEN] & 0x80, 0);
assert_eq!(stream[HEADER_LEN], 3);
let restored = decode(base, &stream).unwrap();
assert_eq!(restored, target);
}
#[test]
fn pure_copy_full_base() {
let base: Vec<u8> = (0..16u8).cycle().take(128).collect();
let target = &base[..64];
let mut stream = header(
u32::try_from(base.len()).unwrap(),
u32::try_from(target.len()).unwrap(),
)
.to_vec();
stream.push(OP_COPY);
stream.extend_from_slice(&0u32.to_le_bytes());
stream.extend_from_slice(&64u16.to_le_bytes());
assert_eq!(stream.len(), HEADER_LEN + 7);
let restored = decode(&base, &stream).unwrap();
assert_eq!(restored, target);
}
#[test]
fn near_duplicate_yields_smaller_delta() {
let v1 = include_str!("delta.rs"); let mut v2 = String::from(v1);
v2.push_str("\n// trailing edit\n");
let stream = encode(v1.as_bytes(), v2.as_bytes()).unwrap();
let restored = decode(v1.as_bytes(), &stream).unwrap();
assert_eq!(restored, v2.as_bytes());
assert!(stream.len() < v2.len(), "delta should be smaller than v2");
}
#[test]
fn rejects_zero_opcode() {
let mut stream = header(0, 0).to_vec();
stream.push(0x00);
let err = decode(&[], &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn rejects_unknown_version() {
let mut bytes = header(0, 0);
bytes[0] = 0x02;
let err = decode(&[], &bytes).unwrap_err();
assert!(matches!(err, MkitError::UnsupportedObjectVersion));
}
#[test]
fn rejects_truncated_header() {
let bytes = [0x01u8, 0x00, 0x00];
let err = decode(&[], &bytes).unwrap_err();
assert!(matches!(err, MkitError::UnexpectedEof));
}
#[test]
fn rejects_truncated_copy() {
let mut stream = header(16, 16).to_vec();
stream.push(OP_COPY);
stream.extend_from_slice(&0u32.to_le_bytes()); let err = decode(&[0u8; 16], &stream).unwrap_err();
assert!(matches!(err, MkitError::UnexpectedEof));
}
#[test]
fn rejects_truncated_insert() {
let mut stream = header(0, 10).to_vec();
stream.push(10); stream.extend_from_slice(b"abc"); let err = decode(&[], &stream).unwrap_err();
assert!(matches!(err, MkitError::UnexpectedEof));
}
#[test]
fn rejects_copy_past_base_end() {
let base = b"short"; let mut stream = header(u32::try_from(base.len()).unwrap(), 100).to_vec();
stream.push(OP_COPY);
stream.extend_from_slice(&0u32.to_le_bytes());
stream.extend_from_slice(&100u16.to_le_bytes());
let err = decode(base, &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn rejects_copy_with_zero_length() {
let base = [0u8; 16];
let mut stream = header(16, 16).to_vec();
stream.push(OP_COPY);
stream.extend_from_slice(&0u32.to_le_bytes());
stream.extend_from_slice(&0u16.to_le_bytes());
let err = decode(&base, &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn rejects_base_len_mismatch() {
let stream = header(16, 0).to_vec();
let err = decode(&[0u8; 8], &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn rejects_result_len_mismatch_at_end() {
let mut stream = header(0, 3).to_vec();
stream.push(5);
stream.extend_from_slice(b"hello");
let err = decode(&[], &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn rejects_huge_result_len_without_preallocating() {
let stream = header(0, u32::MAX);
let err = decode(&[], &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn rejects_copy_with_reserved_low_bits() {
let base = [0u8; 16];
let mut stream = header(16, 4).to_vec();
stream.push(OP_COPY | 0x01); stream.extend_from_slice(&0u32.to_le_bytes());
stream.extend_from_slice(&4u16.to_le_bytes());
let err = decode(&base, &stream).unwrap_err();
assert!(matches!(err, MkitError::TrailingData));
}
#[test]
fn empty_base_pure_insert() {
let target = b"all new content here!";
let stream = encode(b"", target).unwrap();
let restored = decode(b"", &stream).unwrap();
assert_eq!(restored, target);
}
#[test]
fn cap_hint_does_not_scale_with_base_len() {
let huge_base = 1usize << 30; let tiny_stream = 9usize; let declared_result = u32::MAX as usize;
let cap = super::compute_cap_hint(declared_result, huge_base, tiny_stream);
assert!(
cap <= tiny_stream.saturating_mul(CAP_MULTIPLIER),
"cap_hint {cap} must be bounded by stream.len() * CAP_MULTIPLIER, \
not by base.len()",
);
assert!(
cap < 1024 * 1024,
"cap_hint {cap} must stay well below 1 MiB for a 9-byte stream",
);
}
#[test]
fn check_length_bounds_rejects_over_u32() {
let over = (u32::MAX as usize).saturating_add(1);
assert!(matches!(
check_length_bounds(over, 0),
Err(MkitError::DeltaLengthOverflow { .. })
));
assert!(matches!(
check_length_bounds(0, over),
Err(MkitError::DeltaLengthOverflow { .. })
));
assert!(check_length_bounds(u32::MAX as usize, u32::MAX as usize).is_ok());
assert!(check_length_bounds(1, 1).is_ok());
}
}