1use crate::object::MkitError;
25
26pub const STREAM_VERSION: u8 = 0x01;
28pub const OP_COPY: u8 = 0x80;
30pub const MAX_INSERT_LEN: usize = 127;
32pub const HEADER_LEN: usize = 1 + 4 + 4;
34
35const BLOCK_SIZE: usize = 16;
38
39pub(crate) const CAP_MULTIPLIER: usize = 256;
48
49#[inline]
60pub(crate) fn compute_cap_hint(result_len: usize, _base_len: usize, stream_len: usize) -> usize {
61 result_len.min(stream_len.saturating_mul(CAP_MULTIPLIER))
62}
63
64pub fn encode(base: &[u8], result: &[u8]) -> Result<Vec<u8>, MkitError> {
85 use std::collections::HashMap;
86
87 check_length_bounds(base.len(), result.len())?;
88
89 let mut out = Vec::with_capacity(HEADER_LEN + result.len());
90 write_header(&mut out, base.len(), result.len());
91
92 let num_blocks = base.len() / BLOCK_SIZE;
96 let mut index: HashMap<u64, u32> = HashMap::with_capacity(num_blocks);
97 for i in 0..num_blocks {
98 let pos = i * BLOCK_SIZE;
99 if let Ok(pos_u32) = u32::try_from(pos) {
100 let block = &base[pos..pos + BLOCK_SIZE];
101 let h = block_hash(block);
102 index.entry(h).or_insert(pos_u32);
103 } else {
104 break; }
106 }
107
108 let mut insert_buf: Vec<u8> = Vec::with_capacity(MAX_INSERT_LEN);
109 let mut ti = 0usize;
110 while ti < result.len() {
111 let mut matched = false;
112 if ti + BLOCK_SIZE <= result.len() {
113 let target_block = &result[ti..ti + BLOCK_SIZE];
114 let h = block_hash(target_block);
115 if let Some(&base_pos) = index.get(&h) {
116 let base_pos_usize = base_pos as usize;
117 if &base[base_pos_usize..base_pos_usize + BLOCK_SIZE] == target_block {
118 flush_insert(&mut out, &mut insert_buf);
119
120 let mut match_len = BLOCK_SIZE;
122 while base_pos_usize + match_len < base.len()
123 && ti + match_len < result.len()
124 && base[base_pos_usize + match_len] == result[ti + match_len]
125 && match_len < u16::MAX as usize
126 {
127 match_len += 1;
128 }
129 emit_copy(
131 &mut out,
132 base_pos,
133 u16::try_from(match_len).expect("<= u16::MAX"),
134 );
135 ti += match_len;
136 matched = true;
137 }
138 }
139 }
140 if !matched {
141 insert_buf.push(result[ti]);
142 ti += 1;
143 if insert_buf.len() == MAX_INSERT_LEN {
144 flush_insert(&mut out, &mut insert_buf);
145 }
146 }
147 }
148 flush_insert(&mut out, &mut insert_buf);
149 Ok(out)
150}
151
152pub(crate) fn check_length_bounds(base_len: usize, result_len: usize) -> Result<(), MkitError> {
156 if u32::try_from(base_len).is_err() {
157 return Err(MkitError::DeltaLengthOverflow {
158 field: "base_len",
159 len: base_len,
160 });
161 }
162 if u32::try_from(result_len).is_err() {
163 return Err(MkitError::DeltaLengthOverflow {
164 field: "result_len",
165 len: result_len,
166 });
167 }
168 Ok(())
169}
170
171pub fn decode(base: &[u8], stream: &[u8]) -> Result<Vec<u8>, MkitError> {
189 if stream.len() < HEADER_LEN {
190 return Err(MkitError::UnexpectedEof);
191 }
192 if stream[0] != STREAM_VERSION {
193 return Err(MkitError::UnsupportedObjectVersion);
194 }
195 let base_len = u32::from_le_bytes(stream[1..5].try_into().expect("4 bytes")) as usize;
196 let result_len = u32::from_le_bytes(stream[5..9].try_into().expect("4 bytes")) as usize;
197 if base_len != base.len() {
198 return Err(MkitError::TrailingData);
199 }
200
201 let cap_hint = compute_cap_hint(result_len, base.len(), stream.len());
209 let mut out: Vec<u8> = Vec::with_capacity(cap_hint);
210 let mut pos = HEADER_LEN;
211 while pos < stream.len() {
212 let op = stream[pos];
213 pos += 1;
214 if op & 0x80 != 0 {
215 if op & 0x7F != 0 {
217 return Err(MkitError::TrailingData);
218 }
219 if pos + 6 > stream.len() {
220 return Err(MkitError::UnexpectedEof);
221 }
222 let offset =
223 u32::from_le_bytes(stream[pos..pos + 4].try_into().expect("4 bytes")) as usize;
224 pos += 4;
225 let length =
226 u16::from_le_bytes(stream[pos..pos + 2].try_into().expect("2 bytes")) as usize;
227 pos += 2;
228 if length == 0 {
229 return Err(MkitError::TrailingData);
230 }
231 let end = offset.checked_add(length).ok_or(MkitError::TrailingData)?;
234 if end > base.len() {
235 return Err(MkitError::TrailingData);
236 }
237 if out.len().checked_add(length).is_none_or(|v| v > result_len) {
239 return Err(MkitError::TrailingData);
240 }
241 out.extend_from_slice(&base[offset..end]);
242 } else if op > 0 {
243 let length = op as usize;
245 if pos + length > stream.len() {
246 return Err(MkitError::UnexpectedEof);
247 }
248 if out.len().checked_add(length).is_none_or(|v| v > result_len) {
249 return Err(MkitError::TrailingData);
250 }
251 out.extend_from_slice(&stream[pos..pos + length]);
252 pos += length;
253 } else {
254 return Err(MkitError::TrailingData);
256 }
257 }
258 if out.len() != result_len {
259 return Err(MkitError::TrailingData);
260 }
261 Ok(out)
262}
263
264fn write_header(out: &mut Vec<u8>, base_len: usize, result_len: usize) {
267 let bl: u32 = u32::try_from(base_len).expect("base_len <= u32::MAX (checked)");
272 let rl: u32 = u32::try_from(result_len).expect("result_len <= u32::MAX (checked)");
273 out.push(STREAM_VERSION);
274 out.extend_from_slice(&bl.to_le_bytes());
275 out.extend_from_slice(&rl.to_le_bytes());
276}
277
278fn emit_copy(out: &mut Vec<u8>, offset: u32, length: u16) {
279 out.push(OP_COPY);
280 out.extend_from_slice(&offset.to_le_bytes());
281 out.extend_from_slice(&length.to_le_bytes());
282}
283
284fn flush_insert(out: &mut Vec<u8>, buf: &mut Vec<u8>) {
285 if buf.is_empty() {
286 return;
287 }
288 debug_assert!(buf.len() <= MAX_INSERT_LEN);
289 out.push(u8::try_from(buf.len()).expect("<= 127"));
290 out.extend_from_slice(buf);
291 buf.clear();
292}
293
294fn block_hash(block: &[u8]) -> u64 {
295 let mut h: u64 = 0xcbf2_9ce4_8422_2325;
297 for &b in block {
298 h ^= u64::from(b);
299 h = h.wrapping_mul(0x0000_0001_0000_01b3);
300 }
301 h
302}
303
304#[cfg(test)]
309mod tests {
310 use super::*;
311
312 fn header(base_len: u32, result_len: u32) -> [u8; HEADER_LEN] {
313 let mut h = [0u8; HEADER_LEN];
314 h[0] = STREAM_VERSION;
315 h[1..5].copy_from_slice(&base_len.to_le_bytes());
316 h[5..9].copy_from_slice(&result_len.to_le_bytes());
317 h
318 }
319
320 #[test]
321 fn identity_roundtrip() {
322 let data = b"0123456789abcdef".repeat(4); let stream = encode(&data, &data).unwrap();
324 let restored = decode(&data, &stream).unwrap();
325 assert_eq!(restored, data);
326 }
327
328 #[test]
329 fn pure_insert_roundtrip() {
330 let base = b"aaa";
331 let target = b"zzz";
332 let stream = encode(base, target).unwrap();
333 assert_eq!(stream[HEADER_LEN] & 0x80, 0);
335 assert_eq!(stream[HEADER_LEN], 3);
336 let restored = decode(base, &stream).unwrap();
337 assert_eq!(restored, target);
338 }
339
340 #[test]
341 fn pure_copy_full_base() {
342 let base: Vec<u8> = (0..16u8).cycle().take(128).collect();
343 let target = &base[..64];
344 let mut stream = header(
346 u32::try_from(base.len()).unwrap(),
347 u32::try_from(target.len()).unwrap(),
348 )
349 .to_vec();
350 stream.push(OP_COPY);
351 stream.extend_from_slice(&0u32.to_le_bytes());
352 stream.extend_from_slice(&64u16.to_le_bytes());
353 assert_eq!(stream.len(), HEADER_LEN + 7);
354 let restored = decode(&base, &stream).unwrap();
355 assert_eq!(restored, target);
356 }
357
358 #[test]
359 fn near_duplicate_yields_smaller_delta() {
360 let v1 = include_str!("delta.rs"); let mut v2 = String::from(v1);
362 v2.push_str("\n// trailing edit\n");
363 let stream = encode(v1.as_bytes(), v2.as_bytes()).unwrap();
364 let restored = decode(v1.as_bytes(), &stream).unwrap();
365 assert_eq!(restored, v2.as_bytes());
366 assert!(stream.len() < v2.len(), "delta should be smaller than v2");
367 }
368
369 #[test]
370 fn rejects_zero_opcode() {
371 let mut stream = header(0, 0).to_vec();
372 stream.push(0x00);
373 let err = decode(&[], &stream).unwrap_err();
374 assert!(matches!(err, MkitError::TrailingData));
375 }
376
377 #[test]
378 fn rejects_unknown_version() {
379 let mut bytes = header(0, 0);
380 bytes[0] = 0x02;
381 let err = decode(&[], &bytes).unwrap_err();
382 assert!(matches!(err, MkitError::UnsupportedObjectVersion));
383 }
384
385 #[test]
386 fn rejects_truncated_header() {
387 let bytes = [0x01u8, 0x00, 0x00];
388 let err = decode(&[], &bytes).unwrap_err();
389 assert!(matches!(err, MkitError::UnexpectedEof));
390 }
391
392 #[test]
393 fn rejects_truncated_copy() {
394 let mut stream = header(16, 16).to_vec();
395 stream.push(OP_COPY);
396 stream.extend_from_slice(&0u32.to_le_bytes()); let err = decode(&[0u8; 16], &stream).unwrap_err();
398 assert!(matches!(err, MkitError::UnexpectedEof));
399 }
400
401 #[test]
402 fn rejects_truncated_insert() {
403 let mut stream = header(0, 10).to_vec();
404 stream.push(10); stream.extend_from_slice(b"abc"); let err = decode(&[], &stream).unwrap_err();
407 assert!(matches!(err, MkitError::UnexpectedEof));
408 }
409
410 #[test]
411 fn rejects_copy_past_base_end() {
412 let base = b"short"; let mut stream = header(u32::try_from(base.len()).unwrap(), 100).to_vec();
414 stream.push(OP_COPY);
415 stream.extend_from_slice(&0u32.to_le_bytes());
416 stream.extend_from_slice(&100u16.to_le_bytes());
417 let err = decode(base, &stream).unwrap_err();
418 assert!(matches!(err, MkitError::TrailingData));
419 }
420
421 #[test]
422 fn rejects_copy_with_zero_length() {
423 let base = [0u8; 16];
424 let mut stream = header(16, 16).to_vec();
425 stream.push(OP_COPY);
426 stream.extend_from_slice(&0u32.to_le_bytes());
427 stream.extend_from_slice(&0u16.to_le_bytes());
428 let err = decode(&base, &stream).unwrap_err();
429 assert!(matches!(err, MkitError::TrailingData));
430 }
431
432 #[test]
433 fn rejects_base_len_mismatch() {
434 let stream = header(16, 0).to_vec();
435 let err = decode(&[0u8; 8], &stream).unwrap_err();
436 assert!(matches!(err, MkitError::TrailingData));
437 }
438
439 #[test]
440 fn rejects_result_len_mismatch_at_end() {
441 let mut stream = header(0, 3).to_vec();
443 stream.push(5);
444 stream.extend_from_slice(b"hello");
445 let err = decode(&[], &stream).unwrap_err();
446 assert!(matches!(err, MkitError::TrailingData));
447 }
448
449 #[test]
450 fn rejects_huge_result_len_without_preallocating() {
451 let stream = header(0, u32::MAX);
457 let err = decode(&[], &stream).unwrap_err();
458 assert!(matches!(err, MkitError::TrailingData));
459 }
460
461 #[test]
462 fn rejects_copy_with_reserved_low_bits() {
463 let base = [0u8; 16];
464 let mut stream = header(16, 4).to_vec();
465 stream.push(OP_COPY | 0x01); stream.extend_from_slice(&0u32.to_le_bytes());
467 stream.extend_from_slice(&4u16.to_le_bytes());
468 let err = decode(&base, &stream).unwrap_err();
469 assert!(matches!(err, MkitError::TrailingData));
470 }
471
472 #[test]
473 fn empty_base_pure_insert() {
474 let target = b"all new content here!";
475 let stream = encode(b"", target).unwrap();
476 let restored = decode(b"", &stream).unwrap();
477 assert_eq!(restored, target);
478 }
479
480 #[test]
481 fn cap_hint_does_not_scale_with_base_len() {
482 let huge_base = 1usize << 30; let tiny_stream = 9usize; let declared_result = u32::MAX as usize;
490 let cap = super::compute_cap_hint(declared_result, huge_base, tiny_stream);
491 assert!(
492 cap <= tiny_stream.saturating_mul(CAP_MULTIPLIER),
493 "cap_hint {cap} must be bounded by stream.len() * CAP_MULTIPLIER, \
494 not by base.len()",
495 );
496 assert!(
497 cap < 1024 * 1024,
498 "cap_hint {cap} must stay well below 1 MiB for a 9-byte stream",
499 );
500 }
501
502 #[test]
508 fn check_length_bounds_rejects_over_u32() {
509 let over = (u32::MAX as usize).saturating_add(1);
511 assert!(matches!(
512 check_length_bounds(over, 0),
513 Err(MkitError::DeltaLengthOverflow { .. })
514 ));
515 assert!(matches!(
517 check_length_bounds(0, over),
518 Err(MkitError::DeltaLengthOverflow { .. })
519 ));
520 assert!(check_length_bounds(u32::MAX as usize, u32::MAX as usize).is_ok());
522 assert!(check_length_bounds(1, 1).is_ok());
524 }
525}