Skip to main content

mkit_core/
pack.rs

1//! Packfile writer / reader — conformant to `docs/SPEC-PACKFILE.md`.
2//!
3//! Layout (SPEC-PACKFILE §1, §2, §3, §8):
4//!
5//! ```text
6//! [4B  magic            "MKIT"]                       offset 0
7//! [4B  version u32 LE  == 1   ]
8//! [4B  entry_count u32 LE     ]
9//!   for each entry:
10//!     [u8  entry_type]                                0x00 raw | 0x02 delta
11//!     [u32 LE payload_len]                            length of payload only
12//!     [payload_len bytes payload]
13//! [32B trailer = BLAKE3 of all preceding bytes]
14//! ```
15//!
16//! Entry types (SPEC-PACKFILE §3):
17//!
18//! * `0x00` raw  — payload is a fully serialised mkit object.
19//! * `0x01`      — RESERVED, MUST be rejected.
20//! * `0x02` delta — payload is `[32B base_hash][SPEC-DELTA stream]`.
21//!
22//! Caps (SPEC-PACKFILE §5):
23//!
24//! * `entry_count <= 10_000_000`
25//! * total `payload_len` sum `<= 4 GiB`
26//!
27//! Delta-base ordering rule (SPEC-PACKFILE §4): every delta entry's
28//! `base_hash` MUST appear earlier in the same pack as a raw entry, OR
29//! already exist in the destination object store.
30//!
31//! The pack key (SPEC-PACKFILE §7) is `packs/<lower-hex BLAKE3 of entire
32//! pack>`. The trailer is then redundant w.r.t. that key, but it lets a
33//! streaming reader detect bit-rot before the whole pack has been
34//! hashed end-to-end.
35
36use crate::delta;
37use crate::hash::{self, Hash};
38use crate::object::{MkitError, Object};
39use crate::store::{MAX_RAW_OBJECT_SIZE, ObjectStore};
40use std::sync::Arc;
41
42/// ASCII magic ("MKIT") at the start of every v1 pack.
43pub const MAGIC: &[u8; 4] = b"MKIT";
44/// Current packfile version. Reader rejects anything else.
45pub const VERSION: u32 = 1;
46
47/// Hard cap on entries (SPEC-PACKFILE §5).
48pub const MAX_ENTRIES: u32 = 10_000_000;
49/// Hard cap on the sum of payload bytes across all entries.
50pub const MAX_TOTAL_PAYLOAD: u64 = 4 * 1024 * 1024 * 1024;
51/// Trailer is a 32-byte raw BLAKE3 digest.
52pub const TRAILER_LEN: usize = 32;
53
54/// Header is `[4B magic][4B version][4B entry_count]`.
55pub const HEADER_LEN: usize = 4 + 4 + 4;
56/// Per-entry framing overhead is `[1B type][4B payload_len]`.
57pub const ENTRY_FRAME_LEN: usize = 1 + 4;
58
59/// Packfile errors. Distinct from [`MkitError`] so callers can match on
60/// pack-specific failures (trailer mismatch, base-missing) without
61/// catching every object decode error.
62#[derive(Debug, thiserror::Error)]
63pub enum PackError {
64    #[error("packfile is shorter than the {HEADER_LEN}-byte header + {TRAILER_LEN}-byte trailer")]
65    PackfileTooShort,
66    #[error("first 4 bytes are not ASCII \"MKIT\"")]
67    InvalidMagic,
68    #[error("version {0} is not supported (v1 only)")]
69    UnsupportedVersion(u32),
70    #[error("entry_type {0:#04x} is not 0x00 (raw) or 0x02 (delta)")]
71    InvalidEntryType(u8),
72    #[error("entry_count {0} exceeds the {MAX_ENTRIES} cap")]
73    TooManyObjects(u32),
74    #[error("sum of payload_len exceeds {MAX_TOTAL_PAYLOAD} bytes")]
75    PackfileTooLarge,
76    #[error("entry payload extends past the trailer offset")]
77    UnexpectedEof,
78    #[error("trailer BLAKE3 mismatch — packfile is corrupt or truncated")]
79    PackfileCorrupted,
80    #[error("delta entry references base hash {0} which is not in this pack or the store")]
81    DeltaBaseMissing(String),
82    #[error("delta entry payload is shorter than the 32-byte base hash prefix")]
83    DeltaEntryTruncated,
84    #[error("delta reconstruction failed: {0}")]
85    DeltaApply(#[from] MkitError),
86    #[error("pack entry is not a canonical storable object: {0}")]
87    InvalidObject(MkitError),
88    #[error("pack entry resolves to pack-only delta object")]
89    NonStorableObject,
90    #[error("pack contains trailing bytes after declared entries")]
91    TrailingData,
92    #[error("store I/O failure: {0}")]
93    Store(#[from] crate::store::StoreError),
94}
95
96/// Result of an unpack: which entries were stored, plus a count of
97/// delta resolutions vs raw writes. Useful for transport/CLI summaries.
98#[derive(Debug, Clone, Default, PartialEq, Eq)]
99pub struct UnpackReport {
100    pub raw_count: u32,
101    pub delta_count: u32,
102    /// Hashes inserted into the store this unpack call.
103    pub stored: Vec<Hash>,
104}
105
106/// Builds a packfile in memory, enforcing entry/payload caps and
107/// recording entries in insertion order. Call [`Self::finish`] to obtain
108/// the final packfile bytes (header + entries + trailer).
109#[derive(Debug, Default)]
110pub struct PackWriter {
111    // Buffered entries. Each `(entry_type, payload)` pair is written
112    // verbatim by `finish`; the writer adds the per-entry frame.
113    entries: Vec<(u8, Vec<u8>)>,
114    total_payload: u64,
115}
116
117impl PackWriter {
118    /// Create an empty writer.
119    #[must_use]
120    pub fn new() -> Self {
121        Self::default()
122    }
123
124    /// Append a raw object entry. `bytes` is the fully serialised object
125    /// payload; `hash_of_bytes` is the BLAKE3 of those same bytes —
126    /// callers usually have it on hand from the object store, so we take
127    /// it explicitly to avoid an extra BLAKE3 pass over the same buffer.
128    /// Returns the same hash for chaining.
129    pub fn push_raw(&mut self, hash_of_bytes: Hash, bytes: Vec<u8>) -> Result<Hash, PackError> {
130        self.check_caps_for(bytes.len())?;
131        self.total_payload += bytes.len() as u64;
132        self.entries.push((0x00, bytes));
133        Ok(hash_of_bytes)
134    }
135
136    /// Append a delta entry. `base_hash` MUST refer to an earlier raw
137    /// entry in this pack OR an object already in the destination store.
138    /// `delta_stream` MUST be a valid SPEC-DELTA stream — we don't
139    /// re-validate here (the writer is trusted), but the reader will.
140    pub fn push_delta(&mut self, base_hash: &Hash, delta_stream: &[u8]) -> Result<(), PackError> {
141        let payload_len = TRAILER_LEN + delta_stream.len();
142        self.check_caps_for(payload_len)?;
143        let mut payload = Vec::with_capacity(payload_len);
144        payload.extend_from_slice(base_hash);
145        payload.extend_from_slice(delta_stream);
146        self.total_payload += payload.len() as u64;
147        self.entries.push((0x02, payload));
148        Ok(())
149    }
150
151    fn check_caps_for(&self, add_len: usize) -> Result<(), PackError> {
152        let next_count = self.entries.len() as u64 + 1;
153        if next_count > u64::from(MAX_ENTRIES) {
154            return Err(PackError::TooManyObjects(MAX_ENTRIES + 1));
155        }
156        let next_total = self.total_payload.saturating_add(add_len as u64);
157        if next_total > MAX_TOTAL_PAYLOAD {
158            return Err(PackError::PackfileTooLarge);
159        }
160        Ok(())
161    }
162
163    /// Number of entries pushed so far. Useful for sizing diagnostics.
164    #[must_use]
165    pub fn entry_count(&self) -> usize {
166        self.entries.len()
167    }
168
169    /// Serialise the pack: header + entries + trailer. The trailer is
170    /// `BLAKE3(everything_before_trailer)`. The whole pack's BLAKE3 is
171    /// the on-disk pack key — see [`pack_key`].
172    pub fn finish(self) -> Result<Vec<u8>, PackError> {
173        let count: u32 = self
174            .entries
175            .len()
176            .try_into()
177            .map_err(|_| PackError::TooManyObjects(MAX_ENTRIES + 1))?;
178        if count > MAX_ENTRIES {
179            return Err(PackError::TooManyObjects(count));
180        }
181
182        // Pre-size: header + per-entry overhead + payloads + trailer.
183        let mut size = HEADER_LEN + TRAILER_LEN;
184        for (_, p) in &self.entries {
185            size += ENTRY_FRAME_LEN + p.len();
186        }
187        let mut buf = Vec::with_capacity(size);
188
189        buf.extend_from_slice(MAGIC);
190        buf.extend_from_slice(&VERSION.to_le_bytes());
191        buf.extend_from_slice(&count.to_le_bytes());
192        for (etype, payload) in self.entries {
193            buf.push(etype);
194            let plen: u32 = payload
195                .len()
196                .try_into()
197                .map_err(|_| PackError::PackfileTooLarge)?;
198            buf.extend_from_slice(&plen.to_le_bytes());
199            buf.extend_from_slice(&payload);
200        }
201        // Trailer over everything written so far.
202        let trailer = hash::hash(&buf);
203        buf.extend_from_slice(&trailer);
204        Ok(buf)
205    }
206}
207
208/// Compute the on-disk pack key: BLAKE3 of the entire packfile bytes
209/// (including the trailer). SPEC-PACKFILE §7. Returns the bare digest;
210/// callers prepend `packs/` and lower-hex-encode for the storage path.
211#[must_use]
212pub fn pack_key(pack_bytes: &[u8]) -> Hash {
213    hash::hash(pack_bytes)
214}
215
216/// Streaming-style packfile reader. Verifies header, trailer, entry
217/// framing, and the base-before-delta ordering rule. Reconstructs delta
218/// targets and writes every resolved object to `store`.
219#[derive(Debug)]
220pub struct PackReader;
221
222impl PackReader {
223    /// Verify and unpack `pack_bytes` into `store`. Returns counts of
224    /// raw vs. delta entries plus the list of stored hashes (in pack
225    /// order, deduped within this call).
226    ///
227    /// # Errors
228    ///
229    /// Returns the matching [`PackError`] variant on any malformed
230    /// input or trailer mismatch. The store is not modified if the
231    /// trailer fails verification.
232    ///
233    /// # Panics
234    ///
235    /// The internal `try_into` calls on fixed-size byte slices are
236    /// statically guaranteed to succeed (we slice exactly 4 bytes for
237    /// every `u32::from_le_bytes`). They `expect`-panic only if the
238    /// compiler's slice-bounds elision is wrong.
239    pub fn read(pack_bytes: &[u8], store: &ObjectStore) -> Result<UnpackReport, PackError> {
240        // 1. Length sanity: must fit header + trailer at minimum.
241        if pack_bytes.len() < HEADER_LEN + TRAILER_LEN {
242            return Err(PackError::PackfileTooShort);
243        }
244        // 2. Magic.
245        if &pack_bytes[..4] != MAGIC.as_slice() {
246            return Err(PackError::InvalidMagic);
247        }
248        // 3. Version.
249        let version = u32::from_le_bytes(pack_bytes[4..8].try_into().expect("4 bytes"));
250        if version != VERSION {
251            return Err(PackError::UnsupportedVersion(version));
252        }
253        // 4. Trailer must match BEFORE we touch the store. SPEC-PACKFILE §8.
254        let split = pack_bytes.len() - TRAILER_LEN;
255        let body = &pack_bytes[..split];
256        let trailer = &pack_bytes[split..];
257        let computed = hash::hash(body);
258        if computed.as_slice() != trailer {
259            return Err(PackError::PackfileCorrupted);
260        }
261        // 5. Entry count + cap.
262        let count = u32::from_le_bytes(pack_bytes[8..12].try_into().expect("4 bytes"));
263        if count > MAX_ENTRIES {
264            return Err(PackError::TooManyObjects(count));
265        }
266        // Quick lower bound sanity: each entry is at least ENTRY_FRAME_LEN bytes.
267        let body_after_header = body.len() - HEADER_LEN;
268        if u64::from(count) * ENTRY_FRAME_LEN as u64 > body_after_header as u64 {
269            return Err(PackError::TooManyObjects(count));
270        }
271
272        let mut report = UnpackReport::default();
273        let mut pending_writes: Vec<(Hash, Arc<[u8]>)> = Vec::new();
274        // Track raw entries we wrote in *this* pack so subsequent delta
275        // entries can resolve their base from memory before falling back
276        // to the on-disk store. We keep the resolved object bytes (raw
277        // serialised SPEC-OBJECTS payload) so the delta apply doesn't
278        // need to re-read them.
279        let mut in_pack: std::collections::HashMap<Hash, Arc<[u8]>> =
280            std::collections::HashMap::new();
281        let mut total_payload: u64 = 0;
282        let mut pos = HEADER_LEN;
283
284        for _ in 0..count {
285            // Frame: [type][payload_len].
286            if pos + ENTRY_FRAME_LEN > split {
287                return Err(PackError::UnexpectedEof);
288            }
289            let etype = pack_bytes[pos];
290            pos += 1;
291            let payload_len =
292                u32::from_le_bytes(pack_bytes[pos..pos + 4].try_into().expect("4 bytes")) as usize;
293            pos += 4;
294
295            total_payload = total_payload.saturating_add(payload_len as u64);
296            if total_payload > MAX_TOTAL_PAYLOAD {
297                return Err(PackError::PackfileTooLarge);
298            }
299            if pos + payload_len > split {
300                return Err(PackError::UnexpectedEof);
301            }
302            let payload = &pack_bytes[pos..pos + payload_len];
303            pos += payload_len;
304
305            match etype {
306                0x00 => {
307                    // raw — validate and stage for writing after the whole pack parses.
308                    validate_storable_object(payload)?;
309                    let stored_hash = hash::hash(payload);
310                    let bytes: Arc<[u8]> = Arc::from(payload);
311                    in_pack.insert(stored_hash, Arc::clone(&bytes));
312                    pending_writes.push((stored_hash, bytes));
313                    report.raw_count += 1;
314                    report.stored.push(stored_hash);
315                }
316                0x02 => {
317                    // delta — payload is [32B base_hash][stream].
318                    if payload.len() < TRAILER_LEN {
319                        return Err(PackError::DeltaEntryTruncated);
320                    }
321                    let mut base_hash = [0u8; 32];
322                    base_hash.copy_from_slice(&payload[..TRAILER_LEN]);
323                    let stream = &payload[TRAILER_LEN..];
324                    // Resolve base: in-pack first, then on-disk.
325                    let base_bytes: std::borrow::Cow<'_, [u8]> =
326                        if let Some(b) = in_pack.get(&base_hash) {
327                            std::borrow::Cow::Borrowed(b.as_ref())
328                        } else if store.contains(&base_hash) {
329                            let bytes = store.read(&base_hash)?;
330                            validate_storable_object(&bytes)?;
331                            std::borrow::Cow::Owned(bytes)
332                        } else {
333                            return Err(PackError::DeltaBaseMissing(hash::to_hex(&base_hash)));
334                        };
335                    validate_delta_result_size(stream)?;
336                    let resolved = delta::decode(base_bytes.as_ref(), stream)?;
337                    validate_storable_object(&resolved)?;
338                    let stored_hash = hash::hash(&resolved);
339                    let bytes: Arc<[u8]> = Arc::from(resolved);
340                    in_pack.insert(stored_hash, Arc::clone(&bytes));
341                    pending_writes.push((stored_hash, bytes));
342                    report.delta_count += 1;
343                    report.stored.push(stored_hash);
344                }
345                0x01 => return Err(PackError::InvalidEntryType(0x01)),
346                other => return Err(PackError::InvalidEntryType(other)),
347            }
348        }
349
350        if pos != split {
351            return Err(PackError::TrailingData);
352        }
353
354        // Batched durability: one full flush for the whole pack instead
355        // of one per object. The caller's ref update happens after
356        // `read` returns, so the commit-before-reference ordering holds.
357        let batch = store.batch();
358        for (h, bytes) in pending_writes {
359            // Every entry was BLAKE3-hashed above (trailer-verified
360            // pack, hash recorded in the report); skip the re-hash.
361            batch.write_prehashed(h, &[&bytes])?;
362        }
363        batch.commit()?;
364
365        Ok(report)
366    }
367}
368
369fn validate_storable_object(bytes: &[u8]) -> Result<(), PackError> {
370    if bytes.len() > MAX_RAW_OBJECT_SIZE {
371        return Err(PackError::Store(crate::store::StoreError::ObjectTooLarge));
372    }
373    match crate::serialize::deserialize(bytes).map_err(PackError::InvalidObject)? {
374        Object::Delta(_) => Err(PackError::NonStorableObject),
375        Object::Blob(_)
376        | Object::Tree(_)
377        | Object::Commit(_)
378        | Object::Remix(_)
379        | Object::ChunkedBlob(_)
380        | Object::Tag(_) => Ok(()),
381    }
382}
383
384fn validate_delta_result_size(stream: &[u8]) -> Result<(), PackError> {
385    if stream.len() < delta::HEADER_LEN {
386        return Err(PackError::DeltaApply(MkitError::UnexpectedEof));
387    }
388    let result_len = u32::from_le_bytes(stream[5..9].try_into().expect("4 bytes")) as usize;
389    if result_len > MAX_RAW_OBJECT_SIZE {
390        return Err(PackError::Store(crate::store::StoreError::ObjectTooLarge));
391    }
392    Ok(())
393}
394
395// =========================================================================
396// Tests
397// =========================================================================
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402    use tempfile::TempDir;
403
404    fn fresh_store() -> (TempDir, ObjectStore) {
405        let dir = TempDir::new().unwrap();
406        let store = ObjectStore::init(dir.path()).unwrap();
407        (dir, store)
408    }
409
410    fn write_blob_via_serialize(payload: &[u8]) -> Vec<u8> {
411        // Use the serialize/object stack so the bytes are a real mkit
412        // object — important because `store.write` accepts any bytes
413        // but unpack-time delta apply produces what serialize would.
414        let blob = crate::object::Object::Blob(crate::object::Blob {
415            data: payload.to_vec(),
416        });
417        crate::serialize::serialize(&blob).expect("serialize blob")
418    }
419
420    fn finish_pack_body(mut body: Vec<u8>) -> Vec<u8> {
421        let trailer = hash::hash(&body);
422        body.extend_from_slice(&trailer);
423        body
424    }
425
426    #[test]
427    fn empty_pack_is_44_bytes() {
428        let pack = PackWriter::new().finish().unwrap();
429        assert_eq!(pack.len(), HEADER_LEN + TRAILER_LEN);
430        assert_eq!(&pack[..4], MAGIC);
431        assert_eq!(u32::from_le_bytes(pack[4..8].try_into().unwrap()), VERSION);
432        assert_eq!(u32::from_le_bytes(pack[8..12].try_into().unwrap()), 0);
433
434        let (_dir, store) = fresh_store();
435        let report = PackReader::read(&pack, &store).unwrap();
436        assert_eq!(report.raw_count, 0);
437        assert_eq!(report.delta_count, 0);
438        assert!(report.stored.is_empty());
439    }
440
441    #[test]
442    fn unpack_writes_objects_via_single_batch_flush() {
443        // clone/fetch receive N objects per pack; durability must cost
444        // O(1) full flushes per pack, not O(N).
445        use crate::batch::testing::{Ev, RecordingSyncer};
446        use std::sync::Arc;
447
448        let mut w = PackWriter::new();
449        let mut blobs = Vec::new();
450        for i in 0u32..30 {
451            let blob = write_blob_via_serialize(format!("pack object {i}").as_bytes());
452            w.push_raw(hash::hash(&blob), blob.clone()).unwrap();
453            blobs.push(blob);
454        }
455        let pack = w.finish().unwrap();
456
457        let (_dir, mut store) = fresh_store();
458        let rec = Arc::new(RecordingSyncer::default());
459        store.set_syncer(rec.clone());
460
461        let report = PackReader::read(&pack, &store).unwrap();
462        assert_eq!(report.raw_count, 30);
463
464        let fulls = rec
465            .events()
466            .iter()
467            .filter(|e| matches!(e, Ev::Full(_)))
468            .count();
469        assert_eq!(
470            fulls, 2,
471            "unpack flush cost must be constant, not O(objects)"
472        );
473        for blob in &blobs {
474            assert_eq!(store.read(&hash::hash(blob)).unwrap(), *blob);
475        }
476    }
477
478    #[test]
479    fn single_raw_roundtrip() {
480        let blob = write_blob_via_serialize(b"hello packfile");
481        let h = hash::hash(&blob);
482
483        let mut w = PackWriter::new();
484        w.push_raw(h, blob.clone()).unwrap();
485        let pack = w.finish().unwrap();
486
487        let (_dir, store) = fresh_store();
488        let report = PackReader::read(&pack, &store).unwrap();
489        assert_eq!(report.raw_count, 1);
490        assert_eq!(report.delta_count, 0);
491        assert_eq!(report.stored, vec![h]);
492        assert_eq!(store.read(&h).unwrap(), blob);
493    }
494
495    #[test]
496    fn raw_then_delta_resolves_in_pack() {
497        // Two near-identical blobs. Delta should reconstruct the second.
498        let mut content_base = vec![0u8; 1024];
499        for (i, b) in content_base.iter_mut().enumerate() {
500            *b = u8::try_from(i % 251).expect("modulo < 256");
501        }
502        let mut content_target = content_base.clone();
503        content_target[500] = 0xFF;
504        content_target[501] = 0xFE;
505
506        let base_obj = write_blob_via_serialize(&content_base);
507        let target_obj = write_blob_via_serialize(&content_target);
508        let base_hash = hash::hash(&base_obj);
509        let target_hash = hash::hash(&target_obj);
510
511        let stream = delta::encode(&base_obj, &target_obj).unwrap();
512
513        let mut w = PackWriter::new();
514        w.push_raw(base_hash, base_obj.clone()).unwrap();
515        w.push_delta(&base_hash, &stream).unwrap();
516        let pack = w.finish().unwrap();
517
518        let (_dir, store) = fresh_store();
519        let report = PackReader::read(&pack, &store).unwrap();
520        assert_eq!(report.raw_count, 1);
521        assert_eq!(report.delta_count, 1);
522        assert_eq!(report.stored, vec![base_hash, target_hash]);
523        assert_eq!(store.read(&target_hash).unwrap(), target_obj);
524    }
525
526    #[test]
527    fn rejects_raw_payload_that_is_not_canonical_object_without_store_write() {
528        let payload = b"not a serialized mkit object".to_vec();
529        let payload_hash = hash::hash(&payload);
530        let mut body = Vec::new();
531        body.extend_from_slice(MAGIC);
532        body.extend_from_slice(&VERSION.to_le_bytes());
533        body.extend_from_slice(&1u32.to_le_bytes());
534        body.push(0x00);
535        let payload_len = u32::try_from(payload.len()).unwrap();
536        body.extend_from_slice(&payload_len.to_le_bytes());
537        body.extend_from_slice(&payload);
538        let pack = finish_pack_body(body);
539
540        let (_dir, store) = fresh_store();
541        let err = PackReader::read(&pack, &store).unwrap_err();
542        assert!(matches!(err, PackError::InvalidObject(_)), "got {err:?}");
543        assert!(!store.contains(&payload_hash));
544    }
545
546    #[test]
547    fn rejects_raw_delta_object_without_store_write() {
548        let delta = crate::object::Object::Delta(crate::object::Delta {
549            base_hash: [0xAB; 32],
550            result_size: 0,
551            instructions: Vec::new(),
552        });
553        let payload = crate::serialize::serialize(&delta).unwrap();
554        let payload_hash = hash::hash(&payload);
555        let mut w = PackWriter::new();
556        w.push_raw(payload_hash, payload).unwrap();
557        let pack = w.finish().unwrap();
558
559        let (_dir, store) = fresh_store();
560        let err = PackReader::read(&pack, &store).unwrap_err();
561        assert!(matches!(err, PackError::NonStorableObject), "got {err:?}");
562        assert!(!store.contains(&payload_hash));
563    }
564
565    #[test]
566    fn rejects_delta_resolving_to_non_object_without_partial_store_write() {
567        let base_obj = write_blob_via_serialize(b"base bytes");
568        let base_hash = hash::hash(&base_obj);
569        let invalid_target = b"not a serialized object".to_vec();
570        let invalid_hash = hash::hash(&invalid_target);
571        let stream = delta::encode(&base_obj, &invalid_target).unwrap();
572
573        let mut w = PackWriter::new();
574        w.push_raw(base_hash, base_obj).unwrap();
575        w.push_delta(&base_hash, &stream).unwrap();
576        let pack = w.finish().unwrap();
577
578        let (_dir, store) = fresh_store();
579        let err = PackReader::read(&pack, &store).unwrap_err();
580        assert!(matches!(err, PackError::InvalidObject(_)), "got {err:?}");
581        assert!(!store.contains(&base_hash));
582        assert!(!store.contains(&invalid_hash));
583    }
584
585    #[test]
586    fn rejects_delta_result_over_object_cap_without_partial_store_write() {
587        let base_obj = write_blob_via_serialize(b"base bytes");
588        let base_hash = hash::hash(&base_obj);
589        let mut stream = Vec::new();
590        stream.push(delta::STREAM_VERSION);
591        stream.extend_from_slice(&u32::try_from(base_obj.len()).unwrap().to_le_bytes());
592        stream.extend_from_slice(
593            &u32::try_from(MAX_RAW_OBJECT_SIZE + 1)
594                .unwrap()
595                .to_le_bytes(),
596        );
597
598        let mut w = PackWriter::new();
599        w.push_raw(base_hash, base_obj).unwrap();
600        w.push_delta(&base_hash, &stream).unwrap();
601        let pack = w.finish().unwrap();
602
603        let (_dir, store) = fresh_store();
604        let err = PackReader::read(&pack, &store).unwrap_err();
605        assert!(
606            matches!(
607                err,
608                PackError::Store(crate::store::StoreError::ObjectTooLarge)
609            ),
610            "got {err:?}"
611        );
612        assert!(!store.contains(&base_hash));
613    }
614
615    #[test]
616    fn rejects_trailing_bytes_after_declared_entries_without_store_write() {
617        let blob = write_blob_via_serialize(b"trailing bytes test");
618        let blob_hash = hash::hash(&blob);
619        let mut body = Vec::new();
620        body.extend_from_slice(MAGIC);
621        body.extend_from_slice(&VERSION.to_le_bytes());
622        body.extend_from_slice(&1u32.to_le_bytes());
623        body.push(0x00);
624        let blob_len = u32::try_from(blob.len()).unwrap();
625        body.extend_from_slice(&blob_len.to_le_bytes());
626        body.extend_from_slice(&blob);
627        body.extend_from_slice(b"junk");
628        let pack = finish_pack_body(body);
629
630        let (_dir, store) = fresh_store();
631        let err = PackReader::read(&pack, &store).unwrap_err();
632        assert!(matches!(err, PackError::TrailingData), "got {err:?}");
633        assert!(!store.contains(&blob_hash));
634    }
635
636    #[test]
637    fn rejects_invalid_magic() {
638        // Use an arbitrary invalid 4-byte sequence; the rename gate
639        // forbids spelling out the upstream pre-rename magic literally.
640        let mut pack = PackWriter::new().finish().unwrap();
641        pack[0] = b'X';
642        pack[1] = b'X';
643        pack[2] = b'X';
644        pack[3] = b'X';
645        let (_dir, store) = fresh_store();
646        let err = PackReader::read(&pack, &store).unwrap_err();
647        assert!(matches!(err, PackError::InvalidMagic));
648    }
649
650    #[test]
651    fn rejects_unknown_version() {
652        let mut pack = PackWriter::new().finish().unwrap();
653        // version is u32 LE at offset 4
654        pack[4] = 99;
655        // Corrupt trailer so the version check fires first — but
656        // SPEC-PACKFILE §8 says trailer is checked before entries,
657        // and we want UnsupportedVersion. Trailer check happens after
658        // version check in our impl (see read()), so just leave the
659        // trailer; it will fail UnsupportedVersion on byte 4.
660        let (_dir, store) = fresh_store();
661        let err = PackReader::read(&pack, &store).unwrap_err();
662        assert!(matches!(err, PackError::UnsupportedVersion(99)));
663    }
664
665    #[test]
666    fn rejects_truncated_pack() {
667        let pack = vec![b'M', b'K']; // only 2 bytes
668        let (_dir, store) = fresh_store();
669        let err = PackReader::read(&pack, &store).unwrap_err();
670        assert!(matches!(err, PackError::PackfileTooShort));
671    }
672
673    #[test]
674    fn rejects_bit_flipped_trailer() {
675        let blob = write_blob_via_serialize(b"trailer test");
676        let h = hash::hash(&blob);
677        let mut w = PackWriter::new();
678        w.push_raw(h, blob).unwrap();
679        let mut pack = w.finish().unwrap();
680        let last = pack.len() - 1;
681        pack[last] ^= 0x01; // flip one bit
682        let (_dir, store) = fresh_store();
683        let err = PackReader::read(&pack, &store).unwrap_err();
684        assert!(matches!(err, PackError::PackfileCorrupted));
685    }
686
687    #[test]
688    fn rejects_reserved_entry_type_0x01() {
689        // Hand-build a pack with one entry of type 0x01.
690        let mut buf = Vec::new();
691        buf.extend_from_slice(MAGIC);
692        buf.extend_from_slice(&VERSION.to_le_bytes());
693        buf.extend_from_slice(&1u32.to_le_bytes());
694        buf.push(0x01); // RESERVED type
695        buf.extend_from_slice(&0u32.to_le_bytes()); // payload_len = 0
696        let trailer = hash::hash(&buf);
697        buf.extend_from_slice(&trailer);
698
699        let (_dir, store) = fresh_store();
700        let err = PackReader::read(&buf, &store).unwrap_err();
701        assert!(matches!(err, PackError::InvalidEntryType(0x01)));
702    }
703
704    #[test]
705    fn rejects_unknown_entry_type() {
706        let mut buf = Vec::new();
707        buf.extend_from_slice(MAGIC);
708        buf.extend_from_slice(&VERSION.to_le_bytes());
709        buf.extend_from_slice(&1u32.to_le_bytes());
710        buf.push(0x77); // unknown
711        buf.extend_from_slice(&0u32.to_le_bytes());
712        let trailer = hash::hash(&buf);
713        buf.extend_from_slice(&trailer);
714
715        let (_dir, store) = fresh_store();
716        let err = PackReader::read(&buf, &store).unwrap_err();
717        assert!(matches!(err, PackError::InvalidEntryType(0x77)));
718    }
719
720    #[test]
721    fn delta_base_missing_is_loud() {
722        let mut fake_base = [0u8; 32];
723        fake_base[0] = 0xAB;
724        // Build a minimal SPEC-DELTA stream that targets a nonexistent base.
725        let mut stream = Vec::new();
726        stream.push(0x01); // version
727        stream.extend_from_slice(&0u32.to_le_bytes()); // base_len
728        stream.extend_from_slice(&0u32.to_le_bytes()); // result_len
729        let mut w = PackWriter::new();
730        w.push_delta(&fake_base, &stream).unwrap();
731        let pack = w.finish().unwrap();
732
733        let (_dir, store) = fresh_store();
734        let err = PackReader::read(&pack, &store).unwrap_err();
735        assert!(matches!(err, PackError::DeltaBaseMissing(_)), "got {err:?}");
736    }
737
738    #[test]
739    fn entry_payload_past_trailer_rejected() {
740        let mut buf = Vec::new();
741        buf.extend_from_slice(MAGIC);
742        buf.extend_from_slice(&VERSION.to_le_bytes());
743        buf.extend_from_slice(&1u32.to_le_bytes());
744        buf.push(0x00);
745        buf.extend_from_slice(&1_000_000u32.to_le_bytes());
746        // No payload bytes follow.
747        let trailer = hash::hash(&buf);
748        buf.extend_from_slice(&trailer);
749
750        let (_dir, store) = fresh_store();
751        let err = PackReader::read(&buf, &store).unwrap_err();
752        assert!(matches!(err, PackError::UnexpectedEof));
753    }
754
755    #[test]
756    fn entry_count_over_cap_rejected() {
757        let mut buf = Vec::new();
758        buf.extend_from_slice(MAGIC);
759        buf.extend_from_slice(&VERSION.to_le_bytes());
760        buf.extend_from_slice(&u32::MAX.to_le_bytes());
761        // Add a fake trailer so trailer-check passes — wait, it can't
762        // pass since the body is bogus. Compute it correctly so the
763        // trailer is the not-the-failure path; then the count cap must
764        // fire first per read() ordering.
765        let trailer = hash::hash(&buf);
766        buf.extend_from_slice(&trailer);
767
768        let (_dir, store) = fresh_store();
769        let err = PackReader::read(&buf, &store).unwrap_err();
770        // count cap fires after trailer verify in our impl. Either is
771        // acceptable; assert one of them.
772        assert!(
773            matches!(err, PackError::TooManyObjects(_)),
774            "expected TooManyObjects, got {err:?}"
775        );
776    }
777
778    #[test]
779    fn pack_key_is_blake3_of_pack_bytes() {
780        let blob = write_blob_via_serialize(b"key test");
781        let h = hash::hash(&blob);
782        let mut w = PackWriter::new();
783        w.push_raw(h, blob).unwrap();
784        let pack = w.finish().unwrap();
785        assert_eq!(pack_key(&pack), hash::hash(&pack));
786    }
787
788    #[test]
789    fn delta_resolves_against_pre_existing_store_object() {
790        let (_dir, store) = fresh_store();
791        // Plant the base in the store first.
792        let mut content_base = vec![0u8; 256];
793        for (i, b) in content_base.iter_mut().enumerate() {
794            *b = u8::try_from(i % 251).expect("modulo < 256");
795        }
796        let base_obj = write_blob_via_serialize(&content_base);
797        let base_hash = store.write(&base_obj).unwrap();
798
799        // Pack contains ONLY a delta; the base must be resolved from disk.
800        let mut content_target = content_base.clone();
801        content_target[100] = 0xAA;
802        let target_obj = write_blob_via_serialize(&content_target);
803        let target_hash = hash::hash(&target_obj);
804        let stream = delta::encode(&base_obj, &target_obj).unwrap();
805
806        let mut w = PackWriter::new();
807        w.push_delta(&base_hash, &stream).unwrap();
808        let pack = w.finish().unwrap();
809
810        let report = PackReader::read(&pack, &store).unwrap();
811        assert_eq!(report.delta_count, 1);
812        assert_eq!(report.raw_count, 0);
813        assert_eq!(store.read(&target_hash).unwrap(), target_obj);
814    }
815}