Skip to main content

mkit_core/
pack.rs

1//! Packfile writer / reader — conformant to `docs/SPEC-PACKFILE.md`.
2//!
3//! Layout (SPEC-PACKFILE §1, §2, §3, §8):
4//!
5//! ```text
6//! [4B  magic            "MKIT"]                       offset 0
7//! [4B  version u32 LE  == 1   ]
8//! [4B  entry_count u32 LE     ]
9//!   for each entry:
10//!     [u8  entry_type]                                0x00 raw | 0x02 delta
11//!     [u32 LE payload_len]                            length of payload only
12//!     [payload_len bytes payload]
13//! [32B trailer = BLAKE3 of all preceding bytes]
14//! ```
15//!
16//! Entry types (SPEC-PACKFILE §3):
17//!
18//! * `0x00` raw  — payload is a fully serialised mkit object.
19//! * `0x01`      — RESERVED, MUST be rejected.
20//! * `0x02` delta — payload is `[32B base_hash][SPEC-DELTA stream]`.
21//!
22//! Caps (SPEC-PACKFILE §5):
23//!
24//! * `entry_count <= 10_000_000`
25//! * total `payload_len` sum `<= 4 GiB`
26//!
27//! Delta-base ordering rule (SPEC-PACKFILE §4): every delta entry's
28//! `base_hash` MUST appear earlier in the same pack as a raw entry, OR
29//! already exist in the destination object store.
30//!
31//! The pack key (SPEC-PACKFILE §7) is `packs/<lower-hex BLAKE3 of entire
32//! pack>`. The trailer is then redundant w.r.t. that key, but it lets a
33//! streaming reader detect bit-rot before the whole pack has been
34//! hashed end-to-end.
35
36use crate::delta;
37use crate::hash::{self, Hash};
38use crate::object::{MkitError, Object};
39use crate::store::{MAX_RAW_OBJECT_SIZE, ObjectStore};
40use std::sync::Arc;
41
42/// ASCII magic ("MKIT") at the start of every v1 pack.
43pub const MAGIC: &[u8; 4] = b"MKIT";
44/// Current packfile version. Reader rejects anything else.
45pub const VERSION: u32 = 1;
46
47/// Hard cap on entries (SPEC-PACKFILE §5).
48pub const MAX_ENTRIES: u32 = 10_000_000;
49/// Hard cap on the sum of payload bytes across all entries.
50pub const MAX_TOTAL_PAYLOAD: u64 = 4 * 1024 * 1024 * 1024;
51/// Trailer is a 32-byte raw BLAKE3 digest.
52pub const TRAILER_LEN: usize = 32;
53
54/// Header is `[4B magic][4B version][4B entry_count]`.
55pub const HEADER_LEN: usize = 4 + 4 + 4;
56/// Per-entry framing overhead is `[1B type][4B payload_len]`.
57pub const ENTRY_FRAME_LEN: usize = 1 + 4;
58
59/// Packfile errors. Distinct from [`MkitError`] so callers can match on
60/// pack-specific failures (trailer mismatch, base-missing) without
61/// catching every object decode error.
62#[derive(Debug, thiserror::Error)]
63pub enum PackError {
64    #[error("packfile is shorter than the {HEADER_LEN}-byte header + {TRAILER_LEN}-byte trailer")]
65    PackfileTooShort,
66    #[error("first 4 bytes are not ASCII \"MKIT\"")]
67    InvalidMagic,
68    #[error("version {0} is not supported (v1 only)")]
69    UnsupportedVersion(u32),
70    #[error("entry_type {0:#04x} is not 0x00 (raw) or 0x02 (delta)")]
71    InvalidEntryType(u8),
72    #[error("entry_count {0} exceeds the {MAX_ENTRIES} cap")]
73    TooManyObjects(u32),
74    #[error("sum of payload_len exceeds {MAX_TOTAL_PAYLOAD} bytes")]
75    PackfileTooLarge,
76    #[error("entry payload extends past the trailer offset")]
77    UnexpectedEof,
78    #[error("trailer BLAKE3 mismatch — packfile is corrupt or truncated")]
79    PackfileCorrupted,
80    #[error("delta entry references base hash {0} which is not in this pack or the store")]
81    DeltaBaseMissing(String),
82    #[error("delta entry payload is shorter than the 32-byte base hash prefix")]
83    DeltaEntryTruncated,
84    #[error("delta reconstruction failed: {0}")]
85    DeltaApply(#[from] MkitError),
86    #[error("pack entry is not a canonical storable object: {0}")]
87    InvalidObject(MkitError),
88    #[error("pack entry resolves to pack-only delta object")]
89    NonStorableObject,
90    #[error("pack contains trailing bytes after declared entries")]
91    TrailingData,
92    #[error("store I/O failure: {0}")]
93    Store(#[from] crate::store::StoreError),
94}
95
96/// Result of an unpack: which entries were stored, plus a count of
97/// delta resolutions vs raw writes. Useful for transport/CLI summaries.
98#[derive(Debug, Clone, Default, PartialEq, Eq)]
99pub struct UnpackReport {
100    pub raw_count: u32,
101    pub delta_count: u32,
102    /// Hashes inserted into the store this unpack call.
103    pub stored: Vec<Hash>,
104}
105
106/// Builds a packfile in memory, enforcing entry/payload caps and
107/// recording entries in insertion order. Call [`Self::finish`] to obtain
108/// the final packfile bytes (header + entries + trailer).
109#[derive(Debug, Default)]
110pub struct PackWriter {
111    // Buffered entries. Each `(entry_type, payload)` pair is written
112    // verbatim by `finish`; the writer adds the per-entry frame.
113    entries: Vec<(u8, Vec<u8>)>,
114    total_payload: u64,
115}
116
117impl PackWriter {
118    /// Create an empty writer.
119    #[must_use]
120    pub fn new() -> Self {
121        Self::default()
122    }
123
124    /// Append a raw object entry. `bytes` is the fully serialised object
125    /// payload; `hash_of_bytes` is the BLAKE3 of those same bytes —
126    /// callers usually have it on hand from the object store, so we take
127    /// it explicitly to avoid an extra BLAKE3 pass over the same buffer.
128    /// Returns the same hash for chaining.
129    pub fn push_raw(&mut self, hash_of_bytes: Hash, bytes: Vec<u8>) -> Result<Hash, PackError> {
130        self.check_caps_for(bytes.len())?;
131        self.total_payload += bytes.len() as u64;
132        self.entries.push((0x00, bytes));
133        Ok(hash_of_bytes)
134    }
135
136    /// Append a delta entry. `base_hash` MUST refer to an earlier raw
137    /// entry in this pack OR an object already in the destination store.
138    /// `delta_stream` MUST be a valid SPEC-DELTA stream — we don't
139    /// re-validate here (the writer is trusted), but the reader will.
140    pub fn push_delta(&mut self, base_hash: &Hash, delta_stream: &[u8]) -> Result<(), PackError> {
141        let payload_len = TRAILER_LEN + delta_stream.len();
142        self.check_caps_for(payload_len)?;
143        let mut payload = Vec::with_capacity(payload_len);
144        payload.extend_from_slice(base_hash);
145        payload.extend_from_slice(delta_stream);
146        self.total_payload += payload.len() as u64;
147        self.entries.push((0x02, payload));
148        Ok(())
149    }
150
151    fn check_caps_for(&self, add_len: usize) -> Result<(), PackError> {
152        let next_count = self.entries.len() as u64 + 1;
153        if next_count > u64::from(MAX_ENTRIES) {
154            return Err(PackError::TooManyObjects(MAX_ENTRIES + 1));
155        }
156        let next_total = self.total_payload.saturating_add(add_len as u64);
157        if next_total > MAX_TOTAL_PAYLOAD {
158            return Err(PackError::PackfileTooLarge);
159        }
160        Ok(())
161    }
162
163    /// Number of entries pushed so far. Useful for sizing diagnostics.
164    #[must_use]
165    pub fn entry_count(&self) -> usize {
166        self.entries.len()
167    }
168
169    /// Serialise the pack: header + entries + trailer. The trailer is
170    /// `BLAKE3(everything_before_trailer)`. The whole pack's BLAKE3 is
171    /// the on-disk pack key — see [`pack_key`].
172    pub fn finish(self) -> Result<Vec<u8>, PackError> {
173        let count: u32 = self
174            .entries
175            .len()
176            .try_into()
177            .map_err(|_| PackError::TooManyObjects(MAX_ENTRIES + 1))?;
178        if count > MAX_ENTRIES {
179            return Err(PackError::TooManyObjects(count));
180        }
181
182        // Pre-size: header + per-entry overhead + payloads + trailer.
183        let mut size = HEADER_LEN + TRAILER_LEN;
184        for (_, p) in &self.entries {
185            size += ENTRY_FRAME_LEN + p.len();
186        }
187        let mut buf = Vec::with_capacity(size);
188
189        buf.extend_from_slice(MAGIC);
190        buf.extend_from_slice(&VERSION.to_le_bytes());
191        buf.extend_from_slice(&count.to_le_bytes());
192        for (etype, payload) in self.entries {
193            buf.push(etype);
194            let plen: u32 = payload
195                .len()
196                .try_into()
197                .map_err(|_| PackError::PackfileTooLarge)?;
198            buf.extend_from_slice(&plen.to_le_bytes());
199            buf.extend_from_slice(&payload);
200        }
201        // Trailer over everything written so far.
202        let trailer = hash::hash(&buf);
203        buf.extend_from_slice(&trailer);
204        Ok(buf)
205    }
206}
207
208/// Compute the on-disk pack key: BLAKE3 of the entire packfile bytes
209/// (including the trailer). SPEC-PACKFILE §7. Returns the bare digest;
210/// callers prepend `packs/` and lower-hex-encode for the storage path.
211#[must_use]
212pub fn pack_key(pack_bytes: &[u8]) -> Hash {
213    hash::hash(pack_bytes)
214}
215
216/// Streaming-style packfile reader. Verifies header, trailer, entry
217/// framing, and the base-before-delta ordering rule. Reconstructs delta
218/// targets and writes every resolved object to `store`.
219#[derive(Debug)]
220pub struct PackReader;
221
222impl PackReader {
223    /// Verify and unpack `pack_bytes` into `store`. Returns counts of
224    /// raw vs. delta entries plus the list of stored hashes (in pack
225    /// order, deduped within this call).
226    ///
227    /// # Errors
228    ///
229    /// Returns the matching [`PackError`] variant on any malformed
230    /// input or trailer mismatch. The store is not modified if the
231    /// trailer fails verification.
232    ///
233    /// # Panics
234    ///
235    /// The internal `try_into` calls on fixed-size byte slices are
236    /// statically guaranteed to succeed (we slice exactly 4 bytes for
237    /// every `u32::from_le_bytes`). They `expect`-panic only if the
238    /// compiler's slice-bounds elision is wrong.
239    pub fn read(pack_bytes: &[u8], store: &ObjectStore) -> Result<UnpackReport, PackError> {
240        // 1. Length sanity: must fit header + trailer at minimum.
241        if pack_bytes.len() < HEADER_LEN + TRAILER_LEN {
242            return Err(PackError::PackfileTooShort);
243        }
244        // 2. Magic.
245        if &pack_bytes[..4] != MAGIC.as_slice() {
246            return Err(PackError::InvalidMagic);
247        }
248        // 3. Version.
249        let version = u32::from_le_bytes(pack_bytes[4..8].try_into().expect("4 bytes"));
250        if version != VERSION {
251            return Err(PackError::UnsupportedVersion(version));
252        }
253        // 4. Trailer must match BEFORE we touch the store. SPEC-PACKFILE §8.
254        let split = pack_bytes.len() - TRAILER_LEN;
255        let body = &pack_bytes[..split];
256        let trailer = &pack_bytes[split..];
257        let computed = hash::hash(body);
258        if computed.as_slice() != trailer {
259            return Err(PackError::PackfileCorrupted);
260        }
261        // 5. Entry count + cap.
262        let count = u32::from_le_bytes(pack_bytes[8..12].try_into().expect("4 bytes"));
263        if count > MAX_ENTRIES {
264            return Err(PackError::TooManyObjects(count));
265        }
266        // Quick lower bound sanity: each entry is at least ENTRY_FRAME_LEN bytes.
267        let body_after_header = body.len() - HEADER_LEN;
268        if u64::from(count) * ENTRY_FRAME_LEN as u64 > body_after_header as u64 {
269            return Err(PackError::TooManyObjects(count));
270        }
271
272        let mut report = UnpackReport::default();
273        let mut pending_writes: Vec<Arc<[u8]>> = Vec::new();
274        // Track raw entries we wrote in *this* pack so subsequent delta
275        // entries can resolve their base from memory before falling back
276        // to the on-disk store. We keep the resolved object bytes (raw
277        // serialised SPEC-OBJECTS payload) so the delta apply doesn't
278        // need to re-read them.
279        let mut in_pack: std::collections::HashMap<Hash, Arc<[u8]>> =
280            std::collections::HashMap::new();
281        let mut total_payload: u64 = 0;
282        let mut pos = HEADER_LEN;
283
284        for _ in 0..count {
285            // Frame: [type][payload_len].
286            if pos + ENTRY_FRAME_LEN > split {
287                return Err(PackError::UnexpectedEof);
288            }
289            let etype = pack_bytes[pos];
290            pos += 1;
291            let payload_len =
292                u32::from_le_bytes(pack_bytes[pos..pos + 4].try_into().expect("4 bytes")) as usize;
293            pos += 4;
294
295            total_payload = total_payload.saturating_add(payload_len as u64);
296            if total_payload > MAX_TOTAL_PAYLOAD {
297                return Err(PackError::PackfileTooLarge);
298            }
299            if pos + payload_len > split {
300                return Err(PackError::UnexpectedEof);
301            }
302            let payload = &pack_bytes[pos..pos + payload_len];
303            pos += payload_len;
304
305            match etype {
306                0x00 => {
307                    // raw — validate and stage for writing after the whole pack parses.
308                    validate_storable_object(payload)?;
309                    let stored_hash = hash::hash(payload);
310                    let bytes: Arc<[u8]> = Arc::from(payload);
311                    in_pack.insert(stored_hash, Arc::clone(&bytes));
312                    pending_writes.push(bytes);
313                    report.raw_count += 1;
314                    report.stored.push(stored_hash);
315                }
316                0x02 => {
317                    // delta — payload is [32B base_hash][stream].
318                    if payload.len() < TRAILER_LEN {
319                        return Err(PackError::DeltaEntryTruncated);
320                    }
321                    let mut base_hash = [0u8; 32];
322                    base_hash.copy_from_slice(&payload[..TRAILER_LEN]);
323                    let stream = &payload[TRAILER_LEN..];
324                    // Resolve base: in-pack first, then on-disk.
325                    let base_bytes: std::borrow::Cow<'_, [u8]> =
326                        if let Some(b) = in_pack.get(&base_hash) {
327                            std::borrow::Cow::Borrowed(b.as_ref())
328                        } else if store.contains(&base_hash) {
329                            let bytes = store.read(&base_hash)?;
330                            validate_storable_object(&bytes)?;
331                            std::borrow::Cow::Owned(bytes)
332                        } else {
333                            return Err(PackError::DeltaBaseMissing(hash::to_hex(&base_hash)));
334                        };
335                    validate_delta_result_size(stream)?;
336                    let resolved = delta::decode(base_bytes.as_ref(), stream)?;
337                    validate_storable_object(&resolved)?;
338                    let stored_hash = hash::hash(&resolved);
339                    let bytes: Arc<[u8]> = Arc::from(resolved);
340                    in_pack.insert(stored_hash, Arc::clone(&bytes));
341                    pending_writes.push(bytes);
342                    report.delta_count += 1;
343                    report.stored.push(stored_hash);
344                }
345                0x01 => return Err(PackError::InvalidEntryType(0x01)),
346                other => return Err(PackError::InvalidEntryType(other)),
347            }
348        }
349
350        if pos != split {
351            return Err(PackError::TrailingData);
352        }
353
354        for bytes in pending_writes {
355            store.write(&bytes)?;
356        }
357
358        Ok(report)
359    }
360}
361
362fn validate_storable_object(bytes: &[u8]) -> Result<(), PackError> {
363    if bytes.len() > MAX_RAW_OBJECT_SIZE {
364        return Err(PackError::Store(crate::store::StoreError::ObjectTooLarge));
365    }
366    match crate::serialize::deserialize(bytes).map_err(PackError::InvalidObject)? {
367        Object::Delta(_) => Err(PackError::NonStorableObject),
368        Object::Blob(_)
369        | Object::Tree(_)
370        | Object::Commit(_)
371        | Object::Remix(_)
372        | Object::ChunkedBlob(_)
373        | Object::Tag(_) => Ok(()),
374    }
375}
376
377fn validate_delta_result_size(stream: &[u8]) -> Result<(), PackError> {
378    if stream.len() < delta::HEADER_LEN {
379        return Err(PackError::DeltaApply(MkitError::UnexpectedEof));
380    }
381    let result_len = u32::from_le_bytes(stream[5..9].try_into().expect("4 bytes")) as usize;
382    if result_len > MAX_RAW_OBJECT_SIZE {
383        return Err(PackError::Store(crate::store::StoreError::ObjectTooLarge));
384    }
385    Ok(())
386}
387
388// =========================================================================
389// Tests
390// =========================================================================
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395    use tempfile::TempDir;
396
397    fn fresh_store() -> (TempDir, ObjectStore) {
398        let dir = TempDir::new().unwrap();
399        let store = ObjectStore::init(dir.path()).unwrap();
400        (dir, store)
401    }
402
403    fn write_blob_via_serialize(payload: &[u8]) -> Vec<u8> {
404        // Use the serialize/object stack so the bytes are a real mkit
405        // object — important because `store.write` accepts any bytes
406        // but unpack-time delta apply produces what serialize would.
407        let blob = crate::object::Object::Blob(crate::object::Blob {
408            data: payload.to_vec(),
409        });
410        crate::serialize::serialize(&blob).expect("serialize blob")
411    }
412
413    fn finish_pack_body(mut body: Vec<u8>) -> Vec<u8> {
414        let trailer = hash::hash(&body);
415        body.extend_from_slice(&trailer);
416        body
417    }
418
419    #[test]
420    fn empty_pack_is_44_bytes() {
421        let pack = PackWriter::new().finish().unwrap();
422        assert_eq!(pack.len(), HEADER_LEN + TRAILER_LEN);
423        assert_eq!(&pack[..4], MAGIC);
424        assert_eq!(u32::from_le_bytes(pack[4..8].try_into().unwrap()), VERSION);
425        assert_eq!(u32::from_le_bytes(pack[8..12].try_into().unwrap()), 0);
426
427        let (_dir, store) = fresh_store();
428        let report = PackReader::read(&pack, &store).unwrap();
429        assert_eq!(report.raw_count, 0);
430        assert_eq!(report.delta_count, 0);
431        assert!(report.stored.is_empty());
432    }
433
434    #[test]
435    fn single_raw_roundtrip() {
436        let blob = write_blob_via_serialize(b"hello packfile");
437        let h = hash::hash(&blob);
438
439        let mut w = PackWriter::new();
440        w.push_raw(h, blob.clone()).unwrap();
441        let pack = w.finish().unwrap();
442
443        let (_dir, store) = fresh_store();
444        let report = PackReader::read(&pack, &store).unwrap();
445        assert_eq!(report.raw_count, 1);
446        assert_eq!(report.delta_count, 0);
447        assert_eq!(report.stored, vec![h]);
448        assert_eq!(store.read(&h).unwrap(), blob);
449    }
450
451    #[test]
452    fn raw_then_delta_resolves_in_pack() {
453        // Two near-identical blobs. Delta should reconstruct the second.
454        let mut content_base = vec![0u8; 1024];
455        for (i, b) in content_base.iter_mut().enumerate() {
456            *b = u8::try_from(i % 251).expect("modulo < 256");
457        }
458        let mut content_target = content_base.clone();
459        content_target[500] = 0xFF;
460        content_target[501] = 0xFE;
461
462        let base_obj = write_blob_via_serialize(&content_base);
463        let target_obj = write_blob_via_serialize(&content_target);
464        let base_hash = hash::hash(&base_obj);
465        let target_hash = hash::hash(&target_obj);
466
467        let stream = delta::encode(&base_obj, &target_obj).unwrap();
468
469        let mut w = PackWriter::new();
470        w.push_raw(base_hash, base_obj.clone()).unwrap();
471        w.push_delta(&base_hash, &stream).unwrap();
472        let pack = w.finish().unwrap();
473
474        let (_dir, store) = fresh_store();
475        let report = PackReader::read(&pack, &store).unwrap();
476        assert_eq!(report.raw_count, 1);
477        assert_eq!(report.delta_count, 1);
478        assert_eq!(report.stored, vec![base_hash, target_hash]);
479        assert_eq!(store.read(&target_hash).unwrap(), target_obj);
480    }
481
482    #[test]
483    fn rejects_raw_payload_that_is_not_canonical_object_without_store_write() {
484        let payload = b"not a serialized mkit object".to_vec();
485        let payload_hash = hash::hash(&payload);
486        let mut body = Vec::new();
487        body.extend_from_slice(MAGIC);
488        body.extend_from_slice(&VERSION.to_le_bytes());
489        body.extend_from_slice(&1u32.to_le_bytes());
490        body.push(0x00);
491        let payload_len = u32::try_from(payload.len()).unwrap();
492        body.extend_from_slice(&payload_len.to_le_bytes());
493        body.extend_from_slice(&payload);
494        let pack = finish_pack_body(body);
495
496        let (_dir, store) = fresh_store();
497        let err = PackReader::read(&pack, &store).unwrap_err();
498        assert!(matches!(err, PackError::InvalidObject(_)), "got {err:?}");
499        assert!(!store.contains(&payload_hash));
500    }
501
502    #[test]
503    fn rejects_raw_delta_object_without_store_write() {
504        let delta = crate::object::Object::Delta(crate::object::Delta {
505            base_hash: [0xAB; 32],
506            result_size: 0,
507            instructions: Vec::new(),
508        });
509        let payload = crate::serialize::serialize(&delta).unwrap();
510        let payload_hash = hash::hash(&payload);
511        let mut w = PackWriter::new();
512        w.push_raw(payload_hash, payload).unwrap();
513        let pack = w.finish().unwrap();
514
515        let (_dir, store) = fresh_store();
516        let err = PackReader::read(&pack, &store).unwrap_err();
517        assert!(matches!(err, PackError::NonStorableObject), "got {err:?}");
518        assert!(!store.contains(&payload_hash));
519    }
520
521    #[test]
522    fn rejects_delta_resolving_to_non_object_without_partial_store_write() {
523        let base_obj = write_blob_via_serialize(b"base bytes");
524        let base_hash = hash::hash(&base_obj);
525        let invalid_target = b"not a serialized object".to_vec();
526        let invalid_hash = hash::hash(&invalid_target);
527        let stream = delta::encode(&base_obj, &invalid_target).unwrap();
528
529        let mut w = PackWriter::new();
530        w.push_raw(base_hash, base_obj).unwrap();
531        w.push_delta(&base_hash, &stream).unwrap();
532        let pack = w.finish().unwrap();
533
534        let (_dir, store) = fresh_store();
535        let err = PackReader::read(&pack, &store).unwrap_err();
536        assert!(matches!(err, PackError::InvalidObject(_)), "got {err:?}");
537        assert!(!store.contains(&base_hash));
538        assert!(!store.contains(&invalid_hash));
539    }
540
541    #[test]
542    fn rejects_delta_result_over_object_cap_without_partial_store_write() {
543        let base_obj = write_blob_via_serialize(b"base bytes");
544        let base_hash = hash::hash(&base_obj);
545        let mut stream = Vec::new();
546        stream.push(delta::STREAM_VERSION);
547        stream.extend_from_slice(&u32::try_from(base_obj.len()).unwrap().to_le_bytes());
548        stream.extend_from_slice(
549            &u32::try_from(MAX_RAW_OBJECT_SIZE + 1)
550                .unwrap()
551                .to_le_bytes(),
552        );
553
554        let mut w = PackWriter::new();
555        w.push_raw(base_hash, base_obj).unwrap();
556        w.push_delta(&base_hash, &stream).unwrap();
557        let pack = w.finish().unwrap();
558
559        let (_dir, store) = fresh_store();
560        let err = PackReader::read(&pack, &store).unwrap_err();
561        assert!(
562            matches!(
563                err,
564                PackError::Store(crate::store::StoreError::ObjectTooLarge)
565            ),
566            "got {err:?}"
567        );
568        assert!(!store.contains(&base_hash));
569    }
570
571    #[test]
572    fn rejects_trailing_bytes_after_declared_entries_without_store_write() {
573        let blob = write_blob_via_serialize(b"trailing bytes test");
574        let blob_hash = hash::hash(&blob);
575        let mut body = Vec::new();
576        body.extend_from_slice(MAGIC);
577        body.extend_from_slice(&VERSION.to_le_bytes());
578        body.extend_from_slice(&1u32.to_le_bytes());
579        body.push(0x00);
580        let blob_len = u32::try_from(blob.len()).unwrap();
581        body.extend_from_slice(&blob_len.to_le_bytes());
582        body.extend_from_slice(&blob);
583        body.extend_from_slice(b"junk");
584        let pack = finish_pack_body(body);
585
586        let (_dir, store) = fresh_store();
587        let err = PackReader::read(&pack, &store).unwrap_err();
588        assert!(matches!(err, PackError::TrailingData), "got {err:?}");
589        assert!(!store.contains(&blob_hash));
590    }
591
592    #[test]
593    fn rejects_invalid_magic() {
594        // Use an arbitrary invalid 4-byte sequence; the rename gate
595        // forbids spelling out the upstream pre-rename magic literally.
596        let mut pack = PackWriter::new().finish().unwrap();
597        pack[0] = b'X';
598        pack[1] = b'X';
599        pack[2] = b'X';
600        pack[3] = b'X';
601        let (_dir, store) = fresh_store();
602        let err = PackReader::read(&pack, &store).unwrap_err();
603        assert!(matches!(err, PackError::InvalidMagic));
604    }
605
606    #[test]
607    fn rejects_unknown_version() {
608        let mut pack = PackWriter::new().finish().unwrap();
609        // version is u32 LE at offset 4
610        pack[4] = 99;
611        // Corrupt trailer so the version check fires first — but
612        // SPEC-PACKFILE §8 says trailer is checked before entries,
613        // and we want UnsupportedVersion. Trailer check happens after
614        // version check in our impl (see read()), so just leave the
615        // trailer; it will fail UnsupportedVersion on byte 4.
616        let (_dir, store) = fresh_store();
617        let err = PackReader::read(&pack, &store).unwrap_err();
618        assert!(matches!(err, PackError::UnsupportedVersion(99)));
619    }
620
621    #[test]
622    fn rejects_truncated_pack() {
623        let pack = vec![b'M', b'K']; // only 2 bytes
624        let (_dir, store) = fresh_store();
625        let err = PackReader::read(&pack, &store).unwrap_err();
626        assert!(matches!(err, PackError::PackfileTooShort));
627    }
628
629    #[test]
630    fn rejects_bit_flipped_trailer() {
631        let blob = write_blob_via_serialize(b"trailer test");
632        let h = hash::hash(&blob);
633        let mut w = PackWriter::new();
634        w.push_raw(h, blob).unwrap();
635        let mut pack = w.finish().unwrap();
636        let last = pack.len() - 1;
637        pack[last] ^= 0x01; // flip one bit
638        let (_dir, store) = fresh_store();
639        let err = PackReader::read(&pack, &store).unwrap_err();
640        assert!(matches!(err, PackError::PackfileCorrupted));
641    }
642
643    #[test]
644    fn rejects_reserved_entry_type_0x01() {
645        // Hand-build a pack with one entry of type 0x01.
646        let mut buf = Vec::new();
647        buf.extend_from_slice(MAGIC);
648        buf.extend_from_slice(&VERSION.to_le_bytes());
649        buf.extend_from_slice(&1u32.to_le_bytes());
650        buf.push(0x01); // RESERVED type
651        buf.extend_from_slice(&0u32.to_le_bytes()); // payload_len = 0
652        let trailer = hash::hash(&buf);
653        buf.extend_from_slice(&trailer);
654
655        let (_dir, store) = fresh_store();
656        let err = PackReader::read(&buf, &store).unwrap_err();
657        assert!(matches!(err, PackError::InvalidEntryType(0x01)));
658    }
659
660    #[test]
661    fn rejects_unknown_entry_type() {
662        let mut buf = Vec::new();
663        buf.extend_from_slice(MAGIC);
664        buf.extend_from_slice(&VERSION.to_le_bytes());
665        buf.extend_from_slice(&1u32.to_le_bytes());
666        buf.push(0x77); // unknown
667        buf.extend_from_slice(&0u32.to_le_bytes());
668        let trailer = hash::hash(&buf);
669        buf.extend_from_slice(&trailer);
670
671        let (_dir, store) = fresh_store();
672        let err = PackReader::read(&buf, &store).unwrap_err();
673        assert!(matches!(err, PackError::InvalidEntryType(0x77)));
674    }
675
676    #[test]
677    fn delta_base_missing_is_loud() {
678        let mut fake_base = [0u8; 32];
679        fake_base[0] = 0xAB;
680        // Build a minimal SPEC-DELTA stream that targets a nonexistent base.
681        let mut stream = Vec::new();
682        stream.push(0x01); // version
683        stream.extend_from_slice(&0u32.to_le_bytes()); // base_len
684        stream.extend_from_slice(&0u32.to_le_bytes()); // result_len
685        let mut w = PackWriter::new();
686        w.push_delta(&fake_base, &stream).unwrap();
687        let pack = w.finish().unwrap();
688
689        let (_dir, store) = fresh_store();
690        let err = PackReader::read(&pack, &store).unwrap_err();
691        assert!(matches!(err, PackError::DeltaBaseMissing(_)), "got {err:?}");
692    }
693
694    #[test]
695    fn entry_payload_past_trailer_rejected() {
696        let mut buf = Vec::new();
697        buf.extend_from_slice(MAGIC);
698        buf.extend_from_slice(&VERSION.to_le_bytes());
699        buf.extend_from_slice(&1u32.to_le_bytes());
700        buf.push(0x00);
701        buf.extend_from_slice(&1_000_000u32.to_le_bytes());
702        // No payload bytes follow.
703        let trailer = hash::hash(&buf);
704        buf.extend_from_slice(&trailer);
705
706        let (_dir, store) = fresh_store();
707        let err = PackReader::read(&buf, &store).unwrap_err();
708        assert!(matches!(err, PackError::UnexpectedEof));
709    }
710
711    #[test]
712    fn entry_count_over_cap_rejected() {
713        let mut buf = Vec::new();
714        buf.extend_from_slice(MAGIC);
715        buf.extend_from_slice(&VERSION.to_le_bytes());
716        buf.extend_from_slice(&u32::MAX.to_le_bytes());
717        // Add a fake trailer so trailer-check passes — wait, it can't
718        // pass since the body is bogus. Compute it correctly so the
719        // trailer is the not-the-failure path; then the count cap must
720        // fire first per read() ordering.
721        let trailer = hash::hash(&buf);
722        buf.extend_from_slice(&trailer);
723
724        let (_dir, store) = fresh_store();
725        let err = PackReader::read(&buf, &store).unwrap_err();
726        // count cap fires after trailer verify in our impl. Either is
727        // acceptable; assert one of them.
728        assert!(
729            matches!(err, PackError::TooManyObjects(_)),
730            "expected TooManyObjects, got {err:?}"
731        );
732    }
733
734    #[test]
735    fn pack_key_is_blake3_of_pack_bytes() {
736        let blob = write_blob_via_serialize(b"key test");
737        let h = hash::hash(&blob);
738        let mut w = PackWriter::new();
739        w.push_raw(h, blob).unwrap();
740        let pack = w.finish().unwrap();
741        assert_eq!(pack_key(&pack), hash::hash(&pack));
742    }
743
744    #[test]
745    fn delta_resolves_against_pre_existing_store_object() {
746        let (_dir, store) = fresh_store();
747        // Plant the base in the store first.
748        let mut content_base = vec![0u8; 256];
749        for (i, b) in content_base.iter_mut().enumerate() {
750            *b = u8::try_from(i % 251).expect("modulo < 256");
751        }
752        let base_obj = write_blob_via_serialize(&content_base);
753        let base_hash = store.write(&base_obj).unwrap();
754
755        // Pack contains ONLY a delta; the base must be resolved from disk.
756        let mut content_target = content_base.clone();
757        content_target[100] = 0xAA;
758        let target_obj = write_blob_via_serialize(&content_target);
759        let target_hash = hash::hash(&target_obj);
760        let stream = delta::encode(&base_obj, &target_obj).unwrap();
761
762        let mut w = PackWriter::new();
763        w.push_delta(&base_hash, &stream).unwrap();
764        let pack = w.finish().unwrap();
765
766        let report = PackReader::read(&pack, &store).unwrap();
767        assert_eq!(report.delta_count, 1);
768        assert_eq!(report.raw_count, 0);
769        assert_eq!(store.read(&target_hash).unwrap(), target_obj);
770    }
771}