Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::sync::Arc;
13
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct PackEntry {
16    pub oid: ObjectId,
17    pub compressed_size: u64,
18    pub uncompressed_size: u64,
19    pub offset: u64,
20}
21
22/// Default sliding-window size used by [`PackFile::write_packed`].
23///
24/// Each object is compared against up to this many previously emitted
25/// candidates of the same type when searching for a small delta. Matches git's
26/// default `pack.window`.
27pub const DEFAULT_PACK_WINDOW: usize = 10;
28
29/// Default maximum delta chain depth used by [`PackFile::write_packed`].
30///
31/// A delta may reference a base that is itself a delta; this bounds how long
32/// such chains may grow so that reconstructing any object stays cheap and the
33/// reader's recursion stays shallow. Matches git's default `pack.depth`.
34pub const DEFAULT_PACK_DEPTH: usize = 50;
35
36/// Object-count threshold before pack payload compression is fanned out across
37/// worker threads. Below this, thread setup and extra buffering cost more than
38/// they save.
39const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
40
41/// Keep parallel compression bounded. Git gets much of its wall-clock win from
42/// using several cores, but unbounded threads can steal cache from delta
43/// planning and inflate peak memory on large packs.
44const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
45
46/// Options controlling sliding-window delta selection during pack generation.
47///
48/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
49/// the builder-style setters, or build one directly. Used by
50/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
51#[derive(Debug, Clone)]
52pub struct PackWriteOptions {
53    /// Number of previous same-type candidates each object is deltified
54    /// against. Larger windows find better deltas at higher cost.
55    pub window: usize,
56    /// Maximum delta chain depth. A value of `0` disables deltification.
57    pub depth: usize,
58    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
59    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
60    /// against external thin-pack bases always use ref-deltas regardless.
61    pub prefer_ofs_delta: bool,
62    /// External base objects, keyed by object id, that are *not* written into
63    /// the pack but may be used as delta bases. Supplying any entries here
64    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
65    /// yielding a self-contained pack.
66    pub thin_bases: HashMap<ObjectId, EncodedObject>,
67    /// When `true` (the default), objects are reordered by type and size for
68    /// better delta locality. When `false`, the input order is preserved (the
69    /// emitted pack lists objects in the order supplied); deltas then only
70    /// reference earlier input objects. Reordering is always skipped when
71    /// deltification is disabled (`depth == 0`), since it has no effect there.
72    pub reorder: bool,
73}
74
75impl Default for PackWriteOptions {
76    fn default() -> Self {
77        Self::new()
78    }
79}
80
81impl PackWriteOptions {
82    /// Options with git-compatible defaults: window
83    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
84    /// no external thin bases.
85    pub fn new() -> Self {
86        Self {
87            window: DEFAULT_PACK_WINDOW,
88            depth: DEFAULT_PACK_DEPTH,
89            prefer_ofs_delta: true,
90            thin_bases: HashMap::new(),
91            reorder: true,
92        }
93    }
94
95    /// Set the sliding-window size.
96    pub fn with_window(mut self, window: usize) -> Self {
97        self.window = window;
98        self
99    }
100
101    /// Set the maximum delta chain depth (`0` disables deltas).
102    pub fn with_depth(mut self, depth: usize) -> Self {
103        self.depth = depth;
104        self
105    }
106
107    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
108    /// (`false`) base references.
109    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
110        self.prefer_ofs_delta = prefer_ofs_delta;
111        self
112    }
113
114    /// Provide the set of external base objects permitted for a thin pack.
115    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
116        self.thin_bases = thin_bases;
117        self
118    }
119
120    /// Choose whether objects may be reordered for delta locality (`true`) or
121    /// emitted in input order (`false`).
122    pub fn with_reorder(mut self, reorder: bool) -> Self {
123        self.reorder = reorder;
124        self
125    }
126}
127
128#[derive(Debug, Clone, PartialEq, Eq)]
129pub struct RepackPolicy {
130    pub write_bitmaps: bool,
131    pub cruft_packs: bool,
132    pub geometric_factor: Option<u8>,
133}
134
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct PackFile {
137    pub version: u32,
138    pub entries: Vec<PackObject>,
139    pub checksum: ObjectId,
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct PackObject {
144    pub entry: PackEntry,
145    pub object: EncodedObject,
146}
147
148#[derive(Debug, Clone, PartialEq, Eq)]
149pub struct PackWrite {
150    pub pack: Vec<u8>,
151    pub index: Vec<u8>,
152    pub checksum: ObjectId,
153    pub entries: Vec<PackIndexEntry>,
154}
155
156#[derive(Debug, Clone, Copy, PartialEq, Eq)]
157pub struct PackInput<'a> {
158    pub oid: &'a ObjectId,
159    pub object: &'a EncodedObject,
160}
161
162#[derive(Debug, Clone, PartialEq, Eq)]
163pub struct PackIndexBuild {
164    pub index: Vec<u8>,
165    pub pack_checksum: ObjectId,
166    pub entries: Vec<PackIndexEntry>,
167}
168
169#[derive(Debug, Clone, PartialEq, Eq)]
170pub struct PackIndex {
171    pub version: u32,
172    pub fanout: [u32; 256],
173    pub entries: Vec<PackIndexEntry>,
174    pub pack_checksum: ObjectId,
175    pub index_checksum: ObjectId,
176}
177
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub struct PackIndexEntry {
180    pub oid: ObjectId,
181    pub crc32: u32,
182    pub offset: u64,
183}
184
185#[derive(Debug, Clone, PartialEq, Eq)]
186pub struct PackReverseIndex {
187    pub version: u32,
188    pub format: ObjectFormat,
189    pub positions: Vec<u32>,
190    pub pack_checksum: ObjectId,
191    pub index_checksum: ObjectId,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct PackMtimes {
196    pub version: u32,
197    pub format: ObjectFormat,
198    pub mtimes: Vec<u32>,
199    pub pack_checksum: ObjectId,
200    pub index_checksum: ObjectId,
201}
202
203#[derive(Debug, Clone, PartialEq, Eq)]
204pub struct PackBitmapIndex {
205    pub version: u16,
206    pub format: ObjectFormat,
207    pub options: u16,
208    pub pack_checksum: ObjectId,
209    pub index_checksum: ObjectId,
210    pub type_bitmaps: PackBitmapTypeBitmaps,
211    pub entries: Vec<PackBitmapEntry>,
212    pub name_hash_cache: Option<Vec<u32>>,
213}
214
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub struct PackBitmapTypeBitmaps {
217    pub commits: EwahBitmap,
218    pub trees: EwahBitmap,
219    pub blobs: EwahBitmap,
220    pub tags: EwahBitmap,
221}
222
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct PackBitmapEntry {
225    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
226    /// NOT the pack-order position used for the bitmap's bit numbering.
227    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
228    /// back via `nth_packed_object_id` (pack-bitmap.c).
229    pub object_position: u32,
230    pub xor_offset: u8,
231    pub flags: u8,
232    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
233    /// order* (offset order), as mapped by the pack's reverse index.
234    pub bitmap: EwahBitmap,
235}
236
237#[derive(Debug, Clone, PartialEq, Eq)]
238pub struct EwahBitmap {
239    pub bit_size: u32,
240    pub words: Vec<u64>,
241    pub rlw_position: u32,
242}
243
244#[derive(Debug, Clone, PartialEq, Eq)]
245pub struct MultiPackIndex {
246    pub version: u8,
247    pub format: ObjectFormat,
248    pub pack_count: u32,
249    pub pack_names: Vec<String>,
250    pub object_count: u32,
251    pub fanout: [u32; 256],
252    pub objects: Vec<MultiPackIndexEntry>,
253    pub reverse_index: Option<Vec<u32>>,
254    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
255    pub chunks: Vec<MultiPackIndexChunk>,
256    pub checksum: ObjectId,
257}
258
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct MultiPackIndexEntry {
261    pub oid: ObjectId,
262    pub pack_int_id: u32,
263    pub offset: u64,
264}
265
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub struct MultiPackBitmapPack {
268    pub bitmap_pos: u32,
269    pub bitmap_nr: u32,
270}
271
272#[derive(Debug, Clone, PartialEq, Eq)]
273pub struct MultiPackIndexChunk {
274    pub id: [u8; 4],
275    pub offset: u64,
276    pub len: u64,
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Eq)]
280enum PackObjectKind {
281    Commit,
282    Tree,
283    Blob,
284    Tag,
285    OfsDelta,
286    RefDelta,
287}
288
289#[derive(Debug, Clone, PartialEq, Eq)]
290enum ParsedPackEntry {
291    Resolved(PackObject),
292    Delta {
293        base: DeltaBase,
294        compressed_size: u64,
295        delta_size: u64,
296        offset: u64,
297        delta: Vec<u8>,
298    },
299}
300
301#[derive(Debug, Clone, PartialEq, Eq)]
302enum DeltaBase {
303    Offset(u64),
304    Ref(ObjectId),
305}
306
307impl PackFile {
308    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
309        Self::parse(bytes, ObjectFormat::Sha1)
310    }
311
312    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
313        Self::parse_with_base(bytes, format, |_| Ok(None))
314    }
315
316    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
317        Self::parse(&bundle.pack, bundle.format)
318    }
319
320    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
321        let PackIndexBuild {
322            index,
323            pack_checksum,
324            entries,
325        } = PackIndex::write_v2_for_pack(bytes, format)?;
326        Ok(PackWrite {
327            pack: bytes.to_vec(),
328            index,
329            checksum: pack_checksum,
330            entries,
331        })
332    }
333
334    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
335    where
336        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
337    {
338        Self::parse_with_base(bytes, format, external_base)
339    }
340
341    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
342    where
343        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
344    {
345        let trailer_len = format.raw_len();
346        if bytes.len() < 12 + trailer_len {
347            return Err(GitError::InvalidFormat("pack file too short".into()));
348        }
349        let trailer_offset = bytes.len() - trailer_len;
350        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
351        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
352        if checksum != expected {
353            return Err(GitError::InvalidFormat(format!(
354                "pack checksum mismatch: expected {expected}, got {checksum}"
355            )));
356        }
357
358        if &bytes[..4] != b"PACK" {
359            return Err(GitError::InvalidFormat("missing PACK signature".into()));
360        }
361        let version = u32_be(&bytes[4..8]);
362        if version != 2 && version != 3 {
363            return Err(GitError::Unsupported(format!("pack version {version}")));
364        }
365        let count = u32_be(&bytes[8..12]) as usize;
366        let mut offset = 12usize;
367        let mut entries = Vec::with_capacity(count);
368        for _ in 0..count {
369            let entry_offset = offset;
370            let header = parse_entry_header(bytes, &mut offset)?;
371            let base =
372                match header.kind {
373                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
374                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
375                    )),
376                    PackObjectKind::RefDelta => {
377                        let hash_len = format.raw_len();
378                        if offset + hash_len > trailer_offset {
379                            return Err(GitError::InvalidFormat(
380                                "truncated ref-delta base object id".into(),
381                            ));
382                        }
383                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
384                        offset += hash_len;
385                        Some(DeltaBase::Ref(oid))
386                    }
387                    _ => None,
388                };
389            let mut body = Vec::new();
390            let consumed = inflate_into(
391                &bytes[offset..trailer_offset],
392                &mut body,
393                header.size.min(usize::MAX as u64) as usize,
394            )?;
395            if body.len() as u64 != header.size {
396                return Err(GitError::InvalidObject(format!(
397                    "pack object declared {} bytes, decoded {}",
398                    header.size,
399                    body.len()
400                )));
401            }
402            if consumed == 0 {
403                return Err(GitError::InvalidFormat(
404                    "empty compressed pack entry".into(),
405                ));
406            }
407            offset = offset
408                .checked_add(consumed)
409                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
410            if offset > trailer_offset {
411                return Err(GitError::InvalidFormat(
412                    "pack entry extends past checksum".into(),
413                ));
414            }
415            if let Some(base) = base {
416                entries.push(ParsedPackEntry::Delta {
417                    base,
418                    compressed_size: consumed as u64,
419                    delta_size: header.size,
420                    offset: entry_offset as u64,
421                    delta: body,
422                });
423            } else {
424                let object_type = match header.kind {
425                    PackObjectKind::Commit => ObjectType::Commit,
426                    PackObjectKind::Tree => ObjectType::Tree,
427                    PackObjectKind::Blob => ObjectType::Blob,
428                    PackObjectKind::Tag => ObjectType::Tag,
429                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
430                };
431                let object = EncodedObject::new(object_type, body);
432                let oid = object.object_id(format)?;
433                entries.push(ParsedPackEntry::Resolved(PackObject {
434                    entry: PackEntry {
435                        oid,
436                        compressed_size: consumed as u64,
437                        uncompressed_size: header.size,
438                        offset: entry_offset as u64,
439                    },
440                    object,
441                }));
442            }
443        }
444        if offset != trailer_offset {
445            return Err(GitError::InvalidFormat(format!(
446                "pack has {} trailing bytes before checksum",
447                trailer_offset - offset
448            )));
449        }
450        Ok(Self {
451            version,
452            entries: resolve_pack_entries(entries, format, &mut external_base)?,
453            checksum,
454        })
455    }
456
457    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
458    where
459        T: Borrow<EncodedObject>,
460    {
461        Self::write_undeltified(objects, ObjectFormat::Sha1)
462    }
463
464    /// Write a pack with every object stored undeltified (no delta entries).
465    ///
466    /// This is the simple, self-contained encoding; objects appear in the given
467    /// order. For smaller output that exploits similarity between objects, use
468    /// [`PackFile::write_packed`].
469    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
470    where
471        T: Borrow<EncodedObject>,
472    {
473        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
474        Self::write_packed_impl(objects, format, &options)
475    }
476
477    /// Write a pack using sliding-window delta selection with git-compatible
478    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
479    /// ofs-deltas, self-contained).
480    ///
481    /// Objects are grouped by type and ordered for good deltas, then each is
482    /// compared against a window of previously emitted candidates; the smallest
483    /// acceptable delta is kept, otherwise the object is stored undeltified. The
484    /// result round-trips through [`PackFile::parse`].
485    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
486    where
487        T: Borrow<EncodedObject>,
488    {
489        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
490    }
491
492    /// Like [`PackFile::write_packed`] but with caller-supplied
493    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
494    /// external thin bases).
495    pub fn write_packed_with_options<T>(
496        objects: &[T],
497        format: ObjectFormat,
498        options: &PackWriteOptions,
499    ) -> Result<PackWrite>
500    where
501        T: Borrow<EncodedObject>,
502    {
503        Self::write_packed_impl(objects, format, options)
504    }
505
506    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
507    /// instead of re-hashing each object before pack planning.
508    ///
509    /// This is intended for object-database paths that reached each object by
510    /// its id and already trust that id/object mapping. The function validates
511    /// id formats and duplicate ids, but it does not re-hash object bodies; use
512    /// [`PackFile::write_packed`] when the ids are not already known to be
513    /// canonical.
514    pub fn write_packed_with_known_ids(
515        inputs: &[PackInput<'_>],
516        format: ObjectFormat,
517    ) -> Result<PackWrite> {
518        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
519    }
520
521    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
522    /// [`PackWriteOptions`].
523    pub fn write_packed_with_known_ids_and_options(
524        inputs: &[PackInput<'_>],
525        format: ObjectFormat,
526        options: &PackWriteOptions,
527    ) -> Result<PackWrite> {
528        if inputs.len() > u32::MAX as usize {
529            return Err(GitError::InvalidFormat("too many pack objects".into()));
530        }
531        let mut objects = Vec::with_capacity(inputs.len());
532        let mut object_ids = Vec::with_capacity(inputs.len());
533        for input in inputs {
534            if input.oid.format() != format {
535                return Err(GitError::InvalidObjectId(format!(
536                    "pack object id {} uses {}, pack uses {}",
537                    input.oid,
538                    input.oid.format().name(),
539                    format.name()
540                )));
541            }
542            objects.push(input.object);
543            object_ids.push(*input.oid);
544        }
545        Self::write_packed_from_parts(objects, object_ids, format, options)
546    }
547
548    /// Write a thin pack: objects may be deltified against `external_bases`
549    /// that are *not* included in the pack, referenced by ref-delta to their
550    /// object id.
551    ///
552    /// The receiver must already have (or otherwise obtain) those base objects
553    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
554    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
555    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
556    pub fn write_thin<T>(
557        objects: &[T],
558        format: ObjectFormat,
559        external_bases: HashMap<ObjectId, EncodedObject>,
560    ) -> Result<PackWrite>
561    where
562        T: Borrow<EncodedObject>,
563    {
564        let options = PackWriteOptions::new().with_thin_bases(external_bases);
565        Self::write_packed_impl(objects, format, &options)
566    }
567
568    fn write_packed_impl<T>(
569        objects: &[T],
570        format: ObjectFormat,
571        options: &PackWriteOptions,
572    ) -> Result<PackWrite>
573    where
574        T: Borrow<EncodedObject>,
575    {
576        if objects.len() > u32::MAX as usize {
577            return Err(GitError::InvalidFormat("too many pack objects".into()));
578        }
579        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
580
581        // Compute object ids up front; they are needed both for the index and,
582        // for ref-deltas, inside the pack entries themselves.
583        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
584        for object in &objects {
585            object_ids.push(object.object_id(format)?);
586        }
587        Self::write_packed_from_parts(objects, object_ids, format, options)
588    }
589
590    fn write_packed_from_parts(
591        objects: Vec<&EncodedObject>,
592        object_ids: Vec<ObjectId>,
593        format: ObjectFormat,
594        options: &PackWriteOptions,
595    ) -> Result<PackWrite> {
596        let mut seen = HashSet::with_capacity(object_ids.len());
597        for oid in &object_ids {
598            if !seen.insert(oid) {
599                return Err(GitError::InvalidFormat(format!(
600                    "pack contains duplicate object id {oid}"
601                )));
602            }
603        }
604
605        // Validate external thin bases share the pack's hash format.
606        for oid in options.thin_bases.keys() {
607            if oid.format() != format {
608                return Err(GitError::InvalidObjectId(
609                    "thin pack base object id format does not match pack format".into(),
610                ));
611            }
612        }
613
614        // Decide, for each object, whether it is stored undeltified or as a
615        // delta against another object (in-pack or an external thin base), and
616        // obtain the emit order. In-pack deltas only ever reference candidates
617        // that appear earlier in `order`, so emitting in `order` guarantees a
618        // base is always written before any object that deltas against it.
619        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
620
621        let mut pack = Vec::new();
622        pack.extend_from_slice(b"PACK");
623        pack.extend_from_slice(&2u32.to_be_bytes());
624        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
625
626        let mut index_entries = Vec::with_capacity(objects.len());
627        // Pack offset at which each original object index was written, or
628        // `None` until it has been emitted.
629        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
630
631        let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
632
633        for (order_pos, &idx) in order.iter().enumerate() {
634            let offset = pack.len() as u64;
635            let mut entry_bytes = Vec::new();
636            match &plan[idx].base {
637                PlannedBase::None => {
638                    write_entry_header(
639                        &mut entry_bytes,
640                        objects[idx].object_type,
641                        objects[idx].body.len() as u64,
642                    );
643                }
644                PlannedBase::InPack { base_idx, delta } => {
645                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
646                        GitError::InvalidFormat(
647                            "in-pack delta base emitted after dependent object".into(),
648                        )
649                    })?;
650                    if options.prefer_ofs_delta {
651                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
652                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
653                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
654                        })?;
655                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
656                    } else {
657                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
658                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
659                    }
660                }
661                PlannedBase::External { base_oid, delta } => {
662                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
663                    entry_bytes.extend_from_slice(base_oid.as_bytes());
664                }
665            }
666            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
667            let crc32 = crc32fast::hash(&entry_bytes);
668            pack.extend_from_slice(&entry_bytes);
669            written_offsets[idx] = Some(offset);
670            index_entries.push(PackIndexEntry {
671                oid: object_ids[idx].clone(),
672                crc32,
673                offset,
674            });
675        }
676
677        let checksum = sley_core::digest_bytes(format, &pack)?;
678        pack.extend_from_slice(checksum.as_bytes());
679        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
680        Ok(PackWrite {
681            pack,
682            index,
683            checksum,
684            entries: index_entries,
685        })
686    }
687}
688
689impl PackIndex {
690    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
691        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
692    }
693
694    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
695        let trailer_len = format.raw_len();
696        if pack_bytes.len() < 12 + trailer_len {
697            return Err(GitError::InvalidFormat("pack file too short".into()));
698        }
699        let trailer_offset = pack_bytes.len() - trailer_len;
700        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
701        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
702        if pack_checksum != expected {
703            return Err(GitError::InvalidFormat(format!(
704                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
705            )));
706        }
707
708        if &pack_bytes[..4] != b"PACK" {
709            return Err(GitError::InvalidFormat("missing PACK signature".into()));
710        }
711        let version = u32_be(&pack_bytes[4..8]);
712        if version != 2 && version != 3 {
713            return Err(GitError::Unsupported(format!("pack version {version}")));
714        }
715        let count = u32_be(&pack_bytes[8..12]) as usize;
716        let mut offset = 12usize;
717        let mut parsed_entries = Vec::with_capacity(count);
718        let mut raw_entries = Vec::with_capacity(count);
719        for _ in 0..count {
720            let entry_offset = offset;
721            let header = parse_entry_header(pack_bytes, &mut offset)?;
722            let base = match header.kind {
723                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
724                    pack_bytes,
725                    &mut offset,
726                    entry_offset as u64,
727                )?)),
728                PackObjectKind::RefDelta => {
729                    let hash_len = format.raw_len();
730                    if offset + hash_len > trailer_offset {
731                        return Err(GitError::InvalidFormat(
732                            "truncated ref-delta base object id".into(),
733                        ));
734                    }
735                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
736                    offset += hash_len;
737                    Some(DeltaBase::Ref(oid))
738                }
739                _ => None,
740            };
741            let mut body = Vec::new();
742            let consumed = inflate_into(
743                &pack_bytes[offset..trailer_offset],
744                &mut body,
745                header.size.min(usize::MAX as u64) as usize,
746            )?;
747            if body.len() as u64 != header.size {
748                return Err(GitError::InvalidObject(format!(
749                    "pack object declared {} bytes, decoded {}",
750                    header.size,
751                    body.len()
752                )));
753            }
754            if consumed == 0 {
755                return Err(GitError::InvalidFormat(
756                    "empty compressed pack entry".into(),
757                ));
758            }
759            offset = offset
760                .checked_add(consumed)
761                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
762            if offset > trailer_offset {
763                return Err(GitError::InvalidFormat(
764                    "pack entry extends past checksum".into(),
765                ));
766            }
767            raw_entries.push((
768                entry_offset as u64,
769                crc32fast::hash(&pack_bytes[entry_offset..offset]),
770            ));
771            if let Some(base) = base {
772                parsed_entries.push(ParsedPackEntry::Delta {
773                    base,
774                    compressed_size: consumed as u64,
775                    delta_size: header.size,
776                    offset: entry_offset as u64,
777                    delta: body,
778                });
779            } else {
780                let object_type = match header.kind {
781                    PackObjectKind::Commit => ObjectType::Commit,
782                    PackObjectKind::Tree => ObjectType::Tree,
783                    PackObjectKind::Blob => ObjectType::Blob,
784                    PackObjectKind::Tag => ObjectType::Tag,
785                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
786                };
787                let object = EncodedObject::new(object_type, body);
788                let oid = object.object_id(format)?;
789                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
790                    entry: PackEntry {
791                        oid,
792                        compressed_size: consumed as u64,
793                        uncompressed_size: header.size,
794                        offset: entry_offset as u64,
795                    },
796                    object,
797                }));
798            }
799        }
800        if offset != trailer_offset {
801            return Err(GitError::InvalidFormat(format!(
802                "pack has {} trailing bytes before checksum",
803                trailer_offset - offset
804            )));
805        }
806
807        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
808        let entries = resolved
809            .iter()
810            .zip(raw_entries)
811            .map(|(object, (offset, crc32))| PackIndexEntry {
812                oid: object.entry.oid,
813                crc32,
814                offset,
815            })
816            .collect::<Vec<_>>();
817        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
818        Ok(PackIndexBuild {
819            index,
820            pack_checksum,
821            entries,
822        })
823    }
824
825    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
826        Self::parse(bytes, ObjectFormat::Sha1)
827    }
828
829    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
830        let hash_len = format.raw_len();
831        if bytes.len() < 4 {
832            return Err(GitError::InvalidFormat("pack index too short".into()));
833        }
834        if bytes[..4] != [0xff, b't', b'O', b'c'] {
835            return Self::parse_v1(bytes, format);
836        }
837        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
838            return Err(GitError::InvalidFormat("pack index too short".into()));
839        }
840        let version = u32_be(&bytes[4..8]);
841        if version != 2 {
842            return Err(GitError::Unsupported(format!(
843                "pack index version {version}"
844            )));
845        }
846        let index_checksum_offset = bytes.len() - hash_len;
847        let actual_index_checksum =
848            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
849        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
850        if actual_index_checksum != index_checksum {
851            return Err(GitError::InvalidFormat(format!(
852                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
853            )));
854        }
855
856        let mut offset = 8usize;
857        let mut fanout = [0u32; 256];
858        let mut previous = 0u32;
859        for slot in &mut fanout {
860            *slot = u32_be(&bytes[offset..offset + 4]);
861            if *slot < previous {
862                return Err(GitError::InvalidFormat(
863                    "pack index fanout is not monotonic".into(),
864                ));
865            }
866            previous = *slot;
867            offset += 4;
868        }
869        let count = fanout[255] as usize;
870        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
871        offset = oid_table.end;
872        let crc_table = checked_range(offset, count, 4, bytes.len())?;
873        offset = crc_table.end;
874        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
875        offset = small_offset_table.end;
876
877        let large_offset_count = (0..count)
878            .filter(|idx| {
879                let start = small_offset_table.start + idx * 4;
880                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
881            })
882            .count();
883        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
884        offset = large_offset_table.end;
885
886        let expected_trailer_offset = bytes.len() - hash_len * 2;
887        if offset != expected_trailer_offset {
888            return Err(GitError::InvalidFormat(format!(
889                "pack index has {} unexpected bytes before trailer",
890                expected_trailer_offset.saturating_sub(offset)
891            )));
892        }
893        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
894
895        let mut entries = Vec::with_capacity(count);
896        for idx in 0..count {
897            let oid_start = oid_table.start + idx * hash_len;
898            let crc_start = crc_table.start + idx * 4;
899            let offset_start = small_offset_table.start + idx * 4;
900            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
901            // Object ids must be strictly ascending: lookup binary-searches them,
902            // and the fanout must match the first byte. A malformed/forged index
903            // (e.g. from a received pack) would otherwise yield silent misses.
904            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
905                return Err(GitError::InvalidFormat(
906                    "pack index object ids are not strictly ascending".into(),
907                ));
908            }
909            let expected_min = if oid_bytes[0] == 0 {
910                0
911            } else {
912                fanout[usize::from(oid_bytes[0] - 1)]
913            };
914            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
915                return Err(GitError::InvalidFormat(
916                    "pack index object id is outside its fanout bucket".into(),
917                ));
918            }
919            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
920            let offset = if raw_offset & 0x8000_0000 == 0 {
921                u64::from(raw_offset)
922            } else {
923                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
924                let large_start = large_offset_table.start + large_idx * 8;
925                if large_idx >= large_offset_count {
926                    return Err(GitError::InvalidFormat(
927                        "pack index large offset points past table".into(),
928                    ));
929                }
930                u64_be(&bytes[large_start..large_start + 8])
931            };
932            entries.push(PackIndexEntry {
933                oid: ObjectId::from_raw(format, oid_bytes)?,
934                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
935                offset,
936            });
937        }
938        Ok(Self {
939            version,
940            fanout,
941            entries,
942            pack_checksum,
943            index_checksum,
944        })
945    }
946
947    fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
948        let hash_len = format.raw_len();
949        if bytes.len() < 256 * 4 + 2 * hash_len {
950            return Err(GitError::InvalidFormat("pack index too short".into()));
951        }
952        let index_checksum_offset = bytes.len() - hash_len;
953        let actual_index_checksum =
954            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
955        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
956        if actual_index_checksum != index_checksum {
957            return Err(GitError::InvalidFormat(format!(
958                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
959            )));
960        }
961
962        let mut offset = 0usize;
963        let mut fanout = [0u32; 256];
964        let mut previous = 0u32;
965        for slot in &mut fanout {
966            *slot = u32_be(&bytes[offset..offset + 4]);
967            if *slot < previous {
968                return Err(GitError::InvalidFormat(
969                    "pack index fanout is not monotonic".into(),
970                ));
971            }
972            previous = *slot;
973            offset += 4;
974        }
975        let count = fanout[255] as usize;
976        let entry_len = hash_len
977            .checked_add(4)
978            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
979        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
980        offset = entry_table.end;
981        let expected_trailer_offset = bytes.len() - hash_len * 2;
982        if offset != expected_trailer_offset {
983            return Err(GitError::InvalidFormat(format!(
984                "pack index has {} unexpected bytes before trailer",
985                expected_trailer_offset.saturating_sub(offset)
986            )));
987        }
988        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
989
990        let mut entries = Vec::with_capacity(count);
991        let mut previous_oid: Option<ObjectId> = None;
992        for idx in 0..count {
993            let start = entry_table.start + idx * entry_len;
994            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
995            if let Some(previous) = &previous_oid
996                && previous.as_bytes() >= oid.as_bytes()
997            {
998                return Err(GitError::InvalidFormat(
999                    "pack index object ids are not strictly sorted".into(),
1000                ));
1001            }
1002            previous_oid = Some(oid);
1003            entries.push(PackIndexEntry {
1004                oid,
1005                crc32: 0,
1006                offset: u64::from(u32_be(&bytes[start..start + 4])),
1007            });
1008        }
1009        Ok(Self {
1010            version: 1,
1011            fanout,
1012            entries,
1013            pack_checksum,
1014            index_checksum,
1015        })
1016    }
1017
1018    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1019        self.entries
1020            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1021            .ok()
1022            .map(|idx| &self.entries[idx])
1023    }
1024
1025    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1026        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1027    }
1028
1029    pub fn write_v2(
1030        format: ObjectFormat,
1031        entries: &[PackIndexEntry],
1032        pack_checksum: &ObjectId,
1033    ) -> Result<Vec<u8>> {
1034        if pack_checksum.format() != format {
1035            return Err(GitError::InvalidObjectId(
1036                "pack checksum format does not match index format".into(),
1037            ));
1038        }
1039        let mut entries = entries.iter().collect::<Vec<_>>();
1040        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1041        for pair in entries.windows(2) {
1042            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1043                return Err(GitError::InvalidFormat(format!(
1044                    "pack index contains duplicate object id {}",
1045                    pair[0].oid
1046                )));
1047            }
1048        }
1049        let mut fanout = [0u32; 256];
1050        for entry in &entries {
1051            if entry.oid.format() != format {
1052                return Err(GitError::InvalidObjectId(
1053                    "pack index entry format does not match index format".into(),
1054                ));
1055            }
1056            let first = entry.oid.as_bytes()[0] as usize;
1057            fanout[first] = fanout[first]
1058                .checked_add(1)
1059                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1060        }
1061        let mut running = 0u32;
1062        for slot in &mut fanout {
1063            running = running
1064                .checked_add(*slot)
1065                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1066            *slot = running;
1067        }
1068
1069        let mut index = Vec::new();
1070        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1071        index.extend_from_slice(&2u32.to_be_bytes());
1072        for count in fanout {
1073            index.extend_from_slice(&count.to_be_bytes());
1074        }
1075        for entry in &entries {
1076            index.extend_from_slice(entry.oid.as_bytes());
1077        }
1078        for entry in &entries {
1079            index.extend_from_slice(&entry.crc32.to_be_bytes());
1080        }
1081
1082        let mut large_offsets = Vec::new();
1083        for entry in &entries {
1084            if entry.offset < 0x8000_0000 {
1085                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1086            } else {
1087                if large_offsets.len() > 0x7fff_ffff {
1088                    return Err(GitError::InvalidFormat(
1089                        "too many large pack offsets".into(),
1090                    ));
1091                }
1092                let large_idx = large_offsets.len() as u32;
1093                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1094                large_offsets.push(entry.offset);
1095            }
1096        }
1097        for offset in large_offsets {
1098            index.extend_from_slice(&offset.to_be_bytes());
1099        }
1100        index.extend_from_slice(pack_checksum.as_bytes());
1101        let index_checksum = sley_core::digest_bytes(format, &index)?;
1102        index.extend_from_slice(index_checksum.as_bytes());
1103        Ok(index)
1104    }
1105}
1106
1107/// The `.rev` table for a pack: index positions (the rank of each object in
1108/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
1109/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
1110/// the result feeds [`PackReverseIndex::write`].
1111pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1112    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1113    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1114    let mut index_position = vec![0u32; entries.len()];
1115    for (position, &entry) in oid_sorted.iter().enumerate() {
1116        index_position[entry] = position as u32;
1117    }
1118    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1119    by_offset.sort_by_key(|&entry| entries[entry].offset);
1120    by_offset
1121        .into_iter()
1122        .map(|entry| index_position[entry])
1123        .collect()
1124}
1125
1126impl PackReverseIndex {
1127    pub fn write(
1128        format: ObjectFormat,
1129        positions: &[u32],
1130        pack_checksum: &ObjectId,
1131    ) -> Result<Vec<u8>> {
1132        if pack_checksum.format() != format {
1133            return Err(GitError::InvalidObjectId(
1134                "pack checksum format does not match reverse index format".into(),
1135            ));
1136        }
1137        validate_position_permutation(positions)?;
1138
1139        let mut out = Vec::new();
1140        out.extend_from_slice(b"RIDX");
1141        out.extend_from_slice(&1u32.to_be_bytes());
1142        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1143        for position in positions {
1144            out.extend_from_slice(&position.to_be_bytes());
1145        }
1146        out.extend_from_slice(pack_checksum.as_bytes());
1147        let checksum = sley_core::digest_bytes(format, &out)?;
1148        out.extend_from_slice(checksum.as_bytes());
1149        Ok(out)
1150    }
1151
1152    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1153        let hash_len = format.raw_len();
1154        let table_len = object_count
1155            .checked_mul(4)
1156            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1157        let min_len = 12usize
1158            .checked_add(table_len)
1159            .and_then(|len| len.checked_add(hash_len * 2))
1160            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1161        if bytes.len() < min_len {
1162            return Err(GitError::InvalidFormat("reverse index too short".into()));
1163        }
1164        if bytes.len() != min_len {
1165            return Err(GitError::InvalidFormat(format!(
1166                "reverse index has {} trailing bytes",
1167                bytes.len() - min_len
1168            )));
1169        }
1170        if &bytes[..4] != b"RIDX" {
1171            return Err(GitError::InvalidFormat(
1172                "missing reverse index signature".into(),
1173            ));
1174        }
1175        let version = u32_be(&bytes[4..8]);
1176        if version != 1 {
1177            return Err(GitError::Unsupported(format!(
1178                "reverse index version {version}"
1179            )));
1180        }
1181        let hash_id = u32_be(&bytes[8..12]);
1182        if hash_id != hash_function_id(format) {
1183            return Err(GitError::InvalidFormat(format!(
1184                "reverse index hash id {hash_id} does not match {}",
1185                format.name()
1186            )));
1187        }
1188
1189        let index_checksum_offset = bytes.len() - hash_len;
1190        let actual_index_checksum =
1191            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1192        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1193        if actual_index_checksum != index_checksum {
1194            return Err(GitError::InvalidFormat(format!(
1195                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1196            )));
1197        }
1198
1199        let pack_checksum_offset = index_checksum_offset - hash_len;
1200        let pack_checksum =
1201            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1202        let mut positions = Vec::with_capacity(object_count);
1203        let mut offset = 12usize;
1204        for _ in 0..object_count {
1205            let position = u32_be(&bytes[offset..offset + 4]);
1206            positions.push(position);
1207            offset += 4;
1208        }
1209        validate_position_permutation(&positions)?;
1210
1211        Ok(Self {
1212            version,
1213            format,
1214            positions,
1215            pack_checksum,
1216            index_checksum,
1217        })
1218    }
1219}
1220
1221impl PackMtimes {
1222    pub fn write(
1223        format: ObjectFormat,
1224        mtimes: &[u32],
1225        pack_checksum: &ObjectId,
1226    ) -> Result<Vec<u8>> {
1227        if pack_checksum.format() != format {
1228            return Err(GitError::InvalidObjectId(
1229                "pack checksum format does not match mtimes format".into(),
1230            ));
1231        }
1232
1233        let mut out = Vec::new();
1234        out.extend_from_slice(b"MTME");
1235        out.extend_from_slice(&1u32.to_be_bytes());
1236        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1237        for mtime in mtimes {
1238            out.extend_from_slice(&mtime.to_be_bytes());
1239        }
1240        out.extend_from_slice(pack_checksum.as_bytes());
1241        let checksum = sley_core::digest_bytes(format, &out)?;
1242        out.extend_from_slice(checksum.as_bytes());
1243        Ok(out)
1244    }
1245
1246    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1247        let hash_len = format.raw_len();
1248        let table_len = object_count
1249            .checked_mul(4)
1250            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
1251        let expected_len = 12usize
1252            .checked_add(table_len)
1253            .and_then(|len| len.checked_add(hash_len * 2))
1254            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
1255        if bytes.len() < expected_len {
1256            return Err(GitError::InvalidFormat("mtimes file too short".into()));
1257        }
1258        if bytes.len() != expected_len {
1259            return Err(GitError::InvalidFormat(format!(
1260                "mtimes file has {} trailing bytes",
1261                bytes.len() - expected_len
1262            )));
1263        }
1264        if &bytes[..4] != b"MTME" {
1265            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
1266        }
1267        let version = u32_be(&bytes[4..8]);
1268        if version != 1 {
1269            return Err(GitError::Unsupported(format!("mtimes version {version}")));
1270        }
1271        let hash_id = u32_be(&bytes[8..12]);
1272        if hash_id != hash_function_id(format) {
1273            return Err(GitError::InvalidFormat(format!(
1274                "mtimes hash id {hash_id} does not match {}",
1275                format.name()
1276            )));
1277        }
1278
1279        let index_checksum_offset = bytes.len() - hash_len;
1280        let actual_index_checksum =
1281            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1282        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1283        if actual_index_checksum != index_checksum {
1284            return Err(GitError::InvalidFormat(format!(
1285                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1286            )));
1287        }
1288
1289        let pack_checksum_offset = index_checksum_offset - hash_len;
1290        let pack_checksum =
1291            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1292        let mut mtimes = Vec::with_capacity(object_count);
1293        let mut offset = 12usize;
1294        for _ in 0..object_count {
1295            mtimes.push(u32_be(&bytes[offset..offset + 4]));
1296            offset += 4;
1297        }
1298
1299        Ok(Self {
1300            version,
1301            format,
1302            mtimes,
1303            pack_checksum,
1304            index_checksum,
1305        })
1306    }
1307}
1308
1309impl PackBitmapIndex {
1310    pub const OPTION_FULL_DAG: u16 = 0x0001;
1311    pub const OPTION_HASH_CACHE: u16 = 0x0004;
1312
1313    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1314        let hash_len = format.raw_len();
1315        let min_len = 12usize
1316            .checked_add(hash_len * 2)
1317            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1318        if bytes.len() < min_len {
1319            return Err(GitError::InvalidFormat("bitmap index too short".into()));
1320        }
1321        if &bytes[..4] != b"BITM" {
1322            return Err(GitError::InvalidFormat(
1323                "missing bitmap index signature".into(),
1324            ));
1325        }
1326        let version = u16_be(&bytes[4..6]);
1327        if version != 1 {
1328            return Err(GitError::Unsupported(format!(
1329                "bitmap index version {version}"
1330            )));
1331        }
1332        let options = u16_be(&bytes[6..8]);
1333        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
1334        if options & !known_options != 0 {
1335            return Err(GitError::Unsupported(format!(
1336                "bitmap index options {:#06x}",
1337                options & !known_options
1338            )));
1339        }
1340        let entry_count = u32_be(&bytes[8..12]) as usize;
1341        let checksum_offset = bytes.len() - hash_len;
1342        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1343        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1344        if actual_index_checksum != index_checksum {
1345            return Err(GitError::InvalidFormat(format!(
1346                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1347            )));
1348        }
1349
1350        let pack_checksum_end = 12usize
1351            .checked_add(hash_len)
1352            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1353        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
1354        let mut offset = pack_checksum_end;
1355        let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1356        let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1357        let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1358        let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1359
1360        let mut entries = Vec::with_capacity(entry_count);
1361        for idx in 0..entry_count {
1362            if checksum_offset.saturating_sub(offset) < 6 {
1363                return Err(GitError::InvalidFormat(
1364                    "truncated bitmap index entry".into(),
1365                ));
1366            }
1367            let object_position = u32_be(&bytes[offset..offset + 4]);
1368            offset += 4;
1369            if object_position as usize >= object_count {
1370                return Err(GitError::InvalidFormat(
1371                    "bitmap index entry points past object table".into(),
1372                ));
1373            }
1374            let xor_offset = bytes[offset];
1375            offset += 1;
1376            if xor_offset as usize > idx || xor_offset > 160 {
1377                return Err(GitError::InvalidFormat(
1378                    "bitmap index entry has invalid XOR offset".into(),
1379                ));
1380            }
1381            let flags = bytes[offset];
1382            offset += 1;
1383            let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1384            entries.push(PackBitmapEntry {
1385                object_position,
1386                xor_offset,
1387                flags,
1388                bitmap,
1389            });
1390        }
1391
1392        let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
1393            let cache_len = object_count
1394                .checked_mul(4)
1395                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
1396            if checksum_offset.saturating_sub(offset) < cache_len {
1397                return Err(GitError::InvalidFormat(
1398                    "truncated bitmap hash cache".into(),
1399                ));
1400            }
1401            let mut cache = Vec::with_capacity(object_count);
1402            for _ in 0..object_count {
1403                cache.push(u32_be(&bytes[offset..offset + 4]));
1404                offset += 4;
1405            }
1406            Some(cache)
1407        } else {
1408            None
1409        };
1410
1411        if offset != checksum_offset {
1412            return Err(GitError::InvalidFormat(format!(
1413                "bitmap index has {} trailing bytes",
1414                checksum_offset - offset
1415            )));
1416        }
1417
1418        Ok(Self {
1419            version,
1420            format,
1421            options,
1422            pack_checksum,
1423            index_checksum,
1424            type_bitmaps: PackBitmapTypeBitmaps {
1425                commits,
1426                trees,
1427                blobs,
1428                tags,
1429            },
1430            entries,
1431            name_hash_cache,
1432        })
1433    }
1434
1435    /// Looks up the stored entry whose commit sits at `position` in the
1436    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
1437    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
1438        self.entries
1439            .iter()
1440            .find(|entry| entry.object_position == position)
1441    }
1442}
1443
1444fn parse_bitmap_ewah(
1445    bytes: &[u8],
1446    offset: &mut usize,
1447    checksum_offset: usize,
1448    _object_count: usize,
1449) -> Result<EwahBitmap> {
1450    if checksum_offset.saturating_sub(*offset) < 12 {
1451        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
1452    }
1453    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
1454    *offset += 4;
1455    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
1456    *offset += 4;
1457    let words_len = word_count
1458        .checked_mul(8)
1459        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
1460    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
1461        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
1462    }
1463    let mut words = Vec::with_capacity(word_count);
1464    for _ in 0..word_count {
1465        words.push(u64_be(&bytes[*offset..*offset + 8]));
1466        *offset += 8;
1467    }
1468    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
1469    *offset += 4;
1470    validate_ewah_words(bit_size, &words, rlw_position)?;
1471    Ok(EwahBitmap {
1472        bit_size,
1473        words,
1474        rlw_position,
1475    })
1476}
1477
1478fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
1479    if words.is_empty() {
1480        if rlw_position != 0 || bit_size != 0 {
1481            return Err(GitError::InvalidFormat(
1482                "EWAH bitmap has invalid empty RLW".into(),
1483            ));
1484        }
1485        return Ok(());
1486    }
1487    if rlw_position as usize >= words.len() {
1488        return Err(GitError::InvalidFormat(
1489            "EWAH RLW position points past word table".into(),
1490        ));
1491    }
1492    let mut word_idx = 0usize;
1493    let mut decoded_words = 0u64;
1494    while word_idx < words.len() {
1495        let rlw = words[word_idx];
1496        let run_words = (rlw >> 1) & 0xffff_ffff;
1497        let literal_words = (rlw >> 33) as usize;
1498        word_idx += 1;
1499        word_idx = word_idx
1500            .checked_add(literal_words)
1501            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
1502        if word_idx > words.len() {
1503            return Err(GitError::InvalidFormat(
1504                "EWAH literal words extend past word table".into(),
1505            ));
1506        }
1507        decoded_words = decoded_words
1508            .checked_add(run_words)
1509            .and_then(|value| value.checked_add(literal_words as u64))
1510            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
1511    }
1512    let decoded_bits = decoded_words
1513        .checked_mul(64)
1514        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
1515    if decoded_bits < u64::from(bit_size) {
1516        return Err(GitError::InvalidFormat(
1517            "EWAH bitmap decodes fewer bits than declared".into(),
1518        ));
1519    }
1520    Ok(())
1521}
1522
1523impl MultiPackIndex {
1524    pub fn write(
1525        format: ObjectFormat,
1526        version: u8,
1527        pack_names: &[String],
1528        objects: &[MultiPackIndexEntry],
1529    ) -> Result<Vec<u8>> {
1530        Self::write_with_reverse_index(format, version, pack_names, objects, None)
1531    }
1532
1533    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
1534    /// additionally emits the `RIDX` chunk: the object order a multi-pack
1535    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
1536    /// the preferred pack first, then the rest by pack id, each pack's slice
1537    /// in offset order), stored as one u32 midx position per object.
1538    ///
1539    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
1540    /// must be in range.
1541    pub fn write_with_reverse_index(
1542        format: ObjectFormat,
1543        version: u8,
1544        pack_names: &[String],
1545        objects: &[MultiPackIndexEntry],
1546        preferred_pack: Option<u32>,
1547    ) -> Result<Vec<u8>> {
1548        if let Some(preferred) = preferred_pack
1549            && preferred as usize >= pack_names.len()
1550        {
1551            return Err(GitError::InvalidFormat(format!(
1552                "preferred pack {preferred} out of range for {} packs",
1553                pack_names.len()
1554            )));
1555        }
1556        if version != 1 && version != 2 {
1557            return Err(GitError::Unsupported(format!(
1558                "multi-pack-index version {version}"
1559            )));
1560        }
1561        if pack_names.len() > u32::MAX as usize {
1562            return Err(GitError::InvalidFormat(
1563                "too many multi-pack-index packs".into(),
1564            ));
1565        }
1566        if objects.len() > u32::MAX as usize {
1567            return Err(GitError::InvalidFormat(
1568                "too many multi-pack-index objects".into(),
1569            ));
1570        }
1571        validate_midx_pack_names(pack_names)?;
1572        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
1573            return Err(GitError::InvalidFormat(
1574                "multi-pack-index v1 pack names must be sorted".into(),
1575            ));
1576        }
1577
1578        let mut objects = objects.iter().collect::<Vec<_>>();
1579        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1580        let mut previous_oid: Option<&ObjectId> = None;
1581        for object in &objects {
1582            if object.oid.format() != format {
1583                return Err(GitError::InvalidObjectId(
1584                    "multi-pack-index object format does not match index format".into(),
1585                ));
1586            }
1587            if let Some(previous) = previous_oid
1588                && previous.as_bytes() == object.oid.as_bytes()
1589            {
1590                return Err(GitError::InvalidFormat(
1591                    "multi-pack-index contains duplicate object ids".into(),
1592                ));
1593            }
1594            if object.pack_int_id as usize >= pack_names.len() {
1595                return Err(GitError::InvalidFormat(
1596                    "multi-pack-index object points past pack table".into(),
1597                ));
1598            }
1599            previous_oid = Some(&object.oid);
1600        }
1601
1602        let mut large_offsets = Vec::new();
1603        let mut chunks = vec![
1604            (*b"PNAM", write_midx_pack_names(pack_names)),
1605            (*b"OIDF", write_midx_oid_fanout(&objects)?),
1606            (*b"OIDL", write_midx_oid_lookup(&objects)),
1607            (
1608                *b"OOFF",
1609                write_midx_object_offsets(&objects, &mut large_offsets)?,
1610            ),
1611        ];
1612        if !large_offsets.is_empty() {
1613            chunks.push((*b"LOFF", large_offsets));
1614        }
1615        if let Some(preferred) = preferred_pack {
1616            // `objects` is already in midx (oid-sorted) order here; the chunk
1617            // lists each object's midx position in pseudo-pack order.
1618            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
1619            pseudo.sort_by_key(|&midx_pos| {
1620                let object = objects[midx_pos as usize];
1621                (
1622                    object.pack_int_id != preferred,
1623                    object.pack_int_id,
1624                    object.offset,
1625                )
1626            });
1627            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
1628            for midx_pos in pseudo {
1629                ridx.extend_from_slice(&midx_pos.to_be_bytes());
1630            }
1631            chunks.push((*b"RIDX", ridx));
1632        }
1633        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
1634    }
1635
1636    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1637        let hash_len = format.raw_len();
1638        if bytes.len() < 12 + 12 + hash_len {
1639            return Err(GitError::InvalidFormat(
1640                "multi-pack-index file too short".into(),
1641            ));
1642        }
1643        if &bytes[..4] != b"MIDX" {
1644            return Err(GitError::InvalidFormat(
1645                "missing multi-pack-index signature".into(),
1646            ));
1647        }
1648        let version = bytes[4];
1649        if version != 1 && version != 2 {
1650            return Err(GitError::Unsupported(format!(
1651                "multi-pack-index version {version}"
1652            )));
1653        }
1654        let hash_id = bytes[5];
1655        if u32::from(hash_id) != hash_function_id(format) {
1656            return Err(GitError::InvalidFormat(format!(
1657                "multi-pack-index hash id {hash_id} does not match {}",
1658                format.name()
1659            )));
1660        }
1661        let chunk_count = bytes[6] as usize;
1662        let base_midx_count = bytes[7];
1663        if base_midx_count != 0 {
1664            return Err(GitError::Unsupported(format!(
1665                "multi-pack-index base count {base_midx_count}"
1666            )));
1667        }
1668        let pack_count = u32_be(&bytes[8..12]);
1669        let lookup_len = (chunk_count + 1)
1670            .checked_mul(12)
1671            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
1672        let data_start = 12usize
1673            .checked_add(lookup_len)
1674            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
1675        let checksum_offset = bytes.len() - hash_len;
1676        if data_start > checksum_offset {
1677            return Err(GitError::InvalidFormat(
1678                "truncated multi-pack-index chunk lookup".into(),
1679            ));
1680        }
1681
1682        let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1683        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1684        if actual_checksum != checksum {
1685            return Err(GitError::InvalidFormat(format!(
1686                "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
1687            )));
1688        }
1689
1690        let mut entries = Vec::with_capacity(chunk_count + 1);
1691        let mut offset = 12usize;
1692        for _ in 0..=chunk_count {
1693            let id = [
1694                bytes[offset],
1695                bytes[offset + 1],
1696                bytes[offset + 2],
1697                bytes[offset + 3],
1698            ];
1699            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
1700            entries.push((id, chunk_offset));
1701            offset += 12;
1702        }
1703        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
1704            return Err(GitError::InvalidFormat(
1705                "multi-pack-index chunk lookup is empty".into(),
1706            ));
1707        };
1708        if terminator_id != [0, 0, 0, 0] {
1709            return Err(GitError::InvalidFormat(
1710                "multi-pack-index chunk lookup missing terminator".into(),
1711            ));
1712        }
1713        if terminator_offset != checksum_offset as u64 {
1714            return Err(GitError::InvalidFormat(
1715                "multi-pack-index terminator does not point at checksum".into(),
1716            ));
1717        }
1718
1719        let mut chunks = Vec::with_capacity(chunk_count);
1720        let mut previous_offset = data_start as u64;
1721        for pair in entries.windows(2) {
1722            let (id, chunk_offset) = pair[0];
1723            let (_next_id, next_offset) = pair[1];
1724            if id == [0, 0, 0, 0] {
1725                return Err(GitError::InvalidFormat(
1726                    "multi-pack-index chunk id is zero before terminator".into(),
1727                ));
1728            }
1729            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
1730                return Err(GitError::InvalidFormat(
1731                    "multi-pack-index chunk offsets are not monotonic".into(),
1732                ));
1733            }
1734            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
1735                return Err(GitError::InvalidFormat(
1736                    "multi-pack-index chunk length is invalid".into(),
1737                ));
1738            }
1739            chunks.push(MultiPackIndexChunk {
1740                id,
1741                offset: chunk_offset,
1742                len: next_offset - chunk_offset,
1743            });
1744            previous_offset = chunk_offset;
1745        }
1746
1747        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
1748        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
1749        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
1750        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
1751        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
1752        let bitmapped_packs =
1753            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
1754
1755        Ok(Self {
1756            version,
1757            format,
1758            pack_count,
1759            pack_names,
1760            object_count: object_count as u32,
1761            fanout,
1762            objects,
1763            reverse_index,
1764            bitmapped_packs,
1765            chunks,
1766            checksum,
1767        })
1768    }
1769
1770    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
1771        self.objects
1772            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1773            .ok()
1774            .map(|idx| &self.objects[idx])
1775    }
1776}
1777
1778fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
1779    for name in pack_names {
1780        if name.is_empty() {
1781            return Err(GitError::InvalidFormat(
1782                "multi-pack-index pack name is empty".into(),
1783            ));
1784        }
1785        if name
1786            .bytes()
1787            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
1788        {
1789            return Err(GitError::InvalidFormat(
1790                "multi-pack-index pack name contains an invalid byte".into(),
1791            ));
1792        }
1793    }
1794    Ok(())
1795}
1796
1797fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
1798    let mut out = Vec::new();
1799    for name in pack_names {
1800        out.extend_from_slice(name.as_bytes());
1801        out.push(0);
1802    }
1803    while out.len() % 4 != 0 {
1804        out.push(0);
1805    }
1806    out
1807}
1808
1809fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
1810    let mut counts = [0u32; 256];
1811    for object in objects {
1812        let first = object.oid.as_bytes()[0] as usize;
1813        counts[first] = counts[first]
1814            .checked_add(1)
1815            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
1816    }
1817    let mut running = 0u32;
1818    let mut out = Vec::with_capacity(256 * 4);
1819    for count in counts {
1820        running = running
1821            .checked_add(count)
1822            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
1823        out.extend_from_slice(&running.to_be_bytes());
1824    }
1825    Ok(out)
1826}
1827
1828fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
1829    let mut out = Vec::new();
1830    for object in objects {
1831        out.extend_from_slice(object.oid.as_bytes());
1832    }
1833    out
1834}
1835
1836fn write_midx_object_offsets(
1837    objects: &[&MultiPackIndexEntry],
1838    large_offsets: &mut Vec<u8>,
1839) -> Result<Vec<u8>> {
1840    let mut out = Vec::new();
1841    for object in objects {
1842        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
1843        if object.offset < 0x8000_0000 {
1844            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
1845        } else {
1846            let large_idx = large_offsets.len() / 8;
1847            if large_idx > 0x7fff_ffff {
1848                return Err(GitError::InvalidFormat(
1849                    "too many multi-pack-index large offsets".into(),
1850                ));
1851            }
1852            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
1853            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
1854        }
1855    }
1856    Ok(out)
1857}
1858
1859fn write_multi_pack_index_chunks(
1860    format: ObjectFormat,
1861    version: u8,
1862    pack_count: u32,
1863    chunks: &[([u8; 4], Vec<u8>)],
1864) -> Result<Vec<u8>> {
1865    if chunks.len() > u8::MAX as usize {
1866        return Err(GitError::InvalidFormat(
1867            "too many multi-pack-index chunks".into(),
1868        ));
1869    }
1870    let lookup_len = (chunks.len() + 1)
1871        .checked_mul(12)
1872        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
1873    let mut out = Vec::new();
1874    out.extend_from_slice(b"MIDX");
1875    out.push(version);
1876    out.push(hash_function_id(format) as u8);
1877    out.push(chunks.len() as u8);
1878    out.push(0);
1879    out.extend_from_slice(&pack_count.to_be_bytes());
1880    let mut chunk_offset = (12usize)
1881        .checked_add(lookup_len)
1882        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
1883        as u64;
1884    for (id, data) in chunks {
1885        out.extend_from_slice(id);
1886        out.extend_from_slice(&chunk_offset.to_be_bytes());
1887        chunk_offset = chunk_offset
1888            .checked_add(data.len() as u64)
1889            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
1890    }
1891    out.extend_from_slice(&[0, 0, 0, 0]);
1892    out.extend_from_slice(&chunk_offset.to_be_bytes());
1893    for (_id, data) in chunks {
1894        out.extend_from_slice(data);
1895    }
1896    let checksum = sley_core::digest_bytes(format, &out)?;
1897    out.extend_from_slice(checksum.as_bytes());
1898    Ok(out)
1899}
1900
1901#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1902struct EntryHeader {
1903    kind: PackObjectKind,
1904    size: u64,
1905}
1906
1907/// A cache of objects already decoded from one specific pack, keyed by the
1908/// in-pack byte offset at which each object's entry begins.
1909///
1910/// Delta resolution within a pack walks a chain of base objects by offset; the
1911/// same base is the parent of many deltas, so without a cache the entire chain
1912/// is re-inflated and re-applied on every read. Implementors let
1913/// [`read_object_at_with_cache`] reuse a warm base instead.
1914///
1915/// Correctness contract: a given `offset` within a given pack's bytes always
1916/// decodes to exactly one object, so caching by offset can never serve the wrong
1917/// object **provided the same cache is only ever used with one pack's bytes**.
1918/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
1919/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
1920pub trait PackDeltaCache {
1921    /// Return the decoded object whose entry begins at `offset`, if cached.
1922    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
1923    /// Record that the entry beginning at `offset` decodes to `object`.
1924    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
1925}
1926
1927/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
1928/// the original, allocation-free behavior for callers that do not opt in.
1929struct NoopDeltaCache;
1930
1931impl PackDeltaCache for NoopDeltaCache {
1932    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
1933        None
1934    }
1935    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
1936}
1937
1938// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
1939// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
1940// an allocation that dominated bulk reads. Borrowed only for the duration of a
1941// single inflate; the recursive pack reader fully inflates each entry's data before
1942// recursing to its base, so the borrow never nests.
1943thread_local! {
1944    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
1945}
1946
1947/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
1948/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
1949/// run of back-references). We pre-reserve no more than this multiple of the
1950/// available compressed input, so an attacker who declares a huge `size_hint`
1951/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
1952/// memory before the inflate has produced a single byte. The stream's *actual*
1953/// output is still verified against the declared size by the caller; this only
1954/// bounds the speculative allocation. git never pre-allocates an attacker's
1955/// declared size beyond a streaming buffer either (see index-pack.c's
1956/// `unpack_entry_data`).
1957const MAX_INFLATE_EXPANSION: usize = 1032;
1958
1959/// An absolute ceiling on the speculative pre-reservation, independent of the
1960/// input length, so even a large legitimate-looking compressed input can't be
1961/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
1962/// output buffer organically past this when a real stream genuinely produces
1963/// that much — this only caps the *speculative* reserve.
1964const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
1965
1966/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
1967/// to something safe to reserve up front: no larger than what `compressed_len`
1968/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
1969/// returned value is only used to size the initial allocation; the inflate loop
1970/// grows the buffer as the real stream produces output, so legitimate large
1971/// objects still decode correctly — they just don't get the whole allocation at
1972/// once.
1973fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
1974    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
1975    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
1976    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
1977}
1978
1979/// Inflate the entire zlib stream at the front of `compressed`, appending the
1980/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
1981/// is the caller's expectation for the decompressed length, but it is treated as
1982/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
1983/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
1984/// number of *compressed* bytes consumed (so callers stepping through a pack can
1985/// advance to the next entry). Byte-for-byte equivalent to
1986/// `ZlibDecoder::read_to_end` + `total_in`.
1987fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
1988    INFLATE.with(|cell| {
1989        let mut decompress = cell.borrow_mut();
1990        decompress.reset(true);
1991        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
1992        let mut input = compressed;
1993        let mut consumed_total = 0usize;
1994        loop {
1995            // Always leave output room so a zero-progress result means the input
1996            // (not the buffer) is exhausted.
1997            if out.len() == out.capacity() {
1998                out.reserve(out.len().max(64));
1999            }
2000            let before_in = decompress.total_in();
2001            let before_out = decompress.total_out();
2002            let status = decompress
2003                .decompress_vec(input, out, flate2::FlushDecompress::None)
2004                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2005            let consumed = (decompress.total_in() - before_in) as usize;
2006            let produced = decompress.total_out() - before_out;
2007            input = &input[consumed..];
2008            consumed_total += consumed;
2009            match status {
2010                flate2::Status::StreamEnd => return Ok(consumed_total),
2011                _ if consumed == 0 && produced == 0 => {
2012                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
2013                }
2014                _ => {}
2015            }
2016        }
2017    })
2018}
2019
2020/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
2021/// into `out`, reusing the thread-local state. Used to read a delta's leading
2022/// base-size / result-size varints without inflating the whole instruction stream.
2023fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
2024    INFLATE.with(|cell| {
2025        let mut decompress = cell.borrow_mut();
2026        decompress.reset(true);
2027        out.reserve(max_out.max(16));
2028        let mut input = compressed;
2029        while out.len() < max_out {
2030            if out.len() == out.capacity() {
2031                out.reserve(out.len().max(16));
2032            }
2033            let before_in = decompress.total_in();
2034            let before_out = decompress.total_out();
2035            let status = decompress
2036                .decompress_vec(input, out, flate2::FlushDecompress::None)
2037                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2038            let consumed = (decompress.total_in() - before_in) as usize;
2039            let produced = decompress.total_out() - before_out;
2040            input = &input[consumed..];
2041            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
2042                break;
2043            }
2044        }
2045        Ok(())
2046    })
2047}
2048
2049/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
2050/// only that object and its delta-base chain instead of parsing the whole pack.
2051///
2052/// Ofs-delta bases are followed by offset (recursively, within this pack);
2053/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
2054/// with the surrounding object store (so a base in another pack or loose still
2055/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
2056pub fn read_object_at_arc<F>(
2057    pack_bytes: &[u8],
2058    offset: u64,
2059    format: ObjectFormat,
2060    resolve_ref_base: F,
2061) -> Result<Arc<EncodedObject>>
2062where
2063    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2064{
2065    read_object_at_with_cache_arc(
2066        pack_bytes,
2067        offset,
2068        format,
2069        resolve_ref_base,
2070        &NoopDeltaCache,
2071    )
2072}
2073
2074/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
2075/// (keyed by in-pack offset) and records every object it decodes.
2076///
2077/// This turns repeated reads from the same pack — where many deltas share a base
2078/// chain — from re-inflating each chain per read into resolving each base once.
2079/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
2080/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
2081/// callers can reuse cache handles without cloning full object bodies.
2082pub fn read_object_at_with_cache_arc<F, C>(
2083    pack_bytes: &[u8],
2084    offset: u64,
2085    format: ObjectFormat,
2086    mut resolve_ref_base: F,
2087    cache: &C,
2088) -> Result<Arc<EncodedObject>>
2089where
2090    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2091    C: PackDeltaCache + ?Sized,
2092{
2093    read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
2094}
2095
2096fn read_object_at_inner<F, C>(
2097    pack_bytes: &[u8],
2098    offset: u64,
2099    format: ObjectFormat,
2100    resolve_ref_base: &mut F,
2101    cache: &C,
2102) -> Result<Arc<EncodedObject>>
2103where
2104    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2105    C: PackDeltaCache + ?Sized,
2106{
2107    // A warm cache entry for this exact offset is already the fully resolved
2108    // object, so the whole base chain below can be skipped.
2109    if let Some(object) = cache.get(offset) {
2110        return Ok(object);
2111    }
2112    let trailer_offset = pack_bytes
2113        .len()
2114        .checked_sub(format.raw_len())
2115        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
2116    let mut cursor = usize::try_from(offset)
2117        .ok()
2118        .filter(|&value| value < trailer_offset)
2119        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
2120    let header = parse_entry_header(pack_bytes, &mut cursor)?;
2121    let base = match header.kind {
2122        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
2123            pack_bytes,
2124            &mut cursor,
2125            offset,
2126        )?)),
2127        PackObjectKind::RefDelta => {
2128            let hash_len = format.raw_len();
2129            if cursor + hash_len > trailer_offset {
2130                return Err(GitError::InvalidFormat(
2131                    "truncated ref-delta base object id".into(),
2132                ));
2133            }
2134            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
2135            cursor += hash_len;
2136            Some(DeltaBase::Ref(oid))
2137        }
2138        _ => None,
2139    };
2140    let mut body = Vec::new();
2141    inflate_into(
2142        &pack_bytes[cursor..trailer_offset],
2143        &mut body,
2144        header.size.min(usize::MAX as u64) as usize,
2145    )?;
2146    if body.len() as u64 != header.size {
2147        return Err(GitError::InvalidObject(format!(
2148            "pack object declared {} bytes, decoded {}",
2149            header.size,
2150            body.len()
2151        )));
2152    }
2153    let object = match base {
2154        None => {
2155            let object_type = match header.kind {
2156                PackObjectKind::Commit => ObjectType::Commit,
2157                PackObjectKind::Tree => ObjectType::Tree,
2158                PackObjectKind::Blob => ObjectType::Blob,
2159                PackObjectKind::Tag => ObjectType::Tag,
2160                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
2161                    return Err(GitError::InvalidFormat(
2162                        "delta pack entry decoded without a base".into(),
2163                    ));
2164                }
2165            };
2166            Arc::new(EncodedObject::new(object_type, body))
2167        }
2168        Some(DeltaBase::Offset(base_offset)) => {
2169            let base =
2170                read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
2171            let resolved = apply_pack_delta(&base.body, &body)?;
2172            Arc::new(EncodedObject::new(base.object_type, resolved))
2173        }
2174        Some(DeltaBase::Ref(base_oid)) => {
2175            let base = resolve_ref_base(&base_oid)?
2176                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
2177            let resolved = apply_pack_delta(&base.body, &body)?;
2178            Arc::new(EncodedObject::new(base.object_type, resolved))
2179        }
2180    };
2181    // Record the fully resolved object so any later read that walks through this
2182    // offset (as a delta base or directly) reuses it. Bases are inserted as the
2183    // recursion unwinds, so a chain is decoded at most once across reads.
2184    cache.insert(offset, Arc::clone(&object));
2185    Ok(object)
2186}
2187
2188/// The object type and final (inflated) size of the entry at `offset`, *without*
2189/// materializing the object body — git's `cat-file --batch-check` fast path.
2190///
2191/// A base object's size is already in its pack entry header, and a delta's result
2192/// size is the second varint at the front of its (small) delta stream, so neither
2193/// inflates the full content. The reported type is the type at the end of the
2194/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
2195/// the type of a ref-delta base that lives outside this pack (resolved through the
2196/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
2197pub fn read_object_header_at<F>(
2198    pack_bytes: &[u8],
2199    offset: u64,
2200    format: ObjectFormat,
2201    mut resolve_ref_base_type: F,
2202) -> Result<(ObjectType, u64)>
2203where
2204    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2205{
2206    read_object_header_at_inner(
2207        pack_bytes,
2208        offset,
2209        format,
2210        &mut resolve_ref_base_type,
2211        &mut NoopHeaderTypeCache,
2212    )
2213}
2214
2215/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
2216/// the `cat-file --batch-check` header fast path.
2217///
2218/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
2219/// to its base on every header read, re-inflating each link's leading varints
2220/// from scratch — so reading every object in a deeply-deltified pack costs
2221/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
2222/// of memoizing `offset -> (type, size)`:
2223///
2224/// * a chain's end-of-chain type is resolved at most once, so later objects on
2225///   the same chain skip the walk; and
2226/// * a repeated lookup of the same object (common in batch input) returns from
2227///   the memo without re-inflating its delta header at all.
2228///
2229/// The size stored is the object's final (inflated) result size — read from its
2230/// own pack/delta header, never by materializing the body.
2231pub trait HeaderTypeCache {
2232    /// The previously resolved header at `pack_offset`, if any.
2233    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
2234    /// Record the resolved header at `pack_offset` for reuse by later reads.
2235    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
2236}
2237
2238struct NoopHeaderTypeCache;
2239
2240impl HeaderTypeCache for NoopHeaderTypeCache {
2241    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
2242        None
2243    }
2244    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
2245}
2246
2247/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
2248/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
2249/// most once per chain and (b) a repeated lookup of the same offset returns from
2250/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
2251/// so it must be scoped to a single pack's bytes by the caller.
2252pub fn read_object_header_at_with_cache<F, C>(
2253    pack_bytes: &[u8],
2254    offset: u64,
2255    format: ObjectFormat,
2256    mut resolve_ref_base_type: F,
2257    type_cache: &mut C,
2258) -> Result<(ObjectType, u64)>
2259where
2260    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2261    C: HeaderTypeCache + ?Sized,
2262{
2263    if let Some(header) = type_cache.get(offset) {
2264        return Ok(header);
2265    }
2266    read_object_header_at_inner(
2267        pack_bytes,
2268        offset,
2269        format,
2270        &mut resolve_ref_base_type,
2271        type_cache,
2272    )
2273}
2274
2275fn read_object_header_at_inner<F, C>(
2276    pack_bytes: &[u8],
2277    offset: u64,
2278    format: ObjectFormat,
2279    resolve_ref_base_type: &mut F,
2280    type_cache: &mut C,
2281) -> Result<(ObjectType, u64)>
2282where
2283    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2284    C: HeaderTypeCache + ?Sized,
2285{
2286    let trailer_offset = pack_bytes
2287        .len()
2288        .checked_sub(format.raw_len())
2289        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
2290    let mut cursor = usize::try_from(offset)
2291        .ok()
2292        .filter(|&value| value < trailer_offset)
2293        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
2294    let header = parse_entry_header(pack_bytes, &mut cursor)?;
2295    let resolved = match header.kind {
2296        PackObjectKind::Commit => (ObjectType::Commit, header.size),
2297        PackObjectKind::Tree => (ObjectType::Tree, header.size),
2298        PackObjectKind::Blob => (ObjectType::Blob, header.size),
2299        PackObjectKind::Tag => (ObjectType::Tag, header.size),
2300        PackObjectKind::OfsDelta => {
2301            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
2302            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
2303            // The end-of-chain type only depends on the base, so reuse it across
2304            // reads instead of re-walking the chain per object (sley#26).
2305            let base_type = match type_cache.get(base_offset) {
2306                Some((base_type, _)) => base_type,
2307                None => {
2308                    let (base_type, _) = read_object_header_at_inner(
2309                        pack_bytes,
2310                        base_offset,
2311                        format,
2312                        resolve_ref_base_type,
2313                        type_cache,
2314                    )?;
2315                    base_type
2316                }
2317            };
2318            (base_type, size)
2319        }
2320        PackObjectKind::RefDelta => {
2321            let hash_len = format.raw_len();
2322            if cursor + hash_len > trailer_offset {
2323                return Err(GitError::InvalidFormat(
2324                    "truncated ref-delta base object id".into(),
2325                ));
2326            }
2327            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
2328            cursor += hash_len;
2329            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
2330            let base_type = resolve_ref_base_type(&oid)?
2331                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
2332            (base_type, size)
2333        }
2334    };
2335    // Memoize the fully resolved header so a repeated lookup of this offset (or a
2336    // chain that bases on it) returns without re-inflating (sley#26).
2337    type_cache.put(offset, resolved);
2338    Ok(resolved)
2339}
2340
2341/// Number of inflated delta-stream bytes to read when only the leading base-size
2342/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
2343/// prefix always covers both without inflating the delta instructions.
2344const DELTA_HEADER_PREFIX_LEN: usize = 32;
2345
2346/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
2347/// inflating only the short prefix that holds its two leading varints.
2348fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
2349    let mut prefix = Vec::new();
2350    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
2351    decoded_delta_result_size(&prefix)
2352}
2353
2354fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
2355    let first = next_byte(bytes, offset)?;
2356    let mut size = u64::from(first & 0x0f);
2357    let kind = match (first >> 4) & 0x07 {
2358        1 => PackObjectKind::Commit,
2359        2 => PackObjectKind::Tree,
2360        3 => PackObjectKind::Blob,
2361        4 => PackObjectKind::Tag,
2362        6 => PackObjectKind::OfsDelta,
2363        7 => PackObjectKind::RefDelta,
2364        other => {
2365            return Err(GitError::InvalidFormat(format!(
2366                "invalid pack object type {other}"
2367            )));
2368        }
2369    };
2370    let mut shift = 4;
2371    let mut byte = first;
2372    while byte & 0x80 != 0 {
2373        byte = next_byte(bytes, offset)?;
2374        let part = u64::from(byte & 0x7f);
2375        size = size
2376            .checked_add(
2377                part.checked_shl(shift)
2378                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
2379            )
2380            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
2381        shift += 7;
2382    }
2383    Ok(EntryHeader { kind, size })
2384}
2385
2386fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
2387    let mut byte = next_byte(bytes, offset)?;
2388    let mut relative = u64::from(byte & 0x7f);
2389    while byte & 0x80 != 0 {
2390        byte = next_byte(bytes, offset)?;
2391        relative = relative
2392            .checked_add(1)
2393            .and_then(|value| value.checked_shl(7))
2394            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2395            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2396    }
2397    entry_offset
2398        .checked_sub(relative)
2399        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2400}
2401
2402fn resolve_pack_entries<F>(
2403    parsed: Vec<ParsedPackEntry>,
2404    format: ObjectFormat,
2405    external_base: &mut F,
2406) -> Result<Vec<PackObject>>
2407where
2408    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
2409{
2410    let mut offset_to_index = HashMap::with_capacity(parsed.len());
2411    for (idx, entry) in parsed.iter().enumerate() {
2412        offset_to_index.insert(parsed_entry_offset(entry), idx);
2413    }
2414
2415    let mut resolved = vec![None; parsed.len()];
2416    let mut oid_to_index = HashMap::new();
2417    let mut unresolved = 0usize;
2418    for (idx, entry) in parsed.iter().enumerate() {
2419        match entry {
2420            ParsedPackEntry::Resolved(object) => {
2421                oid_to_index.insert(object.entry.oid, idx);
2422                resolved[idx] = Some(object.clone());
2423            }
2424            ParsedPackEntry::Delta { .. } => unresolved += 1,
2425        }
2426    }
2427
2428    while unresolved != 0 {
2429        let mut progress = false;
2430        for idx in 0..parsed.len() {
2431            if resolved[idx].is_some() {
2432                continue;
2433            }
2434            let ParsedPackEntry::Delta {
2435                base,
2436                compressed_size,
2437                delta_size,
2438                offset,
2439                delta,
2440            } = &parsed[idx]
2441            else {
2442                continue;
2443            };
2444            let Some(base_object) = delta_base_object(
2445                base,
2446                &offset_to_index,
2447                &oid_to_index,
2448                &resolved,
2449                external_base,
2450            )?
2451            else {
2452                continue;
2453            };
2454            let body = apply_pack_delta(base_object.body(), delta)?;
2455            let object = EncodedObject::new(base_object.object_type(), body);
2456            let oid = object.object_id(format)?;
2457            let pack_object = PackObject {
2458                entry: PackEntry {
2459                    oid,
2460                    compressed_size: *compressed_size,
2461                    uncompressed_size: object.body.len() as u64,
2462                    offset: *offset,
2463                },
2464                object,
2465            };
2466            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
2467                return Err(GitError::InvalidObject(
2468                    "resolved delta size does not match delta header".into(),
2469                ));
2470            }
2471            if *delta_size != delta.len() as u64 {
2472                return Err(GitError::InvalidObject(format!(
2473                    "pack delta declared {delta_size} bytes, decoded {}",
2474                    delta.len()
2475                )));
2476            }
2477            oid_to_index.insert(oid, idx);
2478            resolved[idx] = Some(pack_object);
2479            unresolved -= 1;
2480            progress = true;
2481        }
2482        if !progress {
2483            return Err(GitError::Unsupported("unresolved delta base".into()));
2484        }
2485    }
2486
2487    resolved
2488        .into_iter()
2489        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
2490        .collect()
2491}
2492
2493fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
2494    match entry {
2495        ParsedPackEntry::Resolved(object) => object.entry.offset,
2496        ParsedPackEntry::Delta { offset, .. } => *offset,
2497    }
2498}
2499
2500enum DeltaBaseObject<'a> {
2501    Borrowed(&'a EncodedObject),
2502    Owned(EncodedObject),
2503}
2504
2505impl DeltaBaseObject<'_> {
2506    fn object_type(&self) -> ObjectType {
2507        match self {
2508            Self::Borrowed(object) => object.object_type,
2509            Self::Owned(object) => object.object_type,
2510        }
2511    }
2512
2513    fn body(&self) -> &[u8] {
2514        match self {
2515            Self::Borrowed(object) => &object.body,
2516            Self::Owned(object) => &object.body,
2517        }
2518    }
2519}
2520
2521fn delta_base_object<'a, F>(
2522    base: &DeltaBase,
2523    offset_to_index: &HashMap<u64, usize>,
2524    oid_to_index: &HashMap<ObjectId, usize>,
2525    resolved: &'a [Option<PackObject>],
2526    external_base: &mut F,
2527) -> Result<Option<DeltaBaseObject<'a>>>
2528where
2529    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
2530{
2531    match base {
2532        DeltaBase::Offset(offset) => {
2533            let Some(index) = offset_to_index.get(offset).copied() else {
2534                return Err(GitError::InvalidFormat(format!(
2535                    "ofs-delta base offset {offset} not found"
2536                )));
2537            };
2538            Ok(resolved[index]
2539                .as_ref()
2540                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
2541        }
2542        DeltaBase::Ref(oid) => {
2543            if let Some(index) = oid_to_index.get(oid).copied() {
2544                return Ok(resolved[index]
2545                    .as_ref()
2546                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
2547            }
2548            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
2549        }
2550    }
2551}
2552
2553fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
2554    let mut cursor = 0usize;
2555    let base_size = read_delta_varint(delta, &mut cursor)?;
2556    if base_size != base.len() as u64 {
2557        return Err(GitError::InvalidObject(format!(
2558            "delta base size mismatch: expected {base_size}, got {}",
2559            base.len()
2560        )));
2561    }
2562    let result_size = read_delta_varint(delta, &mut cursor)?;
2563    // `result_size` is an attacker-controlled delta varint from a network pack
2564    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
2565    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
2566    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
2567    // size-mismatch check below can fire. Route the up-front reservation through
2568    // the sley#2 bound so the speculative allocation is capped; `result.extend`
2569    // still grows the buffer organically and the post-decode length check
2570    // (`result.len() != result_size`) rejects the lie cleanly.
2571    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
2572    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
2573    while cursor < delta.len() {
2574        let command = delta[cursor];
2575        cursor += 1;
2576        if command & 0x80 != 0 {
2577            let copy_offset =
2578                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
2579            let mut copy_size =
2580                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
2581            if copy_size == 0 {
2582                copy_size = 0x10000;
2583            }
2584            let start = usize::try_from(copy_offset)
2585                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
2586            let len = usize::try_from(copy_size)
2587                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
2588            let end = start
2589                .checked_add(len)
2590                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
2591            let Some(slice) = base.get(start..end) else {
2592                return Err(GitError::InvalidObject(
2593                    "delta copy range exceeds base object".into(),
2594                ));
2595            };
2596            result.extend_from_slice(slice);
2597        } else if command != 0 {
2598            let len = usize::from(command);
2599            let end = cursor
2600                .checked_add(len)
2601                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
2602            let Some(slice) = delta.get(cursor..end) else {
2603                return Err(GitError::InvalidObject(
2604                    "delta insert range exceeds delta data".into(),
2605                ));
2606            };
2607            result.extend_from_slice(slice);
2608            cursor = end;
2609        } else {
2610            return Err(GitError::InvalidObject(
2611                "delta contains reserved zero command".into(),
2612            ));
2613        }
2614    }
2615    if result.len() as u64 != result_size {
2616        return Err(GitError::InvalidObject(format!(
2617            "delta result size mismatch: expected {result_size}, got {}",
2618            result.len()
2619        )));
2620    }
2621    Ok(result)
2622}
2623
2624fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
2625    let mut cursor = 0usize;
2626    let _ = read_delta_varint(delta, &mut cursor)?;
2627    read_delta_varint(delta, &mut cursor)
2628}
2629
2630/// Size, in bytes, of the fixed blocks used to index a base object for delta
2631/// compression. Matches git's `diff-delta.c` block size.
2632const DELTA_BLOCK_SIZE: usize = 16;
2633
2634/// Distance between indexed base anchors. Delta generation still scans target
2635/// objects byte-by-byte once there is evidence of shared content; anchoring the
2636/// base at block boundaries keeps the index compact and avoids per-object
2637/// hash-table allocation storms on unrelated blobs.
2638const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
2639
2640/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
2641/// base object's anchors while keeping exact-hash candidate scans short.
2642const DELTA_BUCKET_BITS: usize = 12;
2643const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
2644const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
2645
2646/// An index over a base object's content used to generate deltas against it.
2647///
2648/// The index hashes block-sized anchors of the base, groups them into fixed
2649/// buckets, and verifies exact byte matches before copying. This avoids both
2650/// per-bucket allocation storms and the per-object sort needed by a single
2651/// sorted vector.
2652struct DeltaIndex<'a> {
2653    base: &'a [u8],
2654    blocks: Vec<DeltaBlock>,
2655    buckets: Vec<usize>,
2656}
2657
2658#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2659struct DeltaBlock {
2660    hash: u32,
2661    offset: usize,
2662}
2663
2664impl<'a> DeltaIndex<'a> {
2665    fn new(base: &'a [u8]) -> Self {
2666        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
2667        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
2668        for_each_delta_anchor(base.len(), |offset| {
2669            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
2670            buckets[delta_bucket(hash) + 1] += 1;
2671            anchors.push(DeltaBlock { hash, offset });
2672        });
2673        for idx in 1..buckets.len() {
2674            buckets[idx] += buckets[idx - 1];
2675        }
2676
2677        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
2678        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
2679        for anchor in anchors {
2680            let bucket = delta_bucket(anchor.hash);
2681            let next = &mut next_offsets[bucket];
2682            blocks[*next] = anchor;
2683            *next += 1;
2684        }
2685
2686        Self {
2687            base,
2688            blocks,
2689            buckets,
2690        }
2691    }
2692
2693    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
2694        let bucket = delta_bucket(hash);
2695        let start = self.buckets[bucket];
2696        let end = self.buckets[bucket + 1];
2697        self.blocks[start..end]
2698            .iter()
2699            .filter(move |block| block.hash == hash)
2700    }
2701
2702    fn has_hash(&self, hash: u32) -> bool {
2703        self.candidate_blocks(hash).next().is_some()
2704    }
2705
2706    fn has_shared_anchor(&self, target: &[u8]) -> bool {
2707        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
2708            return false;
2709        }
2710        let last = target.len() - DELTA_BLOCK_SIZE;
2711        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
2712            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
2713            if self.has_hash(hash) {
2714                return true;
2715            }
2716        }
2717        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
2718            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
2719            if self.has_hash(hash) {
2720                return true;
2721            }
2722        }
2723        false
2724    }
2725
2726    /// Generate a delta that reconstructs `target` from this index's base.
2727    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
2728        if !self.has_shared_anchor(target) {
2729            return None;
2730        }
2731        let base = self.base;
2732        let mut delta = Vec::new();
2733        write_delta_varint(&mut delta, base.len() as u64);
2734        write_delta_varint(&mut delta, target.len() as u64);
2735
2736        let mut pending_insert_start = 0usize;
2737        let mut pos = 0usize;
2738        while pos < target.len() {
2739            let mut best_len = 0usize;
2740            let mut best_offset = 0usize;
2741            if pos + DELTA_BLOCK_SIZE <= target.len() {
2742                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
2743                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
2744                    // Confirm the block actually matches (hash collisions are
2745                    // possible) before measuring how far it extends.
2746                    let candidate = candidate.offset;
2747                    let max_len = (base.len() - candidate).min(target.len() - pos);
2748                    let mut len = 0usize;
2749                    while len < max_len && base[candidate + len] == target[pos + len] {
2750                        len += 1;
2751                    }
2752                    if len > best_len {
2753                        best_len = len;
2754                        best_offset = candidate;
2755                    }
2756                }
2757            }
2758
2759            if best_len >= DELTA_BLOCK_SIZE {
2760                if pending_insert_start < pos {
2761                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
2762                }
2763                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
2764                pos += best_len;
2765                pending_insert_start = pos;
2766            } else {
2767                pos += 1;
2768            }
2769        }
2770        if pending_insert_start < target.len() {
2771            write_delta_insert(&mut delta, &target[pending_insert_start..]);
2772        }
2773        Some(delta)
2774    }
2775}
2776
2777fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
2778    if len < DELTA_BLOCK_SIZE {
2779        return;
2780    }
2781    len -= DELTA_BLOCK_SIZE;
2782    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
2783        visit(offset);
2784    }
2785    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
2786        visit(len);
2787    }
2788}
2789
2790fn delta_anchor_count(len: usize) -> usize {
2791    if len < DELTA_BLOCK_SIZE {
2792        return 0;
2793    }
2794    let last = len - DELTA_BLOCK_SIZE;
2795    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
2796}
2797
2798fn delta_bucket(hash: u32) -> usize {
2799    (hash as usize) & DELTA_BUCKET_MASK
2800}
2801
2802/// Maximum number of base offsets retained per block-hash bucket. Caps the work
2803/// done extending candidate matches for inputs with many repeated blocks.
2804const DELTA_MAX_CHAIN: usize = 64;
2805
2806/// Hash a fixed-size block of base/target bytes into a bucket key.
2807///
2808/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
2809/// always verified byte-for-byte before use, so collisions only cost a little
2810/// extra comparison work and never affect correctness.
2811fn block_hash(block: &[u8]) -> u32 {
2812    let mut hash = 0u32;
2813    for &byte in block {
2814        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
2815    }
2816    hash
2817}
2818
2819/// The chosen storage form for a single object during pack generation.
2820#[derive(Debug, Clone, PartialEq, Eq)]
2821enum PlannedBase {
2822    /// Stored undeltified (a base for others, or no good delta was found).
2823    None,
2824    /// Delta against another object in this pack, identified by its original
2825    /// index. The pre-computed `delta` bytes reconstruct the object from that
2826    /// base's body.
2827    InPack { base_idx: usize, delta: Vec<u8> },
2828    /// Delta against an external (thin-pack) base, referenced by object id.
2829    External { base_oid: ObjectId, delta: Vec<u8> },
2830}
2831
2832#[derive(Debug, Clone, PartialEq, Eq)]
2833struct PlannedEntry {
2834    base: PlannedBase,
2835}
2836
2837fn compress_planned_payloads(
2838    objects: &[&EncodedObject],
2839    plan: &[PlannedEntry],
2840    order: &[usize],
2841) -> Result<Vec<Vec<u8>>> {
2842    if order.is_empty() {
2843        return Ok(Vec::new());
2844    }
2845
2846    let worker_count = std::thread::available_parallelism()
2847        .map(|threads| threads.get())
2848        .unwrap_or(1)
2849        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
2850        .min(order.len());
2851    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
2852        let mut payloads = Vec::with_capacity(order.len());
2853        for &idx in order {
2854            payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
2855        }
2856        return Ok(payloads);
2857    }
2858
2859    let chunk_len = order.len().div_ceil(worker_count);
2860    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
2861    std::thread::scope(|scope| {
2862        let mut handles = Vec::new();
2863        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
2864            let chunk_start = chunk_idx * chunk_len;
2865            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
2866                let mut chunk_payloads = Vec::with_capacity(chunk.len());
2867                for (offset, &idx) in chunk.iter().enumerate() {
2868                    chunk_payloads.push((
2869                        chunk_start + offset,
2870                        compressed_payload(planned_payload(objects, plan, idx))?,
2871                    ));
2872                }
2873                Ok(chunk_payloads)
2874            }));
2875        }
2876
2877        let mut first_error = None;
2878        for handle in handles {
2879            match handle.join() {
2880                Ok(Ok(chunk_payloads)) => {
2881                    if first_error.is_none() {
2882                        for (pos, payload) in chunk_payloads {
2883                            payloads[pos] = payload;
2884                        }
2885                    }
2886                }
2887                Ok(Err(err)) => {
2888                    first_error.get_or_insert(err);
2889                }
2890                Err(_) => {
2891                    first_error.get_or_insert_with(|| {
2892                        GitError::InvalidObject("pack compression worker panicked".into())
2893                    });
2894                }
2895            }
2896        }
2897
2898        match first_error {
2899            Some(err) => Err(err),
2900            None => Ok(()),
2901        }
2902    })?;
2903    Ok(payloads)
2904}
2905
2906fn planned_payload<'a>(
2907    objects: &'a [&'a EncodedObject],
2908    plan: &'a [PlannedEntry],
2909    idx: usize,
2910) -> &'a [u8] {
2911    match &plan[idx].base {
2912        PlannedBase::None => &objects[idx].body,
2913        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
2914    }
2915}
2916
2917fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
2918    let mut out = Vec::new();
2919    write_compressed_payload(&mut out, body)?;
2920    Ok(out)
2921}
2922
2923/// Maximum number of external thin-pack bases compared against any single
2924/// object. Bounds the work of the thin path when a large base set is supplied.
2925const DELTA_MAX_EXTERNAL_BASES: usize = 64;
2926
2927struct DeltaWindowEntry<'a> {
2928    idx: usize,
2929    index: DeltaIndex<'a>,
2930}
2931
2932/// Rank object types for delta grouping. Objects of the same type are far more
2933/// likely to delta well, so the sort groups by this rank first.
2934fn delta_type_rank(object_type: ObjectType) -> u8 {
2935    match object_type {
2936        ObjectType::Commit => 0,
2937        ObjectType::Tree => 1,
2938        ObjectType::Blob => 2,
2939        ObjectType::Tag => 3,
2940    }
2941}
2942
2943/// Decide how each object is stored (undeltified or deltified) and the order in
2944/// which objects are emitted into the pack.
2945///
2946/// # Ordering
2947///
2948/// Candidates are sorted by `(type, size descending, object id)`:
2949/// * **type** — only same-type objects are deltified against one another, so
2950///   grouping by type keeps the sliding window full of viable bases. Type rank
2951///   follows [`delta_type_rank`] (commit, tree, blob, tag).
2952/// * **size descending** — larger objects come first so smaller, later objects
2953///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
2954///   no path/name, so the usual path-hash key is unavailable; size is the next
2955///   best locality signal.
2956/// * **object id** — a deterministic tiebreaker for reproducible packs.
2957///
2958/// # Selection
2959///
2960/// Each object is compared against the previous up to `window` same-type
2961/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
2962/// bases of the same type). The smallest delta whose encoded length is strictly
2963/// less than the object's own body is kept; otherwise the object is stored
2964/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
2965/// only be used if doing so keeps the resulting chain within the bound); a depth
2966/// of `0` disables deltification entirely.
2967///
2968/// Returns the per-object plan (indexed by original object index) together with
2969/// the emit order. Every in-pack delta references a candidate that is earlier in
2970/// the emit order, so emitting in that order writes each base before any object
2971/// that depends on it.
2972fn plan_pack_deltas(
2973    objects: &[&EncodedObject],
2974    object_ids: &[ObjectId],
2975    options: &PackWriteOptions,
2976) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
2977    let count = objects.len();
2978    let mut plan: Vec<PlannedEntry> = (0..count)
2979        .map(|_| PlannedEntry {
2980            base: PlannedBase::None,
2981        })
2982        .collect();
2983
2984    // Processing order. Deltas only point backwards within this order, which is
2985    // therefore also a valid emit order. Reordering by type/size improves delta
2986    // locality but is skipped when disabled or when deltification is off.
2987    let mut order: Vec<usize> = (0..count).collect();
2988    if options.reorder && options.depth > 0 {
2989        order.sort_by(|&left, &right| {
2990            delta_type_rank(objects[left].object_type)
2991                .cmp(&delta_type_rank(objects[right].object_type))
2992                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
2993                .then_with(|| {
2994                    object_ids[left]
2995                        .as_bytes()
2996                        .cmp(object_ids[right].as_bytes())
2997                })
2998        });
2999    }
3000
3001    if options.depth == 0 {
3002        return Ok((plan, order));
3003    }
3004
3005    // Pre-build delta indexes for external thin-pack bases, grouped by type so
3006    // an object only compares against compatible bases.
3007    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
3008        Vec::with_capacity(options.thin_bases.len());
3009    for (oid, object) in &options.thin_bases {
3010        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
3011    }
3012
3013    // Chain depth ending at each object (0 = undeltified). Used to keep delta
3014    // chains within `options.depth`.
3015    let mut depth = vec![0usize; count];
3016    // Sliding window of recently processed original indices, most recent last.
3017    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
3018        std::collections::VecDeque::new();
3019
3020    for &idx in &order {
3021        let target = &objects[idx].body;
3022        let target_type = objects[idx].object_type;
3023
3024        let mut best_delta: Option<Vec<u8>> = None;
3025        let mut best_base = PlannedBase::None;
3026
3027        // Try in-pack candidates from the window (same type only).
3028        for base_entry in window.iter().rev() {
3029            let base_idx = base_entry.idx;
3030            if objects[base_idx].object_type != target_type {
3031                continue;
3032            }
3033            // Using this base would make the new chain depth + 1; skip if that
3034            // would exceed the configured maximum.
3035            if depth[base_idx] + 1 > options.depth {
3036                continue;
3037            }
3038            let Some(delta) = base_entry.index.delta(target) else {
3039                continue;
3040            };
3041            if !delta_is_acceptable(&delta, target.len()) {
3042                continue;
3043            }
3044            if best_delta
3045                .as_ref()
3046                .is_none_or(|current| delta.len() < current.len())
3047            {
3048                best_delta = Some(delta);
3049                best_base = PlannedBase::InPack {
3050                    base_idx,
3051                    delta: Vec::new(),
3052                };
3053            }
3054        }
3055
3056        // Try external thin-pack bases (ref-delta; external base is depth 0, so
3057        // the resulting chain depth is 1, always within a non-zero bound).
3058        for (base_oid, base_type, base_index) in
3059            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
3060        {
3061            if *base_type != target_type {
3062                continue;
3063            }
3064            let Some(delta) = base_index.delta(target) else {
3065                continue;
3066            };
3067            if !delta_is_acceptable(&delta, target.len()) {
3068                continue;
3069            }
3070            if best_delta
3071                .as_ref()
3072                .is_none_or(|current| delta.len() < current.len())
3073            {
3074                best_delta = Some(delta);
3075                best_base = PlannedBase::External {
3076                    base_oid: *base_oid,
3077                    delta: Vec::new(),
3078                };
3079            }
3080        }
3081
3082        if let Some(delta) = best_delta {
3083            match best_base {
3084                PlannedBase::InPack { base_idx, .. } => {
3085                    depth[idx] = depth[base_idx] + 1;
3086                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
3087                }
3088                PlannedBase::External { base_oid, .. } => {
3089                    depth[idx] = 1;
3090                    plan[idx].base = PlannedBase::External { base_oid, delta };
3091                }
3092                PlannedBase::None => {}
3093            }
3094        }
3095
3096        // Add this object to the window for subsequent candidates.
3097        window.push_back(DeltaWindowEntry {
3098            idx,
3099            index: DeltaIndex::new(&objects[idx].body),
3100        });
3101        while window.len() > options.window {
3102            window.pop_front();
3103        }
3104    }
3105
3106    Ok((plan, order))
3107}
3108
3109/// Whether a generated delta is worth using instead of storing the object
3110/// undeltified. The encoded delta must be strictly smaller than the object's own
3111/// body; otherwise the undeltified form is the same size or smaller and is
3112/// always self-contained.
3113fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
3114    !delta.is_empty() && delta.len() < target_len
3115}
3116
3117fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
3118    loop {
3119        let mut byte = (value as u8) & 0x7f;
3120        value >>= 7;
3121        if value != 0 {
3122            byte |= 0x80;
3123        }
3124        out.push(byte);
3125        if value == 0 {
3126            break;
3127        }
3128    }
3129}
3130
3131fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
3132    while size != 0 {
3133        let chunk = size.min(0x10000);
3134        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
3135        let mut command = 0x80u8;
3136        let mut payload = [0u8; 7];
3137        let mut payload_len = 0usize;
3138        for idx in 0..4 {
3139            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
3140            if byte != 0 {
3141                command |= 1 << idx;
3142                payload[payload_len] = byte;
3143                payload_len += 1;
3144            }
3145        }
3146        for idx in 0..3 {
3147            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
3148            if byte != 0 {
3149                command |= 0x10 << idx;
3150                payload[payload_len] = byte;
3151                payload_len += 1;
3152            }
3153        }
3154        out.push(command);
3155        out.extend_from_slice(&payload[..payload_len]);
3156        offset += chunk;
3157        size -= chunk;
3158    }
3159}
3160
3161fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
3162    while !bytes.is_empty() {
3163        let chunk_len = bytes.len().min(0x7f);
3164        out.push(chunk_len as u8);
3165        out.extend_from_slice(&bytes[..chunk_len]);
3166        bytes = &bytes[chunk_len..];
3167    }
3168}
3169
3170fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
3171    let mut value = 0u64;
3172    let mut shift = 0u32;
3173    loop {
3174        let Some(byte) = delta.get(*cursor).copied() else {
3175            return Err(GitError::InvalidObject("truncated delta size".into()));
3176        };
3177        *cursor += 1;
3178        value = value
3179            .checked_add(
3180                u64::from(byte & 0x7f)
3181                    .checked_shl(shift)
3182                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
3183            )
3184            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3185        if byte & 0x80 == 0 {
3186            return Ok(value);
3187        }
3188        shift = shift
3189            .checked_add(7)
3190            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3191    }
3192}
3193
3194fn read_delta_copy_value(
3195    delta: &[u8],
3196    cursor: &mut usize,
3197    command: u8,
3198    masks: &[u8],
3199) -> Result<u64> {
3200    let mut value = 0u64;
3201    for (shift, mask) in masks.iter().enumerate() {
3202        if command & mask != 0 {
3203            let Some(byte) = delta.get(*cursor).copied() else {
3204                return Err(GitError::InvalidObject(
3205                    "truncated delta copy command".into(),
3206                ));
3207            };
3208            *cursor += 1;
3209            value |= u64::from(byte) << (shift * 8);
3210        }
3211    }
3212    Ok(value)
3213}
3214
3215thread_local! {
3216    static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
3217}
3218
3219fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
3220    DEFLATE.with(|cell| {
3221        let mut compressor = cell.borrow_mut();
3222        compressor.reset();
3223        out.reserve(zlib_compress_bound(body.len()));
3224        let status = compressor
3225            .compress_vec(body, out, FlushCompress::Finish)
3226            .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
3227        if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
3228            return Err(GitError::InvalidObject(
3229                "zlib compression did not finish pack entry".into(),
3230            ));
3231        }
3232        Ok(())
3233    })
3234}
3235
3236fn zlib_compress_bound(len: usize) -> usize {
3237    len.saturating_add(len >> 12)
3238        .saturating_add(len >> 14)
3239        .saturating_add(len >> 25)
3240        .saturating_add(13)
3241}
3242
3243fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
3244    let type_code = match object_type {
3245        ObjectType::Commit => 1,
3246        ObjectType::Tree => 2,
3247        ObjectType::Blob => 3,
3248        ObjectType::Tag => 4,
3249    };
3250    write_pack_entry_header_kind(out, type_code, size);
3251}
3252
3253fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
3254    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
3255    size >>= 4;
3256    if size != 0 {
3257        byte |= 0x80;
3258    }
3259    out.push(byte);
3260    while size != 0 {
3261        let mut byte = (size as u8) & 0x7f;
3262        size >>= 7;
3263        if size != 0 {
3264            byte |= 0x80;
3265        }
3266        out.push(byte);
3267    }
3268}
3269
3270fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
3271    if relative == 0 {
3272        return Err(GitError::InvalidFormat(
3273            "ofs-delta relative offset cannot be zero".into(),
3274        ));
3275    }
3276    let mut value = relative;
3277    let mut bytes = vec![(value & 0x7f) as u8];
3278    value >>= 7;
3279    while value != 0 {
3280        value -= 1;
3281        bytes.push(((value & 0x7f) as u8) | 0x80);
3282        value >>= 7;
3283    }
3284    bytes.reverse();
3285    out.extend_from_slice(&bytes);
3286    Ok(())
3287}
3288
3289fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
3290    let Some(byte) = bytes.get(*offset).copied() else {
3291        return Err(GitError::InvalidFormat(
3292            "truncated pack entry header".into(),
3293        ));
3294    };
3295    *offset += 1;
3296    Ok(byte)
3297}
3298
3299fn u16_be(bytes: &[u8]) -> u16 {
3300    u16::from_be_bytes([bytes[0], bytes[1]])
3301}
3302
3303fn u32_be(bytes: &[u8]) -> u32 {
3304    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
3305}
3306
3307fn u64_be(bytes: &[u8]) -> u64 {
3308    u64::from_be_bytes([
3309        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
3310    ])
3311}
3312
3313fn checked_range(
3314    start: usize,
3315    count: usize,
3316    width: usize,
3317    total: usize,
3318) -> Result<std::ops::Range<usize>> {
3319    let len = count
3320        .checked_mul(width)
3321        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
3322    let end = start
3323        .checked_add(len)
3324        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
3325    if end > total {
3326        return Err(GitError::InvalidFormat("truncated pack index table".into()));
3327    }
3328    Ok(start..end)
3329}
3330
3331fn validate_position_permutation(positions: &[u32]) -> Result<()> {
3332    let mut seen = vec![false; positions.len()];
3333    for position in positions {
3334        let idx = *position as usize;
3335        if idx >= positions.len() {
3336            return Err(GitError::InvalidFormat(
3337                "reverse index position points past object table".into(),
3338            ));
3339        }
3340        if seen[idx] {
3341            return Err(GitError::InvalidFormat(
3342                "reverse index position is duplicated".into(),
3343            ));
3344        }
3345        seen[idx] = true;
3346    }
3347    Ok(())
3348}
3349
3350fn parse_midx_pack_names(
3351    bytes: &[u8],
3352    chunks: &[MultiPackIndexChunk],
3353    pack_count: usize,
3354    version: u8,
3355) -> Result<Vec<String>> {
3356    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
3357        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
3358    let mut names = Vec::with_capacity(pack_count);
3359    let mut offset = 0usize;
3360    while names.len() < pack_count {
3361        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
3362            return Err(GitError::InvalidFormat(
3363                "multi-pack-index PNAM entry is unterminated".into(),
3364            ));
3365        };
3366        let name_bytes = &data[offset..offset + relative_end];
3367        if name_bytes.is_empty() {
3368            return Err(GitError::InvalidFormat(
3369                "multi-pack-index PNAM entry is empty".into(),
3370            ));
3371        }
3372        let name = std::str::from_utf8(name_bytes)
3373            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
3374        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
3375            return Err(GitError::InvalidFormat(
3376                "multi-pack-index PNAM entry contains a path separator".into(),
3377            ));
3378        }
3379        names.push(name.to_string());
3380        offset += relative_end + 1;
3381    }
3382    let padding = &data[offset..];
3383    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
3384        return Err(GitError::InvalidFormat(
3385            "multi-pack-index PNAM padding is invalid".into(),
3386        ));
3387    }
3388    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
3389        return Err(GitError::InvalidFormat(
3390            "multi-pack-index v1 PNAM entries are not sorted".into(),
3391        ));
3392    }
3393    Ok(names)
3394}
3395
3396fn parse_midx_oid_fanout(
3397    bytes: &[u8],
3398    chunks: &[MultiPackIndexChunk],
3399) -> Result<([u32; 256], usize)> {
3400    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
3401        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
3402    if data.len() != 256 * 4 {
3403        return Err(GitError::InvalidFormat(
3404            "multi-pack-index OIDF chunk has invalid length".into(),
3405        ));
3406    }
3407    let mut fanout = [0u32; 256];
3408    let mut previous = 0u32;
3409    for (idx, slot) in fanout.iter_mut().enumerate() {
3410        let start = idx * 4;
3411        *slot = u32_be(&data[start..start + 4]);
3412        if *slot < previous {
3413            return Err(GitError::InvalidFormat(
3414                "multi-pack-index OIDF fanout is not monotonic".into(),
3415            ));
3416        }
3417        previous = *slot;
3418    }
3419    Ok((fanout, fanout[255] as usize))
3420}
3421
3422fn parse_midx_object_ids(
3423    bytes: &[u8],
3424    chunks: &[MultiPackIndexChunk],
3425    format: ObjectFormat,
3426    object_count: usize,
3427    fanout: &[u32; 256],
3428) -> Result<Vec<ObjectId>> {
3429    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
3430        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
3431    let expected_len = object_count
3432        .checked_mul(format.raw_len())
3433        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
3434    if data.len() != expected_len {
3435        return Err(GitError::InvalidFormat(
3436            "multi-pack-index OIDL chunk has invalid length".into(),
3437        ));
3438    }
3439
3440    let mut ids = Vec::with_capacity(object_count);
3441    let mut counts = [0u32; 256];
3442    let mut previous_oid: Option<ObjectId> = None;
3443    for idx in 0..object_count {
3444        let start = idx * format.raw_len();
3445        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
3446        if let Some(previous) = &previous_oid
3447            && previous.as_bytes() >= oid.as_bytes()
3448        {
3449            return Err(GitError::InvalidFormat(
3450                "multi-pack-index OIDL object ids are not strictly sorted".into(),
3451            ));
3452        }
3453        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
3454            .checked_add(1)
3455            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3456        previous_oid = Some(oid);
3457        ids.push(oid);
3458    }
3459
3460    let mut running = 0u32;
3461    for (idx, count) in counts.iter().enumerate() {
3462        running = running
3463            .checked_add(*count)
3464            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3465        if fanout[idx] != running {
3466            return Err(GitError::InvalidFormat(
3467                "multi-pack-index OIDF fanout does not match OIDL".into(),
3468            ));
3469        }
3470    }
3471    Ok(ids)
3472}
3473
3474fn parse_midx_object_offsets(
3475    bytes: &[u8],
3476    chunks: &[MultiPackIndexChunk],
3477    object_ids: Vec<ObjectId>,
3478    pack_count: u32,
3479) -> Result<Vec<MultiPackIndexEntry>> {
3480    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
3481        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
3482    let expected_len = object_ids
3483        .len()
3484        .checked_mul(8)
3485        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
3486    if data.len() != expected_len {
3487        return Err(GitError::InvalidFormat(
3488            "multi-pack-index OOFF chunk has invalid length".into(),
3489        ));
3490    }
3491    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
3492    if let Some(large_offsets) = large_offsets
3493        && large_offsets.len() % 8 != 0
3494    {
3495        return Err(GitError::InvalidFormat(
3496            "multi-pack-index LOFF chunk has invalid length".into(),
3497        ));
3498    }
3499
3500    let mut entries = Vec::with_capacity(object_ids.len());
3501    for (idx, oid) in object_ids.into_iter().enumerate() {
3502        let start = idx * 8;
3503        let pack_int_id = u32_be(&data[start..start + 4]);
3504        if pack_int_id >= pack_count {
3505            return Err(GitError::InvalidFormat(
3506                "multi-pack-index object points past pack table".into(),
3507            ));
3508        }
3509        let raw_offset = u32_be(&data[start + 4..start + 8]);
3510        let offset = if raw_offset & 0x8000_0000 == 0 {
3511            u64::from(raw_offset)
3512        } else {
3513            let Some(large_offsets) = large_offsets else {
3514                return Err(GitError::InvalidFormat(
3515                    "multi-pack-index large offset missing LOFF chunk".into(),
3516                ));
3517            };
3518            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
3519            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
3520                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3521            })?;
3522            let large_end = large_start.checked_add(8).ok_or_else(|| {
3523                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3524            })?;
3525            if large_end > large_offsets.len() {
3526                return Err(GitError::InvalidFormat(
3527                    "multi-pack-index large offset points past LOFF chunk".into(),
3528                ));
3529            }
3530            u64_be(&large_offsets[large_start..large_end])
3531        };
3532        entries.push(MultiPackIndexEntry {
3533            oid,
3534            pack_int_id,
3535            offset,
3536        });
3537    }
3538    Ok(entries)
3539}
3540
3541fn parse_midx_reverse_index(
3542    bytes: &[u8],
3543    chunks: &[MultiPackIndexChunk],
3544    object_count: usize,
3545) -> Result<Option<Vec<u32>>> {
3546    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
3547        return Ok(None);
3548    };
3549    let expected_len = object_count
3550        .checked_mul(4)
3551        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
3552    if data.len() != expected_len {
3553        return Err(GitError::InvalidFormat(
3554            "multi-pack-index RIDX chunk has invalid length".into(),
3555        ));
3556    }
3557    let mut positions = Vec::with_capacity(object_count);
3558    for idx in 0..object_count {
3559        let start = idx * 4;
3560        positions.push(u32_be(&data[start..start + 4]));
3561    }
3562    validate_position_permutation(&positions)?;
3563    Ok(Some(positions))
3564}
3565
3566fn parse_midx_bitmapped_packs(
3567    bytes: &[u8],
3568    chunks: &[MultiPackIndexChunk],
3569    pack_count: usize,
3570    object_count: usize,
3571) -> Result<Option<Vec<MultiPackBitmapPack>>> {
3572    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
3573        return Ok(None);
3574    };
3575    let expected_len = pack_count
3576        .checked_mul(8)
3577        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
3578    if data.len() != expected_len {
3579        return Err(GitError::InvalidFormat(
3580            "multi-pack-index BTMP chunk has invalid length".into(),
3581        ));
3582    }
3583    let mut entries = Vec::with_capacity(pack_count);
3584    for idx in 0..pack_count {
3585        let start = idx * 8;
3586        let bitmap_pos = u32_be(&data[start..start + 4]);
3587        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
3588        let bitmap_end = u64::from(bitmap_pos)
3589            .checked_add(u64::from(bitmap_nr))
3590            .ok_or_else(|| {
3591                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
3592            })?;
3593        if bitmap_end > object_count as u64 {
3594            return Err(GitError::InvalidFormat(
3595                "multi-pack-index BTMP range points past object table".into(),
3596            ));
3597        }
3598        entries.push(MultiPackBitmapPack {
3599            bitmap_pos,
3600            bitmap_nr,
3601        });
3602    }
3603    Ok(Some(entries))
3604}
3605
3606fn midx_chunk_data<'a>(
3607    bytes: &'a [u8],
3608    chunks: &[MultiPackIndexChunk],
3609    id: [u8; 4],
3610    required: bool,
3611) -> Result<Option<&'a [u8]>> {
3612    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
3613        if required {
3614            return Err(GitError::InvalidFormat(format!(
3615                "multi-pack-index missing {} chunk",
3616                std::str::from_utf8(&id).unwrap_or("required")
3617            )));
3618        }
3619        return Ok(None);
3620    };
3621    let start = usize::try_from(chunk.offset)
3622        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
3623    let len = usize::try_from(chunk.len)
3624        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
3625    let end = start
3626        .checked_add(len)
3627        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
3628    let Some(data) = bytes.get(start..end) else {
3629        return Err(GitError::InvalidFormat(
3630            "multi-pack-index chunk extends past file".into(),
3631        ));
3632    };
3633    Ok(Some(data))
3634}
3635
3636fn hash_function_id(format: ObjectFormat) -> u32 {
3637    match format {
3638        ObjectFormat::Sha1 => 1,
3639        ObjectFormat::Sha256 => 2,
3640    }
3641}
3642
3643/// Maximum number of clean (run) words that a single EWAH running-length word
3644/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
3645const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
3646
3647/// Maximum number of literal (dirty) words that can trail a single EWAH
3648/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
3649const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
3650
3651/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
3652const EWAH_ALL_ONES: u64 = u64::MAX;
3653
3654impl EwahBitmap {
3655    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
3656    /// from a slice of raw uncompressed 64-bit words.
3657    ///
3658    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
3659    /// matching git's on-disk convention. `bit_size` records the number of
3660    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
3661    ///
3662    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
3663    /// encoder: consecutive all-zero or all-one words collapse into a run, and
3664    /// any other word is stored verbatim as a literal. Only the first
3665    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
3666    /// words supplied by the caller are ignored, just as git encodes a bitmap
3667    /// sized to its highest set bit.
3668    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
3669        let required_words = bit_size.div_ceil(64) as usize;
3670        if required_words > words.len() {
3671            return Err(GitError::InvalidFormat(format!(
3672                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
3673                words.len()
3674            )));
3675        }
3676        // Only the words that actually back the declared bits matter; libgit
3677        // never emits clean trailing zero words for the unused tail.
3678        let significant = &words[..required_words];
3679        let mut builder = EwahBuilder::new(bit_size);
3680        for &word in significant {
3681            if word == 0 {
3682                builder.add_empty_words(false, 1);
3683            } else if word == EWAH_ALL_ONES {
3684                builder.add_empty_words(true, 1);
3685            } else {
3686                builder.add_literal(word);
3687            }
3688        }
3689        builder.finish()
3690    }
3691
3692    /// Constructs an [`EwahBitmap`] from a set of bit positions.
3693    ///
3694    /// `bit_size` is the number of logical bits (typically the pack object
3695    /// count). Every position in `positions` must be strictly less than
3696    /// `bit_size`. Positions may be given in any order and may repeat.
3697    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
3698        let word_count = bit_size.div_ceil(64) as usize;
3699        let mut words = vec![0u64; word_count];
3700        for &position in positions {
3701            if position >= bit_size {
3702                return Err(GitError::InvalidFormat(format!(
3703                    "EWAH bit position {position} out of range for bit_size {bit_size}"
3704                )));
3705            }
3706            let word_index = (position / 64) as usize;
3707            let bit_index = position % 64;
3708            words[word_index] |= 1u64 << bit_index;
3709        }
3710        Self::from_words(bit_size, &words)
3711    }
3712
3713    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
3714    /// all-zero type bitmap (e.g. when a pack has no tags).
3715    pub fn empty() -> Self {
3716        Self {
3717            bit_size: 0,
3718            words: Vec::new(),
3719            rlw_position: 0,
3720        }
3721    }
3722
3723    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
3724    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
3725    ///
3726    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
3727    /// bitmap actually covers and is primarily used to validate roundtrips.
3728    pub fn to_words(&self) -> Result<Vec<u64>> {
3729        let mut out = Vec::new();
3730        let mut word_idx = 0usize;
3731        while word_idx < self.words.len() {
3732            let rlw = self.words[word_idx];
3733            let run_bit = rlw & 1;
3734            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
3735            let literal_words = (rlw >> 33) as usize;
3736            word_idx += 1;
3737            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
3738            for _ in 0..run_words {
3739                out.push(fill);
3740            }
3741            let literal_end = word_idx
3742                .checked_add(literal_words)
3743                .filter(|end| *end <= self.words.len())
3744                .ok_or_else(|| {
3745                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
3746                })?;
3747            out.extend_from_slice(&self.words[word_idx..literal_end]);
3748            word_idx = literal_end;
3749        }
3750        let required_words = (self.bit_size as usize).div_ceil(64);
3751        if out.len() < required_words {
3752            out.resize(required_words, 0);
3753        }
3754        out.truncate(required_words);
3755        Ok(out)
3756    }
3757
3758    /// Returns the sorted set bit positions covered by this bitmap.
3759    pub fn to_positions(&self) -> Result<Vec<u32>> {
3760        let words = self.to_words()?;
3761        let mut positions = Vec::new();
3762        for (word_index, word) in words.iter().enumerate() {
3763            let mut remaining = *word;
3764            while remaining != 0 {
3765                let bit = remaining.trailing_zeros();
3766                let position = (word_index as u64) * 64 + u64::from(bit);
3767                if position < u64::from(self.bit_size) {
3768                    // position always fits in u32 because bit_size is u32.
3769                    positions.push(position as u32);
3770                }
3771                remaining &= remaining - 1;
3772            }
3773        }
3774        Ok(positions)
3775    }
3776
3777    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
3778    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
3779    /// running-length-word position (u32 BE).
3780    pub fn to_bytes(&self) -> Vec<u8> {
3781        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
3782        self.append_bytes(&mut out);
3783        out
3784    }
3785
3786    fn append_bytes(&self, out: &mut Vec<u8>) {
3787        out.extend_from_slice(&self.bit_size.to_be_bytes());
3788        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
3789        for word in &self.words {
3790            out.extend_from_slice(&word.to_be_bytes());
3791        }
3792        out.extend_from_slice(&self.rlw_position.to_be_bytes());
3793    }
3794}
3795
3796/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
3797///
3798/// The buffer is a sequence of blocks. Each block begins with a running-length
3799/// word (RLW) and is followed by zero or more literal words:
3800///   * bit 0      => value of the clean run words (0 or 1)
3801///   * bits 1..=32 => number of clean run words (32-bit field)
3802///   * bits 33..=63 => number of trailing literal words (31-bit field)
3803struct EwahBuilder {
3804    bit_size: u32,
3805    words: Vec<u64>,
3806    rlw_position: usize,
3807}
3808
3809impl EwahBuilder {
3810    fn new(bit_size: u32) -> Self {
3811        // Every EWAH buffer begins with an RLW, even an empty one.
3812        Self {
3813            bit_size,
3814            words: vec![0u64],
3815            rlw_position: 0,
3816        }
3817    }
3818
3819    fn rlw(&self) -> u64 {
3820        self.words[self.rlw_position]
3821    }
3822
3823    fn set_rlw(&mut self, value: u64) {
3824        self.words[self.rlw_position] = value;
3825    }
3826
3827    fn rlw_running_len(&self) -> u64 {
3828        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
3829    }
3830
3831    fn rlw_running_bit(&self) -> bool {
3832        self.rlw() & 1 == 1
3833    }
3834
3835    fn rlw_literal_len(&self) -> u64 {
3836        self.rlw() >> 33
3837    }
3838
3839    fn set_running_bit(&mut self, bit: bool) {
3840        let mut value = self.rlw();
3841        value &= !1;
3842        value |= u64::from(bit);
3843        self.set_rlw(value);
3844    }
3845
3846    fn set_running_len(&mut self, len: u64) {
3847        let mut value = self.rlw();
3848        value &= !(EWAH_MAX_RUNNING_LEN << 1);
3849        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
3850        self.set_rlw(value);
3851    }
3852
3853    fn set_literal_len(&mut self, len: u64) {
3854        let mut value = self.rlw();
3855        value &= (1u64 << 33) - 1;
3856        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
3857        self.set_rlw(value);
3858    }
3859
3860    /// Begins a fresh RLW block at the end of the buffer.
3861    fn push_rlw(&mut self) {
3862        self.rlw_position = self.words.len();
3863        self.words.push(0);
3864    }
3865
3866    /// Appends `number` clean words whose bits are all `value`, mirroring
3867    /// libgit's `ewah_add_empty_words`.
3868    ///
3869    /// A run can only be merged into the current RLW when that RLW has not yet
3870    /// emitted any literal words and its run either is empty or already carries
3871    /// the same fill value. Otherwise a fresh RLW block must be started, because
3872    /// every block stores its run strictly before its literals.
3873    fn add_empty_words(&mut self, value: bool, mut number: u64) {
3874        while number > 0 {
3875            // The current RLW can absorb more run words only when it has no
3876            // literals yet, its run is either empty or already the right fill
3877            // value, and the 32-bit run-length field is not already saturated.
3878            let can_extend = self.rlw_literal_len() == 0
3879                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
3880                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
3881            if !can_extend {
3882                self.push_rlw();
3883            }
3884            if self.rlw_running_len() == 0 {
3885                self.set_running_bit(value);
3886            }
3887            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
3888            let take = available.min(number);
3889            self.set_running_len(self.rlw_running_len() + take);
3890            number -= take;
3891        }
3892    }
3893
3894    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
3895    /// `ewah_add_dirty_words` for a count of one.
3896    fn add_literal(&mut self, word: u64) {
3897        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
3898            self.push_rlw();
3899        }
3900        let literal_len = self.rlw_literal_len();
3901        self.set_literal_len(literal_len + 1);
3902        self.words.push(word);
3903    }
3904
3905    fn finish(self) -> Result<EwahBitmap> {
3906        let rlw_position = u32::try_from(self.rlw_position)
3907            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
3908        if self.words.len() > u32::MAX as usize {
3909            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
3910        }
3911        Ok(EwahBitmap {
3912            bit_size: self.bit_size,
3913            words: self.words,
3914            rlw_position,
3915        })
3916    }
3917}
3918
3919/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
3920///
3921/// The writer is constructed from the object layout of a pack (one
3922/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
3923/// Callers then register one selected commit per [`add_commit`] call, supplying
3924/// the set of pack positions reachable from that commit. [`build`]/[`write`]
3925/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
3926/// on-disk format (signature `BITM`, version 1).
3927///
3928/// [`add_commit`]: PackBitmapWriter::add_commit
3929/// [`build`]: PackBitmapWriter::build
3930/// [`write`]: PackBitmapWriter::write
3931#[derive(Debug, Clone)]
3932pub struct PackBitmapWriter {
3933    format: ObjectFormat,
3934    pack_checksum: ObjectId,
3935    object_count: u32,
3936    commit_positions: Vec<u32>,
3937    tree_positions: Vec<u32>,
3938    blob_positions: Vec<u32>,
3939    tag_positions: Vec<u32>,
3940    name_hash_cache: Option<Vec<u32>>,
3941    selected: Vec<SelectedCommit>,
3942}
3943
3944#[derive(Debug, Clone)]
3945struct SelectedCommit {
3946    /// Oid-sorted `.idx` position (what the on-disk entry records). The
3947    /// commit's pack-order position lives in `reachable` with the rest of the
3948    /// bits.
3949    commit_index_position: u32,
3950    flags: u8,
3951    reachable: Vec<u32>,
3952}
3953
3954impl PackBitmapWriter {
3955    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
3956    /// compression against a previously selected commit). This is the only flag
3957    /// value this writer emits.
3958    pub const FLAG_NONE: u8 = 0;
3959
3960    /// Creates a writer for a pack whose objects (in pack order) have the given
3961    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
3962    ///
3963    /// Returns an error if the pack contains more than `u32::MAX` objects, if
3964    /// `pack_checksum`'s format does not match `format`, or if any object type
3965    /// is not one of the four reachable git object kinds.
3966    pub fn new(
3967        format: ObjectFormat,
3968        pack_checksum: ObjectId,
3969        object_types: &[ObjectType],
3970    ) -> Result<Self> {
3971        if object_types.len() > u32::MAX as usize {
3972            return Err(GitError::InvalidFormat(
3973                "too many objects for a pack bitmap".into(),
3974            ));
3975        }
3976        if pack_checksum.format() != format {
3977            return Err(GitError::InvalidObjectId(
3978                "pack checksum format does not match bitmap format".into(),
3979            ));
3980        }
3981        let object_count = object_types.len() as u32;
3982        let mut commit_positions = Vec::new();
3983        let mut tree_positions = Vec::new();
3984        let mut blob_positions = Vec::new();
3985        let mut tag_positions = Vec::new();
3986        for (index, object_type) in object_types.iter().enumerate() {
3987            let position = index as u32;
3988            match object_type {
3989                ObjectType::Commit => commit_positions.push(position),
3990                ObjectType::Tree => tree_positions.push(position),
3991                ObjectType::Blob => blob_positions.push(position),
3992                ObjectType::Tag => tag_positions.push(position),
3993            }
3994        }
3995        Ok(Self {
3996            format,
3997            pack_checksum,
3998            object_count,
3999            commit_positions,
4000            tree_positions,
4001            blob_positions,
4002            tag_positions,
4003            name_hash_cache: None,
4004            selected: Vec::new(),
4005        })
4006    }
4007
4008    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
4009    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
4010    /// and appends the cache after the bitmap entries, exactly as git does.
4011    ///
4012    /// Returns an error if the cache length does not equal the object count.
4013    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
4014        if cache.len() != self.object_count as usize {
4015            return Err(GitError::InvalidFormat(format!(
4016                "name hash cache has {} entries but pack has {} objects",
4017                cache.len(),
4018                self.object_count
4019            )));
4020        }
4021        self.name_hash_cache = Some(cache);
4022        Ok(self)
4023    }
4024
4025    /// Registers a selected commit and the pack positions reachable from it.
4026    ///
4027    /// `commit_position` is the *pack-order* position of the commit itself (the
4028    /// bit-number space); it must reference a commit object and is implicitly
4029    /// part of the reachable set. `commit_index_position` is the commit's
4030    /// position in the *oid-sorted* pack index — this is what the on-disk entry
4031    /// records (upstream `oid_pos`); bits and entry positions live in different
4032    /// spaces. `reachable` lists the pack-order positions of every object
4033    /// reachable from the commit (it may include or omit `commit_position`;
4034    /// duplicates are fine). All positions must be in range. The commit's full
4035    /// (non-XORed) bitmap is stored.
4036    pub fn add_commit(
4037        &mut self,
4038        commit_position: u32,
4039        commit_index_position: u32,
4040        reachable: &[u32],
4041    ) -> Result<()> {
4042        if commit_position >= self.object_count {
4043            return Err(GitError::InvalidFormat(format!(
4044                "commit position {commit_position} out of range for {} objects",
4045                self.object_count
4046            )));
4047        }
4048        if commit_index_position >= self.object_count {
4049            return Err(GitError::InvalidFormat(format!(
4050                "commit index position {commit_index_position} out of range for {} objects",
4051                self.object_count
4052            )));
4053        }
4054        if !self.commit_positions.contains(&commit_position) {
4055            return Err(GitError::InvalidFormat(format!(
4056                "bitmap commit position {commit_position} is not a commit object"
4057            )));
4058        }
4059        for &position in reachable {
4060            if position >= self.object_count {
4061                return Err(GitError::InvalidFormat(format!(
4062                    "reachable position {position} out of range for {} objects",
4063                    self.object_count
4064                )));
4065            }
4066        }
4067        let mut reachable = reachable.to_vec();
4068        reachable.push(commit_position);
4069        self.selected.push(SelectedCommit {
4070            commit_index_position,
4071            flags: Self::FLAG_NONE,
4072            reachable,
4073        });
4074        Ok(())
4075    }
4076
4077    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
4078    ///
4079    /// The resulting index always advertises
4080    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
4081    /// the pack) and, when a name-hash cache was attached,
4082    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
4083    pub fn build(&self) -> Result<PackBitmapIndex> {
4084        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
4085        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
4086        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
4087        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
4088
4089        let mut entries = Vec::with_capacity(self.selected.len());
4090        for selected in &self.selected {
4091            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
4092            entries.push(PackBitmapEntry {
4093                object_position: selected.commit_index_position,
4094                xor_offset: 0,
4095                flags: selected.flags,
4096                bitmap,
4097            });
4098        }
4099
4100        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
4101        if self.name_hash_cache.is_some() {
4102            options |= PackBitmapIndex::OPTION_HASH_CACHE;
4103        }
4104
4105        // The index checksum is only known once the body is serialised; the
4106        // dedicated `write` path fills it in. `build` reports a placeholder of
4107        // the correct format so the struct is self-consistent for callers that
4108        // only need the decoded bitmaps.
4109        let placeholder_checksum = ObjectId::null(self.format);
4110        Ok(PackBitmapIndex {
4111            version: 1,
4112            format: self.format,
4113            options,
4114            pack_checksum: self.pack_checksum.clone(),
4115            index_checksum: placeholder_checksum,
4116            type_bitmaps: PackBitmapTypeBitmaps {
4117                commits,
4118                trees,
4119                blobs,
4120                tags,
4121            },
4122            entries,
4123            name_hash_cache: self.name_hash_cache.clone(),
4124        })
4125    }
4126
4127    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
4128    /// (including the trailing index checksum).
4129    pub fn write(&self) -> Result<Vec<u8>> {
4130        self.build()?.write()
4131    }
4132}
4133
4134impl PackBitmapIndex {
4135    /// Serialises this index into git's on-disk `.bitmap` byte layout.
4136    ///
4137    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
4138    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
4139    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
4140    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
4141    /// optional name-hash cache, and finally the trailing index checksum over
4142    /// everything written so far.
4143    ///
4144    /// The `index_checksum` field of `self` is ignored and recomputed from the
4145    /// serialised body. Returns an error for unsupported versions, mismatched
4146    /// object-id formats, an oversized entry table, or an inconsistent name-hash
4147    /// cache.
4148    pub fn write(&self) -> Result<Vec<u8>> {
4149        if self.version != 1 {
4150            return Err(GitError::Unsupported(format!(
4151                "bitmap index version {}",
4152                self.version
4153            )));
4154        }
4155        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
4156        if self.options & !known_options != 0 {
4157            return Err(GitError::Unsupported(format!(
4158                "bitmap index options {:#06x}",
4159                self.options & !known_options
4160            )));
4161        }
4162        if self.pack_checksum.format() != self.format {
4163            return Err(GitError::InvalidObjectId(
4164                "bitmap pack checksum format does not match index format".into(),
4165            ));
4166        }
4167        if self.entries.len() > u32::MAX as usize {
4168            return Err(GitError::InvalidFormat(
4169                "too many bitmap index entries".into(),
4170            ));
4171        }
4172        let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
4173        match (&self.name_hash_cache, want_cache) {
4174            (Some(_), false) => {
4175                return Err(GitError::InvalidFormat(
4176                    "name hash cache present without OPTION_HASH_CACHE".into(),
4177                ));
4178            }
4179            (None, true) => {
4180                return Err(GitError::InvalidFormat(
4181                    "OPTION_HASH_CACHE set without a name hash cache".into(),
4182                ));
4183            }
4184            _ => {}
4185        }
4186
4187        let mut out = Vec::new();
4188        out.extend_from_slice(b"BITM");
4189        out.extend_from_slice(&self.version.to_be_bytes());
4190        out.extend_from_slice(&self.options.to_be_bytes());
4191        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
4192        out.extend_from_slice(self.pack_checksum.as_bytes());
4193
4194        self.type_bitmaps.commits.append_bytes(&mut out);
4195        self.type_bitmaps.trees.append_bytes(&mut out);
4196        self.type_bitmaps.blobs.append_bytes(&mut out);
4197        self.type_bitmaps.tags.append_bytes(&mut out);
4198
4199        for (idx, entry) in self.entries.iter().enumerate() {
4200            if entry.xor_offset as usize > idx {
4201                return Err(GitError::InvalidFormat(
4202                    "bitmap index entry has invalid XOR offset".into(),
4203                ));
4204            }
4205            out.extend_from_slice(&entry.object_position.to_be_bytes());
4206            out.push(entry.xor_offset);
4207            out.push(entry.flags);
4208            entry.bitmap.append_bytes(&mut out);
4209        }
4210
4211        if let Some(cache) = &self.name_hash_cache {
4212            for value in cache {
4213                out.extend_from_slice(&value.to_be_bytes());
4214            }
4215        }
4216
4217        let checksum = sley_core::digest_bytes(self.format, &out)?;
4218        out.extend_from_slice(checksum.as_bytes());
4219        Ok(out)
4220    }
4221}
4222
4223/// Convenience wrapper that builds a `.bitmap` file in one call.
4224///
4225/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
4226/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
4227/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
4228/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
4229/// optional `name_hash_cache` (one entry per object) may be supplied to emit
4230/// the hash-cache extension.
4231pub fn write_bitmap(
4232    format: ObjectFormat,
4233    pack_checksum: ObjectId,
4234    object_types: &[ObjectType],
4235    commits: &[(u32, u32, Vec<u32>)],
4236    name_hash_cache: Option<Vec<u32>>,
4237) -> Result<Vec<u8>> {
4238    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
4239    if let Some(cache) = name_hash_cache {
4240        writer = writer.with_name_hash_cache(cache)?;
4241    }
4242    for (commit_position, commit_index_position, reachable) in commits {
4243        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
4244    }
4245    writer.write()
4246}
4247
4248#[cfg(test)]
4249mod tests {
4250    use super::*;
4251    use flate2::Compression;
4252    use flate2::read::ZlibDecoder;
4253    use flate2::write::ZlibEncoder;
4254    use std::fs;
4255    use std::io::Read;
4256    use std::io::Write;
4257    use std::path::{Path, PathBuf};
4258    use std::process::Command;
4259    use std::time::{SystemTime, UNIX_EPOCH};
4260
4261    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
4262        PackWriteOptions::new()
4263            .with_prefer_ofs_delta(prefer_ofs_delta)
4264            .with_reorder(false)
4265    }
4266
4267    #[test]
4268    fn parses_single_blob_pack() {
4269        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
4270        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
4271        assert_eq!(parsed.version, 2);
4272        assert_eq!(parsed.entries.len(), 1);
4273        let object = &parsed.entries[0].object;
4274        assert_eq!(object.object_type, ObjectType::Blob);
4275        assert_eq!(object.body, b"hello\n");
4276        assert_eq!(
4277            parsed.entries[0].entry.oid.to_hex(),
4278            "ce013625030ba8dba906f756967f9e9ca394464a"
4279        );
4280    }
4281
4282    #[test]
4283    fn parses_single_blob_pack_sha256() {
4284        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
4285        let parsed =
4286            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
4287        assert_eq!(parsed.version, 2);
4288        assert_eq!(parsed.entries.len(), 1);
4289        let object = &parsed.entries[0].object;
4290        assert_eq!(object.object_type, ObjectType::Blob);
4291        assert_eq!(object.body, b"hello\n");
4292        assert_eq!(
4293            parsed.entries[0].entry.oid,
4294            object
4295                .object_id(ObjectFormat::Sha256)
4296                .expect("test operation should succeed")
4297        );
4298    }
4299
4300    #[test]
4301    fn parses_bundle_pack_payload_with_bundle_format() {
4302        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
4303        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
4304            .expect("test operation should succeed");
4305        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
4306            .into_bytes()
4307            .into_iter()
4308            .chain(pack)
4309            .collect::<Vec<_>>();
4310        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
4311            .expect("test operation should succeed");
4312
4313        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
4314        assert_eq!(parsed.entries.len(), 1);
4315        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
4316        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
4317    }
4318
4319    /// Build a pack whose single blob entry header LIES about its decompressed
4320    /// size: it declares `declared_size` while the actual zlib payload only
4321    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
4322    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
4323    /// from a handful of compressed bytes.
4324    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
4325        let mut pack = Vec::new();
4326        pack.extend_from_slice(b"PACK");
4327        pack.extend_from_slice(&2u32.to_be_bytes());
4328        pack.extend_from_slice(&1u32.to_be_bytes());
4329        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
4330        write_pack_entry_header_kind(&mut pack, 3, declared_size);
4331        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4332        encoder
4333            .write_all(real_body)
4334            .expect("test operation should succeed");
4335        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
4336        let checksum =
4337            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
4338        pack.extend_from_slice(checksum.as_bytes());
4339        pack
4340    }
4341
4342    /// Regression: a crafted pack object header declaring a gigantic decompressed
4343    /// size with a tiny compressed payload must NOT drive an up-front
4344    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
4345    /// header `size` is attacker-controlled over the network (install_raw_pack →
4346    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
4347    ///
4348    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
4349    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
4350    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
4351    /// run parse on a worker thread so that panic surfaces as a `join()` error
4352    /// rather than killing the test process; the fix turns this into a clean
4353    /// `Err` returned normally.
4354    #[test]
4355    fn rejects_decompression_bomb_header_without_oom() {
4356        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
4357            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
4358            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
4359            let result = handle.join();
4360            // The parse thread must not have panicked/aborted on a huge reserve.
4361            assert!(
4362                result.is_ok(),
4363                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
4364            );
4365            // And parsing must reject the lie (decoded len != declared size).
4366            let parse_result = result.expect("parse thread should not panic on a bomb header");
4367            assert!(
4368                parse_result.is_err(),
4369                "bomb header (declared={declared}) should be rejected as invalid"
4370            );
4371        }
4372    }
4373
4374    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
4375    /// whose *result-size* varint lies, declaring `declared_result_size`, while
4376    /// carrying a tiny real instruction stream. The delta's base-size varint is
4377    /// set correctly (so the base-size check at the top of `apply_pack_delta`
4378    /// passes and we reach the result reservation). Used to drive the sley#35
4379    /// delta-result-size bomb.
4380    fn lying_result_size_delta_pack(
4381        format: ObjectFormat,
4382        declared_result_size: u64,
4383        delta_kind: DeltaKind,
4384    ) -> Vec<u8> {
4385        let base = b"hello";
4386        let result = b"hello world"; // real produced length = 11
4387
4388        // Hand-build a delta with a truthful base-size and a LYING result-size.
4389        let mut delta = Vec::new();
4390        write_delta_varint(&mut delta, base.len() as u64);
4391        write_delta_varint(&mut delta, declared_result_size);
4392        // Real instructions: copy `base` then insert " world".
4393        let suffix = &result[base.len()..];
4394        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
4395        delta.push(base.len() as u8);
4396        delta.push(suffix.len() as u8);
4397        delta.extend_from_slice(suffix);
4398
4399        let mut pack = Vec::new();
4400        pack.extend_from_slice(b"PACK");
4401        pack.extend_from_slice(&2u32.to_be_bytes());
4402        pack.extend_from_slice(&2u32.to_be_bytes());
4403
4404        let base_offset = pack.len();
4405        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
4406        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4407        encoder
4408            .write_all(base)
4409            .expect("test operation should succeed");
4410        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
4411
4412        let delta_offset = pack.len();
4413        write_pack_entry_header_kind(
4414            &mut pack,
4415            match delta_kind {
4416                DeltaKind::Offset => 6,
4417                DeltaKind::Ref => 7,
4418            },
4419            delta.len() as u64,
4420        );
4421        match delta_kind {
4422            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
4423            DeltaKind::Ref => {
4424                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
4425                    .expect("test operation should succeed");
4426                pack.extend_from_slice(base_oid.as_bytes());
4427            }
4428        }
4429        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4430        encoder
4431            .write_all(&delta)
4432            .expect("test operation should succeed");
4433        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
4434
4435        let checksum =
4436            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
4437        pack.extend_from_slice(checksum.as_bytes());
4438        pack
4439    }
4440
4441    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
4442    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
4443    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
4444    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
4445    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
4446    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
4447    /// reach the same reservation, so both must be safe. We resolve the pack on a
4448    /// worker thread so an abort/panic surfaces as a `join()` error rather than
4449    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
4450    #[test]
4451    fn rejects_delta_result_size_bomb_without_oom() {
4452        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
4453        for &declared in bombs {
4454            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
4455                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
4456                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
4457                let join_result = handle.join();
4458                assert!(
4459                    join_result.is_ok(),
4460                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
4461                     instead of erroring cleanly"
4462                );
4463                let parse_result =
4464                    join_result.expect("parse thread should not panic on a delta bomb");
4465                assert!(
4466                    parse_result.is_err(),
4467                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
4468                     as invalid (result.len() != declared)"
4469                );
4470            }
4471        }
4472    }
4473
4474    /// A legitimate (truthful) delta whose result-size varint matches the real
4475    /// produced length must still resolve correctly — the bound only caps the
4476    /// speculative reservation, it must not break real delta application.
4477    #[test]
4478    fn applies_legitimate_delta_after_result_size_bound() {
4479        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
4480            let base = b"hello";
4481            let result = b"hello world";
4482            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
4483            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
4484            assert_eq!(parsed.entries.len(), 2);
4485            assert_eq!(parsed.entries[0].object.body, base);
4486            assert_eq!(parsed.entries[1].object.body, result);
4487        }
4488    }
4489
4490    #[test]
4491    fn bounded_inflate_reserve_caps_attacker_declared_size() {
4492        // A tiny compressed input can't justify a multi-gigabyte reservation.
4493        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
4494        // The absolute ceiling caps even a large input-justified hint.
4495        assert_eq!(
4496            bounded_inflate_reserve(usize::MAX, usize::MAX),
4497            MAX_INFLATE_RESERVE
4498        );
4499        // A modest legitimate hint is preserved unchanged (no regression for real
4500        // objects): 1000 bytes of output from 500 bytes of input is well within
4501        // both bounds.
4502        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
4503        // Floor of 64 for tiny hints.
4504        assert_eq!(bounded_inflate_reserve(0, 0), 64);
4505    }
4506
4507    #[test]
4508    fn rejects_bundle_pack_payload_with_wrong_object_format() {
4509        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
4510        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
4511            .expect("test operation should succeed");
4512        let bundle_bytes =
4513            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
4514                .into_bytes()
4515                .into_iter()
4516                .chain(pack)
4517                .collect::<Vec<_>>();
4518        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
4519            .expect("test operation should succeed");
4520
4521        assert!(PackFile::parse_bundle(&bundle).is_err());
4522    }
4523
4524    #[test]
4525    fn writes_pack_and_index_that_round_trip() {
4526        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4527        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4528            .expect("test operation should succeed");
4529        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4530        let index =
4531            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
4532        let oid = object
4533            .object_id(ObjectFormat::Sha1)
4534            .expect("test operation should succeed");
4535        assert_eq!(pack.entries[0].object, object);
4536        assert_eq!(index.pack_checksum, pack.checksum);
4537        assert_eq!(
4538            index
4539                .find(&oid)
4540                .expect("test operation should succeed")
4541                .offset,
4542            12
4543        );
4544    }
4545
4546    #[test]
4547    fn writes_sha256_pack_and_index_that_round_trip() {
4548        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
4549        let written =
4550            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4551                .expect("test operation should succeed");
4552        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
4553            .expect("test operation should succeed");
4554        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
4555            .expect("test operation should succeed");
4556        let oid = object
4557            .object_id(ObjectFormat::Sha256)
4558            .expect("test operation should succeed");
4559        assert_eq!(pack.entries[0].object, object);
4560        assert_eq!(index.pack_checksum, pack.checksum);
4561        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
4562        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
4563        assert_eq!(
4564            index
4565                .find(&oid)
4566                .expect("test operation should succeed")
4567                .offset,
4568            12
4569        );
4570    }
4571
4572    #[test]
4573    fn indexes_existing_sha256_pack_bytes() {
4574        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
4575        let written =
4576            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4577                .expect("test operation should succeed");
4578
4579        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
4580            .expect("test operation should succeed");
4581        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
4582            .expect("test operation should succeed");
4583
4584        assert_eq!(indexed.pack_checksum, written.checksum);
4585        assert_eq!(indexed.entries, written.entries);
4586        assert_eq!(index.pack_checksum, written.checksum);
4587        assert_eq!(index.entries, written.entries);
4588    }
4589
4590    #[test]
4591    fn indexes_existing_delta_pack_bytes() {
4592        let (base, changed) = similar_blob_objects();
4593        let options = delta_pack_options(true);
4594        let written = PackFile::write_packed_with_options(
4595            &[base, changed.clone()],
4596            ObjectFormat::Sha1,
4597            &options,
4598        )
4599        .expect("test operation should succeed");
4600
4601        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
4602            .expect("test operation should succeed");
4603        let index =
4604            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
4605        let changed_oid = changed
4606            .object_id(ObjectFormat::Sha1)
4607            .expect("test operation should succeed");
4608
4609        assert_eq!(indexed.pack_checksum, written.checksum);
4610        assert_eq!(indexed.entries, written.entries);
4611        assert_eq!(
4612            index
4613                .find(&changed_oid)
4614                .expect("test operation should succeed")
4615                .offset,
4616            written.entries[1].offset
4617        );
4618        assert_eq!(
4619            index
4620                .find(&changed_oid)
4621                .expect("test operation should succeed")
4622                .crc32,
4623            written.entries[1].crc32
4624        );
4625    }
4626
4627    #[test]
4628    fn writes_ref_delta_pack_and_index_that_round_trip() {
4629        let (base, changed) = similar_blob_objects();
4630        let options = delta_pack_options(false);
4631        let written = PackFile::write_packed_with_options(
4632            &[base.clone(), changed.clone()],
4633            ObjectFormat::Sha1,
4634            &options,
4635        )
4636        .expect("test operation should succeed");
4637        let mut second_offset = written.entries[1].offset as usize;
4638        let header = parse_entry_header(&written.pack, &mut second_offset)
4639            .expect("test operation should succeed");
4640        assert_eq!(header.kind, PackObjectKind::RefDelta);
4641
4642        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4643        let index =
4644            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
4645        let oid = changed
4646            .object_id(ObjectFormat::Sha1)
4647            .expect("test operation should succeed");
4648        assert_eq!(pack.entries[0].object, base);
4649        assert_eq!(pack.entries[1].object, changed);
4650        assert_eq!(index.pack_checksum, pack.checksum);
4651        assert_eq!(
4652            index
4653                .find(&oid)
4654                .expect("test operation should succeed")
4655                .offset,
4656            written.entries[1].offset
4657        );
4658    }
4659
4660    #[test]
4661    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
4662        let (base, changed) = similar_blob_objects();
4663        let options = delta_pack_options(true);
4664        let written = PackFile::write_packed_with_options(
4665            &[base, changed.clone()],
4666            ObjectFormat::Sha1,
4667            &options,
4668        )
4669        .expect("test operation should succeed");
4670        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
4671        let mut second = written.entries[1].offset as usize;
4672        assert_eq!(
4673            parse_entry_header(&written.pack, &mut second)
4674                .expect("test operation should succeed")
4675                .kind,
4676            PackObjectKind::OfsDelta
4677        );
4678        // Ground truth from a full parse; single-object decode must match at every offset.
4679        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4680        for po in &parsed.entries {
4681            let got =
4682                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
4683                    Ok(None)
4684                })
4685                .expect("test operation should succeed");
4686            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
4687        }
4688    }
4689
4690    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
4691    /// read is byte-identical to the uncached one cold and warm (sley#26).
4692    #[derive(Default)]
4693    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
4694
4695    impl HeaderTypeCache for MapHeaderTypeCache {
4696        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
4697            self.0.get(&pack_offset).copied()
4698        }
4699        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
4700            self.0.insert(pack_offset, header);
4701        }
4702    }
4703
4704    #[test]
4705    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
4706        let (base, changed) = similar_blob_objects();
4707        let options = delta_pack_options(true);
4708        let written =
4709            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
4710                .expect("test operation should succeed");
4711        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
4712        let mut second = written.entries[1].offset as usize;
4713        assert_eq!(
4714            parse_entry_header(&written.pack, &mut second)
4715                .expect("test operation should succeed")
4716                .kind,
4717            PackObjectKind::OfsDelta
4718        );
4719
4720        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4721        let mut cache = MapHeaderTypeCache::default();
4722        for po in &parsed.entries {
4723            let uncached =
4724                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
4725                    Ok(None)
4726                })
4727                .expect("test operation should succeed");
4728            // Type inherited from the chain base; size is the inflated body length.
4729            assert_eq!(
4730                uncached,
4731                (po.object.object_type, po.object.body.len() as u64),
4732                "uncached header at offset {}",
4733                po.entry.offset
4734            );
4735            // Cold cache: must agree with the uncached read and populate the memo.
4736            let cold = read_object_header_at_with_cache(
4737                &written.pack,
4738                po.entry.offset,
4739                ObjectFormat::Sha1,
4740                |_| Ok(None),
4741                &mut cache,
4742            )
4743            .expect("test operation should succeed");
4744            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
4745        }
4746        // Warm cache: every offset now resolves from the memo and is still correct,
4747        // proving the fast path does not change behavior (sley#26).
4748        for po in &parsed.entries {
4749            let warm = read_object_header_at_with_cache(
4750                &written.pack,
4751                po.entry.offset,
4752                ObjectFormat::Sha1,
4753                |_| panic!("warm cache must not re-walk the chain"),
4754                &mut cache,
4755            )
4756            .expect("test operation should succeed");
4757            assert_eq!(
4758                warm,
4759                (po.object.object_type, po.object.body.len() as u64),
4760                "warm cache at offset {}",
4761                po.entry.offset
4762            );
4763        }
4764    }
4765
4766    #[test]
4767    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
4768        let (base, changed) = similar_blob_objects();
4769        let options = delta_pack_options(false);
4770        let written = PackFile::write_packed_with_options(
4771            &[base, changed.clone()],
4772            ObjectFormat::Sha1,
4773            &options,
4774        )
4775        .expect("test operation should succeed");
4776        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4777        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
4778            .entries
4779            .iter()
4780            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
4781            .collect();
4782        for po in &parsed.entries {
4783            let got =
4784                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
4785                    Ok(by_oid.get(oid).cloned())
4786                })
4787                .expect("test operation should succeed");
4788            assert_eq!(*got, po.object);
4789        }
4790    }
4791
4792    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
4793    /// used to prove the cached decode path is byte-identical to the uncached
4794    /// one and that bases are reused across reads.
4795    #[derive(Default)]
4796    struct CountingDeltaCache {
4797        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
4798        hits: std::cell::Cell<usize>,
4799        inserts: std::cell::Cell<usize>,
4800    }
4801
4802    impl PackDeltaCache for CountingDeltaCache {
4803        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
4804            let hit = self.map.borrow().get(&offset).cloned();
4805            if hit.is_some() {
4806                self.hits.set(self.hits.get() + 1);
4807            }
4808            hit
4809        }
4810        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
4811            self.inserts.set(self.inserts.get() + 1);
4812            self.map.borrow_mut().insert(offset, object);
4813        }
4814    }
4815
4816    #[test]
4817    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
4818        // A multi-object pack with a real ofs-delta chain so the cache has bases
4819        // to reuse. Build several similar blobs to encourage deltification.
4820        let mut objects = Vec::new();
4821        for idx in 0..8u32 {
4822            let mut body = vec![b'x'; 4096];
4823            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
4824            objects.push(EncodedObject::new(ObjectType::Blob, body));
4825        }
4826        let options = delta_pack_options(true);
4827        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
4828            .expect("test operation should succeed");
4829        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4830
4831        let cache = CountingDeltaCache::default();
4832        // Read every object twice through the cache; each result must equal the
4833        // ground-truth from the full parse, byte for byte, both times.
4834        for _ in 0..2 {
4835            for po in &parsed.entries {
4836                let got = read_object_at_with_cache_arc(
4837                    &written.pack,
4838                    po.entry.offset,
4839                    ObjectFormat::Sha1,
4840                    |_| Ok(None),
4841                    &cache,
4842                )
4843                .expect("test operation should succeed");
4844                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
4845            }
4846        }
4847        // The second pass reads everything straight from the cache, so there must
4848        // be at least one hit (proving reuse, not just correctness).
4849        assert!(cache.hits.get() > 0, "cache never served a warm object");
4850    }
4851
4852    #[test]
4853    fn writes_ofs_delta_pack_and_index_that_round_trip() {
4854        let (base, changed) = similar_blob_objects();
4855        let options = delta_pack_options(true);
4856        let written = PackFile::write_packed_with_options(
4857            &[base.clone(), changed.clone()],
4858            ObjectFormat::Sha1,
4859            &options,
4860        )
4861        .expect("test operation should succeed");
4862        let mut second_offset = written.entries[1].offset as usize;
4863        let header = parse_entry_header(&written.pack, &mut second_offset)
4864            .expect("test operation should succeed");
4865        assert_eq!(header.kind, PackObjectKind::OfsDelta);
4866
4867        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
4868        let index =
4869            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
4870        let oid = changed
4871            .object_id(ObjectFormat::Sha1)
4872            .expect("test operation should succeed");
4873        assert_eq!(pack.entries[0].object, base);
4874        assert_eq!(pack.entries[1].object, changed);
4875        assert_eq!(index.pack_checksum, pack.checksum);
4876        assert_eq!(
4877            index
4878                .find(&oid)
4879                .expect("test operation should succeed")
4880                .offset,
4881            written.entries[1].offset
4882        );
4883    }
4884
4885    #[test]
4886    fn resolves_ofs_delta_pack_entry() {
4887        let base = b"hello";
4888        let result = b"hello world";
4889        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
4890        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
4891        assert_eq!(parsed.entries.len(), 2);
4892        assert_eq!(parsed.entries[0].object.body, base);
4893        assert_eq!(parsed.entries[1].object.body, result);
4894        assert_eq!(
4895            parsed.entries[1].entry.oid,
4896            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
4897                .expect("test operation should succeed")
4898        );
4899    }
4900
4901    #[test]
4902    fn resolves_ref_delta_pack_entry() {
4903        let base = b"hello";
4904        let result = b"hello world";
4905        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
4906        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
4907        assert_eq!(parsed.entries.len(), 2);
4908        assert_eq!(parsed.entries[0].object.body, base);
4909        assert_eq!(parsed.entries[1].object.body, result);
4910        assert_eq!(
4911            parsed.entries[1].entry.oid,
4912            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
4913                .expect("test operation should succeed")
4914        );
4915    }
4916
4917    #[test]
4918    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
4919        let base = b"hello";
4920        let result = b"hello world";
4921        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
4922        assert!(PackFile::parse_sha1(&pack).is_err());
4923
4924        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
4925            .expect("test operation should succeed");
4926        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
4927            if oid == &base_oid {
4928                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
4929            } else {
4930                Ok(None)
4931            }
4932        })
4933        .expect("test operation should succeed");
4934        assert_eq!(parsed.entries.len(), 1);
4935        assert_eq!(parsed.entries[0].object.body, result);
4936        assert_eq!(
4937            parsed.entries[0].entry.oid,
4938            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
4939                .expect("test operation should succeed")
4940        );
4941    }
4942
4943    #[test]
4944    fn rejects_bad_pack_checksum() {
4945        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
4946        let last = pack.len() - 1;
4947        pack[last] ^= 1;
4948        assert!(PackFile::parse_sha1(&pack).is_err());
4949    }
4950
4951    #[test]
4952    fn raw_pack_index_rejects_bad_pack_checksum() {
4953        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
4954        let last = pack.len() - 1;
4955        pack[last] ^= 1;
4956        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
4957    }
4958
4959    #[test]
4960    fn pack_index_writer_rejects_duplicate_object_ids() {
4961        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
4962            .expect("test operation should succeed");
4963        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
4964            .expect("test operation should succeed");
4965        let entries = vec![
4966            PackIndexEntry {
4967                oid,
4968                crc32: 1,
4969                offset: 12,
4970            },
4971            PackIndexEntry {
4972                oid,
4973                crc32: 2,
4974                offset: 24,
4975            },
4976        ];
4977        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
4978    }
4979
4980    #[test]
4981    fn parses_single_entry_pack_index() {
4982        let oid = ObjectId::from_hex(
4983            ObjectFormat::Sha1,
4984            "ce013625030ba8dba906f756967f9e9ca394464a",
4985        )
4986        .expect("test operation should succeed");
4987        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
4988            .expect("test operation should succeed");
4989        let index = single_entry_index(
4990            ObjectFormat::Sha1,
4991            oid,
4992            0x1234_5678,
4993            12,
4994            pack_checksum.clone(),
4995        );
4996        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
4997        assert_eq!(parsed.version, 2);
4998        assert_eq!(parsed.pack_checksum, pack_checksum);
4999        assert_eq!(parsed.entries.len(), 1);
5000        assert_eq!(
5001            parsed
5002                .find(&oid)
5003                .expect("test operation should succeed")
5004                .offset,
5005            12
5006        );
5007        assert_eq!(
5008            parsed
5009                .find(&oid)
5010                .expect("test operation should succeed")
5011                .crc32,
5012            0x1234_5678
5013        );
5014    }
5015
5016    #[test]
5017    fn parses_single_entry_pack_index_v1() {
5018        let oid = ObjectId::from_hex(
5019            ObjectFormat::Sha1,
5020            "ce013625030ba8dba906f756967f9e9ca394464a",
5021        )
5022        .expect("test operation should succeed");
5023        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5024            .expect("test operation should succeed");
5025        let index =
5026            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
5027        let parsed =
5028            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
5029        assert_eq!(parsed.version, 1);
5030        assert_eq!(parsed.pack_checksum, pack_checksum);
5031        assert_eq!(parsed.entries.len(), 1);
5032        assert_eq!(
5033            parsed
5034                .find(&oid)
5035                .expect("test operation should succeed")
5036                .offset,
5037            0x1234_5678
5038        );
5039        assert_eq!(
5040            parsed
5041                .find(&oid)
5042                .expect("test operation should succeed")
5043                .crc32,
5044            0
5045        );
5046    }
5047
5048    #[test]
5049    fn rejects_bad_pack_index_v1_checksum() {
5050        let oid = ObjectId::from_hex(
5051            ObjectFormat::Sha1,
5052            "ce013625030ba8dba906f756967f9e9ca394464a",
5053        )
5054        .expect("test operation should succeed");
5055        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5056            .expect("test operation should succeed");
5057        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
5058        let last = index.len() - 1;
5059        index[last] ^= 1;
5060        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
5061    }
5062
5063    #[test]
5064    fn parses_pack_reverse_index() {
5065        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5066            .expect("test operation should succeed");
5067        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
5068            .expect("test operation should succeed");
5069        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
5070            .expect("test operation should succeed");
5071        assert_eq!(parsed.version, 1);
5072        assert_eq!(parsed.format, ObjectFormat::Sha1);
5073        assert_eq!(parsed.positions, vec![2, 0, 1]);
5074        assert_eq!(parsed.pack_checksum, pack_checksum);
5075        assert_eq!(
5076            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
5077                .expect("test operation should succeed"),
5078            reverse_index
5079        );
5080    }
5081
5082    #[test]
5083    fn rejects_bad_pack_reverse_index_checksum() {
5084        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5085            .expect("test operation should succeed");
5086        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
5087            .expect("test operation should succeed");
5088        let last = reverse_index.len() - 1;
5089        reverse_index[last] ^= 1;
5090        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
5091    }
5092
5093    #[test]
5094    fn rejects_bad_pack_reverse_index_positions() {
5095        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5096            .expect("test operation should succeed");
5097        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
5098        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
5099        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
5100        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
5101        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5102            .expect("test operation should succeed");
5103        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
5104        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
5105    }
5106
5107    #[test]
5108    fn parses_pack_mtimes() {
5109        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5110            .expect("test operation should succeed");
5111        let mtimes = PackMtimes::write(
5112            ObjectFormat::Sha1,
5113            &[1, 1_700_000_000, u32::MAX],
5114            &pack_checksum,
5115        )
5116        .expect("test operation should succeed");
5117        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
5118            .expect("test operation should succeed");
5119        assert_eq!(parsed.version, 1);
5120        assert_eq!(parsed.format, ObjectFormat::Sha1);
5121        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
5122        assert_eq!(parsed.pack_checksum, pack_checksum);
5123        assert_eq!(
5124            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
5125                .expect("test operation should succeed"),
5126            mtimes
5127        );
5128    }
5129
5130    #[test]
5131    fn rejects_bad_pack_mtimes_checksum() {
5132        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5133            .expect("test operation should succeed");
5134        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
5135            .expect("test operation should succeed");
5136        let last = mtimes.len() - 1;
5137        mtimes[last] ^= 1;
5138        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
5139    }
5140
5141    #[test]
5142    fn rejects_bad_pack_mtimes_shape() {
5143        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5144            .expect("test operation should succeed");
5145        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
5146        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
5147
5148        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
5149        wrong_hash[11] = 2;
5150        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
5151        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
5152            .expect("test operation should succeed");
5153        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
5154        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
5155    }
5156
5157    #[test]
5158    fn parses_multi_pack_index_header_and_chunk_lookup() {
5159        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
5160            .expect("test operation should succeed");
5161        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
5162            .expect("test operation should succeed");
5163        let chunks = midx_chunks_with_pack_names(
5164            ObjectFormat::Sha1,
5165            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
5166            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
5167        );
5168        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
5169        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
5170            .expect("test operation should succeed");
5171        assert_eq!(parsed.version, 2);
5172        assert_eq!(parsed.format, ObjectFormat::Sha1);
5173        assert_eq!(parsed.pack_count, 2);
5174        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
5175        assert_eq!(parsed.object_count, 2);
5176        assert_eq!(parsed.objects.len(), 2);
5177        assert_eq!(
5178            parsed
5179                .find(&first)
5180                .expect("test operation should succeed")
5181                .pack_int_id,
5182            0
5183        );
5184        assert_eq!(
5185            parsed
5186                .find(&first)
5187                .expect("test operation should succeed")
5188                .offset,
5189            12
5190        );
5191        assert_eq!(
5192            parsed
5193                .find(&second)
5194                .expect("test operation should succeed")
5195                .pack_int_id,
5196            1
5197        );
5198        assert_eq!(
5199            parsed
5200                .find(&second)
5201                .expect("test operation should succeed")
5202                .offset,
5203            0x1_0000_0000
5204        );
5205        assert_eq!(parsed.reverse_index, None);
5206        assert_eq!(parsed.bitmapped_packs, None);
5207        assert_eq!(parsed.chunks.len(), 5);
5208        assert_eq!(parsed.chunks[0].id, *b"PNAM");
5209        assert_eq!(parsed.chunks[0].offset, 84);
5210        assert_eq!(parsed.chunks[0].len, 24);
5211        assert_eq!(parsed.chunks[1].id, *b"OIDF");
5212        assert_eq!(parsed.chunks[1].offset, 108);
5213        assert_eq!(parsed.chunks[1].len, 1024);
5214    }
5215
5216    #[test]
5217    fn rejects_bad_multi_pack_index_checksum() {
5218        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
5219        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
5220        let last = midx.len() - 1;
5221        midx[last] ^= 1;
5222        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
5223    }
5224
5225    #[test]
5226    fn rejects_bad_multi_pack_index_shape() {
5227        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
5228        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
5229        wrong_hash[5] = 2;
5230        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
5231        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
5232            .expect("test operation should succeed");
5233        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
5234        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
5235
5236        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
5237        missing_terminator[12] = b'B';
5238        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
5239        let checksum =
5240            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
5241                .expect("test operation should succeed");
5242        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
5243        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
5244
5245        let mut bad_offset = multi_pack_index(
5246            ObjectFormat::Sha1,
5247            2,
5248            0,
5249            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
5250        );
5251        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
5252        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
5253        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
5254            .expect("test operation should succeed");
5255        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
5256        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
5257    }
5258
5259    #[test]
5260    fn rejects_bad_multi_pack_index_pack_names() {
5261        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
5262        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
5263
5264        let too_few = multi_pack_index(
5265            ObjectFormat::Sha1,
5266            2,
5267            2,
5268            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
5269        );
5270        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
5271
5272        let bad_padding = multi_pack_index(
5273            ObjectFormat::Sha1,
5274            2,
5275            1,
5276            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
5277        );
5278        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
5279
5280        let unsorted_v1 = multi_pack_index(
5281            ObjectFormat::Sha1,
5282            1,
5283            2,
5284            &midx_chunks_with_pack_names(
5285                ObjectFormat::Sha1,
5286                b"pack-b.idx\0pack-a.idx\0".to_vec(),
5287                &[],
5288            ),
5289        );
5290        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
5291
5292        let unsorted_v2 = multi_pack_index(
5293            ObjectFormat::Sha1,
5294            2,
5295            2,
5296            &midx_chunks_with_pack_names(
5297                ObjectFormat::Sha1,
5298                b"pack-b.idx\0pack-a.idx\0".to_vec(),
5299                &[],
5300            ),
5301        );
5302        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
5303            .expect("test operation should succeed");
5304        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
5305    }
5306
5307    #[test]
5308    fn rejects_bad_multi_pack_index_object_tables() {
5309        let oid_a = ObjectId::from_hex(
5310            ObjectFormat::Sha1,
5311            "1111111111111111111111111111111111111111",
5312        )
5313        .expect("test operation should succeed");
5314        let oid_b = ObjectId::from_hex(
5315            ObjectFormat::Sha1,
5316            "2222222222222222222222222222222222222222",
5317        )
5318        .expect("test operation should succeed");
5319
5320        let missing_oidf = multi_pack_index(
5321            ObjectFormat::Sha1,
5322            2,
5323            1,
5324            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
5325        );
5326        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
5327
5328        let bad_fanout = vec![
5329            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
5330            (*b"OIDF", vec![0; 256 * 4]),
5331            (*b"OIDL", oid_a.as_bytes().to_vec()),
5332            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
5333        ];
5334        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
5335        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
5336
5337        let mut unsorted = Vec::new();
5338        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
5339        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
5340        let mut oid_lookup = Vec::new();
5341        oid_lookup.extend_from_slice(oid_b.as_bytes());
5342        oid_lookup.extend_from_slice(oid_a.as_bytes());
5343        unsorted.push((*b"OIDL", oid_lookup));
5344        unsorted.push((
5345            *b"OOFF",
5346            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
5347        ));
5348        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
5349        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
5350
5351        let bad_pack = multi_pack_index(
5352            ObjectFormat::Sha1,
5353            2,
5354            1,
5355            &midx_chunks_with_pack_names(
5356                ObjectFormat::Sha1,
5357                b"pack-a.idx\0\0".to_vec(),
5358                &[(oid_a.clone(), 1, 12)],
5359            ),
5360        );
5361        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
5362
5363        let mut large_offsets = Vec::new();
5364        let missing_loff = vec![
5365            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
5366            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
5367            (*b"OIDL", oid_a.as_bytes().to_vec()),
5368            (
5369                *b"OOFF",
5370                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
5371            ),
5372        ];
5373        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
5374        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
5375
5376        let mut bad_loff =
5377            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
5378        bad_loff.push((*b"LOFF", vec![0]));
5379        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
5380        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
5381    }
5382
5383    #[test]
5384    fn parses_multi_pack_index_bitmap_chunks() {
5385        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
5386            .expect("test operation should succeed");
5387        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
5388            .expect("test operation should succeed");
5389        let mut chunks = midx_chunks_with_pack_names(
5390            ObjectFormat::Sha1,
5391            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
5392            &[(first, 0, 12), (second, 1, 24)],
5393        );
5394        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
5395        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
5396        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
5397
5398        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
5399            .expect("test operation should succeed");
5400        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
5401        assert_eq!(
5402            parsed.bitmapped_packs,
5403            Some(vec![
5404                MultiPackBitmapPack {
5405                    bitmap_pos: 0,
5406                    bitmap_nr: 1,
5407                },
5408                MultiPackBitmapPack {
5409                    bitmap_pos: 1,
5410                    bitmap_nr: 1,
5411                },
5412            ])
5413        );
5414    }
5415
5416    #[test]
5417    fn writes_multi_pack_index_that_round_trips() {
5418        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
5419            .expect("test operation should succeed");
5420        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
5421            .expect("test operation should succeed");
5422        let bytes = MultiPackIndex::write(
5423            ObjectFormat::Sha1,
5424            2,
5425            &["pack-b.idx".into(), "pack-a.idx".into()],
5426            &[
5427                MultiPackIndexEntry {
5428                    oid: second.clone(),
5429                    pack_int_id: 0,
5430                    offset: 0x1_0000_0000,
5431                },
5432                MultiPackIndexEntry {
5433                    oid: first.clone(),
5434                    pack_int_id: 1,
5435                    offset: 12,
5436                },
5437            ],
5438        )
5439        .expect("test operation should succeed");
5440
5441        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
5442            .expect("test operation should succeed");
5443        assert_eq!(parsed.version, 2);
5444        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
5445        assert_eq!(parsed.object_count, 2);
5446        assert_eq!(
5447            parsed
5448                .find(&first)
5449                .expect("test operation should succeed")
5450                .pack_int_id,
5451            1
5452        );
5453        assert_eq!(
5454            parsed
5455                .find(&first)
5456                .expect("test operation should succeed")
5457                .offset,
5458            12
5459        );
5460        assert_eq!(
5461            parsed
5462                .find(&second)
5463                .expect("test operation should succeed")
5464                .pack_int_id,
5465            0
5466        );
5467        assert_eq!(
5468            parsed
5469                .find(&second)
5470                .expect("test operation should succeed")
5471                .offset,
5472            0x1_0000_0000
5473        );
5474        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
5475    }
5476
5477    #[test]
5478    fn write_multi_pack_index_rejects_invalid_inputs() {
5479        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
5480            .expect("test operation should succeed");
5481        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
5482        assert!(
5483            MultiPackIndex::write(
5484                ObjectFormat::Sha1,
5485                1,
5486                &["pack-b.idx".into(), "pack-a.idx".into()],
5487                &[],
5488            )
5489            .is_err()
5490        );
5491        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
5492        assert!(
5493            MultiPackIndex::write(
5494                ObjectFormat::Sha1,
5495                2,
5496                &["pack-a.idx".into()],
5497                &[MultiPackIndexEntry {
5498                    oid,
5499                    pack_int_id: 1,
5500                    offset: 12,
5501                }],
5502            )
5503            .is_err()
5504        );
5505        assert!(
5506            MultiPackIndex::write(
5507                ObjectFormat::Sha1,
5508                2,
5509                &["pack-a.idx".into()],
5510                &[
5511                    MultiPackIndexEntry {
5512                        oid,
5513                        pack_int_id: 0,
5514                        offset: 12,
5515                    },
5516                    MultiPackIndexEntry {
5517                        oid,
5518                        pack_int_id: 0,
5519                        offset: 24,
5520                    },
5521                ],
5522            )
5523            .is_err()
5524        );
5525    }
5526
5527    #[test]
5528    fn rejects_bad_multi_pack_index_bitmap_chunks() {
5529        let oid_a = ObjectId::from_hex(
5530            ObjectFormat::Sha1,
5531            "1111111111111111111111111111111111111111",
5532        )
5533        .expect("test operation should succeed");
5534        let oid_b = ObjectId::from_hex(
5535            ObjectFormat::Sha1,
5536            "2222222222222222222222222222222222222222",
5537        )
5538        .expect("test operation should succeed");
5539
5540        let mut duplicate_ridx = midx_chunks_with_pack_names(
5541            ObjectFormat::Sha1,
5542            b"pack-a.idx\0\0".to_vec(),
5543            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
5544        );
5545        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
5546        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
5547        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
5548
5549        let mut short_btmp = midx_chunks_with_pack_names(
5550            ObjectFormat::Sha1,
5551            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
5552            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
5553        );
5554        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
5555        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
5556        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
5557
5558        let mut out_of_range_btmp = midx_chunks_with_pack_names(
5559            ObjectFormat::Sha1,
5560            b"pack-a.idx\0\0".to_vec(),
5561            &[(oid_a, 0, 12), (oid_b, 0, 24)],
5562        );
5563        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
5564        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
5565        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
5566    }
5567
5568    #[test]
5569    fn parses_pack_bitmap_index_with_hash_cache() {
5570        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5571            .expect("test operation should succeed");
5572        let bitmap = pack_bitmap_index(
5573            ObjectFormat::Sha1,
5574            3,
5575            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
5576            &pack_checksum,
5577            &[(2, 0, 1, &[0b101])],
5578            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
5579        );
5580
5581        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
5582            .expect("test operation should succeed");
5583        assert_eq!(parsed.version, 1);
5584        assert_eq!(parsed.format, ObjectFormat::Sha1);
5585        assert_eq!(
5586            parsed.options,
5587            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
5588        );
5589        assert_eq!(parsed.pack_checksum, pack_checksum);
5590        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
5591        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
5592        assert_eq!(parsed.entries.len(), 1);
5593        let entry = parsed
5594            .entry_for_index_position(2)
5595            .expect("test operation should succeed");
5596        assert_eq!(entry.xor_offset, 0);
5597        assert_eq!(entry.flags, 1);
5598        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
5599        assert_eq!(
5600            parsed.name_hash_cache,
5601            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
5602        );
5603    }
5604
5605    #[test]
5606    fn parses_pack_bitmap_index_sha256() {
5607        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
5608            .expect("test operation should succeed");
5609        let bitmap = pack_bitmap_index(
5610            ObjectFormat::Sha256,
5611            2,
5612            PackBitmapIndex::OPTION_FULL_DAG,
5613            &pack_checksum,
5614            &[(0, 0, 0, &[0b11])],
5615            None,
5616        );
5617
5618        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
5619            .expect("test operation should succeed");
5620        assert_eq!(parsed.version, 1);
5621        assert_eq!(parsed.format, ObjectFormat::Sha256);
5622        assert_eq!(parsed.pack_checksum, pack_checksum);
5623        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
5624        assert_eq!(parsed.entries[0].object_position, 0);
5625        assert_eq!(parsed.name_hash_cache, None);
5626    }
5627
5628    #[test]
5629    fn parses_upstream_git_written_pack_bitmap_index() {
5630        let root = unique_temp_dir("git-pack-bitmap-upstream");
5631        fs::create_dir_all(&root).expect("test operation should succeed");
5632        {
5633            run_git_success(&root, &["init", "-q", "-b", "main"]);
5634            run_git_success(
5635                &root,
5636                &[
5637                    "-c",
5638                    "user.name=Example User",
5639                    "-c",
5640                    "user.email=example@example.invalid",
5641                    "commit",
5642                    "--allow-empty",
5643                    "-q",
5644                    "-m",
5645                    "one",
5646                ],
5647            );
5648            run_git_success(
5649                &root,
5650                &[
5651                    "-c",
5652                    "user.name=Example User",
5653                    "-c",
5654                    "user.email=example@example.invalid",
5655                    "commit",
5656                    "--allow-empty",
5657                    "-q",
5658                    "-m",
5659                    "two",
5660                ],
5661            );
5662            run_git_success(&root, &["repack", "-adb"]);
5663            let pack_dir = root.join(".git").join("objects").join("pack");
5664            let idx_path = single_path_with_extension(&pack_dir, "idx");
5665            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
5666            let index = PackIndex::parse(
5667                &fs::read(idx_path).expect("test operation should succeed"),
5668                ObjectFormat::Sha1,
5669            )
5670            .expect("test operation should succeed");
5671            let bitmap = PackBitmapIndex::parse(
5672                &fs::read(bitmap_path).expect("test operation should succeed"),
5673                ObjectFormat::Sha1,
5674                index.entries.len(),
5675            )
5676            .expect("test operation should succeed");
5677            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
5678            assert!(!bitmap.entries.is_empty());
5679        };
5680        let _ = fs::remove_dir_all(&root);
5681    }
5682
5683    #[test]
5684    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
5685        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5686            .expect("test operation should succeed");
5687        let bitmap = pack_bitmap_index(
5688            ObjectFormat::Sha1,
5689            1,
5690            PackBitmapIndex::OPTION_FULL_DAG,
5691            &pack_checksum,
5692            &[(0, 0, 0, &[1])],
5693            None,
5694        );
5695
5696        let mut bad_signature = bitmap.clone();
5697        bad_signature[0] = b'X';
5698        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
5699
5700        let mut bad_version = bitmap.clone();
5701        bad_version[5] = 2;
5702        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
5703        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
5704
5705        let mut bad_option = bitmap.clone();
5706        bad_option[7] = 0x20;
5707        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
5708        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
5709
5710        let mut bad_checksum = bitmap;
5711        let last = bad_checksum.len() - 1;
5712        bad_checksum[last] ^= 1;
5713        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
5714    }
5715
5716    #[test]
5717    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
5718        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5719            .expect("test operation should succeed");
5720        let bitmap = pack_bitmap_index(
5721            ObjectFormat::Sha1,
5722            2,
5723            PackBitmapIndex::OPTION_FULL_DAG,
5724            &pack_checksum,
5725            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
5726            None,
5727        );
5728
5729        let mut truncated = bitmap.clone();
5730        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
5731        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
5732        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
5733
5734        let mut out_of_range_position = pack_bitmap_index(
5735            ObjectFormat::Sha1,
5736            2,
5737            PackBitmapIndex::OPTION_FULL_DAG,
5738            &pack_checksum,
5739            &[(2, 0, 0, &[0b01])],
5740            None,
5741        );
5742        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
5743        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
5744        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
5745
5746        let invalid_xor = pack_bitmap_index(
5747            ObjectFormat::Sha1,
5748            2,
5749            PackBitmapIndex::OPTION_FULL_DAG,
5750            &pack_checksum,
5751            &[(0, 1, 0, &[0b01])],
5752            None,
5753        );
5754        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
5755    }
5756
5757    #[test]
5758    fn parses_single_entry_pack_index_sha256() {
5759        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
5760            .expect("test operation should succeed");
5761        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
5762            .expect("test operation should succeed");
5763        let index = single_entry_index(
5764            ObjectFormat::Sha256,
5765            oid,
5766            0x1234_5678,
5767            12,
5768            pack_checksum.clone(),
5769        );
5770        let parsed =
5771            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
5772        assert_eq!(parsed.version, 2);
5773        assert_eq!(parsed.pack_checksum, pack_checksum);
5774        assert_eq!(parsed.entries.len(), 1);
5775        assert_eq!(
5776            parsed
5777                .find(&oid)
5778                .expect("test operation should succeed")
5779                .offset,
5780            12
5781        );
5782        assert_eq!(
5783            parsed
5784                .find(&oid)
5785                .expect("test operation should succeed")
5786                .crc32,
5787            0x1234_5678
5788        );
5789        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
5790    }
5791
5792    #[test]
5793    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
5794        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
5795    }
5796
5797    #[test]
5798    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
5799        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
5800    }
5801
5802    #[test]
5803    fn write_packed_rejects_duplicate_objects() {
5804        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
5805        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
5806    }
5807
5808    #[test]
5809    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
5810        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
5811        let sha1 = object
5812            .object_id(ObjectFormat::Sha1)
5813            .expect("test operation should succeed");
5814        let sha256 = object
5815            .object_id(ObjectFormat::Sha256)
5816            .expect("test operation should succeed");
5817        let duplicate = [
5818            PackInput {
5819                oid: &sha1,
5820                object: &object,
5821            },
5822            PackInput {
5823                oid: &sha1,
5824                object: &object,
5825            },
5826        ];
5827        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
5828
5829        let wrong_format = [PackInput {
5830            oid: &sha256,
5831            object: &object,
5832        }];
5833        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
5834    }
5835
5836    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
5837        let objects = similar_blob_family(8);
5838        let packed =
5839            PackFile::write_packed(&objects, format).expect("test operation should succeed");
5840        let undeltified =
5841            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
5842
5843        // The whole point of delta selection: the packed output is smaller than
5844        // storing every object undeltified.
5845        assert!(
5846            packed.pack.len() < undeltified.pack.len(),
5847            "expected delta pack ({}) smaller than undeltified pack ({})",
5848            packed.pack.len(),
5849            undeltified.pack.len()
5850        );
5851
5852        // At least one object must actually be stored as a delta.
5853        let kinds = pack_entry_kinds(&packed.pack, format);
5854        let delta_count = kinds
5855            .iter()
5856            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
5857            .count();
5858        assert!(
5859            delta_count >= 1,
5860            "expected at least one delta entry, found kinds {kinds:?}"
5861        );
5862
5863        // Round-trip: every original object reconstructs byte-for-byte.
5864        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
5865        assert_eq!(parsed.entries.len(), objects.len());
5866        for object in &objects {
5867            let oid = object
5868                .object_id(format)
5869                .expect("test operation should succeed");
5870            let found = parsed
5871                .entries
5872                .iter()
5873                .find(|entry| entry.entry.oid == oid)
5874                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
5875            assert_eq!(&found.object, object, "object {oid} did not round-trip");
5876        }
5877
5878        // The index must agree with the pack and locate every object.
5879        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
5880        assert_eq!(index.pack_checksum, packed.checksum);
5881        for object in &objects {
5882            let oid = object
5883                .object_id(format)
5884                .expect("test operation should succeed");
5885            assert!(index.find(&oid).is_some(), "index missing {oid}");
5886        }
5887    }
5888
5889    #[test]
5890    fn write_packed_emits_ofs_delta_by_default() {
5891        let objects = similar_blob_family(6);
5892        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5893            .expect("test operation should succeed");
5894        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
5895        assert!(
5896            kinds.contains(&PackObjectKind::OfsDelta),
5897            "expected an ofs-delta entry by default, found {kinds:?}"
5898        );
5899        assert!(
5900            !kinds.contains(&PackObjectKind::RefDelta),
5901            "default self-contained pack must not use ref-delta, found {kinds:?}"
5902        );
5903        // Round-trips.
5904        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
5905    }
5906
5907    #[test]
5908    fn write_packed_can_emit_ref_delta() {
5909        let objects = similar_blob_family(6);
5910        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
5911        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5912            .expect("test operation should succeed");
5913        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
5914        assert!(
5915            kinds.contains(&PackObjectKind::RefDelta),
5916            "expected a ref-delta entry, found {kinds:?}"
5917        );
5918        assert!(
5919            !kinds.contains(&PackObjectKind::OfsDelta),
5920            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
5921        );
5922
5923        // Ref-delta packs are still self-contained here, so they round-trip
5924        // without any external base lookup.
5925        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
5926            .expect("test operation should succeed");
5927        assert_eq!(parsed.entries.len(), objects.len());
5928    }
5929
5930    #[test]
5931    fn write_packed_bounds_delta_chain_depth() {
5932        // A long chain of progressively-modified blobs. With a large window
5933        // every object could otherwise delta against its immediate predecessor,
5934        // forming a chain as long as the input.
5935        let objects = incremental_blob_chain(20);
5936        let format = ObjectFormat::Sha1;
5937
5938        for max_depth in [1usize, 2, 5] {
5939            let options = PackWriteOptions::new()
5940                .with_window(20)
5941                .with_depth(max_depth);
5942            let packed = PackFile::write_packed_with_options(&objects, format, &options)
5943                .expect("test operation should succeed");
5944
5945            let depths = pack_entry_depths(&packed.pack, format);
5946            let observed = depths.iter().copied().max().unwrap_or(0);
5947            assert!(
5948                observed <= max_depth,
5949                "max chain depth {observed} exceeded bound {max_depth}"
5950            );
5951
5952            // Still correct: round-trips byte-for-byte.
5953            let parsed =
5954                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
5955            for object in &objects {
5956                let oid = object
5957                    .object_id(format)
5958                    .expect("test operation should succeed");
5959                let found = parsed
5960                    .entries
5961                    .iter()
5962                    .find(|entry| entry.entry.oid == oid)
5963                    .expect("test operation should succeed");
5964                assert_eq!(&found.object, object);
5965            }
5966        }
5967    }
5968
5969    #[test]
5970    fn write_packed_depth_zero_stores_everything_undeltified() {
5971        let objects = similar_blob_family(5);
5972        let options = PackWriteOptions::new().with_depth(0);
5973        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5974            .expect("test operation should succeed");
5975        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
5976        assert!(
5977            kinds
5978                .iter()
5979                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
5980            "depth 0 must disable deltas, found {kinds:?}"
5981        );
5982    }
5983
5984    #[test]
5985    fn write_thin_uses_external_base_and_round_trips_sha1() {
5986        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
5987    }
5988
5989    #[test]
5990    fn write_thin_uses_external_base_and_round_trips_sha256() {
5991        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
5992    }
5993
5994    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
5995        // The base object stays OUT of the pack; only `target` is written, as a
5996        // ref-delta against the external base's object id.
5997        let base = blob_with_marker("EXTERNAL-BASE");
5998        let target = blob_with_marker("EXTERNAL-TARGET");
5999        let base_oid = base
6000            .object_id(format)
6001            .expect("test operation should succeed");
6002
6003        let mut external = HashMap::new();
6004        external.insert(base_oid, base.clone());
6005        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
6006            .expect("test operation should succeed");
6007
6008        // Exactly one entry, encoded as a ref-delta to the external base.
6009        let kinds = pack_entry_kinds(&packed.pack, format);
6010        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
6011
6012        // The external base reference must be the base oid.
6013        let mut offset = 12usize;
6014        let header =
6015            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
6016        assert_eq!(header.kind, PackObjectKind::RefDelta);
6017        let referenced =
6018            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
6019                .expect("test operation should succeed");
6020        assert_eq!(referenced, base_oid);
6021
6022        // A plain (non-thin) parse fails: the base is not present.
6023        assert!(PackFile::parse(&packed.pack, format).is_err());
6024
6025        // A thin parse that supplies the external base reconstructs the target.
6026        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
6027            if oid == &base_oid {
6028                Ok(Some(base.clone()))
6029            } else {
6030                Ok(None)
6031            }
6032        })
6033        .expect("test operation should succeed");
6034        assert_eq!(parsed.entries.len(), 1);
6035        assert_eq!(parsed.entries[0].object, target);
6036    }
6037
6038    #[test]
6039    fn write_packed_preserves_distinct_objects_with_no_similarity() {
6040        // Unrelated objects: nothing should delta, but the pack must still be
6041        // valid and complete.
6042        let objects = vec![
6043            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
6044            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
6045            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
6046        ];
6047        let format = ObjectFormat::Sha1;
6048        let packed =
6049            PackFile::write_packed(&objects, format).expect("test operation should succeed");
6050        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
6051        assert_eq!(parsed.entries.len(), objects.len());
6052        for object in &objects {
6053            let oid = object
6054                .object_id(format)
6055                .expect("test operation should succeed");
6056            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
6057        }
6058    }
6059
6060    /// Build a family of blobs that all share a large common region but differ
6061    /// in a marker placed in the *middle*, so a good delta finds copy regions on
6062    /// both sides of the change.
6063    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
6064        let mut common_head = Vec::new();
6065        for _ in 0..200 {
6066            common_head.extend_from_slice(b"shared header line for delta testing\n");
6067        }
6068        let mut common_tail = Vec::new();
6069        for _ in 0..200 {
6070            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
6071        }
6072        (0..count)
6073            .map(|idx| {
6074                let mut body = common_head.clone();
6075                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
6076                body.extend_from_slice(&common_tail);
6077                EncodedObject::new(ObjectType::Blob, body)
6078            })
6079            .collect()
6080    }
6081
6082    /// Build a chain where each blob is the previous one plus an appended line,
6083    /// so each is highly similar to its predecessor.
6084    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
6085        let mut body = Vec::new();
6086        for _ in 0..100 {
6087            body.extend_from_slice(b"baseline content shared across the whole chain\n");
6088        }
6089        let mut objects = Vec::with_capacity(count);
6090        for idx in 0..count {
6091            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
6092            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
6093        }
6094        objects
6095    }
6096
6097    fn blob_with_marker(marker: &str) -> EncodedObject {
6098        let mut body = Vec::new();
6099        for _ in 0..150 {
6100            body.extend_from_slice(b"common body shared between base and target\n");
6101        }
6102        body.extend_from_slice(marker.as_bytes());
6103        body.push(b'\n');
6104        for _ in 0..150 {
6105            body.extend_from_slice(b"more common body shared between objects\n");
6106        }
6107        EncodedObject::new(ObjectType::Blob, body)
6108    }
6109
6110    /// Classify every entry in a pack (in pack order) by its on-disk kind.
6111    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
6112        pack_entry_descriptors(pack, format)
6113            .into_iter()
6114            .map(|descriptor| descriptor.kind)
6115            .collect()
6116    }
6117
6118    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
6119    /// order. Entries always appear after their in-pack bases, so a single
6120    /// forward pass suffices.
6121    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
6122        let descriptors = pack_entry_descriptors(pack, format);
6123        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
6124        let mut depths = Vec::with_capacity(descriptors.len());
6125        for descriptor in &descriptors {
6126            let depth = match &descriptor.base {
6127                EntryBase::None => 0,
6128                EntryBase::Offset(base_offset) => {
6129                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
6130                }
6131                // Ref-delta to an in-pack base: look it up by offset via oid is
6132                // unnecessary for these tests (which only use ofs-delta for the
6133                // chains), so treat as depth 1 if unknown.
6134                EntryBase::Ref => 1,
6135            };
6136            depth_by_offset.insert(descriptor.offset, depth);
6137            depths.push(depth);
6138        }
6139        depths
6140    }
6141
6142    struct EntryDescriptor {
6143        offset: u64,
6144        kind: PackObjectKind,
6145        base: EntryBase,
6146    }
6147
6148    enum EntryBase {
6149        None,
6150        Offset(u64),
6151        Ref,
6152    }
6153
6154    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
6155        let trailer_offset = pack.len() - format.raw_len();
6156        let count = u32_be(&pack[8..12]) as usize;
6157        let mut offset = 12usize;
6158        let mut descriptors = Vec::with_capacity(count);
6159        for _ in 0..count {
6160            let entry_offset = offset as u64;
6161            let header =
6162                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
6163            let base = match header.kind {
6164                PackObjectKind::OfsDelta => {
6165                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
6166                        .expect("test operation should succeed");
6167                    EntryBase::Offset(base_offset)
6168                }
6169                PackObjectKind::RefDelta => {
6170                    offset += format.raw_len();
6171                    EntryBase::Ref
6172                }
6173                _ => EntryBase::None,
6174            };
6175            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
6176            let mut body = Vec::new();
6177            decoder
6178                .read_to_end(&mut body)
6179                .expect("test operation should succeed");
6180            offset += decoder.total_in() as usize;
6181            descriptors.push(EntryDescriptor {
6182                offset: entry_offset,
6183                kind: header.kind,
6184                base,
6185            });
6186        }
6187        descriptors
6188    }
6189
6190    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
6191        let mut base = Vec::new();
6192        for _ in 0..300 {
6193            base.extend_from_slice(b"common payload\n");
6194        }
6195        base.extend_from_slice(b"base\n");
6196        let mut changed = Vec::new();
6197        for _ in 0..300 {
6198            changed.extend_from_slice(b"common payload\n");
6199        }
6200        changed.extend_from_slice(b"changed\n");
6201        (
6202            EncodedObject::new(ObjectType::Blob, base),
6203            EncodedObject::new(ObjectType::Blob, changed),
6204        )
6205    }
6206
6207    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
6208        let mut pack = Vec::new();
6209        pack.extend_from_slice(b"PACK");
6210        pack.extend_from_slice(&2u32.to_be_bytes());
6211        pack.extend_from_slice(&1u32.to_be_bytes());
6212        write_entry_header(&mut pack, object_type, body.len() as u64);
6213        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6214        encoder
6215            .write_all(body)
6216            .expect("test operation should succeed");
6217        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6218        let checksum =
6219            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6220        pack.extend_from_slice(checksum.as_bytes());
6221        pack
6222    }
6223
6224    #[derive(Clone, Copy, Debug)]
6225    enum DeltaKind {
6226        Offset,
6227        Ref,
6228    }
6229
6230    fn two_object_delta_pack(
6231        format: ObjectFormat,
6232        base: &[u8],
6233        result: &[u8],
6234        delta_kind: DeltaKind,
6235    ) -> Vec<u8> {
6236        let mut pack = Vec::new();
6237        pack.extend_from_slice(b"PACK");
6238        pack.extend_from_slice(&2u32.to_be_bytes());
6239        pack.extend_from_slice(&2u32.to_be_bytes());
6240
6241        let base_offset = pack.len();
6242        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
6243        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6244        encoder
6245            .write_all(base)
6246            .expect("test operation should succeed");
6247        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6248
6249        let delta = append_suffix_delta(base, result);
6250        let delta_offset = pack.len();
6251        write_pack_entry_header_kind(
6252            &mut pack,
6253            match delta_kind {
6254                DeltaKind::Offset => 6,
6255                DeltaKind::Ref => 7,
6256            },
6257            delta.len() as u64,
6258        );
6259        match delta_kind {
6260            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
6261            DeltaKind::Ref => {
6262                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
6263                    .expect("test operation should succeed");
6264                pack.extend_from_slice(base_oid.as_bytes());
6265            }
6266        }
6267        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6268        encoder
6269            .write_all(&delta)
6270            .expect("test operation should succeed");
6271        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6272
6273        let checksum =
6274            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6275        pack.extend_from_slice(checksum.as_bytes());
6276        pack
6277    }
6278
6279    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
6280        let mut pack = Vec::new();
6281        pack.extend_from_slice(b"PACK");
6282        pack.extend_from_slice(&2u32.to_be_bytes());
6283        pack.extend_from_slice(&1u32.to_be_bytes());
6284
6285        let delta = append_suffix_delta(base, result);
6286        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
6287        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
6288            .expect("test operation should succeed");
6289        pack.extend_from_slice(base_oid.as_bytes());
6290        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6291        encoder
6292            .write_all(&delta)
6293            .expect("test operation should succeed");
6294        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6295
6296        let checksum =
6297            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6298        pack.extend_from_slice(checksum.as_bytes());
6299        pack
6300    }
6301
6302    fn unique_temp_dir(name: &str) -> PathBuf {
6303        let nanos = SystemTime::now()
6304            .duration_since(UNIX_EPOCH)
6305            .expect("test operation should succeed")
6306            .as_nanos();
6307        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
6308    }
6309
6310    fn run_git_success(cwd: &Path, args: &[&str]) {
6311        let output = Command::new("git")
6312            .current_dir(cwd)
6313            .args(args)
6314            .output()
6315            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
6316        assert!(
6317            output.status.success(),
6318            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
6319            output.status.code(),
6320            String::from_utf8_lossy(&output.stdout),
6321            String::from_utf8_lossy(&output.stderr)
6322        );
6323    }
6324
6325    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
6326        let mut paths = fs::read_dir(dir)
6327            .expect("test operation should succeed")
6328            .map(|entry| entry.expect("test operation should succeed").path())
6329            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
6330            .collect::<Vec<_>>();
6331        assert_eq!(paths.len(), 1, "expected one .{extension} file");
6332        paths.remove(0)
6333    }
6334
6335    fn pack_bitmap_index(
6336        format: ObjectFormat,
6337        object_count: u32,
6338        options: u16,
6339        pack_checksum: &ObjectId,
6340        entries: &[(u32, u8, u8, &[u64])],
6341        name_hash_cache: Option<&[u32]>,
6342    ) -> Vec<u8> {
6343        let mut out = Vec::new();
6344        out.extend_from_slice(b"BITM");
6345        out.extend_from_slice(&1u16.to_be_bytes());
6346        out.extend_from_slice(&options.to_be_bytes());
6347        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
6348        out.extend_from_slice(pack_checksum.as_bytes());
6349        write_test_ewah(&mut out, object_count, &[0b001]);
6350        write_test_ewah(&mut out, object_count, &[0b010]);
6351        write_test_ewah(&mut out, object_count, &[0b100]);
6352        write_test_ewah(&mut out, object_count, &[0]);
6353        for (position, xor_offset, flags, words) in entries {
6354            out.extend_from_slice(&position.to_be_bytes());
6355            out.push(*xor_offset);
6356            out.push(*flags);
6357            write_test_ewah(&mut out, object_count, words);
6358        }
6359        if let Some(cache) = name_hash_cache {
6360            for value in cache {
6361                out.extend_from_slice(&value.to_be_bytes());
6362            }
6363        }
6364        let checksum =
6365            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
6366        out.extend_from_slice(checksum.as_bytes());
6367        out
6368    }
6369
6370    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
6371        out.extend_from_slice(&bit_size.to_be_bytes());
6372        let words = ewah_literal_words(literals);
6373        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
6374        for word in words {
6375            out.extend_from_slice(&word.to_be_bytes());
6376        }
6377        out.extend_from_slice(&0u32.to_be_bytes());
6378    }
6379
6380    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
6381        let rlw = (literals.len() as u64) << 33;
6382        let mut words = vec![rlw];
6383        words.extend_from_slice(literals);
6384        words
6385    }
6386
6387    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
6388        let checksum_offset = bytes.len() - format.raw_len();
6389        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
6390            .expect("test operation should succeed");
6391        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
6392    }
6393
6394    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
6395        assert!(result.starts_with(base));
6396        let suffix = &result[base.len()..];
6397        assert!(base.len() < 0x10000);
6398        assert!(suffix.len() < 0x80);
6399        let mut delta = Vec::new();
6400        write_delta_varint(&mut delta, base.len() as u64);
6401        write_delta_varint(&mut delta, result.len() as u64);
6402        delta.push(0x90);
6403        delta.push(base.len() as u8);
6404        delta.push(suffix.len() as u8);
6405        delta.extend_from_slice(suffix);
6406        delta
6407    }
6408
6409    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
6410        loop {
6411            let mut byte = (value as u8) & 0x7f;
6412            value >>= 7;
6413            if value != 0 {
6414                byte |= 0x80;
6415            }
6416            out.push(byte);
6417            if value == 0 {
6418                break;
6419            }
6420        }
6421    }
6422
6423    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
6424        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
6425        size >>= 4;
6426        if size != 0 {
6427            byte |= 0x80;
6428        }
6429        out.push(byte);
6430        while size != 0 {
6431            let mut byte = (size as u8) & 0x7f;
6432            size >>= 7;
6433            if size != 0 {
6434                byte |= 0x80;
6435            }
6436            out.push(byte);
6437        }
6438    }
6439
6440    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
6441        assert!(relative < 0x80);
6442        out.push(relative as u8);
6443    }
6444
6445    fn single_entry_index(
6446        format: ObjectFormat,
6447        oid: ObjectId,
6448        crc32: u32,
6449        offset: u32,
6450        pack_checksum: ObjectId,
6451    ) -> Vec<u8> {
6452        let mut index = Vec::new();
6453        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
6454        index.extend_from_slice(&2u32.to_be_bytes());
6455        for idx in 0..256 {
6456            let count = if idx >= usize::from(oid.as_bytes()[0]) {
6457                1u32
6458            } else {
6459                0u32
6460            };
6461            index.extend_from_slice(&count.to_be_bytes());
6462        }
6463        index.extend_from_slice(oid.as_bytes());
6464        index.extend_from_slice(&crc32.to_be_bytes());
6465        index.extend_from_slice(&offset.to_be_bytes());
6466        index.extend_from_slice(pack_checksum.as_bytes());
6467        let checksum =
6468            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
6469        index.extend_from_slice(checksum.as_bytes());
6470        index
6471    }
6472
6473    fn single_entry_index_v1(
6474        format: ObjectFormat,
6475        oid: ObjectId,
6476        offset: u32,
6477        pack_checksum: ObjectId,
6478    ) -> Vec<u8> {
6479        let mut index = Vec::new();
6480        for idx in 0..256 {
6481            let count = if idx >= usize::from(oid.as_bytes()[0]) {
6482                1u32
6483            } else {
6484                0u32
6485            };
6486            index.extend_from_slice(&count.to_be_bytes());
6487        }
6488        index.extend_from_slice(&offset.to_be_bytes());
6489        index.extend_from_slice(oid.as_bytes());
6490        index.extend_from_slice(pack_checksum.as_bytes());
6491        let checksum =
6492            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
6493        index.extend_from_slice(checksum.as_bytes());
6494        index
6495    }
6496
6497    fn pack_reverse_index(
6498        format: ObjectFormat,
6499        positions: &[u32],
6500        pack_checksum: ObjectId,
6501    ) -> Vec<u8> {
6502        let mut reverse_index = Vec::new();
6503        reverse_index.extend_from_slice(b"RIDX");
6504        reverse_index.extend_from_slice(&1u32.to_be_bytes());
6505        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
6506        for position in positions {
6507            reverse_index.extend_from_slice(&position.to_be_bytes());
6508        }
6509        reverse_index.extend_from_slice(pack_checksum.as_bytes());
6510        let checksum =
6511            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
6512        reverse_index.extend_from_slice(checksum.as_bytes());
6513        reverse_index
6514    }
6515
6516    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
6517        let mut out = Vec::new();
6518        out.extend_from_slice(b"MTME");
6519        out.extend_from_slice(&1u32.to_be_bytes());
6520        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
6521        for mtime in mtimes {
6522            out.extend_from_slice(&mtime.to_be_bytes());
6523        }
6524        out.extend_from_slice(pack_checksum.as_bytes());
6525        let checksum =
6526            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
6527        out.extend_from_slice(checksum.as_bytes());
6528        out
6529    }
6530
6531    fn midx_chunks_with_pack_names(
6532        _format: ObjectFormat,
6533        pack_names: Vec<u8>,
6534        entries: &[(ObjectId, u32, u64)],
6535    ) -> Vec<([u8; 4], Vec<u8>)> {
6536        let mut entries = entries.to_vec();
6537        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
6538        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
6539        let mut large_offsets = Vec::new();
6540        let mut chunks = vec![
6541            (*b"PNAM", pack_names),
6542            (*b"OIDF", midx_oid_fanout(&object_ids)),
6543            (*b"OIDL", midx_oid_lookup(&object_ids)),
6544            (
6545                *b"OOFF",
6546                midx_ooff_entries(
6547                    &entries
6548                        .iter()
6549                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
6550                        .collect::<Vec<_>>(),
6551                    &mut large_offsets,
6552                ),
6553            ),
6554        ];
6555        if !large_offsets.is_empty() {
6556            chunks.push((*b"LOFF", large_offsets));
6557        }
6558        chunks
6559    }
6560
6561    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
6562        let mut counts = [0u32; 256];
6563        for oid in object_ids {
6564            counts[oid.as_bytes()[0] as usize] += 1;
6565        }
6566        let mut running = 0u32;
6567        let mut out = Vec::new();
6568        for count in counts {
6569            running += count;
6570            out.extend_from_slice(&running.to_be_bytes());
6571        }
6572        out
6573    }
6574
6575    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
6576        let mut out = Vec::new();
6577        for oid in object_ids {
6578            out.extend_from_slice(oid.as_bytes());
6579        }
6580        out
6581    }
6582
6583    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
6584        let mut out = Vec::new();
6585        for (pack_int_id, offset) in entries {
6586            out.extend_from_slice(&pack_int_id.to_be_bytes());
6587            if *offset < 0x8000_0000 {
6588                out.extend_from_slice(&(*offset as u32).to_be_bytes());
6589            } else {
6590                let large_idx = (large_offsets.len() / 8) as u32;
6591                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
6592                large_offsets.extend_from_slice(&offset.to_be_bytes());
6593            }
6594        }
6595        out
6596    }
6597
6598    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
6599        let mut out = Vec::new();
6600        for value in values {
6601            out.extend_from_slice(&value.to_be_bytes());
6602        }
6603        out
6604    }
6605
6606    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
6607        let mut out = Vec::new();
6608        for (bitmap_pos, bitmap_nr) in entries {
6609            out.extend_from_slice(&bitmap_pos.to_be_bytes());
6610            out.extend_from_slice(&bitmap_nr.to_be_bytes());
6611        }
6612        out
6613    }
6614
6615    fn multi_pack_index(
6616        format: ObjectFormat,
6617        version: u8,
6618        pack_count: u32,
6619        chunks: &[([u8; 4], Vec<u8>)],
6620    ) -> Vec<u8> {
6621        let lookup_len = (chunks.len() + 1) * 12;
6622        let mut out = Vec::new();
6623        out.extend_from_slice(b"MIDX");
6624        out.push(version);
6625        out.push(hash_function_id(format) as u8);
6626        out.push(chunks.len() as u8);
6627        out.push(0);
6628        out.extend_from_slice(&pack_count.to_be_bytes());
6629        let mut chunk_offset = (12 + lookup_len) as u64;
6630        for (id, data) in chunks {
6631            out.extend_from_slice(id);
6632            out.extend_from_slice(&chunk_offset.to_be_bytes());
6633            chunk_offset += data.len() as u64;
6634        }
6635        out.extend_from_slice(&[0, 0, 0, 0]);
6636        out.extend_from_slice(&chunk_offset.to_be_bytes());
6637        for (_id, data) in chunks {
6638            out.extend_from_slice(data);
6639        }
6640        let checksum =
6641            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
6642        out.extend_from_slice(checksum.as_bytes());
6643        out
6644    }
6645
6646    // ---- EWAH encoder / bitmap writer tests ------------------------------
6647
6648    fn pack_checksum_sha1() -> ObjectId {
6649        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
6650    }
6651
6652    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
6653        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
6654        // expects: a checksum offset that lies just past the serialised bitmap.
6655        let mut offset = 0usize;
6656        let checksum_offset = bytes.len();
6657        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
6658            .expect("test operation should succeed")
6659    }
6660
6661    #[test]
6662    fn ewah_encodes_single_literal_word_matching_helper() {
6663        // A bitmap whose only word is a literal must serialise as one RLW with
6664        // literal_len == 1 followed by the literal, identical to the test
6665        // helper used by the existing parser tests.
6666        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
6667        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
6668        assert_eq!(ewah.rlw_position, 0);
6669        assert_eq!(ewah.bit_size, 64);
6670    }
6671
6672    #[test]
6673    fn ewah_byte_layout_is_big_endian() {
6674        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
6675            .expect("test operation should succeed");
6676        let bytes = ewah.to_bytes();
6677        let mut expected = Vec::new();
6678        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
6679        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
6680        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
6681        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
6682        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
6683        assert_eq!(bytes, expected);
6684    }
6685
6686    #[test]
6687    fn ewah_empty_bitmap_serialises_like_git() {
6688        let ewah = EwahBitmap::empty();
6689        let bytes = ewah.to_bytes();
6690        // bit_size = 0, word_count = 0, rlw_position = 0.
6691        assert_eq!(bytes, vec![0u8; 12]);
6692        // It must still parse and decode to nothing.
6693        let parsed = parse_ewah_bytes(&bytes);
6694        assert_eq!(parsed, ewah);
6695        assert!(
6696            parsed
6697                .to_positions()
6698                .expect("test operation should succeed")
6699                .is_empty()
6700        );
6701    }
6702
6703    #[test]
6704    fn ewah_compresses_clean_zero_run() {
6705        // Three all-zero words followed by a literal: the encoder should emit a
6706        // single RLW carrying a run of 3 clean-zero words plus one literal.
6707        let ewah =
6708            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
6709        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
6710        let rlw = ewah.words[0];
6711        assert_eq!(rlw & 1, 0, "run bit should be zero");
6712        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
6713        assert_eq!(rlw >> 33, 1, "literal length should be 1");
6714        assert_eq!(ewah.words[1], 0b1);
6715    }
6716
6717    #[test]
6718    fn ewah_compresses_clean_ones_run() {
6719        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
6720            .expect("test operation should succeed");
6721        // Pure run of ones, no literals: one RLW only.
6722        assert_eq!(ewah.words.len(), 1);
6723        let rlw = ewah.words[0];
6724        assert_eq!(rlw & 1, 1, "run bit should be one");
6725        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
6726        assert_eq!(rlw >> 33, 0, "no literals");
6727    }
6728
6729    #[test]
6730    fn ewah_run_then_literal_then_run_roundtrips() {
6731        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
6732        let bit_size = (words.len() * 64) as u32;
6733        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
6734        assert_eq!(
6735            ewah.to_words().expect("test operation should succeed"),
6736            words
6737        );
6738    }
6739
6740    #[test]
6741    fn ewah_drops_trailing_clean_zero_words() {
6742        // Trailing all-zero words beyond a literal carry no information and git
6743        // does not serialise them, but to_words() restores them up to bit_size.
6744        let words = vec![0b1, 0, 0, 0];
6745        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
6746        // bit_size of 1 means a single backing word.
6747        assert_eq!(ewah.bit_size, 1);
6748        assert_eq!(
6749            ewah.to_words().expect("test operation should succeed"),
6750            vec![0b1]
6751        );
6752    }
6753
6754    #[test]
6755    fn ewah_from_positions_roundtrips_via_positions() {
6756        let positions = [0u32, 1, 63, 64, 65, 200, 511];
6757        let ewah =
6758            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
6759        let mut decoded = ewah.to_positions().expect("test operation should succeed");
6760        decoded.sort_unstable();
6761        assert_eq!(decoded, positions);
6762    }
6763
6764    #[test]
6765    fn ewah_from_positions_dedupes_and_orders() {
6766        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
6767            .expect("test operation should succeed");
6768        assert_eq!(
6769            ewah.to_positions().expect("test operation should succeed"),
6770            vec![5, 100]
6771        );
6772    }
6773
6774    #[test]
6775    fn ewah_huge_zero_run_spans_multiple_rlws() {
6776        // A run longer than the 32-bit running-length field forces the encoder
6777        // to emit more than one RLW. Use one literal bit far out, with a bit
6778        // size large enough to exceed u32::MAX clean words is impractical, so
6779        // assert the field arithmetic via a direct builder run instead.
6780        let mut builder = EwahBuilder::new(0);
6781        builder.add_empty_words(false, 0xffff_ffff);
6782        builder.add_empty_words(false, 5);
6783        let ewah = builder.finish().expect("test operation should succeed");
6784        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
6785        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
6786        assert_eq!(ewah.words[1] & 1, 0);
6787        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
6788        assert_eq!(ewah.rlw_position, 1);
6789    }
6790
6791    #[test]
6792    fn ewah_from_words_rejects_oversized_bit_size() {
6793        // bit_size demands two words but only one is supplied.
6794        assert!(EwahBitmap::from_words(65, &[0]).is_err());
6795    }
6796
6797    #[test]
6798    fn ewah_from_positions_rejects_out_of_range() {
6799        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
6800    }
6801
6802    #[test]
6803    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
6804        // Exercise the full encode -> serialise -> parse loop for a non-trivial
6805        // pattern and assert structural equality against the parser's model.
6806        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
6807        let bit_size = (words.len() * 64) as u32;
6808        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
6809        let bytes = ewah.to_bytes();
6810        let parsed = parse_ewah_bytes(&bytes);
6811        assert_eq!(parsed, ewah);
6812        assert_eq!(
6813            parsed.to_words().expect("test operation should succeed"),
6814            words
6815        );
6816    }
6817
6818    #[test]
6819    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
6820        // commit, tree, blob in pack order; one selected commit reaching all.
6821        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
6822        let bytes = write_bitmap(
6823            ObjectFormat::Sha1,
6824            pack_checksum_sha1(),
6825            &object_types,
6826            &[(0u32, 0u32, vec![1u32, 2u32])],
6827            None,
6828        )
6829        .expect("test operation should succeed");
6830        assert_eq!(&bytes[..4], b"BITM");
6831
6832        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
6833            .expect("test operation should succeed");
6834        assert_eq!(parsed.version, 1);
6835        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
6836        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
6837        assert_eq!(
6838            parsed
6839                .type_bitmaps
6840                .commits
6841                .to_positions()
6842                .expect("test operation should succeed"),
6843            vec![0]
6844        );
6845        assert_eq!(
6846            parsed
6847                .type_bitmaps
6848                .trees
6849                .to_positions()
6850                .expect("test operation should succeed"),
6851            vec![1]
6852        );
6853        assert_eq!(
6854            parsed
6855                .type_bitmaps
6856                .blobs
6857                .to_positions()
6858                .expect("test operation should succeed"),
6859            vec![2]
6860        );
6861        assert!(
6862            parsed
6863                .type_bitmaps
6864                .tags
6865                .to_positions()
6866                .expect("test operation should succeed")
6867                .is_empty()
6868        );
6869        assert_eq!(parsed.entries.len(), 1);
6870        let entry = parsed
6871            .entry_for_index_position(0)
6872            .expect("test operation should succeed");
6873        assert_eq!(entry.xor_offset, 0);
6874        assert_eq!(entry.flags, 0);
6875        assert_eq!(
6876            entry
6877                .bitmap
6878                .to_positions()
6879                .expect("test operation should succeed"),
6880            vec![0, 1, 2]
6881        );
6882        assert_eq!(parsed.name_hash_cache, None);
6883    }
6884
6885    #[test]
6886    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
6887        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6888            .expect("test operation should succeed");
6889        let object_types = [ObjectType::Commit, ObjectType::Tree];
6890        let bytes = write_bitmap(
6891            ObjectFormat::Sha256,
6892            pack_checksum.clone(),
6893            &object_types,
6894            &[(0u32, 0u32, vec![1u32])],
6895            None,
6896        )
6897        .expect("test operation should succeed");
6898        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
6899            .expect("test operation should succeed");
6900        assert_eq!(parsed.format, ObjectFormat::Sha256);
6901        assert_eq!(parsed.pack_checksum, pack_checksum);
6902        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6903        assert_eq!(
6904            parsed.entries[0]
6905                .bitmap
6906                .to_positions()
6907                .expect("test operation should succeed"),
6908            vec![0, 1]
6909        );
6910    }
6911
6912    #[test]
6913    fn pack_bitmap_index_write_includes_name_hash_cache() {
6914        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
6915        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
6916        let bytes = write_bitmap(
6917            ObjectFormat::Sha1,
6918            pack_checksum_sha1(),
6919            &object_types,
6920            &[(0u32, 0u32, vec![1u32, 2u32])],
6921            Some(cache.clone()),
6922        )
6923        .expect("test operation should succeed");
6924        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
6925            .expect("test operation should succeed");
6926        assert_eq!(
6927            parsed.options,
6928            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6929        );
6930        assert_eq!(parsed.name_hash_cache, Some(cache));
6931    }
6932
6933    #[test]
6934    fn pack_bitmap_writer_supports_multiple_commits() {
6935        let object_types = [
6936            ObjectType::Commit,
6937            ObjectType::Commit,
6938            ObjectType::Tree,
6939            ObjectType::Blob,
6940        ];
6941        let mut writer =
6942            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
6943                .expect("test operation should succeed");
6944        writer
6945            .add_commit(0, 0, &[2, 3])
6946            .expect("test operation should succeed");
6947        writer
6948            .add_commit(1, 1, &[2])
6949            .expect("test operation should succeed");
6950        let bytes = writer.write().expect("test operation should succeed");
6951        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
6952            .expect("test operation should succeed");
6953        assert_eq!(parsed.entries.len(), 2);
6954        assert_eq!(
6955            parsed
6956                .type_bitmaps
6957                .commits
6958                .to_positions()
6959                .expect("test operation should succeed"),
6960            vec![0, 1]
6961        );
6962        let first = parsed
6963            .entry_for_index_position(0)
6964            .expect("test operation should succeed");
6965        assert_eq!(
6966            first
6967                .bitmap
6968                .to_positions()
6969                .expect("test operation should succeed"),
6970            vec![0, 2, 3]
6971        );
6972        let second = parsed
6973            .entry_for_index_position(1)
6974            .expect("test operation should succeed");
6975        assert_eq!(
6976            second
6977                .bitmap
6978                .to_positions()
6979                .expect("test operation should succeed"),
6980            vec![1, 2]
6981        );
6982    }
6983
6984    #[test]
6985    fn pack_bitmap_index_recomputes_checksum_on_write() {
6986        // The provided index_checksum field is ignored; write recomputes it so
6987        // a bogus placeholder still produces a valid, parseable file.
6988        let object_types = [ObjectType::Commit, ObjectType::Blob];
6989        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
6990            .expect("test operation should succeed");
6991        let mut index = writer.build().expect("test operation should succeed");
6992        // build() sets an all-zero placeholder checksum.
6993        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
6994        index.entries.clear(); // mutate the model after build
6995        index.entries.push(PackBitmapEntry {
6996            object_position: 0,
6997            xor_offset: 0,
6998            flags: 0,
6999            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
7000        });
7001        let bytes = index.write().expect("test operation should succeed");
7002        // Parsing validates the trailing checksum, so a wrong checksum fails.
7003        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
7004            .expect("test operation should succeed");
7005        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
7006    }
7007
7008    #[test]
7009    fn pack_bitmap_writer_rejects_non_commit_selection() {
7010        let object_types = [ObjectType::Commit, ObjectType::Blob];
7011        let mut writer =
7012            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
7013                .expect("test operation should succeed");
7014        // Position 1 is a blob, not a commit.
7015        assert!(writer.add_commit(1, 1, &[]).is_err());
7016        // Position 5 is out of range entirely.
7017        assert!(writer.add_commit(5, 5, &[]).is_err());
7018        // Index position out of range.
7019        assert!(writer.add_commit(0, 5, &[]).is_err());
7020        // Reachable position out of range.
7021        assert!(writer.add_commit(0, 0, &[9]).is_err());
7022    }
7023
7024    #[test]
7025    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
7026        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7027            .expect("test operation should succeed");
7028        assert!(
7029            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
7030                .is_err()
7031        );
7032    }
7033
7034    #[test]
7035    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
7036        let writer = PackBitmapWriter::new(
7037            ObjectFormat::Sha1,
7038            pack_checksum_sha1(),
7039            &[ObjectType::Commit],
7040        )
7041        .expect("test operation should succeed");
7042        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
7043    }
7044
7045    #[test]
7046    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
7047        let mut index = PackBitmapWriter::new(
7048            ObjectFormat::Sha1,
7049            pack_checksum_sha1(),
7050            &[ObjectType::Commit],
7051        )
7052        .expect("test operation should succeed")
7053        .build()
7054        .expect("test operation should succeed");
7055        // Flag set but no cache present.
7056        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
7057        assert!(index.write().is_err());
7058        // Cache present but flag missing.
7059        index.options = PackBitmapIndex::OPTION_FULL_DAG;
7060        index.name_hash_cache = Some(vec![0]);
7061        assert!(index.write().is_err());
7062    }
7063
7064    #[test]
7065    fn write_bitmap_roundtrips_through_upstream_git_parser() {
7066        // Build a real pack with git, then overwrite reachability with our own
7067        // writer using the real pack checksum and object types, and confirm our
7068        // bytes parse under the same parser that reads upstream bitmaps.
7069        let root = unique_temp_dir("git-pack-bitmap-writer");
7070        fs::create_dir_all(&root).expect("test operation should succeed");
7071        {
7072            run_git_success(&root, &["init", "-q", "-b", "main"]);
7073            run_git_success(
7074                &root,
7075                &[
7076                    "-c",
7077                    "user.name=Example User",
7078                    "-c",
7079                    "user.email=example@example.invalid",
7080                    "commit",
7081                    "--allow-empty",
7082                    "-q",
7083                    "-m",
7084                    "one",
7085                ],
7086            );
7087            run_git_success(&root, &["repack", "-adb"]);
7088            let pack_dir = root.join(".git").join("objects").join("pack");
7089            let idx_path = single_path_with_extension(&pack_dir, "idx");
7090            let index = PackIndex::parse(
7091                &fs::read(idx_path).expect("test operation should succeed"),
7092                ObjectFormat::Sha1,
7093            )
7094            .expect("test operation should succeed");
7095            // Read object types from the pack so the type bitmaps are accurate.
7096            let pack_path = single_path_with_extension(&pack_dir, "pack");
7097            let pack =
7098                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
7099                    .expect("test operation should succeed");
7100            // Map each index entry (sorted by oid) to its pack offset, then to a
7101            // pack-order position so positions line up with the index ordering.
7102            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
7103            offsets.sort_unstable();
7104            let position_of = |offset: u64| -> u32 {
7105                offsets
7106                    .iter()
7107                    .position(|value| *value == offset)
7108                    .expect("test operation should succeed") as u32
7109            };
7110            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
7111            for entry in &index.entries {
7112                let position = position_of(entry.offset) as usize;
7113                // Find the parsed object at this pack offset to read its type.
7114                if let Some(parsed) = pack
7115                    .entries
7116                    .iter()
7117                    .find(|po| po.entry.offset == entry.offset)
7118                {
7119                    object_types[position] = parsed.object.object_type;
7120                }
7121            }
7122            // Select the first commit position we find and reach everything.
7123            let commit_position = object_types
7124                .iter()
7125                .position(|ty| *ty == ObjectType::Commit)
7126                .expect("test operation should succeed") as u32;
7127            // The entry records the commit's position in the oid-sorted index.
7128            let commit_index_position = index
7129                .entries
7130                .iter()
7131                .position(|entry| position_of(entry.offset) == commit_position)
7132                .expect("test operation should succeed")
7133                as u32;
7134            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
7135            let bytes = write_bitmap(
7136                ObjectFormat::Sha1,
7137                index.pack_checksum.clone(),
7138                &object_types,
7139                &[(commit_position, commit_index_position, reachable)],
7140                None,
7141            )
7142            .expect("test operation should succeed");
7143            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
7144                .expect("test operation should succeed");
7145            assert_eq!(parsed.pack_checksum, index.pack_checksum);
7146            assert_eq!(parsed.entries.len(), 1);
7147            assert_eq!(
7148                parsed.entries[0]
7149                    .bitmap
7150                    .to_positions()
7151                    .expect("test operation should succeed")
7152                    .len(),
7153                index.entries.len()
7154            );
7155        };
7156        let _ = fs::remove_dir_all(&root);
7157    }
7158}