Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result, StreamingDigest};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
12use std::fmt;
13use std::fs::File;
14use std::io::{Read, Seek, SeekFrom, Write};
15use std::ops::Range;
16use std::path::Path;
17use std::sync::Arc;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct PackEntry {
21    pub oid: ObjectId,
22    pub compressed_size: u64,
23    pub uncompressed_size: u64,
24    pub offset: u64,
25}
26
27/// Default sliding-window size used by [`PackFile::write_packed`].
28///
29/// Each object is compared against up to this many previously emitted
30/// candidates of the same type when searching for a small delta. Matches git's
31/// default `pack.window`.
32pub const DEFAULT_PACK_WINDOW: usize = 10;
33
34/// Default maximum delta chain depth used by [`PackFile::write_packed`].
35///
36/// A delta may reference a base that is itself a delta; this bounds how long
37/// such chains may grow so that reconstructing any object stays cheap and the
38/// reader's recursion stays shallow. Matches git's default `pack.depth`.
39pub const DEFAULT_PACK_DEPTH: usize = 50;
40
41/// Object-count threshold before pack payload compression is fanned out across
42/// worker threads. Below this, thread setup and extra buffering cost more than
43/// they save.
44const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
45
46/// Keep parallel compression bounded. Git gets much of its wall-clock win from
47/// using several cores, but unbounded threads can steal cache from delta
48/// planning and inflate peak memory on large packs.
49const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
50
51/// Streaming pack writes pre-compress only this many ordered entries at a time.
52/// This restores CPU parallelism without holding every compressed payload for a
53/// large pack in memory at once.
54const PACK_STREAM_COMPRESSION_WINDOW_OBJECTS: usize = 256;
55
56/// Options controlling sliding-window delta selection during pack generation.
57///
58/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
59/// the builder-style setters, or build one directly. Used by
60/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
61#[derive(Debug, Clone)]
62pub struct PackWriteOptions {
63    /// Number of previous same-type candidates each object is deltified
64    /// against. Larger windows find better deltas at higher cost.
65    pub window: usize,
66    /// Maximum delta chain depth. A value of `0` disables deltification.
67    pub depth: usize,
68    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
69    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
70    /// against external thin-pack bases always use ref-deltas regardless.
71    pub prefer_ofs_delta: bool,
72    /// External base objects, keyed by object id, that are *not* written into
73    /// the pack but may be used as delta bases. Supplying any entries here
74    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
75    /// yielding a self-contained pack.
76    pub thin_bases: HashMap<ObjectId, EncodedObject>,
77    /// When `true` (the default), objects are reordered by type and size for
78    /// better delta locality. When `false`, the input order is preserved (the
79    /// emitted pack lists objects in the order supplied); deltas then only
80    /// reference earlier input objects. Reordering is always skipped when
81    /// deltification is disabled (`depth == 0`), since it has no effect there.
82    pub reorder: bool,
83    /// Zlib compression level for pack entry payloads.
84    pub compression_level: u32,
85}
86
87impl Default for PackWriteOptions {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93impl PackWriteOptions {
94    /// Options with git-compatible defaults: window
95    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
96    /// no external thin bases.
97    pub fn new() -> Self {
98        Self {
99            window: DEFAULT_PACK_WINDOW,
100            depth: DEFAULT_PACK_DEPTH,
101            prefer_ofs_delta: true,
102            thin_bases: HashMap::new(),
103            reorder: true,
104            compression_level: 6,
105        }
106    }
107
108    /// Set the sliding-window size.
109    pub fn with_window(mut self, window: usize) -> Self {
110        self.window = window;
111        self
112    }
113
114    /// Set the maximum delta chain depth (`0` disables deltas).
115    pub fn with_depth(mut self, depth: usize) -> Self {
116        self.depth = depth;
117        self
118    }
119
120    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
121    /// (`false`) base references.
122    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
123        self.prefer_ofs_delta = prefer_ofs_delta;
124        self
125    }
126
127    /// Provide the set of external base objects permitted for a thin pack.
128    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
129        self.thin_bases = thin_bases;
130        self
131    }
132
133    /// Choose whether objects may be reordered for delta locality (`true`) or
134    /// emitted in input order (`false`).
135    pub fn with_reorder(mut self, reorder: bool) -> Self {
136        self.reorder = reorder;
137        self
138    }
139
140    /// Set the zlib compression level used for pack entry payloads.
141    pub fn with_compression_level(mut self, level: u32) -> Self {
142        self.compression_level = level.min(9);
143        self
144    }
145}
146
147#[derive(Debug, Clone, PartialEq, Eq)]
148pub struct RepackPolicy {
149    pub write_bitmaps: bool,
150    pub cruft_packs: bool,
151    pub geometric_factor: Option<u8>,
152}
153
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub struct PackFile {
156    pub version: u32,
157    pub entries: Vec<PackObject>,
158    pub checksum: ObjectId,
159}
160
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct PackObject {
163    pub entry: PackEntry,
164    pub object: EncodedObject,
165}
166
167/// Per-object statistics for one entry of a verified pack, in the shape
168/// `git verify-pack -v` reports.
169#[derive(Debug, Clone, PartialEq, Eq)]
170pub struct PackVerifyStat {
171    /// Resolved object id.
172    pub oid: ObjectId,
173    /// Resolved object type (the delta's *result* type, not `ofs-delta`).
174    pub object_type: ObjectType,
175    /// Resolved (inflated) object size in bytes.
176    pub size: u64,
177    /// Bytes this object occupies in the pack: the offset delta to the next
178    /// object, or to the trailing checksum for the last object.
179    pub size_in_pack: u64,
180    /// In-pack byte offset where this object's entry begins.
181    pub offset: u64,
182    /// Delta chain depth: `0` for undeltified objects, base-depth + 1 otherwise.
183    pub delta_depth: u32,
184    /// For delta objects, the id of the *immediate* base object (which may
185    /// itself be a delta). `None` for undeltified objects.
186    pub base_oid: Option<ObjectId>,
187}
188
189/// Result of [`PackFile::verify_pack_stats`]: per-object stats in pack offset
190/// order plus the pack's trailing checksum.
191#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct PackVerifyStats {
193    pub objects: Vec<PackVerifyStat>,
194    pub checksum: ObjectId,
195}
196
197#[derive(Debug, Clone, PartialEq, Eq)]
198pub struct PackWrite {
199    pub pack: Vec<u8>,
200    pub index: Vec<u8>,
201    pub checksum: ObjectId,
202    pub entries: Vec<PackIndexEntry>,
203    pub delta_count: u32,
204}
205
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct PackWriteSummary {
208    pub index: Vec<u8>,
209    pub checksum: ObjectId,
210    pub entries: Vec<PackIndexEntry>,
211    pub delta_count: u32,
212    pub pack_size: u64,
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub struct PackInput<'a> {
217    pub oid: &'a ObjectId,
218    pub object: &'a EncodedObject,
219}
220
221#[derive(Debug, Clone, PartialEq, Eq)]
222pub struct PackIndexBuild {
223    pub index: Vec<u8>,
224    pub pack_checksum: ObjectId,
225    pub entries: Vec<PackIndexEntry>,
226}
227
228#[derive(Debug, Clone, PartialEq, Eq)]
229pub struct PackStreamIndexBuild {
230    pub index: Vec<u8>,
231    pub pack_checksum: ObjectId,
232    pub entries: Vec<PackIndexEntry>,
233    pub objects: Vec<PackIndexedObject>,
234}
235
236#[derive(Debug, Clone, PartialEq, Eq)]
237pub struct PackIndexedObject {
238    pub oid: ObjectId,
239    pub object_type: ObjectType,
240    pub size: u64,
241    pub offset: u64,
242}
243
244#[derive(Debug, Clone, PartialEq, Eq)]
245pub struct PackIndex {
246    pub version: u32,
247    pub fanout: [u32; 256],
248    pub entries: Vec<PackIndexEntry>,
249    pub pack_checksum: ObjectId,
250    pub index_checksum: ObjectId,
251}
252
253#[derive(Debug, Clone, PartialEq, Eq)]
254pub struct PackIndexView<'a> {
255    pub version: u32,
256    pub count: usize,
257    pub fanout: [u32; 256],
258    pub pack_checksum: ObjectId,
259    pub index_checksum: ObjectId,
260    bytes: &'a [u8],
261    format: ObjectFormat,
262    tables: PackIndexViewTables,
263}
264
265pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
266    fn as_bytes(&self) -> &[u8];
267}
268
269impl<T> PackIndexByteSource for T
270where
271    T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
272{
273    fn as_bytes(&self) -> &[u8] {
274        self.as_ref()
275    }
276}
277
278#[derive(Debug)]
279struct SharedIndexBytes(Arc<[u8]>);
280
281impl PackIndexByteSource for SharedIndexBytes {
282    fn as_bytes(&self) -> &[u8] {
283        self.0.as_ref()
284    }
285}
286
287#[derive(Debug, Clone)]
288pub struct PackIndexViewData {
289    pub version: u32,
290    pub count: usize,
291    pub fanout: [u32; 256],
292    pub pack_checksum: ObjectId,
293    pub index_checksum: ObjectId,
294    bytes: Arc<dyn PackIndexByteSource>,
295    format: ObjectFormat,
296    tables: PackIndexViewTables,
297}
298
299#[derive(Debug, Clone, PartialEq, Eq)]
300pub struct PackIndexEntry {
301    pub oid: ObjectId,
302    pub crc32: u32,
303    pub offset: u64,
304}
305
306#[derive(Debug, Clone, Copy, PartialEq, Eq)]
307pub struct PackIndexLookup {
308    pub crc32: u32,
309    pub offset: u64,
310}
311
312#[derive(Debug, Clone, PartialEq, Eq)]
313enum PackIndexViewTables {
314    V1 {
315        entry_table: Range<usize>,
316    },
317    V2 {
318        oid_table: Range<usize>,
319        crc_table: Range<usize>,
320        small_offset_table: Range<usize>,
321        large_offset_table: Range<usize>,
322    },
323}
324
325#[derive(Debug, Clone, PartialEq, Eq)]
326pub struct PackReverseIndex {
327    pub version: u32,
328    pub format: ObjectFormat,
329    pub positions: Vec<u32>,
330    pub pack_checksum: ObjectId,
331    pub index_checksum: ObjectId,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct PackMtimes {
336    pub version: u32,
337    pub format: ObjectFormat,
338    pub mtimes: Vec<u32>,
339    pub pack_checksum: ObjectId,
340    pub index_checksum: ObjectId,
341}
342
343#[derive(Debug, Clone, PartialEq, Eq)]
344pub struct PackBitmapIndex {
345    pub version: u16,
346    pub format: ObjectFormat,
347    pub options: u16,
348    pub pack_checksum: ObjectId,
349    pub index_checksum: ObjectId,
350    pub type_bitmaps: PackBitmapTypeBitmaps,
351    pub entries: Vec<PackBitmapEntry>,
352    pub pseudo_merges: Vec<PackBitmapPseudoMerge>,
353    pub name_hash_cache: Option<Vec<u32>>,
354}
355
356#[derive(Debug, Clone, PartialEq, Eq)]
357pub struct PackBitmapTypeBitmaps {
358    pub commits: EwahBitmap,
359    pub trees: EwahBitmap,
360    pub blobs: EwahBitmap,
361    pub tags: EwahBitmap,
362}
363
364#[derive(Debug, Clone, PartialEq, Eq)]
365pub struct PackBitmapEntry {
366    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
367    /// NOT the pack-order position used for the bitmap's bit numbering.
368    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
369    /// back via `nth_packed_object_id` (pack-bitmap.c).
370    pub object_position: u32,
371    pub xor_offset: u8,
372    pub flags: u8,
373    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
374    /// order* (offset order), as mapped by the pack's reverse index.
375    pub bitmap: EwahBitmap,
376}
377
378#[derive(Debug, Clone, PartialEq, Eq)]
379pub struct PackBitmapPseudoMerge {
380    /// Commit bits, in the bitmap's bit-numbering order, covered by this
381    /// pseudo-merge.
382    pub commits: EwahBitmap,
383    /// Object reachability closure for the pseudo-merge's commits, in the same
384    /// bit-numbering order.
385    pub bitmap: EwahBitmap,
386}
387
388#[derive(Debug, Clone, PartialEq, Eq)]
389pub struct EwahBitmap {
390    pub bit_size: u32,
391    pub words: Vec<u64>,
392    pub rlw_position: u32,
393}
394
395#[derive(Debug, Clone, PartialEq, Eq)]
396pub struct MultiPackIndex {
397    pub version: u8,
398    pub format: ObjectFormat,
399    pub pack_count: u32,
400    pub pack_names: Vec<String>,
401    pub object_count: u32,
402    pub fanout: [u32; 256],
403    pub objects: Vec<MultiPackIndexEntry>,
404    pub reverse_index: Option<Vec<u32>>,
405    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
406    pub chunks: Vec<MultiPackIndexChunk>,
407    pub checksum: ObjectId,
408}
409
410#[derive(Debug, Clone)]
411pub struct MultiPackIndexOidLookup {
412    format: ObjectFormat,
413    pack_count: u32,
414    pack_names: Vec<String>,
415    fanout: [u32; 256],
416    object_count: usize,
417    oid_lookup_offset: usize,
418    object_offsets_offset: usize,
419    large_offsets_offset: Option<usize>,
420    large_offsets_len: usize,
421    bytes: Arc<dyn PackIndexByteSource>,
422}
423
424#[derive(Debug, Clone, PartialEq, Eq)]
425pub struct MultiPackIndexEntry {
426    pub oid: ObjectId,
427    pub pack_int_id: u32,
428    pub offset: u64,
429    pub force_large_offset: bool,
430}
431
432#[derive(Debug, Clone, PartialEq, Eq)]
433pub struct MultiPackBitmapPack {
434    pub bitmap_pos: u32,
435    pub bitmap_nr: u32,
436}
437
438#[derive(Debug, Clone, PartialEq, Eq)]
439pub struct MultiPackIndexChunk {
440    pub id: [u8; 4],
441    pub offset: u64,
442    pub len: u64,
443}
444
445#[derive(Debug, Clone, Copy, PartialEq, Eq)]
446enum PackObjectKind {
447    Commit,
448    Tree,
449    Blob,
450    Tag,
451    OfsDelta,
452    RefDelta,
453}
454
455#[derive(Debug, Clone, PartialEq, Eq)]
456enum ParsedPackEntry {
457    Resolved(PackObject),
458    Delta {
459        base: DeltaBase,
460        compressed_size: u64,
461        delta_size: u64,
462        offset: u64,
463        delta: Vec<u8>,
464    },
465}
466
467#[derive(Debug, Clone, PartialEq, Eq)]
468enum DeltaBase {
469    Offset(u64),
470    Ref(ObjectId),
471}
472
473/// One pack entry as stored on disk, used by [`PackFile::verify_pack_stats`] to
474/// recover the delta structure and on-disk stream size that resolved
475/// [`PackObject`]s no longer carry.
476struct OnDiskEntry {
477    offset: u64,
478    base: Option<DeltaBase>,
479    stream_size: u64,
480}
481
482impl PackFile {
483    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
484        Self::parse(bytes, ObjectFormat::Sha1)
485    }
486
487    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
488        Self::parse_with_base(bytes, format, |_| Ok(None))
489    }
490
491    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
492        Self::parse(&bundle.pack, bundle.format)
493    }
494
495    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
496        let PackIndexBuild {
497            index,
498            pack_checksum,
499            entries,
500        } = PackIndex::write_v2_for_pack(bytes, format)?;
501        Ok(PackWrite {
502            pack: bytes.to_vec(),
503            index,
504            checksum: pack_checksum,
505            entries,
506            delta_count: 0,
507        })
508    }
509
510    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
511    where
512        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
513    {
514        Self::parse_with_base(bytes, format, external_base)
515    }
516
517    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
518    where
519        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
520    {
521        let trailer_len = format.raw_len();
522        if bytes.len() < 12 + trailer_len {
523            return Err(GitError::InvalidFormat("pack file too short".into()));
524        }
525        let trailer_offset = bytes.len() - trailer_len;
526        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
527        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
528        if checksum != expected {
529            return Err(GitError::InvalidFormat(format!(
530                "pack checksum mismatch: expected {expected}, got {checksum}"
531            )));
532        }
533
534        if &bytes[..4] != b"PACK" {
535            return Err(GitError::InvalidFormat("missing PACK signature".into()));
536        }
537        let version = u32_be(&bytes[4..8]);
538        if version != 2 && version != 3 {
539            return Err(GitError::Unsupported(format!("pack version {version}")));
540        }
541        let count = u32_be(&bytes[8..12]) as usize;
542        let mut offset = 12usize;
543        let mut entries = Vec::with_capacity(count);
544        for _ in 0..count {
545            let entry_offset = offset;
546            let header = parse_entry_header(bytes, &mut offset)?;
547            let base =
548                match header.kind {
549                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
550                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
551                    )),
552                    PackObjectKind::RefDelta => {
553                        let hash_len = format.raw_len();
554                        if offset + hash_len > trailer_offset {
555                            return Err(GitError::InvalidFormat(
556                                "truncated ref-delta base object id".into(),
557                            ));
558                        }
559                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
560                        offset += hash_len;
561                        Some(DeltaBase::Ref(oid))
562                    }
563                    _ => None,
564                };
565            let mut body = Vec::new();
566            let consumed = inflate_into(
567                &bytes[offset..trailer_offset],
568                &mut body,
569                header.size.min(usize::MAX as u64) as usize,
570            )?;
571            if body.len() as u64 != header.size {
572                return Err(GitError::InvalidObject(format!(
573                    "pack object declared {} bytes, decoded {}",
574                    header.size,
575                    body.len()
576                )));
577            }
578            if consumed == 0 {
579                return Err(GitError::InvalidFormat(
580                    "empty compressed pack entry".into(),
581                ));
582            }
583            offset = offset
584                .checked_add(consumed)
585                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
586            if offset > trailer_offset {
587                return Err(GitError::InvalidFormat(
588                    "pack entry extends past checksum".into(),
589                ));
590            }
591            if let Some(base) = base {
592                entries.push(ParsedPackEntry::Delta {
593                    base,
594                    compressed_size: consumed as u64,
595                    delta_size: header.size,
596                    offset: entry_offset as u64,
597                    delta: body,
598                });
599            } else {
600                let object_type = match header.kind {
601                    PackObjectKind::Commit => ObjectType::Commit,
602                    PackObjectKind::Tree => ObjectType::Tree,
603                    PackObjectKind::Blob => ObjectType::Blob,
604                    PackObjectKind::Tag => ObjectType::Tag,
605                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
606                };
607                let object = EncodedObject::new(object_type, body);
608                let oid = object.object_id(format)?;
609                entries.push(ParsedPackEntry::Resolved(PackObject {
610                    entry: PackEntry {
611                        oid,
612                        compressed_size: consumed as u64,
613                        uncompressed_size: header.size,
614                        offset: entry_offset as u64,
615                    },
616                    object,
617                }));
618            }
619        }
620        if offset != trailer_offset {
621            return Err(GitError::InvalidFormat(format!(
622                "pack has {} trailing bytes before checksum",
623                trailer_offset - offset
624            )));
625        }
626        Ok(Self {
627            version,
628            entries: resolve_pack_entries(entries, format, &mut external_base)?,
629            checksum,
630        })
631    }
632
633    /// Walk the pack and produce per-object statistics matching the output of
634    /// `git verify-pack -v` / `git index-pack --verify-stat`.
635    ///
636    /// Objects are returned in pack offset order (the order `git verify-pack -v`
637    /// prints them). Each entry carries the *resolved* object id, type and size,
638    /// the in-pack byte span (`size_in_pack` = the offset delta to the next
639    /// object, or to the trailing checksum for the last object), the in-pack
640    /// offset, the delta chain depth (`0` for undeltified objects), and — for
641    /// deltas — the object id of the *immediate* base (which may itself be a
642    /// delta). This mirrors `builtin/index-pack.c`'s `show_pack_info`.
643    pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
644        // Resolve the whole pack first: this validates the trailing checksum,
645        // every object's inflate, and yields the resolved oid/type/size keyed by
646        // offset. `verify-pack` is exactly this validation plus the stat report.
647        let pack = Self::parse(bytes, format)?;
648
649        // Independently walk the on-disk entries to recover each object's stored
650        // kind and (for deltas) its base reference — information `PackFile`
651        // discards once deltas are resolved.
652        let trailer_len = format.raw_len();
653        let trailer_offset = bytes.len() - trailer_len;
654        let count = u32_be(&bytes[8..12]) as usize;
655        let mut offset = 12usize;
656        // Per entry in read (offset) order: (offset, base, on-disk stream size).
657        // The stream size is what git prints in the size column: it is the
658        // resolved object size for an undeltified entry, but the *delta
659        // instruction stream* length for a delta entry (builtin/index-pack.c sets
660        // `obj->size` from the entry header, before any delta is applied).
661        let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
662        for _ in 0..count {
663            let entry_offset = offset as u64;
664            let header = parse_entry_header(bytes, &mut offset)?;
665            let stream_size = header.size;
666            let base =
667                match header.kind {
668                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
669                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
670                    )),
671                    PackObjectKind::RefDelta => {
672                        let hash_len = format.raw_len();
673                        if offset + hash_len > trailer_offset {
674                            return Err(GitError::InvalidFormat(
675                                "truncated ref-delta base object id".into(),
676                            ));
677                        }
678                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
679                        offset += hash_len;
680                        Some(DeltaBase::Ref(oid))
681                    }
682                    _ => None,
683                };
684            // Skip the compressed body to reach the next entry header.
685            let mut body = Vec::new();
686            let consumed = inflate_into(
687                &bytes[offset..trailer_offset],
688                &mut body,
689                header.size.min(usize::MAX as u64) as usize,
690            )?;
691            offset = offset
692                .checked_add(consumed)
693                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
694            on_disk.push(OnDiskEntry {
695                offset: entry_offset,
696                base,
697                stream_size,
698            });
699        }
700
701        // Map offset -> resolved object so the on-disk walk can join in oid/type.
702        let mut resolved_by_offset: HashMap<u64, &PackObject> =
703            HashMap::with_capacity(pack.entries.len());
704        for object in &pack.entries {
705            resolved_by_offset.insert(object.entry.offset, object);
706        }
707        // Map offset -> resolved oid, for ofs-delta base lookups.
708        let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
709        for entry in &on_disk {
710            if let Some(object) = resolved_by_offset.get(&entry.offset) {
711                oid_by_offset.insert(entry.offset, object.entry.oid);
712            }
713        }
714        // Map base offset -> index in `on_disk`, for delta-depth propagation.
715        let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
716        for (idx, entry) in on_disk.iter().enumerate() {
717            index_by_offset.insert(entry.offset, idx);
718        }
719
720        // Sorted offsets give the size-in-pack span (next offset - this offset),
721        // with the trailing checksum offset as the final sentinel.
722        let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
723        sorted_offsets.sort_unstable();
724        let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
725        for window in sorted_offsets.windows(2) {
726            next_offset.insert(window[0], window[1]);
727        }
728        if let Some(last) = sorted_offsets.last() {
729            next_offset.insert(*last, trailer_offset as u64);
730        }
731
732        // Compute delta depth by following base offsets. Depth of a non-delta is
733        // 0; a delta's depth is its base's depth + 1. `index_by_offset` lets an
734        // ofs-delta find its base's index; a ref-delta resolves its base oid to
735        // an in-pack offset when present (thin-pack external bases are not stored
736        // in this pack, but verify-pack only ever runs on self-contained packs).
737        let mut depth = vec![None; on_disk.len()];
738        fn resolve_depth(
739            idx: usize,
740            on_disk: &[OnDiskEntry],
741            index_by_offset: &HashMap<u64, usize>,
742            offset_of_oid: &HashMap<ObjectId, u64>,
743            depth: &mut [Option<u32>],
744        ) -> u32 {
745            if let Some(d) = depth[idx] {
746                return d;
747            }
748            let computed = match &on_disk[idx].base {
749                None => 0,
750                Some(base) => {
751                    let base_idx = match base {
752                        DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
753                        DeltaBase::Ref(oid) => offset_of_oid
754                            .get(oid)
755                            .and_then(|off| index_by_offset.get(off).copied()),
756                    };
757                    match base_idx {
758                        Some(bi) => {
759                            resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
760                        }
761                        // Base not in this pack (thin pack); treat as depth 1.
762                        None => 1,
763                    }
764                }
765            };
766            depth[idx] = Some(computed);
767            computed
768        }
769        let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
770        for (off, oid) in &oid_by_offset {
771            offset_of_oid.insert(*oid, *off);
772        }
773        for idx in 0..on_disk.len() {
774            resolve_depth(idx, &on_disk, &index_by_offset, &offset_of_oid, &mut depth);
775        }
776
777        let mut stats = Vec::with_capacity(on_disk.len());
778        for (idx, entry) in on_disk.iter().enumerate() {
779            let off = entry.offset;
780            let object = resolved_by_offset.get(&off).ok_or_else(|| {
781                GitError::InvalidFormat("pack offset missing from resolved set".into())
782            })?;
783            let size_in_pack = next_offset
784                .get(&off)
785                .copied()
786                .unwrap_or(trailer_offset as u64)
787                .saturating_sub(off);
788            let base_oid = match &entry.base {
789                None => None,
790                Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
791                Some(DeltaBase::Ref(oid)) => Some(*oid),
792            };
793            stats.push(PackVerifyStat {
794                oid: object.entry.oid,
795                object_type: object.object.object_type,
796                // git prints the on-disk stream size: object body size for an
797                // undeltified entry, delta-instruction stream size for a delta.
798                size: entry.stream_size,
799                size_in_pack,
800                offset: off,
801                delta_depth: depth[idx].unwrap_or(0),
802                base_oid,
803            });
804        }
805        // Emit in pack offset order, matching git's read order.
806        stats.sort_by_key(|stat| stat.offset);
807
808        Ok(PackVerifyStats {
809            objects: stats,
810            checksum: pack.checksum,
811        })
812    }
813
814    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
815    where
816        T: Borrow<EncodedObject>,
817    {
818        Self::write_undeltified(objects, ObjectFormat::Sha1)
819    }
820
821    /// Write a pack with every object stored undeltified (no delta entries).
822    ///
823    /// This is the simple, self-contained encoding; objects appear in the given
824    /// order. For smaller output that exploits similarity between objects, use
825    /// [`PackFile::write_packed`].
826    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
827    where
828        T: Borrow<EncodedObject>,
829    {
830        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
831        Self::write_packed_impl(objects, format, &options)
832    }
833
834    /// Write a pack using sliding-window delta selection with git-compatible
835    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
836    /// ofs-deltas, self-contained).
837    ///
838    /// Objects are grouped by type and ordered for good deltas, then each is
839    /// compared against a window of previously emitted candidates; the smallest
840    /// acceptable delta is kept, otherwise the object is stored undeltified. The
841    /// result round-trips through [`PackFile::parse`].
842    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
843    where
844        T: Borrow<EncodedObject>,
845    {
846        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
847    }
848
849    /// Like [`PackFile::write_packed`] but with caller-supplied
850    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
851    /// external thin bases).
852    pub fn write_packed_with_options<T>(
853        objects: &[T],
854        format: ObjectFormat,
855        options: &PackWriteOptions,
856    ) -> Result<PackWrite>
857    where
858        T: Borrow<EncodedObject>,
859    {
860        Self::write_packed_impl(objects, format, options)
861    }
862
863    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
864    /// instead of re-hashing each object before pack planning.
865    ///
866    /// This is intended for object-database paths that reached each object by
867    /// its id and already trust that id/object mapping. The function validates
868    /// id formats and duplicate ids, but it does not re-hash object bodies; use
869    /// [`PackFile::write_packed`] when the ids are not already known to be
870    /// canonical.
871    pub fn write_packed_with_known_ids(
872        inputs: &[PackInput<'_>],
873        format: ObjectFormat,
874    ) -> Result<PackWrite> {
875        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
876    }
877
878    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
879    /// [`PackWriteOptions`].
880    pub fn write_packed_with_known_ids_and_options(
881        inputs: &[PackInput<'_>],
882        format: ObjectFormat,
883        options: &PackWriteOptions,
884    ) -> Result<PackWrite> {
885        if inputs.len() > u32::MAX as usize {
886            return Err(GitError::InvalidFormat("too many pack objects".into()));
887        }
888        let mut objects = Vec::with_capacity(inputs.len());
889        let mut object_ids = Vec::with_capacity(inputs.len());
890        for input in inputs {
891            if input.oid.format() != format {
892                return Err(GitError::InvalidObjectId(format!(
893                    "pack object id {} uses {}, pack uses {}",
894                    input.oid,
895                    input.oid.format().name(),
896                    format.name()
897                )));
898            }
899            objects.push(input.object);
900            object_ids.push(*input.oid);
901        }
902        Self::write_packed_from_parts(objects, object_ids, format, options)
903    }
904
905    pub fn write_packed_with_known_ids_to_writer<W>(
906        inputs: &[PackInput<'_>],
907        format: ObjectFormat,
908        options: &PackWriteOptions,
909        writer: &mut W,
910    ) -> Result<PackWriteSummary>
911    where
912        W: Write,
913    {
914        if inputs.len() > u32::MAX as usize {
915            return Err(GitError::InvalidFormat("too many pack objects".into()));
916        }
917        let mut objects = Vec::with_capacity(inputs.len());
918        let mut object_ids = Vec::with_capacity(inputs.len());
919        for input in inputs {
920            if input.oid.format() != format {
921                return Err(GitError::InvalidObjectId(format!(
922                    "pack object id {} uses {}, pack uses {}",
923                    input.oid,
924                    input.oid.format().name(),
925                    format.name()
926                )));
927            }
928            objects.push(input.object);
929            object_ids.push(*input.oid);
930        }
931        Self::write_packed_from_parts_to_writer(objects, object_ids, format, options, writer)
932    }
933
934    /// Write a thin pack: objects may be deltified against `external_bases`
935    /// that are *not* included in the pack, referenced by ref-delta to their
936    /// object id.
937    ///
938    /// The receiver must already have (or otherwise obtain) those base objects
939    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
940    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
941    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
942    pub fn write_thin<T>(
943        objects: &[T],
944        format: ObjectFormat,
945        external_bases: HashMap<ObjectId, EncodedObject>,
946    ) -> Result<PackWrite>
947    where
948        T: Borrow<EncodedObject>,
949    {
950        let options = PackWriteOptions::new().with_thin_bases(external_bases);
951        Self::write_packed_impl(objects, format, &options)
952    }
953
954    fn write_packed_impl<T>(
955        objects: &[T],
956        format: ObjectFormat,
957        options: &PackWriteOptions,
958    ) -> Result<PackWrite>
959    where
960        T: Borrow<EncodedObject>,
961    {
962        if objects.len() > u32::MAX as usize {
963            return Err(GitError::InvalidFormat("too many pack objects".into()));
964        }
965        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
966
967        // Compute object ids up front; they are needed both for the index and,
968        // for ref-deltas, inside the pack entries themselves.
969        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
970        for object in &objects {
971            object_ids.push(object.object_id(format)?);
972        }
973        Self::write_packed_from_parts(objects, object_ids, format, options)
974    }
975
976    fn write_packed_from_parts(
977        objects: Vec<&EncodedObject>,
978        object_ids: Vec<ObjectId>,
979        format: ObjectFormat,
980        options: &PackWriteOptions,
981    ) -> Result<PackWrite> {
982        let mut seen = HashSet::with_capacity(object_ids.len());
983        for oid in &object_ids {
984            if !seen.insert(oid) {
985                return Err(GitError::InvalidFormat(format!(
986                    "pack contains duplicate object id {oid}"
987                )));
988            }
989        }
990
991        // Validate external thin bases share the pack's hash format.
992        for oid in options.thin_bases.keys() {
993            if oid.format() != format {
994                return Err(GitError::InvalidObjectId(
995                    "thin pack base object id format does not match pack format".into(),
996                ));
997            }
998        }
999
1000        // Decide, for each object, whether it is stored undeltified or as a
1001        // delta against another object (in-pack or an external thin base), and
1002        // obtain the emit order. In-pack deltas only ever reference candidates
1003        // that appear earlier in `order`, so emitting in `order` guarantees a
1004        // base is always written before any object that deltas against it.
1005        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
1006
1007        let mut pack = Vec::new();
1008        pack.extend_from_slice(b"PACK");
1009        pack.extend_from_slice(&2u32.to_be_bytes());
1010        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1011
1012        let mut index_entries = Vec::with_capacity(objects.len());
1013        let mut delta_count = 0u32;
1014        // Pack offset at which each original object index was written, or
1015        // `None` until it has been emitted.
1016        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1017
1018        let compressed_payloads =
1019            compress_planned_payloads(&objects, &plan, &order, options.compression_level)?;
1020
1021        for (order_pos, &idx) in order.iter().enumerate() {
1022            let offset = pack.len() as u64;
1023            let mut entry_bytes = Vec::new();
1024            match &plan[idx].base {
1025                PlannedBase::None => {
1026                    write_entry_header(
1027                        &mut entry_bytes,
1028                        objects[idx].object_type,
1029                        objects[idx].body.len() as u64,
1030                    );
1031                }
1032                PlannedBase::InPack { base_idx, delta } => {
1033                    delta_count += 1;
1034                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1035                        GitError::InvalidFormat(
1036                            "in-pack delta base emitted after dependent object".into(),
1037                        )
1038                    })?;
1039                    if options.prefer_ofs_delta {
1040                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
1041                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1042                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
1043                        })?;
1044                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
1045                    } else {
1046                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1047                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
1048                    }
1049                }
1050                PlannedBase::External { base_oid, delta } => {
1051                    delta_count += 1;
1052                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1053                    entry_bytes.extend_from_slice(base_oid.as_bytes());
1054                }
1055            }
1056            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
1057            let crc32 = crc32fast::hash(&entry_bytes);
1058            pack.extend_from_slice(&entry_bytes);
1059            written_offsets[idx] = Some(offset);
1060            index_entries.push(PackIndexEntry {
1061                oid: object_ids[idx].clone(),
1062                crc32,
1063                offset,
1064            });
1065        }
1066
1067        let checksum = sley_core::digest_bytes(format, &pack)?;
1068        pack.extend_from_slice(checksum.as_bytes());
1069        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1070        Ok(PackWrite {
1071            pack,
1072            index,
1073            checksum,
1074            entries: index_entries,
1075            delta_count,
1076        })
1077    }
1078
1079    fn write_packed_from_parts_to_writer<W>(
1080        objects: Vec<&EncodedObject>,
1081        object_ids: Vec<ObjectId>,
1082        format: ObjectFormat,
1083        options: &PackWriteOptions,
1084        writer: &mut W,
1085    ) -> Result<PackWriteSummary>
1086    where
1087        W: Write,
1088    {
1089        let mut seen = HashSet::with_capacity(object_ids.len());
1090        for oid in &object_ids {
1091            if !seen.insert(oid) {
1092                return Err(GitError::InvalidFormat(format!(
1093                    "pack contains duplicate object id {oid}"
1094                )));
1095            }
1096        }
1097
1098        for oid in options.thin_bases.keys() {
1099            if oid.format() != format {
1100                return Err(GitError::InvalidObjectId(
1101                    "thin pack base object id format does not match pack format".into(),
1102                ));
1103            }
1104        }
1105
1106        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
1107        let mut output = PackDigestWriter::new(writer, format);
1108        output.write_pack_bytes(b"PACK")?;
1109        output.write_pack_bytes(&2u32.to_be_bytes())?;
1110        output.write_pack_bytes(&(objects.len() as u32).to_be_bytes())?;
1111
1112        let mut index_entries = Vec::with_capacity(objects.len());
1113        let mut delta_count = 0u32;
1114        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1115
1116        for order_window in order.chunks(PACK_STREAM_COMPRESSION_WINDOW_OBJECTS) {
1117            let compressed_payloads = compress_planned_payloads(
1118                &objects,
1119                &plan,
1120                order_window,
1121                options.compression_level,
1122            )?;
1123            for (&idx, compressed_payload) in order_window.iter().zip(&compressed_payloads) {
1124                let offset = output.position();
1125                let mut entry_header = Vec::new();
1126                match &plan[idx].base {
1127                    PlannedBase::None => {
1128                        write_entry_header(
1129                            &mut entry_header,
1130                            objects[idx].object_type,
1131                            objects[idx].body.len() as u64,
1132                        );
1133                    }
1134                    PlannedBase::InPack { base_idx, delta } => {
1135                        delta_count += 1;
1136                        let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1137                            GitError::InvalidFormat(
1138                                "in-pack delta base emitted after dependent object".into(),
1139                            )
1140                        })?;
1141                        if options.prefer_ofs_delta {
1142                            write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1143                            let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1144                                GitError::InvalidFormat(
1145                                    "ofs-delta base offset is after delta".into(),
1146                                )
1147                            })?;
1148                            write_ofs_delta_offset(&mut entry_header, relative)?;
1149                        } else {
1150                            write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1151                            entry_header.extend_from_slice(object_ids[*base_idx].as_bytes());
1152                        }
1153                    }
1154                    PlannedBase::External { base_oid, delta } => {
1155                        delta_count += 1;
1156                        write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1157                        entry_header.extend_from_slice(base_oid.as_bytes());
1158                    }
1159                }
1160                let mut crc32 = crc32fast::Hasher::new();
1161                crc32.update(&entry_header);
1162                crc32.update(compressed_payload);
1163                output.write_pack_bytes(&entry_header)?;
1164                output.write_pack_bytes(compressed_payload)?;
1165                written_offsets[idx] = Some(offset);
1166                index_entries.push(PackIndexEntry {
1167                    oid: object_ids[idx],
1168                    crc32: crc32.finalize(),
1169                    offset,
1170                });
1171            }
1172        }
1173
1174        let (checksum, pack_size) = output.finish()?;
1175        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1176        Ok(PackWriteSummary {
1177            index,
1178            checksum,
1179            entries: index_entries,
1180            delta_count,
1181            pack_size,
1182        })
1183    }
1184
1185    pub fn write_undeltified_from_source_to_writer<W, F>(
1186        object_ids: &[ObjectId],
1187        format: ObjectFormat,
1188        options: &PackWriteOptions,
1189        mut read_object: F,
1190        writer: &mut W,
1191    ) -> Result<PackWriteSummary>
1192    where
1193        W: Write,
1194        F: FnMut(&ObjectId) -> Result<Arc<EncodedObject>>,
1195    {
1196        let mut seen = HashSet::with_capacity(object_ids.len());
1197        for oid in object_ids {
1198            if oid.format() != format {
1199                return Err(GitError::InvalidObjectId(
1200                    "pack object id format does not match pack format".into(),
1201                ));
1202            }
1203            if !seen.insert(oid) {
1204                return Err(GitError::InvalidFormat(format!(
1205                    "pack contains duplicate object id {oid}"
1206                )));
1207            }
1208        }
1209
1210        let mut output = PackDigestWriter::new(writer, format);
1211        output.write_pack_bytes(b"PACK")?;
1212        output.write_pack_bytes(&2u32.to_be_bytes())?;
1213        output.write_pack_bytes(&(object_ids.len() as u32).to_be_bytes())?;
1214
1215        let mut index_entries = Vec::with_capacity(object_ids.len());
1216        for oid_window in object_ids.chunks(PACK_STREAM_COMPRESSION_WINDOW_OBJECTS) {
1217            let mut objects = Vec::with_capacity(oid_window.len());
1218            for oid in oid_window {
1219                objects.push(read_object(oid)?);
1220            }
1221            let compressed_payloads =
1222                compress_undeltified_payloads(&objects, options.compression_level)?;
1223            for ((oid, object), compressed_payload) in
1224                oid_window.iter().zip(&objects).zip(&compressed_payloads)
1225            {
1226                let offset = output.position();
1227                let mut entry_header = Vec::new();
1228                write_entry_header(
1229                    &mut entry_header,
1230                    object.object_type,
1231                    object.body.len() as u64,
1232                );
1233                let mut crc32 = crc32fast::Hasher::new();
1234                crc32.update(&entry_header);
1235                crc32.update(compressed_payload);
1236                output.write_pack_bytes(&entry_header)?;
1237                output.write_pack_bytes(compressed_payload)?;
1238                index_entries.push(PackIndexEntry {
1239                    oid: *oid,
1240                    crc32: crc32.finalize(),
1241                    offset,
1242                });
1243            }
1244        }
1245
1246        let (checksum, pack_size) = output.finish()?;
1247        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1248        Ok(PackWriteSummary {
1249            index,
1250            checksum,
1251            entries: index_entries,
1252            delta_count: 0,
1253            pack_size,
1254        })
1255    }
1256
1257    pub fn write_packed_from_source_to_writer<W, F>(
1258        object_ids: &[ObjectId],
1259        format: ObjectFormat,
1260        options: &PackWriteOptions,
1261        mut read_object: F,
1262        writer: &mut W,
1263    ) -> Result<PackWriteSummary>
1264    where
1265        W: Write,
1266        F: FnMut(&ObjectId) -> Result<Arc<EncodedObject>>,
1267    {
1268        if object_ids.len() > u32::MAX as usize {
1269            return Err(GitError::InvalidFormat("too many pack objects".into()));
1270        }
1271
1272        let mut seen = HashSet::with_capacity(object_ids.len());
1273        for oid in object_ids {
1274            if oid.format() != format {
1275                return Err(GitError::InvalidObjectId(
1276                    "pack object id format does not match pack format".into(),
1277                ));
1278            }
1279            if !seen.insert(*oid) {
1280                return Err(GitError::InvalidFormat(format!(
1281                    "pack contains duplicate object id {oid}"
1282                )));
1283            }
1284        }
1285
1286        for oid in options.thin_bases.keys() {
1287            if oid.format() != format {
1288                return Err(GitError::InvalidObjectId(
1289                    "thin pack base object id format does not match pack format".into(),
1290                ));
1291            }
1292        }
1293
1294        let mut output = PackDigestWriter::new(writer, format);
1295        output.write_pack_bytes(b"PACK")?;
1296        output.write_pack_bytes(&2u32.to_be_bytes())?;
1297        output.write_pack_bytes(&(object_ids.len() as u32).to_be_bytes())?;
1298
1299        let mut index_entries = Vec::with_capacity(object_ids.len());
1300        let mut delta_count = 0u32;
1301        let mut base_horizon: VecDeque<StreamingDeltaBase> = VecDeque::new();
1302
1303        for oid_window in object_ids.chunks(PACK_STREAM_COMPRESSION_WINDOW_OBJECTS) {
1304            let mut objects = Vec::with_capacity(oid_window.len());
1305            for oid in oid_window {
1306                objects.push(read_object(oid)?);
1307            }
1308
1309            let (plan, order) =
1310                plan_streaming_window_deltas(&objects, oid_window, &base_horizon, options);
1311            let compressed_payloads = compress_streaming_planned_payloads(
1312                &objects,
1313                &plan,
1314                &order,
1315                options.compression_level,
1316            )?;
1317            let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1318
1319            for (&idx, compressed_payload) in order.iter().zip(&compressed_payloads) {
1320                let offset = output.position();
1321                let mut entry_header = Vec::new();
1322                match &plan[idx].base {
1323                    StreamingPlannedBase::None => {
1324                        write_entry_header(
1325                            &mut entry_header,
1326                            objects[idx].object_type,
1327                            objects[idx].body.len() as u64,
1328                        );
1329                    }
1330                    StreamingPlannedBase::Current { base_idx, delta } => {
1331                        delta_count += 1;
1332                        let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1333                            GitError::InvalidFormat(
1334                                "in-pack delta base emitted after dependent object".into(),
1335                            )
1336                        })?;
1337                        if options.prefer_ofs_delta {
1338                            write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1339                            let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1340                                GitError::InvalidFormat(
1341                                    "ofs-delta base offset is after delta".into(),
1342                                )
1343                            })?;
1344                            write_ofs_delta_offset(&mut entry_header, relative)?;
1345                        } else {
1346                            write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1347                            entry_header.extend_from_slice(oid_window[*base_idx].as_bytes());
1348                        }
1349                    }
1350                    StreamingPlannedBase::Previous {
1351                        base_oid,
1352                        base_offset,
1353                        delta,
1354                    } => {
1355                        delta_count += 1;
1356                        if options.prefer_ofs_delta {
1357                            write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1358                            let relative = offset.checked_sub(*base_offset).ok_or_else(|| {
1359                                GitError::InvalidFormat(
1360                                    "ofs-delta base offset is after delta".into(),
1361                                )
1362                            })?;
1363                            write_ofs_delta_offset(&mut entry_header, relative)?;
1364                        } else {
1365                            write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1366                            entry_header.extend_from_slice(base_oid.as_bytes());
1367                        }
1368                    }
1369                    StreamingPlannedBase::External { base_oid, delta } => {
1370                        delta_count += 1;
1371                        write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1372                        entry_header.extend_from_slice(base_oid.as_bytes());
1373                    }
1374                }
1375
1376                let mut crc32 = crc32fast::Hasher::new();
1377                crc32.update(&entry_header);
1378                crc32.update(compressed_payload);
1379                output.write_pack_bytes(&entry_header)?;
1380                output.write_pack_bytes(compressed_payload)?;
1381                written_offsets[idx] = Some(offset);
1382                index_entries.push(PackIndexEntry {
1383                    oid: oid_window[idx],
1384                    crc32: crc32.finalize(),
1385                    offset,
1386                });
1387
1388                if options.depth > 0 && options.window > 0 {
1389                    base_horizon.push_back(StreamingDeltaBase {
1390                        oid: oid_window[idx],
1391                        object: Arc::clone(&objects[idx]),
1392                        offset,
1393                        depth: plan[idx].depth,
1394                    });
1395                    while base_horizon.len() > options.window {
1396                        base_horizon.pop_front();
1397                    }
1398                }
1399            }
1400        }
1401
1402        let (checksum, pack_size) = output.finish()?;
1403        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1404        Ok(PackWriteSummary {
1405            index,
1406            checksum,
1407            entries: index_entries,
1408            delta_count,
1409            pack_size,
1410        })
1411    }
1412}
1413
1414struct PackDigestWriter<'a, W> {
1415    writer: &'a mut W,
1416    digest: StreamingDigest,
1417    position: u64,
1418}
1419
1420impl<'a, W> PackDigestWriter<'a, W>
1421where
1422    W: Write,
1423{
1424    fn new(writer: &'a mut W, format: ObjectFormat) -> Self {
1425        Self {
1426            writer,
1427            digest: StreamingDigest::new(format),
1428            position: 0,
1429        }
1430    }
1431
1432    fn position(&self) -> u64 {
1433        self.position
1434    }
1435
1436    fn write_pack_bytes(&mut self, bytes: &[u8]) -> Result<()> {
1437        self.writer.write_all(bytes)?;
1438        self.digest.update(bytes);
1439        self.position = self
1440            .position
1441            .checked_add(bytes.len() as u64)
1442            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1443        Ok(())
1444    }
1445
1446    fn finish(mut self) -> Result<(ObjectId, u64)> {
1447        let checksum = self.digest.finalize()?;
1448        self.writer.write_all(checksum.as_bytes())?;
1449        self.position = self
1450            .position
1451            .checked_add(checksum.as_bytes().len() as u64)
1452            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1453        Ok((checksum, self.position))
1454    }
1455}
1456
1457impl<'a> PackIndexView<'a> {
1458    pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
1459        Self::parse(bytes, ObjectFormat::Sha1)
1460    }
1461
1462    pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1463        Self::parse_impl(bytes, format, true, true)
1464    }
1465
1466    /// Parse and validate the index layout without recomputing the trailing
1467    /// index checksum. The checksum stored in the file is still exposed via
1468    /// [`PackIndexView::index_checksum`].
1469    pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1470        Self::parse_impl(bytes, format, false, true)
1471    }
1472
1473    /// Parse a local/trusted pack index without recomputing the trailing index
1474    /// checksum or walking every entry for canonical-order validation.
1475    ///
1476    /// This still validates the table layout and all lookup paths remain
1477    /// bounds-checked, but it avoids O(number-of-objects) startup validation for
1478    /// repository-owned `.idx` files in hot read paths.
1479    pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1480        Self::parse_impl(bytes, format, false, false)
1481    }
1482
1483    pub fn count(&self) -> usize {
1484        self.count
1485    }
1486
1487    pub fn fanout(&self) -> &[u32; 256] {
1488        &self.fanout
1489    }
1490
1491    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1492        if oid.format() != self.format {
1493            return None;
1494        }
1495        let bucket = usize::from(oid.as_bytes()[0]);
1496        let mut start = if bucket == 0 {
1497            0
1498        } else {
1499            self.fanout[bucket - 1] as usize
1500        };
1501        let mut end = self.fanout[bucket] as usize;
1502        let target = oid.as_bytes();
1503
1504        while start < end {
1505            let mid = start + (end - start) / 2;
1506            match self.oid_bytes_at(mid).cmp(target) {
1507                std::cmp::Ordering::Less => start = mid + 1,
1508                std::cmp::Ordering::Equal => return self.lookup_at(mid),
1509                std::cmp::Ordering::Greater => end = mid,
1510            }
1511        }
1512        None
1513    }
1514
1515    fn parse_impl(
1516        bytes: &'a [u8],
1517        format: ObjectFormat,
1518        verify_checksum: bool,
1519        validate_entries: bool,
1520    ) -> Result<Self> {
1521        let hash_len = format.raw_len();
1522        if bytes.len() < 4 {
1523            return Err(GitError::InvalidFormat("pack index too short".into()));
1524        }
1525        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1526            return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1527        }
1528        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1529            return Err(GitError::InvalidFormat("pack index too short".into()));
1530        }
1531        let version = u32_be(&bytes[4..8]);
1532        if version != 2 {
1533            return Err(GitError::Unsupported(format!(
1534                "pack index version {version}"
1535            )));
1536        }
1537        let index_checksum_offset = bytes.len() - hash_len;
1538        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1539        if verify_checksum {
1540            let actual_index_checksum =
1541                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1542            if actual_index_checksum != index_checksum {
1543                return Err(GitError::InvalidFormat(format!(
1544                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1545                )));
1546            }
1547        }
1548
1549        let mut offset = 8usize;
1550        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1551        let count = fanout[255] as usize;
1552        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1553        offset = oid_table.end;
1554        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1555        offset = crc_table.end;
1556        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1557        offset = small_offset_table.end;
1558
1559        let large_offset_count = (0..count)
1560            .filter(|idx| {
1561                let start = small_offset_table.start + idx * 4;
1562                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1563            })
1564            .count();
1565        let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1566        offset = large_offset_table.end;
1567
1568        let expected_trailer_offset = bytes.len() - hash_len * 2;
1569        if offset != expected_trailer_offset {
1570            if !verify_checksum && offset < expected_trailer_offset {
1571                large_offset_table = large_offset_table.start..expected_trailer_offset;
1572                offset = expected_trailer_offset;
1573            } else {
1574                return Err(GitError::InvalidFormat(format!(
1575                    "pack index has {} unexpected bytes before trailer",
1576                    expected_trailer_offset.saturating_sub(offset)
1577                )));
1578            }
1579        }
1580        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1581
1582        let view = Self {
1583            version,
1584            count,
1585            fanout,
1586            pack_checksum,
1587            index_checksum,
1588            bytes,
1589            format,
1590            tables: PackIndexViewTables::V2 {
1591                oid_table,
1592                crc_table,
1593                small_offset_table,
1594                large_offset_table,
1595            },
1596        };
1597        if validate_entries {
1598            view.validate_v2_entries()?;
1599        }
1600        Ok(view)
1601    }
1602
1603    fn parse_v1_impl(
1604        bytes: &'a [u8],
1605        format: ObjectFormat,
1606        verify_checksum: bool,
1607        validate_entries: bool,
1608    ) -> Result<Self> {
1609        let hash_len = format.raw_len();
1610        if bytes.len() < 256 * 4 + 2 * hash_len {
1611            return Err(GitError::InvalidFormat("pack index too short".into()));
1612        }
1613        let index_checksum_offset = bytes.len() - hash_len;
1614        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1615        if verify_checksum {
1616            let actual_index_checksum =
1617                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1618            if actual_index_checksum != index_checksum {
1619                return Err(GitError::InvalidFormat(format!(
1620                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1621                )));
1622            }
1623        }
1624
1625        let mut offset = 0usize;
1626        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1627        let count = fanout[255] as usize;
1628        let entry_len = hash_len
1629            .checked_add(4)
1630            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1631        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1632        offset = entry_table.end;
1633        let expected_trailer_offset = bytes.len() - hash_len * 2;
1634        if offset != expected_trailer_offset {
1635            return Err(GitError::InvalidFormat(format!(
1636                "pack index has {} unexpected bytes before trailer",
1637                expected_trailer_offset.saturating_sub(offset)
1638            )));
1639        }
1640        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1641
1642        let view = Self {
1643            version: 1,
1644            count,
1645            fanout,
1646            pack_checksum,
1647            index_checksum,
1648            bytes,
1649            format,
1650            tables: PackIndexViewTables::V1 { entry_table },
1651        };
1652        if validate_entries {
1653            view.validate_v1_entries()?;
1654        }
1655        Ok(view)
1656    }
1657
1658    fn validate_v2_entries(&self) -> Result<()> {
1659        let PackIndexViewTables::V2 {
1660            oid_table,
1661            small_offset_table,
1662            large_offset_table,
1663            ..
1664        } = &self.tables
1665        else {
1666            unreachable!("v2 validation only runs for v2 views");
1667        };
1668        let oid_table = self.slice(oid_table.clone());
1669        let small_offset_table = self.slice(small_offset_table.clone());
1670        let large_offset_table = self.slice(large_offset_table.clone());
1671        let hash_len = self.format.raw_len();
1672        for idx in 0..self.count {
1673            let oid_start = idx * hash_len;
1674            let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1675            if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1676                return Err(GitError::InvalidFormat(
1677                    "pack index object ids are not strictly ascending".into(),
1678                ));
1679            }
1680            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1681
1682            let offset_start = idx * 4;
1683            let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1684            pack_index_v2_offset(raw_offset, large_offset_table)?;
1685        }
1686        Ok(())
1687    }
1688
1689    fn validate_v1_entries(&self) -> Result<()> {
1690        let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1691            unreachable!("v1 validation only runs for v1 views");
1692        };
1693        let entry_table = self.slice(entry_table.clone());
1694        let hash_len = self.format.raw_len();
1695        let entry_len = hash_len
1696            .checked_add(4)
1697            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1698        for idx in 0..self.count {
1699            let start = idx * entry_len;
1700            let oid_start = start + 4;
1701            let oid_bytes = &entry_table[oid_start..start + entry_len];
1702            if idx > 0 {
1703                let previous_oid_start = oid_start - entry_len;
1704                let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1705                if previous_oid >= oid_bytes {
1706                    return Err(GitError::InvalidFormat(
1707                        "pack index object ids are not strictly sorted".into(),
1708                    ));
1709                }
1710            }
1711            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1712        }
1713        Ok(())
1714    }
1715
1716    fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1717        let hash_len = self.format.raw_len();
1718        match &self.tables {
1719            PackIndexViewTables::V1 { entry_table } => {
1720                let entry_table = self.slice(entry_table.clone());
1721                let entry_len = hash_len + 4;
1722                let start = idx * entry_len + 4;
1723                &entry_table[start..start + hash_len]
1724            }
1725            PackIndexViewTables::V2 { oid_table, .. } => {
1726                let oid_table = self.slice(oid_table.clone());
1727                let start = idx * hash_len;
1728                &oid_table[start..start + hash_len]
1729            }
1730        }
1731    }
1732
1733    fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1734        if idx >= self.count {
1735            return None;
1736        }
1737        let hash_len = self.format.raw_len();
1738        match &self.tables {
1739            PackIndexViewTables::V1 { entry_table } => {
1740                let entry_table = self.slice(entry_table.clone());
1741                let entry_len = hash_len + 4;
1742                let start = idx * entry_len;
1743                Some(PackIndexLookup {
1744                    crc32: 0,
1745                    offset: u64::from(u32_be(&entry_table[start..start + 4])),
1746                })
1747            }
1748            PackIndexViewTables::V2 {
1749                crc_table,
1750                small_offset_table,
1751                large_offset_table,
1752                ..
1753            } => {
1754                let crc_table = self.slice(crc_table.clone());
1755                let small_offset_table = self.slice(small_offset_table.clone());
1756                let large_offset_table = self.slice(large_offset_table.clone());
1757                let crc_start = idx * 4;
1758                let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1759                Some(PackIndexLookup {
1760                    crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1761                    offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1762                })
1763            }
1764        }
1765    }
1766
1767    fn slice(&self, range: Range<usize>) -> &'a [u8] {
1768        &self.bytes[range]
1769    }
1770}
1771
1772impl PackIndexViewData {
1773    pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1774        Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1775    }
1776
1777    /// Parse and validate an owned index view without recomputing the trailing
1778    /// index checksum. The stored checksum is still exposed via
1779    /// [`PackIndexViewData::index_checksum`].
1780    pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1781        Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1782    }
1783
1784    /// Parse a local/trusted owned index view without the checksum or full-entry
1785    /// validation passes.
1786    pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1787        Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1788    }
1789
1790    pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1791        Self::parse_impl(bytes, format, true, true)
1792    }
1793
1794    pub fn parse_source_without_checksum(
1795        bytes: Arc<dyn PackIndexByteSource>,
1796        format: ObjectFormat,
1797    ) -> Result<Self> {
1798        Self::parse_impl(bytes, format, false, true)
1799    }
1800
1801    pub fn parse_trusted_source_without_checksum(
1802        bytes: Arc<dyn PackIndexByteSource>,
1803        format: ObjectFormat,
1804    ) -> Result<Self> {
1805        Self::parse_impl(bytes, format, false, false)
1806    }
1807
1808    pub fn count(&self) -> usize {
1809        self.count
1810    }
1811
1812    pub fn fanout(&self) -> &[u32; 256] {
1813        &self.fanout
1814    }
1815
1816    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1817        self.as_view().find(oid)
1818    }
1819
1820    pub fn as_view(&self) -> PackIndexView<'_> {
1821        PackIndexView {
1822            version: self.version,
1823            count: self.count,
1824            fanout: self.fanout,
1825            pack_checksum: self.pack_checksum,
1826            index_checksum: self.index_checksum,
1827            bytes: self.bytes.as_bytes(),
1828            format: self.format,
1829            tables: self.tables.clone(),
1830        }
1831    }
1832
1833    fn parse_impl(
1834        bytes: Arc<dyn PackIndexByteSource>,
1835        format: ObjectFormat,
1836        verify_checksum: bool,
1837        validate_entries: bool,
1838    ) -> Result<Self> {
1839        let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1840            let view = PackIndexView::parse_impl(
1841                bytes.as_bytes(),
1842                format,
1843                verify_checksum,
1844                validate_entries,
1845            )?;
1846            (
1847                view.version,
1848                view.count,
1849                view.fanout,
1850                view.pack_checksum,
1851                view.index_checksum,
1852                view.tables,
1853            )
1854        };
1855        Ok(Self {
1856            version,
1857            count,
1858            fanout,
1859            pack_checksum,
1860            index_checksum,
1861            bytes,
1862            format,
1863            tables,
1864        })
1865    }
1866}
1867
1868impl PackIndex {
1869    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1870        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1871    }
1872
1873    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1874        let trailer_len = format.raw_len();
1875        if pack_bytes.len() < 12 + trailer_len {
1876            return Err(GitError::InvalidFormat("pack file too short".into()));
1877        }
1878        let trailer_offset = pack_bytes.len() - trailer_len;
1879        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1880        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1881        if pack_checksum != expected {
1882            return Err(GitError::InvalidFormat(format!(
1883                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1884            )));
1885        }
1886
1887        if &pack_bytes[..4] != b"PACK" {
1888            return Err(GitError::InvalidFormat("missing PACK signature".into()));
1889        }
1890        let version = u32_be(&pack_bytes[4..8]);
1891        if version != 2 && version != 3 {
1892            return Err(GitError::Unsupported(format!("pack version {version}")));
1893        }
1894        let count = u32_be(&pack_bytes[8..12]) as usize;
1895        let mut offset = 12usize;
1896        let mut parsed_entries = Vec::with_capacity(count);
1897        let mut raw_entries = Vec::with_capacity(count);
1898        for _ in 0..count {
1899            let entry_offset = offset;
1900            let header = parse_entry_header(pack_bytes, &mut offset)?;
1901            let base = match header.kind {
1902                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1903                    pack_bytes,
1904                    &mut offset,
1905                    entry_offset as u64,
1906                )?)),
1907                PackObjectKind::RefDelta => {
1908                    let hash_len = format.raw_len();
1909                    if offset + hash_len > trailer_offset {
1910                        return Err(GitError::InvalidFormat(
1911                            "truncated ref-delta base object id".into(),
1912                        ));
1913                    }
1914                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1915                    offset += hash_len;
1916                    Some(DeltaBase::Ref(oid))
1917                }
1918                _ => None,
1919            };
1920            let mut body = Vec::new();
1921            let consumed = inflate_into(
1922                &pack_bytes[offset..trailer_offset],
1923                &mut body,
1924                header.size.min(usize::MAX as u64) as usize,
1925            )?;
1926            if body.len() as u64 != header.size {
1927                return Err(GitError::InvalidObject(format!(
1928                    "pack object declared {} bytes, decoded {}",
1929                    header.size,
1930                    body.len()
1931                )));
1932            }
1933            if consumed == 0 {
1934                return Err(GitError::InvalidFormat(
1935                    "empty compressed pack entry".into(),
1936                ));
1937            }
1938            offset = offset
1939                .checked_add(consumed)
1940                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1941            if offset > trailer_offset {
1942                return Err(GitError::InvalidFormat(
1943                    "pack entry extends past checksum".into(),
1944                ));
1945            }
1946            raw_entries.push((
1947                entry_offset as u64,
1948                crc32fast::hash(&pack_bytes[entry_offset..offset]),
1949            ));
1950            if let Some(base) = base {
1951                parsed_entries.push(ParsedPackEntry::Delta {
1952                    base,
1953                    compressed_size: consumed as u64,
1954                    delta_size: header.size,
1955                    offset: entry_offset as u64,
1956                    delta: body,
1957                });
1958            } else {
1959                let object_type = match header.kind {
1960                    PackObjectKind::Commit => ObjectType::Commit,
1961                    PackObjectKind::Tree => ObjectType::Tree,
1962                    PackObjectKind::Blob => ObjectType::Blob,
1963                    PackObjectKind::Tag => ObjectType::Tag,
1964                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1965                };
1966                let object = EncodedObject::new(object_type, body);
1967                let oid = object.object_id(format)?;
1968                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1969                    entry: PackEntry {
1970                        oid,
1971                        compressed_size: consumed as u64,
1972                        uncompressed_size: header.size,
1973                        offset: entry_offset as u64,
1974                    },
1975                    object,
1976                }));
1977            }
1978        }
1979        if offset != trailer_offset {
1980            return Err(GitError::InvalidFormat(format!(
1981                "pack has {} trailing bytes before checksum",
1982                trailer_offset - offset
1983            )));
1984        }
1985
1986        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1987        let entries = resolved
1988            .iter()
1989            .zip(raw_entries)
1990            .map(|(object, (offset, crc32))| PackIndexEntry {
1991                oid: object.entry.oid,
1992                crc32,
1993                offset,
1994            })
1995            .collect::<Vec<_>>();
1996        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1997        Ok(PackIndexBuild {
1998            index,
1999            pack_checksum,
2000            entries,
2001        })
2002    }
2003
2004    /// Validate and index a pack from the reader's current position to EOF.
2005    ///
2006    /// This produces the same v2 `.idx` bytes and object metadata as
2007    /// [`PackIndex::write_v2_for_pack`] without requiring the caller to provide
2008    /// the pack as one contiguous byte slice. The reader is left positioned at
2009    /// EOF on success.
2010    pub fn write_v2_for_pack_reader<R>(
2011        reader: &mut R,
2012        format: ObjectFormat,
2013    ) -> Result<PackStreamIndexBuild>
2014    where
2015        R: Read + Seek,
2016    {
2017        let start = reader.stream_position()?;
2018        let end = reader.seek(SeekFrom::End(0))?;
2019        let pack_len = end
2020            .checked_sub(start)
2021            .ok_or_else(|| GitError::InvalidFormat("pack stream position overflow".into()))?;
2022        reader.seek(SeekFrom::Start(start))?;
2023        index_pack_from_reader(reader, format, pack_len)
2024    }
2025
2026    /// Validate and index a pack from the reader's current position, stopping
2027    /// after the pack trailer checksum.
2028    ///
2029    /// This is for transports where the pack length is not known in advance but
2030    /// the stream is expected to contain exactly one pack. It avoids forcing the
2031    /// caller to first materialize the pack only to learn its length.
2032    pub fn write_v2_for_pack_reader_to_trailer<R>(
2033        reader: &mut R,
2034        format: ObjectFormat,
2035    ) -> Result<PackStreamIndexBuild>
2036    where
2037        R: Read,
2038    {
2039        index_pack_from_reader_to_trailer(reader, format)
2040    }
2041
2042    pub fn write_v2_for_pack_reader_with_len<R>(
2043        reader: &mut R,
2044        format: ObjectFormat,
2045        pack_len: u64,
2046    ) -> Result<PackStreamIndexBuild>
2047    where
2048        R: Read,
2049    {
2050        index_pack_from_reader(reader, format, pack_len)
2051    }
2052
2053    /// Validate and index a pack from a filesystem path without loading the
2054    /// entire pack file into memory.
2055    pub fn write_v2_for_pack_path(
2056        path: impl AsRef<Path>,
2057        format: ObjectFormat,
2058    ) -> Result<PackStreamIndexBuild> {
2059        let mut file = File::open(path)?;
2060        Self::write_v2_for_pack_reader(&mut file, format)
2061    }
2062
2063    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
2064        Self::parse(bytes, ObjectFormat::Sha1)
2065    }
2066
2067    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2068        Self::parse_impl(bytes, format, true)
2069    }
2070
2071    pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2072        Self::parse_impl(bytes, format, false)
2073    }
2074
2075    fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
2076        let hash_len = format.raw_len();
2077        if bytes.len() < 4 {
2078            return Err(GitError::InvalidFormat("pack index too short".into()));
2079        }
2080        if bytes[..4] != [0xff, b't', b'O', b'c'] {
2081            return Self::parse_v1_impl(bytes, format, verify_checksum);
2082        }
2083        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
2084            return Err(GitError::InvalidFormat("pack index too short".into()));
2085        }
2086        let version = u32_be(&bytes[4..8]);
2087        if version != 2 {
2088            return Err(GitError::Unsupported(format!(
2089                "pack index version {version}"
2090            )));
2091        }
2092        let index_checksum_offset = bytes.len() - hash_len;
2093        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2094        if verify_checksum {
2095            let actual_index_checksum =
2096                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2097            if actual_index_checksum != index_checksum {
2098                return Err(GitError::InvalidFormat(format!(
2099                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2100                )));
2101            }
2102        }
2103
2104        let mut offset = 8usize;
2105        let mut fanout = [0u32; 256];
2106        let mut previous = 0u32;
2107        for slot in &mut fanout {
2108            *slot = u32_be(&bytes[offset..offset + 4]);
2109            if *slot < previous {
2110                return Err(GitError::InvalidFormat(
2111                    "pack index fanout is not monotonic".into(),
2112                ));
2113            }
2114            previous = *slot;
2115            offset += 4;
2116        }
2117        let count = fanout[255] as usize;
2118        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
2119        offset = oid_table.end;
2120        let crc_table = checked_range(offset, count, 4, bytes.len())?;
2121        offset = crc_table.end;
2122        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
2123        offset = small_offset_table.end;
2124
2125        let large_offset_count = (0..count)
2126            .filter(|idx| {
2127                let start = small_offset_table.start + idx * 4;
2128                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
2129            })
2130            .count();
2131        let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
2132        offset = large_offset_table.end;
2133
2134        let expected_trailer_offset = bytes.len() - hash_len * 2;
2135        if offset != expected_trailer_offset {
2136            if !verify_checksum && offset < expected_trailer_offset {
2137                large_offset_table = large_offset_table.start..expected_trailer_offset;
2138                offset = expected_trailer_offset;
2139            } else {
2140                return Err(GitError::InvalidFormat(format!(
2141                    "pack index has {} unexpected bytes before trailer",
2142                    expected_trailer_offset.saturating_sub(offset)
2143                )));
2144            }
2145        }
2146        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
2147
2148        let mut entries = Vec::with_capacity(count);
2149        for idx in 0..count {
2150            let oid_start = oid_table.start + idx * hash_len;
2151            let crc_start = crc_table.start + idx * 4;
2152            let offset_start = small_offset_table.start + idx * 4;
2153            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
2154            // Object ids must be strictly ascending: lookup binary-searches them,
2155            // and the fanout must match the first byte. A malformed/forged index
2156            // (e.g. from a received pack) would otherwise yield silent misses.
2157            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
2158                return Err(GitError::InvalidFormat(
2159                    "pack index object ids are not strictly ascending".into(),
2160                ));
2161            }
2162            let expected_min = if oid_bytes[0] == 0 {
2163                0
2164            } else {
2165                fanout[usize::from(oid_bytes[0] - 1)]
2166            };
2167            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
2168                return Err(GitError::InvalidFormat(
2169                    "pack index object id is outside its fanout bucket".into(),
2170                ));
2171            }
2172            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
2173            let offset = if raw_offset & 0x8000_0000 == 0 {
2174                u64::from(raw_offset)
2175            } else {
2176                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2177                let large_start = large_offset_table.start + large_idx * 8;
2178                if large_idx >= large_offset_table.len() / 8 {
2179                    return Err(GitError::InvalidFormat(
2180                        "pack index large offset points past table".into(),
2181                    ));
2182                }
2183                u64_be(&bytes[large_start..large_start + 8])
2184            };
2185            entries.push(PackIndexEntry {
2186                oid: ObjectId::from_raw(format, oid_bytes)?,
2187                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
2188                offset,
2189            });
2190        }
2191        Ok(Self {
2192            version,
2193            fanout,
2194            entries,
2195            pack_checksum,
2196            index_checksum,
2197        })
2198    }
2199
2200    fn parse_v1_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
2201        let hash_len = format.raw_len();
2202        if bytes.len() < 256 * 4 + 2 * hash_len {
2203            return Err(GitError::InvalidFormat("pack index too short".into()));
2204        }
2205        let index_checksum_offset = bytes.len() - hash_len;
2206        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2207        if verify_checksum {
2208            let actual_index_checksum =
2209                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2210            if actual_index_checksum != index_checksum {
2211                return Err(GitError::InvalidFormat(format!(
2212                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2213                )));
2214            }
2215        }
2216
2217        let mut offset = 0usize;
2218        let mut fanout = [0u32; 256];
2219        let mut previous = 0u32;
2220        for slot in &mut fanout {
2221            *slot = u32_be(&bytes[offset..offset + 4]);
2222            if *slot < previous {
2223                return Err(GitError::InvalidFormat(
2224                    "pack index fanout is not monotonic".into(),
2225                ));
2226            }
2227            previous = *slot;
2228            offset += 4;
2229        }
2230        let count = fanout[255] as usize;
2231        let entry_len = hash_len
2232            .checked_add(4)
2233            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
2234        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
2235        offset = entry_table.end;
2236        let expected_trailer_offset = bytes.len() - hash_len * 2;
2237        if offset != expected_trailer_offset {
2238            return Err(GitError::InvalidFormat(format!(
2239                "pack index has {} unexpected bytes before trailer",
2240                expected_trailer_offset.saturating_sub(offset)
2241            )));
2242        }
2243        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
2244
2245        let mut entries = Vec::with_capacity(count);
2246        let mut previous_oid: Option<ObjectId> = None;
2247        for idx in 0..count {
2248            let start = entry_table.start + idx * entry_len;
2249            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
2250            if let Some(previous) = &previous_oid
2251                && previous.as_bytes() >= oid.as_bytes()
2252            {
2253                return Err(GitError::InvalidFormat(
2254                    "pack index object ids are not strictly sorted".into(),
2255                ));
2256            }
2257            previous_oid = Some(oid);
2258            entries.push(PackIndexEntry {
2259                oid,
2260                crc32: 0,
2261                offset: u64::from(u32_be(&bytes[start..start + 4])),
2262            });
2263        }
2264        Ok(Self {
2265            version: 1,
2266            fanout,
2267            entries,
2268            pack_checksum,
2269            index_checksum,
2270        })
2271    }
2272
2273    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
2274        self.entries
2275            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2276            .ok()
2277            .map(|idx| &self.entries[idx])
2278    }
2279
2280    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
2281        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
2282    }
2283
2284    pub fn write_v2(
2285        format: ObjectFormat,
2286        entries: &[PackIndexEntry],
2287        pack_checksum: &ObjectId,
2288    ) -> Result<Vec<u8>> {
2289        if pack_checksum.format() != format {
2290            return Err(GitError::InvalidObjectId(
2291                "pack checksum format does not match index format".into(),
2292            ));
2293        }
2294        let mut entries = entries.iter().collect::<Vec<_>>();
2295        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2296        for pair in entries.windows(2) {
2297            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2298                return Err(GitError::InvalidFormat(format!(
2299                    "pack index contains duplicate object id {}",
2300                    pair[0].oid
2301                )));
2302            }
2303        }
2304        let mut fanout = [0u32; 256];
2305        for entry in &entries {
2306            if entry.oid.format() != format {
2307                return Err(GitError::InvalidObjectId(
2308                    "pack index entry format does not match index format".into(),
2309                ));
2310            }
2311            let first = entry.oid.as_bytes()[0] as usize;
2312            fanout[first] = fanout[first]
2313                .checked_add(1)
2314                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2315        }
2316        let mut running = 0u32;
2317        for slot in &mut fanout {
2318            running = running
2319                .checked_add(*slot)
2320                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2321            *slot = running;
2322        }
2323
2324        let mut index = Vec::new();
2325        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
2326        index.extend_from_slice(&2u32.to_be_bytes());
2327        for count in fanout {
2328            index.extend_from_slice(&count.to_be_bytes());
2329        }
2330        for entry in &entries {
2331            index.extend_from_slice(entry.oid.as_bytes());
2332        }
2333        for entry in &entries {
2334            index.extend_from_slice(&entry.crc32.to_be_bytes());
2335        }
2336
2337        let mut large_offsets = Vec::new();
2338        for entry in &entries {
2339            if entry.offset < 0x8000_0000 {
2340                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2341            } else {
2342                if large_offsets.len() > 0x7fff_ffff {
2343                    return Err(GitError::InvalidFormat(
2344                        "too many large pack offsets".into(),
2345                    ));
2346                }
2347                let large_idx = large_offsets.len() as u32;
2348                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
2349                large_offsets.push(entry.offset);
2350            }
2351        }
2352        for offset in large_offsets {
2353            index.extend_from_slice(&offset.to_be_bytes());
2354        }
2355        index.extend_from_slice(pack_checksum.as_bytes());
2356        let index_checksum = sley_core::digest_bytes(format, &index)?;
2357        index.extend_from_slice(index_checksum.as_bytes());
2358        Ok(index)
2359    }
2360
2361    /// Serialise a version-1 pack `.idx`: a 256-entry fanout, then for each
2362    /// object an inline 4-byte big-endian pack offset immediately followed by
2363    /// its object id (sorted by oid), then the pack checksum and a trailing
2364    /// index checksum. v1 has no CRC table and cannot represent offsets that
2365    /// do not fit in 32 bits.
2366    pub fn write_v1(
2367        format: ObjectFormat,
2368        entries: &[PackIndexEntry],
2369        pack_checksum: &ObjectId,
2370    ) -> Result<Vec<u8>> {
2371        if pack_checksum.format() != format {
2372            return Err(GitError::InvalidObjectId(
2373                "pack checksum format does not match index format".into(),
2374            ));
2375        }
2376        let mut entries = entries.iter().collect::<Vec<_>>();
2377        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2378        for pair in entries.windows(2) {
2379            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2380                return Err(GitError::InvalidFormat(format!(
2381                    "pack index contains duplicate object id {}",
2382                    pair[0].oid
2383                )));
2384            }
2385        }
2386        let mut fanout = [0u32; 256];
2387        for entry in &entries {
2388            if entry.oid.format() != format {
2389                return Err(GitError::InvalidObjectId(
2390                    "pack index entry format does not match index format".into(),
2391                ));
2392            }
2393            if entry.offset > 0xffff_ffff {
2394                return Err(GitError::InvalidFormat(
2395                    "pack offset too large for a version-1 index".into(),
2396                ));
2397            }
2398            let first = entry.oid.as_bytes()[0] as usize;
2399            fanout[first] = fanout[first]
2400                .checked_add(1)
2401                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2402        }
2403        let mut running = 0u32;
2404        for slot in &mut fanout {
2405            running = running
2406                .checked_add(*slot)
2407                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2408            *slot = running;
2409        }
2410
2411        let mut index = Vec::new();
2412        for count in fanout {
2413            index.extend_from_slice(&count.to_be_bytes());
2414        }
2415        for entry in &entries {
2416            index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2417            index.extend_from_slice(entry.oid.as_bytes());
2418        }
2419        index.extend_from_slice(pack_checksum.as_bytes());
2420        let index_checksum = sley_core::digest_bytes(format, &index)?;
2421        index.extend_from_slice(index_checksum.as_bytes());
2422        Ok(index)
2423    }
2424}
2425
2426fn index_pack_from_reader<R>(
2427    reader: &mut R,
2428    format: ObjectFormat,
2429    pack_len: u64,
2430) -> Result<PackStreamIndexBuild>
2431where
2432    R: Read,
2433{
2434    index_pack_from_stream(PackReadStream::new(reader, format, Some(pack_len))?, format)
2435}
2436
2437fn index_pack_from_reader_to_trailer<R>(
2438    reader: &mut R,
2439    format: ObjectFormat,
2440) -> Result<PackStreamIndexBuild>
2441where
2442    R: Read,
2443{
2444    index_pack_from_stream(PackReadStream::new(reader, format, None)?, format)
2445}
2446
2447fn index_pack_from_stream<R>(
2448    mut stream: PackReadStream<'_, R>,
2449    format: ObjectFormat,
2450) -> Result<PackStreamIndexBuild>
2451where
2452    R: Read,
2453{
2454    let mut header = [0u8; 12];
2455    stream.read_pack_bytes(&mut header)?;
2456    if &header[..4] != b"PACK" {
2457        return Err(GitError::InvalidFormat("missing PACK signature".into()));
2458    }
2459    let version = u32_be(&header[4..8]);
2460    if version != 2 && version != 3 {
2461        return Err(GitError::Unsupported(format!("pack version {version}")));
2462    }
2463    let count = u32_be(&header[8..12]) as usize;
2464    let mut parsed_entries = Vec::with_capacity(count);
2465    let mut raw_entries = Vec::with_capacity(count);
2466    for _ in 0..count {
2467        let entry_offset = stream.pack_offset();
2468        let mut entry_crc = crc32fast::Hasher::new();
2469        let header = parse_entry_header_from_stream(&mut stream, &mut entry_crc)?;
2470        let base = match header.kind {
2471            PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
2472                parse_ofs_delta_base_offset_from_stream(&mut stream, &mut entry_crc, entry_offset)?,
2473            )),
2474            PackObjectKind::RefDelta => {
2475                let mut raw = vec![0u8; format.raw_len()];
2476                stream.read_entry_bytes(&mut raw, &mut entry_crc)?;
2477                Some(DeltaBase::Ref(ObjectId::from_raw(format, &raw)?))
2478            }
2479            _ => None,
2480        };
2481        let (body, consumed) = inflate_entry_from_stream(
2482            &mut stream,
2483            &mut entry_crc,
2484            header.size.min(usize::MAX as u64) as usize,
2485        )?;
2486        if body.len() as u64 != header.size {
2487            return Err(GitError::InvalidObject(format!(
2488                "pack object declared {} bytes, decoded {}",
2489                header.size,
2490                body.len()
2491            )));
2492        }
2493        if consumed == 0 {
2494            return Err(GitError::InvalidFormat(
2495                "empty compressed pack entry".into(),
2496            ));
2497        }
2498        raw_entries.push((entry_offset, entry_crc.finalize()));
2499        if let Some(base) = base {
2500            parsed_entries.push(ParsedPackEntry::Delta {
2501                base,
2502                compressed_size: consumed as u64,
2503                delta_size: header.size,
2504                offset: entry_offset,
2505                delta: body,
2506            });
2507        } else {
2508            let object_type = pack_object_kind_to_object_type(header.kind)?;
2509            let object = EncodedObject::new(object_type, body);
2510            let oid = object.object_id(format)?;
2511            parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
2512                entry: PackEntry {
2513                    oid,
2514                    compressed_size: consumed as u64,
2515                    uncompressed_size: header.size,
2516                    offset: entry_offset,
2517                },
2518                object,
2519            }));
2520        }
2521    }
2522    if stream.pack_offset() != stream.trailer_pack_offset() {
2523        return Err(GitError::InvalidFormat(format!(
2524            "pack has {} trailing bytes before checksum",
2525            stream.trailer_pack_offset() - stream.pack_offset()
2526        )));
2527    }
2528    let expected = stream.read_trailer_oid()?;
2529    let pack_checksum = stream.finish_digest()?;
2530    if pack_checksum != expected {
2531        return Err(GitError::InvalidFormat(format!(
2532            "pack checksum mismatch: expected {expected}, got {pack_checksum}"
2533        )));
2534    }
2535
2536    let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
2537    let entries = resolved
2538        .iter()
2539        .zip(raw_entries)
2540        .map(|(object, (offset, crc32))| PackIndexEntry {
2541            oid: object.entry.oid,
2542            crc32,
2543            offset,
2544        })
2545        .collect::<Vec<_>>();
2546    let objects = resolved
2547        .iter()
2548        .map(|object| PackIndexedObject {
2549            oid: object.entry.oid,
2550            object_type: object.object.object_type,
2551            size: object.object.body.len() as u64,
2552            offset: object.entry.offset,
2553        })
2554        .collect::<Vec<_>>();
2555    let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
2556    Ok(PackStreamIndexBuild {
2557        index,
2558        pack_checksum,
2559        entries,
2560        objects,
2561    })
2562}
2563
2564fn pack_object_kind_to_object_type(kind: PackObjectKind) -> Result<ObjectType> {
2565    match kind {
2566        PackObjectKind::Commit => Ok(ObjectType::Commit),
2567        PackObjectKind::Tree => Ok(ObjectType::Tree),
2568        PackObjectKind::Blob => Ok(ObjectType::Blob),
2569        PackObjectKind::Tag => Ok(ObjectType::Tag),
2570        PackObjectKind::OfsDelta | PackObjectKind::RefDelta => Err(GitError::InvalidFormat(
2571            "delta entry cannot be used as an object type".into(),
2572        )),
2573    }
2574}
2575
2576struct PackReadStream<'a, R> {
2577    reader: &'a mut R,
2578    position: u64,
2579    pack_len: Option<u64>,
2580    trailer_position: Option<u64>,
2581    digest: StreamingDigest,
2582    format: ObjectFormat,
2583    pending: VecDeque<u8>,
2584}
2585
2586impl<'a, R> PackReadStream<'a, R>
2587where
2588    R: Read,
2589{
2590    fn new(reader: &'a mut R, format: ObjectFormat, pack_len: Option<u64>) -> Result<Self> {
2591        let trailer_len = format.raw_len() as u64;
2592        let trailer_position = pack_len
2593            .map(|pack_len| {
2594                if pack_len < 12 + trailer_len {
2595                    return Err(GitError::InvalidFormat("pack file too short".into()));
2596                }
2597                Ok(pack_len - trailer_len)
2598            })
2599            .transpose()?;
2600        Ok(Self {
2601            reader,
2602            position: 0,
2603            pack_len,
2604            trailer_position,
2605            digest: StreamingDigest::new(format),
2606            format,
2607            pending: VecDeque::new(),
2608        })
2609    }
2610
2611    fn pack_offset(&self) -> u64 {
2612        self.position
2613    }
2614
2615    fn trailer_pack_offset(&self) -> u64 {
2616        self.trailer_position.unwrap_or(self.position)
2617    }
2618
2619    fn read_pack_bytes(&mut self, bytes: &mut [u8]) -> Result<()> {
2620        let end = self
2621            .position
2622            .checked_add(bytes.len() as u64)
2623            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2624        if self
2625            .trailer_position
2626            .is_some_and(|trailer_position| end > trailer_position)
2627        {
2628            return Err(GitError::InvalidFormat(
2629                "pack entry extends past checksum".into(),
2630            ));
2631        }
2632        self.read_exact_raw(bytes)?;
2633        self.position = end;
2634        self.digest.update(bytes);
2635        Ok(())
2636    }
2637
2638    fn read_exact_raw(&mut self, bytes: &mut [u8]) -> Result<()> {
2639        let mut written = 0usize;
2640        while written < bytes.len() {
2641            if let Some(byte) = self.pending.pop_front() {
2642                bytes[written] = byte;
2643                written += 1;
2644                continue;
2645            }
2646            self.reader.read_exact(&mut bytes[written..])?;
2647            break;
2648        }
2649        Ok(())
2650    }
2651
2652    fn read_entry_bytes(&mut self, bytes: &mut [u8], crc: &mut crc32fast::Hasher) -> Result<()> {
2653        self.read_pack_bytes(bytes)?;
2654        crc.update(bytes);
2655        Ok(())
2656    }
2657
2658    fn read_entry_byte(&mut self, crc: &mut crc32fast::Hasher) -> Result<u8> {
2659        let mut byte = [0u8; 1];
2660        self.read_entry_bytes(&mut byte, crc)?;
2661        Ok(byte[0])
2662    }
2663
2664    fn read_compressed_chunk(&mut self, bytes: &mut [u8]) -> Result<usize> {
2665        let len = if let Some(trailer_position) = self.trailer_position {
2666            if self.position >= trailer_position {
2667                return Ok(0);
2668            }
2669            let remaining = trailer_position - self.position;
2670            if remaining < bytes.len() as u64 {
2671                remaining as usize
2672            } else {
2673                bytes.len()
2674            }
2675        } else {
2676            bytes.len()
2677        };
2678        let mut read = 0usize;
2679        while read < len {
2680            let Some(byte) = self.pending.pop_front() else {
2681                break;
2682            };
2683            bytes[read] = byte;
2684            read += 1;
2685        }
2686        if read < len {
2687            read += self.reader.read(&mut bytes[read..len])?;
2688        }
2689        self.position = self
2690            .position
2691            .checked_add(read as u64)
2692            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2693        Ok(read)
2694    }
2695
2696    fn accept_compressed_bytes(&mut self, bytes: &[u8], crc: &mut crc32fast::Hasher) {
2697        self.digest.update(bytes);
2698        crc.update(bytes);
2699    }
2700
2701    fn push_back_compressed_bytes(&mut self, bytes: &[u8]) -> Result<()> {
2702        if bytes.is_empty() {
2703            return Ok(());
2704        }
2705        self.position = self
2706            .position
2707            .checked_sub(bytes.len() as u64)
2708            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2709        for byte in bytes.iter().rev() {
2710            self.pending.push_front(*byte);
2711        }
2712        Ok(())
2713    }
2714
2715    fn read_trailer_oid(&mut self) -> Result<ObjectId> {
2716        let mut raw = vec![0u8; self.format.raw_len()];
2717        self.read_exact_raw(&mut raw)?;
2718        self.position = self
2719            .position
2720            .checked_add(raw.len() as u64)
2721            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2722        if let Some(pack_len) = self.pack_len
2723            && self.position != pack_len
2724        {
2725            return Err(GitError::InvalidFormat(format!(
2726                "pack has {} trailing bytes after checksum",
2727                pack_len - self.position
2728            )));
2729        }
2730        if self.pack_len.is_none() && !self.pending.is_empty() {
2731            return Err(GitError::InvalidFormat(
2732                "pack has trailing bytes after checksum".into(),
2733            ));
2734        }
2735        ObjectId::from_raw(self.format, &raw)
2736    }
2737
2738    fn finish_digest(self) -> Result<ObjectId> {
2739        self.digest.finalize()
2740    }
2741}
2742
2743const STREAM_INFLATE_CHUNK: usize = 32 * 1024;
2744
2745fn inflate_entry_from_stream<R>(
2746    stream: &mut PackReadStream<'_, R>,
2747    crc: &mut crc32fast::Hasher,
2748    size_hint: usize,
2749) -> Result<(Vec<u8>, usize)>
2750where
2751    R: Read,
2752{
2753    INFLATE.with(|cell| {
2754        let mut decompress = cell.borrow_mut();
2755        decompress.reset(true);
2756        let mut out = Vec::with_capacity(bounded_inflate_reserve(size_hint, STREAM_INFLATE_CHUNK));
2757        let mut compressed_total = 0usize;
2758        let mut input = [0u8; STREAM_INFLATE_CHUNK];
2759        loop {
2760            let read = stream.read_compressed_chunk(&mut input)?;
2761            if read == 0 {
2762                return Err(GitError::InvalidObject("truncated zlib stream".into()));
2763            }
2764            let mut cursor = 0usize;
2765            while cursor < read {
2766                if out.len() == out.capacity() {
2767                    out.reserve(out.len().max(64));
2768                }
2769                let before_in = decompress.total_in();
2770                let before_out = decompress.total_out();
2771                let status = decompress
2772                    .decompress_vec(
2773                        &input[cursor..read],
2774                        &mut out,
2775                        flate2::FlushDecompress::None,
2776                    )
2777                    .map_err(|err| {
2778                        GitError::InvalidObject(format!("zlib inflate failed: {err}"))
2779                    })?;
2780                let consumed = (decompress.total_in() - before_in) as usize;
2781                let produced = decompress.total_out() - before_out;
2782                if consumed > 0 {
2783                    let consumed_end = cursor + consumed;
2784                    stream.accept_compressed_bytes(&input[cursor..consumed_end], crc);
2785                    compressed_total = compressed_total
2786                        .checked_add(consumed)
2787                        .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2788                    cursor = consumed_end;
2789                }
2790                match status {
2791                    flate2::Status::StreamEnd => {
2792                        stream.push_back_compressed_bytes(&input[cursor..read])?;
2793                        return Ok((out, compressed_total));
2794                    }
2795                    _ if consumed == 0 && produced == 0 => {
2796                        return Err(GitError::InvalidObject("truncated zlib stream".into()));
2797                    }
2798                    _ => {}
2799                }
2800            }
2801        }
2802    })
2803}
2804
2805fn parse_entry_header_from_stream<R>(
2806    stream: &mut PackReadStream<'_, R>,
2807    crc: &mut crc32fast::Hasher,
2808) -> Result<EntryHeader>
2809where
2810    R: Read,
2811{
2812    let first = stream.read_entry_byte(crc)?;
2813    let mut size = u64::from(first & 0x0f);
2814    let kind = match (first >> 4) & 0x07 {
2815        1 => PackObjectKind::Commit,
2816        2 => PackObjectKind::Tree,
2817        3 => PackObjectKind::Blob,
2818        4 => PackObjectKind::Tag,
2819        6 => PackObjectKind::OfsDelta,
2820        7 => PackObjectKind::RefDelta,
2821        other => {
2822            return Err(GitError::InvalidFormat(format!(
2823                "invalid pack object type {other}"
2824            )));
2825        }
2826    };
2827    let mut shift = 4;
2828    let mut byte = first;
2829    while byte & 0x80 != 0 {
2830        byte = stream.read_entry_byte(crc)?;
2831        let part = u64::from(byte & 0x7f);
2832        size = size
2833            .checked_add(
2834                part.checked_shl(shift)
2835                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
2836            )
2837            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
2838        shift += 7;
2839    }
2840    Ok(EntryHeader { kind, size })
2841}
2842
2843fn parse_ofs_delta_base_offset_from_stream<R>(
2844    stream: &mut PackReadStream<'_, R>,
2845    crc: &mut crc32fast::Hasher,
2846    entry_offset: u64,
2847) -> Result<u64>
2848where
2849    R: Read,
2850{
2851    let mut byte = stream.read_entry_byte(crc)?;
2852    let mut relative = u64::from(byte & 0x7f);
2853    while byte & 0x80 != 0 {
2854        byte = stream.read_entry_byte(crc)?;
2855        relative = relative
2856            .checked_add(1)
2857            .and_then(|value| value.checked_shl(7))
2858            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2859            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2860    }
2861    entry_offset
2862        .checked_sub(relative)
2863        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2864}
2865
2866/// The `.rev` table for a pack: index positions (the rank of each object in
2867/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
2868/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
2869/// the result feeds [`PackReverseIndex::write`].
2870pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
2871    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
2872    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
2873    let mut index_position = vec![0u32; entries.len()];
2874    for (position, &entry) in oid_sorted.iter().enumerate() {
2875        index_position[entry] = position as u32;
2876    }
2877    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
2878    by_offset.sort_by_key(|&entry| entries[entry].offset);
2879    by_offset
2880        .into_iter()
2881        .map(|entry| index_position[entry])
2882        .collect()
2883}
2884
2885impl PackReverseIndex {
2886    pub fn write(
2887        format: ObjectFormat,
2888        positions: &[u32],
2889        pack_checksum: &ObjectId,
2890    ) -> Result<Vec<u8>> {
2891        if pack_checksum.format() != format {
2892            return Err(GitError::InvalidObjectId(
2893                "pack checksum format does not match reverse index format".into(),
2894            ));
2895        }
2896        validate_position_permutation(positions)?;
2897
2898        let mut out = Vec::new();
2899        out.extend_from_slice(b"RIDX");
2900        out.extend_from_slice(&1u32.to_be_bytes());
2901        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2902        for position in positions {
2903            out.extend_from_slice(&position.to_be_bytes());
2904        }
2905        out.extend_from_slice(pack_checksum.as_bytes());
2906        let checksum = sley_core::digest_bytes(format, &out)?;
2907        out.extend_from_slice(checksum.as_bytes());
2908        Ok(out)
2909    }
2910
2911    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2912        let hash_len = format.raw_len();
2913        let table_len = object_count
2914            .checked_mul(4)
2915            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
2916        let min_len = 12usize
2917            .checked_add(table_len)
2918            .and_then(|len| len.checked_add(hash_len * 2))
2919            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
2920        if bytes.len() < min_len {
2921            return Err(GitError::InvalidFormat("reverse index too short".into()));
2922        }
2923        if bytes.len() != min_len {
2924            return Err(GitError::InvalidFormat(format!(
2925                "reverse index has {} trailing bytes",
2926                bytes.len() - min_len
2927            )));
2928        }
2929        if &bytes[..4] != b"RIDX" {
2930            return Err(GitError::InvalidFormat(
2931                "missing reverse index signature".into(),
2932            ));
2933        }
2934        let version = u32_be(&bytes[4..8]);
2935        if version != 1 {
2936            return Err(GitError::Unsupported(format!(
2937                "reverse index version {version}"
2938            )));
2939        }
2940        let hash_id = u32_be(&bytes[8..12]);
2941        if hash_id != hash_function_id(format) {
2942            return Err(GitError::InvalidFormat(format!(
2943                "reverse index hash id {hash_id} does not match {}",
2944                format.name()
2945            )));
2946        }
2947
2948        let index_checksum_offset = bytes.len() - hash_len;
2949        let actual_index_checksum =
2950            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2951        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2952        if actual_index_checksum != index_checksum {
2953            return Err(GitError::InvalidFormat(format!(
2954                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2955            )));
2956        }
2957
2958        let pack_checksum_offset = index_checksum_offset - hash_len;
2959        let pack_checksum =
2960            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2961        let mut positions = Vec::with_capacity(object_count);
2962        let mut offset = 12usize;
2963        for _ in 0..object_count {
2964            let position = u32_be(&bytes[offset..offset + 4]);
2965            positions.push(position);
2966            offset += 4;
2967        }
2968        validate_position_permutation(&positions)?;
2969
2970        Ok(Self {
2971            version,
2972            format,
2973            positions,
2974            pack_checksum,
2975            index_checksum,
2976        })
2977    }
2978}
2979
2980impl PackMtimes {
2981    pub fn write(
2982        format: ObjectFormat,
2983        mtimes: &[u32],
2984        pack_checksum: &ObjectId,
2985    ) -> Result<Vec<u8>> {
2986        if pack_checksum.format() != format {
2987            return Err(GitError::InvalidObjectId(
2988                "pack checksum format does not match mtimes format".into(),
2989            ));
2990        }
2991
2992        let mut out = Vec::new();
2993        out.extend_from_slice(b"MTME");
2994        out.extend_from_slice(&1u32.to_be_bytes());
2995        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2996        for mtime in mtimes {
2997            out.extend_from_slice(&mtime.to_be_bytes());
2998        }
2999        out.extend_from_slice(pack_checksum.as_bytes());
3000        let checksum = sley_core::digest_bytes(format, &out)?;
3001        out.extend_from_slice(checksum.as_bytes());
3002        Ok(out)
3003    }
3004
3005    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
3006        let hash_len = format.raw_len();
3007        let table_len = object_count
3008            .checked_mul(4)
3009            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
3010        let expected_len = 12usize
3011            .checked_add(table_len)
3012            .and_then(|len| len.checked_add(hash_len * 2))
3013            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
3014        if bytes.len() < expected_len {
3015            return Err(GitError::InvalidFormat("mtimes file too short".into()));
3016        }
3017        if bytes.len() != expected_len {
3018            return Err(GitError::InvalidFormat(format!(
3019                "mtimes file has {} trailing bytes",
3020                bytes.len() - expected_len
3021            )));
3022        }
3023        if &bytes[..4] != b"MTME" {
3024            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
3025        }
3026        let version = u32_be(&bytes[4..8]);
3027        if version != 1 {
3028            return Err(GitError::Unsupported(format!("mtimes version {version}")));
3029        }
3030        let hash_id = u32_be(&bytes[8..12]);
3031        if hash_id != hash_function_id(format) {
3032            return Err(GitError::InvalidFormat(format!(
3033                "mtimes hash id {hash_id} does not match {}",
3034                format.name()
3035            )));
3036        }
3037
3038        let index_checksum_offset = bytes.len() - hash_len;
3039        let actual_index_checksum =
3040            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
3041        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
3042        if actual_index_checksum != index_checksum {
3043            return Err(GitError::InvalidFormat(format!(
3044                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
3045            )));
3046        }
3047
3048        let pack_checksum_offset = index_checksum_offset - hash_len;
3049        let pack_checksum =
3050            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
3051        let mut mtimes = Vec::with_capacity(object_count);
3052        let mut offset = 12usize;
3053        for _ in 0..object_count {
3054            mtimes.push(u32_be(&bytes[offset..offset + 4]));
3055            offset += 4;
3056        }
3057
3058        Ok(Self {
3059            version,
3060            format,
3061            mtimes,
3062            pack_checksum,
3063            index_checksum,
3064        })
3065    }
3066}
3067
3068impl PackBitmapIndex {
3069    pub const OPTION_FULL_DAG: u16 = 0x0001;
3070    pub const OPTION_HASH_CACHE: u16 = 0x0004;
3071    pub const OPTION_PSEUDO_MERGES: u16 = 0x0020;
3072
3073    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
3074        let hash_len = format.raw_len();
3075        let min_len = 12usize
3076            .checked_add(hash_len * 2)
3077            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
3078        if bytes.len() < min_len {
3079            return Err(GitError::InvalidFormat("bitmap index too short".into()));
3080        }
3081        if &bytes[..4] != b"BITM" {
3082            return Err(GitError::InvalidFormat(
3083                "missing bitmap index signature".into(),
3084            ));
3085        }
3086        let version = u16_be(&bytes[4..6]);
3087        if version != 1 {
3088            return Err(GitError::Unsupported(format!(
3089                "bitmap index version {version}"
3090            )));
3091        }
3092        let options = u16_be(&bytes[6..8]);
3093        let known_options =
3094            Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE | Self::OPTION_PSEUDO_MERGES;
3095        if options & !known_options != 0 {
3096            return Err(GitError::Unsupported(format!(
3097                "bitmap index options {:#06x}",
3098                options & !known_options
3099            )));
3100        }
3101        let entry_count = u32_be(&bytes[8..12]) as usize;
3102        let checksum_offset = bytes.len() - hash_len;
3103        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
3104        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
3105        if actual_index_checksum != index_checksum {
3106            return Err(GitError::InvalidFormat(format!(
3107                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
3108            )));
3109        }
3110        let mut extras_end = checksum_offset;
3111        let hash_cache_range = if options & Self::OPTION_HASH_CACHE != 0 {
3112            let cache_len = object_count
3113                .checked_mul(4)
3114                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
3115            if cache_len > extras_end {
3116                return Err(GitError::InvalidFormat(
3117                    "truncated bitmap hash cache".into(),
3118                ));
3119            }
3120            extras_end -= cache_len;
3121            Some(extras_end..extras_end + cache_len)
3122        } else {
3123            None
3124        };
3125        let pseudo_merge_range = if options & Self::OPTION_PSEUDO_MERGES != 0 {
3126            if extras_end < 24 {
3127                return Err(GitError::InvalidFormat(
3128                    "truncated bitmap pseudo-merge extension".into(),
3129                ));
3130            }
3131            let extension_size = u64_be(&bytes[extras_end - 8..extras_end]) as usize;
3132            if extension_size > extras_end {
3133                return Err(GitError::InvalidFormat(
3134                    "bitmap pseudo-merge extension points before file start".into(),
3135                ));
3136            }
3137            let start = extras_end - extension_size;
3138            Some(start..extras_end)
3139        } else {
3140            None
3141        };
3142        let entries_end = pseudo_merge_range
3143            .as_ref()
3144            .map(|range| range.start)
3145            .unwrap_or(extras_end);
3146
3147        let pack_checksum_end = 12usize
3148            .checked_add(hash_len)
3149            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
3150        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
3151        let mut offset = pack_checksum_end;
3152        let commits = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3153        let trees = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3154        let blobs = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3155        let tags = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3156
3157        let mut entries = Vec::with_capacity(entry_count);
3158        for idx in 0..entry_count {
3159            if entries_end.saturating_sub(offset) < 6 {
3160                return Err(GitError::InvalidFormat(
3161                    "truncated bitmap index entry".into(),
3162                ));
3163            }
3164            let object_position = u32_be(&bytes[offset..offset + 4]);
3165            offset += 4;
3166            if object_position as usize >= object_count {
3167                return Err(GitError::InvalidFormat(
3168                    "bitmap index entry points past object table".into(),
3169                ));
3170            }
3171            let xor_offset = bytes[offset];
3172            offset += 1;
3173            if xor_offset as usize > idx || xor_offset > 160 {
3174                return Err(GitError::InvalidFormat(
3175                    "bitmap index entry has invalid XOR offset".into(),
3176                ));
3177            }
3178            let flags = bytes[offset];
3179            offset += 1;
3180            let bitmap = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3181            entries.push(PackBitmapEntry {
3182                object_position,
3183                xor_offset,
3184                flags,
3185                bitmap,
3186            });
3187        }
3188
3189        if offset != entries_end {
3190            return Err(GitError::InvalidFormat(format!(
3191                "bitmap index has {} trailing entry bytes",
3192                entries_end - offset
3193            )));
3194        }
3195
3196        let pseudo_merges = if let Some(range) = pseudo_merge_range {
3197            parse_bitmap_pseudo_merges(bytes, range, object_count)?
3198        } else {
3199            Vec::new()
3200        };
3201
3202        let name_hash_cache = if let Some(range) = hash_cache_range {
3203            let mut cache = Vec::with_capacity(object_count);
3204            let mut offset = range.start;
3205            for _ in 0..object_count {
3206                cache.push(u32_be(&bytes[offset..offset + 4]));
3207                offset += 4;
3208            }
3209            Some(cache)
3210        } else {
3211            None
3212        };
3213
3214        Ok(Self {
3215            version,
3216            format,
3217            options,
3218            pack_checksum,
3219            index_checksum,
3220            type_bitmaps: PackBitmapTypeBitmaps {
3221                commits,
3222                trees,
3223                blobs,
3224                tags,
3225            },
3226            entries,
3227            pseudo_merges,
3228            name_hash_cache,
3229        })
3230    }
3231
3232    /// Looks up the stored entry whose commit sits at `position` in the
3233    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
3234    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
3235        self.entries
3236            .iter()
3237            .find(|entry| entry.object_position == position)
3238    }
3239}
3240
3241fn parse_bitmap_pseudo_merges(
3242    bytes: &[u8],
3243    range: std::ops::Range<usize>,
3244    object_count: usize,
3245) -> Result<Vec<PackBitmapPseudoMerge>> {
3246    if range.end < range.start || range.end > bytes.len() || range.end - range.start < 24 {
3247        return Err(GitError::InvalidFormat(
3248            "truncated bitmap pseudo-merge extension".into(),
3249        ));
3250    }
3251    let trailer_start = range.end - 24;
3252    let pseudo_merge_count = u32_be(&bytes[trailer_start..trailer_start + 4]) as usize;
3253    let commit_count = u32_be(&bytes[trailer_start + 4..trailer_start + 8]) as usize;
3254    let lookup_offset = u64_be(&bytes[trailer_start + 8..trailer_start + 16]) as usize;
3255    let extension_size = u64_be(&bytes[trailer_start + 16..trailer_start + 24]) as usize;
3256    if extension_size != range.end - range.start {
3257        return Err(GitError::InvalidFormat(
3258            "bitmap pseudo-merge extension size mismatch".into(),
3259        ));
3260    }
3261    let lookup_start = range
3262        .start
3263        .checked_add(lookup_offset)
3264        .ok_or_else(|| GitError::InvalidFormat("bitmap pseudo-merge lookup overflow".into()))?;
3265    if lookup_start > trailer_start {
3266        return Err(GitError::InvalidFormat(
3267            "bitmap pseudo-merge lookup points past extension".into(),
3268        ));
3269    }
3270    let lookup_len = commit_count
3271        .checked_mul(12)
3272        .ok_or_else(|| GitError::InvalidFormat("bitmap pseudo-merge lookup overflow".into()))?;
3273    if lookup_start
3274        .checked_add(lookup_len)
3275        .is_none_or(|end| end > trailer_start)
3276    {
3277        return Err(GitError::InvalidFormat(
3278            "truncated bitmap pseudo-merge lookup".into(),
3279        ));
3280    }
3281    let position_table_len = pseudo_merge_count.checked_mul(8).ok_or_else(|| {
3282        GitError::InvalidFormat("bitmap pseudo-merge position table overflow".into())
3283    })?;
3284    let position_table_start = trailer_start
3285        .checked_sub(position_table_len)
3286        .filter(|start| *start >= range.start)
3287        .ok_or_else(|| {
3288            GitError::InvalidFormat("truncated bitmap pseudo-merge position table".into())
3289        })?;
3290
3291    let mut pseudo_merges = Vec::with_capacity(pseudo_merge_count);
3292    let mut cursor = position_table_start;
3293    for _ in 0..pseudo_merge_count {
3294        let pseudo_offset = u64_be(&bytes[cursor..cursor + 8]) as usize;
3295        cursor += 8;
3296        if pseudo_offset < range.start || pseudo_offset >= position_table_start {
3297            return Err(GitError::InvalidFormat(
3298                "bitmap pseudo-merge offset out of range".into(),
3299            ));
3300        }
3301        let mut offset = pseudo_offset;
3302        let commits = parse_bitmap_ewah(bytes, &mut offset, range.end, object_count)?;
3303        let bitmap = parse_bitmap_ewah(bytes, &mut offset, range.end, object_count)?;
3304        pseudo_merges.push(PackBitmapPseudoMerge { commits, bitmap });
3305    }
3306    Ok(pseudo_merges)
3307}
3308
3309fn parse_bitmap_ewah(
3310    bytes: &[u8],
3311    offset: &mut usize,
3312    checksum_offset: usize,
3313    _object_count: usize,
3314) -> Result<EwahBitmap> {
3315    if checksum_offset.saturating_sub(*offset) < 12 {
3316        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
3317    }
3318    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
3319    *offset += 4;
3320    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
3321    *offset += 4;
3322    let words_len = word_count
3323        .checked_mul(8)
3324        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
3325    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
3326        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
3327    }
3328    let mut words = Vec::with_capacity(word_count);
3329    for _ in 0..word_count {
3330        words.push(u64_be(&bytes[*offset..*offset + 8]));
3331        *offset += 8;
3332    }
3333    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
3334    *offset += 4;
3335    validate_ewah_words(bit_size, &words, rlw_position)?;
3336    Ok(EwahBitmap {
3337        bit_size,
3338        words,
3339        rlw_position,
3340    })
3341}
3342
3343fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
3344    if words.is_empty() {
3345        if rlw_position != 0 || bit_size != 0 {
3346            return Err(GitError::InvalidFormat(
3347                "EWAH bitmap has invalid empty RLW".into(),
3348            ));
3349        }
3350        return Ok(());
3351    }
3352    if rlw_position as usize >= words.len() {
3353        return Err(GitError::InvalidFormat(
3354            "EWAH RLW position points past word table".into(),
3355        ));
3356    }
3357    let mut word_idx = 0usize;
3358    let mut decoded_words = 0u64;
3359    while word_idx < words.len() {
3360        let rlw = words[word_idx];
3361        let run_words = (rlw >> 1) & 0xffff_ffff;
3362        let literal_words = (rlw >> 33) as usize;
3363        word_idx += 1;
3364        word_idx = word_idx
3365            .checked_add(literal_words)
3366            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
3367        if word_idx > words.len() {
3368            return Err(GitError::InvalidFormat(
3369                "EWAH literal words extend past word table".into(),
3370            ));
3371        }
3372        decoded_words = decoded_words
3373            .checked_add(run_words)
3374            .and_then(|value| value.checked_add(literal_words as u64))
3375            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
3376    }
3377    let decoded_bits = decoded_words
3378        .checked_mul(64)
3379        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
3380    if decoded_bits < u64::from(bit_size) {
3381        return Err(GitError::InvalidFormat(
3382            "EWAH bitmap decodes fewer bits than declared".into(),
3383        ));
3384    }
3385    Ok(())
3386}
3387
3388impl MultiPackIndex {
3389    pub fn write(
3390        format: ObjectFormat,
3391        version: u8,
3392        pack_names: &[String],
3393        objects: &[MultiPackIndexEntry],
3394    ) -> Result<Vec<u8>> {
3395        Self::write_with_reverse_index(format, version, pack_names, objects, None)
3396    }
3397
3398    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
3399    /// additionally emits the `RIDX` chunk: the object order a multi-pack
3400    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
3401    /// the preferred pack first, then the rest by pack id, each pack's slice
3402    /// in offset order), stored as one u32 midx position per object.
3403    ///
3404    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
3405    /// must be in range.
3406    pub fn write_with_reverse_index(
3407        format: ObjectFormat,
3408        version: u8,
3409        pack_names: &[String],
3410        objects: &[MultiPackIndexEntry],
3411        preferred_pack: Option<u32>,
3412    ) -> Result<Vec<u8>> {
3413        Self::write_with_bitmap_packs(format, version, pack_names, objects, preferred_pack, None)
3414    }
3415
3416    pub fn write_with_bitmap_packs(
3417        format: ObjectFormat,
3418        version: u8,
3419        pack_names: &[String],
3420        objects: &[MultiPackIndexEntry],
3421        preferred_pack: Option<u32>,
3422        bitmapped_packs: Option<&[MultiPackBitmapPack]>,
3423    ) -> Result<Vec<u8>> {
3424        if let Some(preferred) = preferred_pack
3425            && preferred as usize >= pack_names.len()
3426        {
3427            return Err(GitError::InvalidFormat(format!(
3428                "preferred pack {preferred} out of range for {} packs",
3429                pack_names.len()
3430            )));
3431        }
3432        if version != 1 && version != 2 {
3433            return Err(GitError::Unsupported(format!(
3434                "multi-pack-index version {version}"
3435            )));
3436        }
3437        if pack_names.len() > u32::MAX as usize {
3438            return Err(GitError::InvalidFormat(
3439                "too many multi-pack-index packs".into(),
3440            ));
3441        }
3442        if objects.len() > u32::MAX as usize {
3443            return Err(GitError::InvalidFormat(
3444                "too many multi-pack-index objects".into(),
3445            ));
3446        }
3447        if let Some(bitmapped_packs) = bitmapped_packs {
3448            if bitmapped_packs.len() != pack_names.len() {
3449                return Err(GitError::InvalidFormat(
3450                    "multi-pack-index BTMP pack count mismatch".into(),
3451                ));
3452            }
3453            for pack in bitmapped_packs {
3454                let bitmap_end = u64::from(pack.bitmap_pos)
3455                    .checked_add(u64::from(pack.bitmap_nr))
3456                    .ok_or_else(|| {
3457                        GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
3458                    })?;
3459                if bitmap_end > objects.len() as u64 {
3460                    return Err(GitError::InvalidFormat(
3461                        "multi-pack-index BTMP range points past object table".into(),
3462                    ));
3463                }
3464            }
3465        }
3466        validate_midx_pack_names(pack_names)?;
3467        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
3468            return Err(GitError::InvalidFormat(
3469                "multi-pack-index v1 pack names must be sorted".into(),
3470            ));
3471        }
3472
3473        let mut objects = objects.iter().collect::<Vec<_>>();
3474        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
3475        let mut previous_oid: Option<&ObjectId> = None;
3476        for object in &objects {
3477            if object.oid.format() != format {
3478                return Err(GitError::InvalidObjectId(
3479                    "multi-pack-index object format does not match index format".into(),
3480                ));
3481            }
3482            if let Some(previous) = previous_oid
3483                && previous.as_bytes() == object.oid.as_bytes()
3484            {
3485                return Err(GitError::InvalidFormat(
3486                    "multi-pack-index contains duplicate object ids".into(),
3487                ));
3488            }
3489            if object.pack_int_id as usize >= pack_names.len() {
3490                return Err(GitError::InvalidFormat(
3491                    "multi-pack-index object points past pack table".into(),
3492                ));
3493            }
3494            previous_oid = Some(&object.oid);
3495        }
3496
3497        let mut large_offsets = Vec::new();
3498        let mut chunks = vec![
3499            (*b"PNAM", write_midx_pack_names(pack_names)),
3500            (*b"OIDF", write_midx_oid_fanout(&objects)?),
3501            (*b"OIDL", write_midx_oid_lookup(&objects)),
3502            (
3503                *b"OOFF",
3504                write_midx_object_offsets(&objects, &mut large_offsets)?,
3505            ),
3506        ];
3507        if !large_offsets.is_empty() {
3508            chunks.push((*b"LOFF", large_offsets));
3509        }
3510        if let Some(preferred) = preferred_pack {
3511            // `objects` is already in midx (oid-sorted) order here; the chunk
3512            // lists each object's midx position in pseudo-pack order.
3513            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
3514            pseudo.sort_by_key(|&midx_pos| {
3515                let object = objects[midx_pos as usize];
3516                (
3517                    object.pack_int_id != preferred,
3518                    object.pack_int_id,
3519                    object.offset,
3520                )
3521            });
3522            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
3523            for midx_pos in pseudo {
3524                ridx.extend_from_slice(&midx_pos.to_be_bytes());
3525            }
3526            chunks.push((*b"RIDX", ridx));
3527        }
3528        if let Some(bitmapped_packs) = bitmapped_packs {
3529            let mut btmp = Vec::with_capacity(bitmapped_packs.len() * 8);
3530            for pack in bitmapped_packs {
3531                btmp.extend_from_slice(&pack.bitmap_pos.to_be_bytes());
3532                btmp.extend_from_slice(&pack.bitmap_nr.to_be_bytes());
3533            }
3534            chunks.push((*b"BTMP", btmp));
3535        }
3536        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
3537    }
3538
3539    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3540        Self::parse_impl(bytes, format, true)
3541    }
3542
3543    pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3544        Self::parse_impl(bytes, format, false)
3545    }
3546
3547    fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
3548        let hash_len = format.raw_len();
3549        if bytes.len() < 12 + 12 + hash_len {
3550            return Err(GitError::InvalidFormat(
3551                "multi-pack-index file too short".into(),
3552            ));
3553        }
3554        if &bytes[..4] != b"MIDX" {
3555            return Err(GitError::InvalidFormat(
3556                "missing multi-pack-index signature".into(),
3557            ));
3558        }
3559        let version = bytes[4];
3560        if version != 1 && version != 2 {
3561            return Err(GitError::Unsupported(format!(
3562                "multi-pack-index version {version}"
3563            )));
3564        }
3565        let hash_id = bytes[5];
3566        if u32::from(hash_id) != hash_function_id(format) {
3567            return Err(GitError::InvalidFormat(format!(
3568                "multi-pack-index hash id {hash_id} does not match {}",
3569                format.name()
3570            )));
3571        }
3572        let chunk_count = bytes[6] as usize;
3573        let base_midx_count = bytes[7];
3574        if base_midx_count != 0 {
3575            return Err(GitError::Unsupported(format!(
3576                "multi-pack-index base count {base_midx_count}"
3577            )));
3578        }
3579        let pack_count = u32_be(&bytes[8..12]);
3580        let lookup_len = (chunk_count + 1)
3581            .checked_mul(12)
3582            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3583        let data_start = 12usize
3584            .checked_add(lookup_len)
3585            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3586        let checksum_offset = bytes.len() - hash_len;
3587        if data_start > checksum_offset {
3588            return Err(GitError::InvalidFormat(
3589                "truncated multi-pack-index chunk lookup".into(),
3590            ));
3591        }
3592
3593        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
3594        if verify_checksum {
3595            let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
3596            if actual_checksum != checksum {
3597                return Err(GitError::InvalidFormat(format!(
3598                    "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
3599                )));
3600            }
3601        }
3602
3603        let mut entries = Vec::with_capacity(chunk_count + 1);
3604        let mut offset = 12usize;
3605        for _ in 0..=chunk_count {
3606            let id = [
3607                bytes[offset],
3608                bytes[offset + 1],
3609                bytes[offset + 2],
3610                bytes[offset + 3],
3611            ];
3612            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
3613            entries.push((id, chunk_offset));
3614            offset += 12;
3615        }
3616        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3617            return Err(GitError::InvalidFormat(
3618                "multi-pack-index chunk lookup is empty".into(),
3619            ));
3620        };
3621        if terminator_id != [0, 0, 0, 0] {
3622            return Err(GitError::InvalidFormat(
3623                "multi-pack-index chunk lookup missing terminator".into(),
3624            ));
3625        }
3626        if terminator_offset != checksum_offset as u64 {
3627            return Err(GitError::InvalidFormat(
3628                "multi-pack-index terminator does not point at checksum".into(),
3629            ));
3630        }
3631
3632        let mut chunks = Vec::with_capacity(chunk_count);
3633        let mut previous_offset = data_start as u64;
3634        let mut reported_unaligned = false;
3635        for pair in entries.windows(2) {
3636            let (id, chunk_offset) = pair[0];
3637            let (_next_id, next_offset) = pair[1];
3638            if id == [0, 0, 0, 0] {
3639                return Err(GitError::InvalidFormat(
3640                    "multi-pack-index chunk id is zero before terminator".into(),
3641                ));
3642            }
3643            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3644                return Err(GitError::InvalidFormat(
3645                    "multi-pack-index chunk offsets are not monotonic".into(),
3646                ));
3647            }
3648            if chunk_offset % 4 != 0 && !reported_unaligned {
3649                eprintln!(
3650                    "error: chunk id {:08x} not 4-byte aligned",
3651                    u32::from_be_bytes(id)
3652                );
3653                reported_unaligned = true;
3654            }
3655            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3656                return Err(GitError::InvalidFormat(
3657                    "multi-pack-index chunk length is invalid".into(),
3658                ));
3659            }
3660            chunks.push(MultiPackIndexChunk {
3661                id,
3662                offset: chunk_offset,
3663                len: next_offset - chunk_offset,
3664            });
3665            previous_offset = chunk_offset;
3666        }
3667
3668        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
3669        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
3670        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
3671        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
3672        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
3673        let bitmapped_packs =
3674            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
3675
3676        Ok(Self {
3677            version,
3678            format,
3679            pack_count,
3680            pack_names,
3681            object_count: object_count as u32,
3682            fanout,
3683            objects,
3684            reverse_index,
3685            bitmapped_packs,
3686            chunks,
3687            checksum,
3688        })
3689    }
3690
3691    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
3692        self.objects
3693            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
3694            .ok()
3695            .map(|idx| &self.objects[idx])
3696    }
3697}
3698
3699impl MultiPackIndexOidLookup {
3700    pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
3701        let raw = bytes.as_bytes();
3702        let hash_len = format.raw_len();
3703        if raw.len() < 12 + 12 + hash_len {
3704            return Err(GitError::InvalidFormat(
3705                "multi-pack-index file too short".into(),
3706            ));
3707        }
3708        if &raw[..4] != b"MIDX" {
3709            return Err(GitError::InvalidFormat(
3710                "missing multi-pack-index signature".into(),
3711            ));
3712        }
3713        let version = raw[4];
3714        if version != 1 && version != 2 {
3715            return Err(GitError::Unsupported(format!(
3716                "multi-pack-index version {version}"
3717            )));
3718        }
3719        let hash_id = raw[5];
3720        if u32::from(hash_id) != hash_function_id(format) {
3721            return Err(GitError::InvalidFormat(format!(
3722                "multi-pack-index hash id {hash_id} does not match {}",
3723                format.name()
3724            )));
3725        }
3726        let chunk_count = raw[6] as usize;
3727        let base_midx_count = raw[7];
3728        if base_midx_count != 0 {
3729            return Err(GitError::Unsupported(format!(
3730                "multi-pack-index base count {base_midx_count}"
3731            )));
3732        }
3733        let pack_count = u32_be(&raw[8..12]);
3734        let lookup_len = (chunk_count + 1)
3735            .checked_mul(12)
3736            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3737        let data_start = 12usize
3738            .checked_add(lookup_len)
3739            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3740        let checksum_offset = raw.len() - hash_len;
3741        if data_start > checksum_offset {
3742            return Err(GitError::InvalidFormat(
3743                "truncated multi-pack-index chunk lookup".into(),
3744            ));
3745        }
3746
3747        let mut entries = Vec::with_capacity(chunk_count + 1);
3748        let mut offset = 12usize;
3749        for _ in 0..=chunk_count {
3750            let id = [
3751                raw[offset],
3752                raw[offset + 1],
3753                raw[offset + 2],
3754                raw[offset + 3],
3755            ];
3756            let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
3757            entries.push((id, chunk_offset));
3758            offset += 12;
3759        }
3760        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3761            return Err(GitError::InvalidFormat(
3762                "multi-pack-index chunk lookup is empty".into(),
3763            ));
3764        };
3765        if terminator_id != [0, 0, 0, 0] {
3766            return Err(GitError::InvalidFormat(
3767                "multi-pack-index chunk lookup missing terminator".into(),
3768            ));
3769        }
3770        if terminator_offset != checksum_offset as u64 {
3771            return Err(GitError::InvalidFormat(
3772                "multi-pack-index terminator does not point at checksum".into(),
3773            ));
3774        }
3775
3776        let mut chunks = Vec::with_capacity(chunk_count);
3777        let mut previous_offset = data_start as u64;
3778        let mut reported_unaligned = false;
3779        for pair in entries.windows(2) {
3780            let (id, chunk_offset) = pair[0];
3781            let (_next_id, next_offset) = pair[1];
3782            if id == [0, 0, 0, 0] {
3783                return Err(GitError::InvalidFormat(
3784                    "multi-pack-index chunk id is zero before terminator".into(),
3785                ));
3786            }
3787            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3788                return Err(GitError::InvalidFormat(
3789                    "multi-pack-index chunk offsets are not monotonic".into(),
3790                ));
3791            }
3792            if chunk_offset % 4 != 0 && !reported_unaligned {
3793                eprintln!(
3794                    "error: chunk id {:08x} not 4-byte aligned",
3795                    u32::from_be_bytes(id)
3796                );
3797                reported_unaligned = true;
3798            }
3799            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3800                return Err(GitError::InvalidFormat(
3801                    "multi-pack-index chunk length is invalid".into(),
3802                ));
3803            }
3804            chunks.push(MultiPackIndexChunk {
3805                id,
3806                offset: chunk_offset,
3807                len: next_offset - chunk_offset,
3808            });
3809            previous_offset = chunk_offset;
3810        }
3811
3812        let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
3813        let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
3814        let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
3815            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
3816        let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
3817            GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
3818        })?;
3819        if oid_lookup.len() != expected_len {
3820            return Err(GitError::InvalidFormat(
3821                "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
3822            ));
3823        }
3824        let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
3825            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
3826        let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
3827            GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
3828        })?;
3829        if object_offsets.len() != expected_offsets_len {
3830            return Err(GitError::InvalidFormat(
3831                "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
3832            ));
3833        }
3834        let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
3835        if let Some(large_offsets) = large_offsets
3836            && large_offsets.len() % 8 != 0
3837        {
3838            return Err(GitError::InvalidFormat(
3839                "multi-pack-index LOFF chunk has invalid length".into(),
3840            ));
3841        }
3842        let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
3843        let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
3844        let (large_offsets_offset, large_offsets_len) = match large_offsets {
3845            Some(large_offsets) => (
3846                Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
3847                large_offsets.len(),
3848            ),
3849            None => (None, 0),
3850        };
3851        Ok(Self {
3852            format,
3853            pack_count,
3854            pack_names,
3855            fanout,
3856            object_count,
3857            oid_lookup_offset,
3858            object_offsets_offset,
3859            large_offsets_offset,
3860            large_offsets_len,
3861            bytes,
3862        })
3863    }
3864
3865    pub fn contains(&self, oid: &ObjectId) -> bool {
3866        self.find_position(oid).is_some()
3867    }
3868
3869    pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
3870        let Some(position) = self.find_position(oid) else {
3871            return Ok(None);
3872        };
3873        let bytes = self.bytes.as_bytes();
3874        let hash_len = self.format.raw_len();
3875        let oid_start = self
3876            .oid_lookup_offset
3877            .checked_add(position * hash_len)
3878            .ok_or_else(|| {
3879                GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
3880            })?;
3881        let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
3882        let offset_start = self
3883            .object_offsets_offset
3884            .checked_add(position * 8)
3885            .ok_or_else(|| {
3886                GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
3887            })?;
3888        let data = &bytes[offset_start..offset_start + 8];
3889        let pack_int_id = u32_be(&data[..4]);
3890        if pack_int_id >= self.pack_count {
3891            return Err(GitError::InvalidFormat(
3892                "multi-pack-index object points past pack table".into(),
3893            ));
3894        }
3895        let raw_offset = u32_be(&data[4..8]);
3896        let offset = if raw_offset & 0x8000_0000 == 0 {
3897            u64::from(raw_offset)
3898        } else {
3899            let Some(large_offsets_offset) = self.large_offsets_offset else {
3900                return Err(GitError::InvalidFormat(
3901                    "multi-pack-index large offset missing LOFF chunk".into(),
3902                ));
3903            };
3904            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
3905            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
3906                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3907            })?;
3908            let large_end = large_start.checked_add(8).ok_or_else(|| {
3909                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3910            })?;
3911            if large_end > self.large_offsets_len {
3912                return Err(GitError::InvalidFormat(
3913                    "fatal: multi-pack-index large offset out of bounds".into(),
3914                ));
3915            }
3916            let start = large_offsets_offset + large_start;
3917            u64_be(&bytes[start..start + 8])
3918        };
3919        Ok(Some(MultiPackIndexEntry {
3920            oid,
3921            pack_int_id,
3922            offset,
3923            force_large_offset: raw_offset & 0x8000_0000 != 0,
3924        }))
3925    }
3926
3927    pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
3928        self.pack_names
3929            .get(pack_int_id as usize)
3930            .map(String::as_str)
3931    }
3932
3933    fn find_position(&self, oid: &ObjectId) -> Option<usize> {
3934        if oid.format() != self.format || self.object_count == 0 {
3935            return None;
3936        }
3937        let first = oid.as_bytes()[0] as usize;
3938        let start = if first == 0 {
3939            0
3940        } else {
3941            self.fanout[first - 1] as usize
3942        };
3943        let end = self.fanout[first] as usize;
3944        if start >= end || end > self.object_count {
3945            return None;
3946        }
3947        let hash_len = self.format.raw_len();
3948        let table_start = self.oid_lookup_offset;
3949        let table_end = table_start + self.object_count * hash_len;
3950        let bytes = self.bytes.as_bytes();
3951        let table = &bytes[table_start..table_end];
3952        let needle = oid.as_bytes();
3953        let mut low = start;
3954        let mut high = end;
3955        while low < high {
3956            let mid = low + (high - low) / 2;
3957            let raw = &table[mid * hash_len..(mid + 1) * hash_len];
3958            match raw.cmp(needle) {
3959                std::cmp::Ordering::Less => low = mid + 1,
3960                std::cmp::Ordering::Equal => return Some(mid),
3961                std::cmp::Ordering::Greater => high = mid,
3962            }
3963        }
3964        None
3965    }
3966}
3967
3968fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
3969    for name in pack_names {
3970        if name.is_empty() {
3971            return Err(GitError::InvalidFormat(
3972                "multi-pack-index pack name is empty".into(),
3973            ));
3974        }
3975        if name
3976            .bytes()
3977            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
3978        {
3979            return Err(GitError::InvalidFormat(
3980                "multi-pack-index pack name contains an invalid byte".into(),
3981            ));
3982        }
3983    }
3984    Ok(())
3985}
3986
3987fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
3988    let mut out = Vec::new();
3989    for name in pack_names {
3990        out.extend_from_slice(name.as_bytes());
3991        out.push(0);
3992    }
3993    while out.len() % 4 != 0 {
3994        out.push(0);
3995    }
3996    out
3997}
3998
3999fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
4000    let mut counts = [0u32; 256];
4001    for object in objects {
4002        let first = object.oid.as_bytes()[0] as usize;
4003        counts[first] = counts[first]
4004            .checked_add(1)
4005            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4006    }
4007    let mut running = 0u32;
4008    let mut out = Vec::with_capacity(256 * 4);
4009    for count in counts {
4010        running = running
4011            .checked_add(count)
4012            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4013        out.extend_from_slice(&running.to_be_bytes());
4014    }
4015    Ok(out)
4016}
4017
4018fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
4019    let mut out = Vec::new();
4020    for object in objects {
4021        out.extend_from_slice(object.oid.as_bytes());
4022    }
4023    out
4024}
4025
4026fn write_midx_object_offsets(
4027    objects: &[&MultiPackIndexEntry],
4028    large_offsets: &mut Vec<u8>,
4029) -> Result<Vec<u8>> {
4030    let mut out = Vec::new();
4031    for object in objects {
4032        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
4033        if object.offset < 0x8000_0000 && !object.force_large_offset {
4034            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
4035        } else {
4036            let large_idx = large_offsets.len() / 8;
4037            if large_idx > 0x7fff_ffff {
4038                return Err(GitError::InvalidFormat(
4039                    "too many multi-pack-index large offsets".into(),
4040                ));
4041            }
4042            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
4043            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
4044        }
4045    }
4046    Ok(out)
4047}
4048
4049fn write_multi_pack_index_chunks(
4050    format: ObjectFormat,
4051    version: u8,
4052    pack_count: u32,
4053    chunks: &[([u8; 4], Vec<u8>)],
4054) -> Result<Vec<u8>> {
4055    if chunks.len() > u8::MAX as usize {
4056        return Err(GitError::InvalidFormat(
4057            "too many multi-pack-index chunks".into(),
4058        ));
4059    }
4060    let lookup_len = (chunks.len() + 1)
4061        .checked_mul(12)
4062        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
4063    let mut out = Vec::new();
4064    out.extend_from_slice(b"MIDX");
4065    out.push(version);
4066    out.push(hash_function_id(format) as u8);
4067    out.push(chunks.len() as u8);
4068    out.push(0);
4069    out.extend_from_slice(&pack_count.to_be_bytes());
4070    let mut chunk_offset = (12usize)
4071        .checked_add(lookup_len)
4072        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
4073        as u64;
4074    for (id, data) in chunks {
4075        out.extend_from_slice(id);
4076        out.extend_from_slice(&chunk_offset.to_be_bytes());
4077        chunk_offset = chunk_offset
4078            .checked_add(data.len() as u64)
4079            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
4080    }
4081    out.extend_from_slice(&[0, 0, 0, 0]);
4082    out.extend_from_slice(&chunk_offset.to_be_bytes());
4083    for (_id, data) in chunks {
4084        out.extend_from_slice(data);
4085    }
4086    let checksum = sley_core::digest_bytes(format, &out)?;
4087    out.extend_from_slice(checksum.as_bytes());
4088    Ok(out)
4089}
4090
4091#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4092struct EntryHeader {
4093    kind: PackObjectKind,
4094    size: u64,
4095}
4096
4097/// A cache of objects already decoded from one specific pack, keyed by the
4098/// in-pack byte offset at which each object's entry begins.
4099///
4100/// Delta resolution within a pack walks a chain of base objects by offset; the
4101/// same base is the parent of many deltas, so without a cache the entire chain
4102/// is re-inflated and re-applied on every read. Implementors let
4103/// [`read_object_at_with_cache`] reuse a warm base instead.
4104///
4105/// Correctness contract: a given `offset` within a given pack's bytes always
4106/// decodes to exactly one object, so caching by offset can never serve the wrong
4107/// object **provided the same cache is only ever used with one pack's bytes**.
4108/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
4109/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
4110pub trait PackDeltaCache {
4111    /// Return the decoded object whose entry begins at `offset`, if cached.
4112    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
4113    /// Record that the entry beginning at `offset` decodes to `object`.
4114    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
4115}
4116
4117/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
4118/// the original, allocation-free behavior for callers that do not opt in.
4119struct NoopDeltaCache;
4120
4121impl PackDeltaCache for NoopDeltaCache {
4122    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
4123        None
4124    }
4125    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
4126}
4127
4128// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
4129// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
4130// an allocation that dominated bulk reads. Borrowed only for the duration of a
4131// single inflate; the recursive pack reader fully inflates each entry's data before
4132// recursing to its base, so the borrow never nests.
4133thread_local! {
4134    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
4135}
4136
4137/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
4138/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
4139/// run of back-references). We pre-reserve no more than this multiple of the
4140/// available compressed input, so an attacker who declares a huge `size_hint`
4141/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
4142/// memory before the inflate has produced a single byte. The stream's *actual*
4143/// output is still verified against the declared size by the caller; this only
4144/// bounds the speculative allocation. git never pre-allocates an attacker's
4145/// declared size beyond a streaming buffer either (see index-pack.c's
4146/// `unpack_entry_data`).
4147const MAX_INFLATE_EXPANSION: usize = 1032;
4148
4149/// An absolute ceiling on the speculative pre-reservation, independent of the
4150/// input length, so even a large legitimate-looking compressed input can't be
4151/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
4152/// output buffer organically past this when a real stream genuinely produces
4153/// that much — this only caps the *speculative* reserve.
4154const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
4155
4156/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
4157/// to something safe to reserve up front: no larger than what `compressed_len`
4158/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
4159/// returned value is only used to size the initial allocation; the inflate loop
4160/// grows the buffer as the real stream produces output, so legitimate large
4161/// objects still decode correctly — they just don't get the whole allocation at
4162/// once.
4163fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
4164    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
4165    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
4166    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
4167}
4168
4169/// Inflate the entire zlib stream at the front of `compressed`, appending the
4170/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
4171/// is the caller's expectation for the decompressed length, but it is treated as
4172/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
4173/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
4174/// number of *compressed* bytes consumed (so callers stepping through a pack can
4175/// advance to the next entry). Byte-for-byte equivalent to
4176/// `ZlibDecoder::read_to_end` + `total_in`.
4177fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
4178    INFLATE.with(|cell| {
4179        let mut decompress = cell.borrow_mut();
4180        decompress.reset(true);
4181        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
4182        let mut input = compressed;
4183        let mut consumed_total = 0usize;
4184        loop {
4185            // Always leave output room so a zero-progress result means the input
4186            // (not the buffer) is exhausted.
4187            if out.len() == out.capacity() {
4188                out.reserve(out.len().max(64));
4189            }
4190            let before_in = decompress.total_in();
4191            let before_out = decompress.total_out();
4192            let status = decompress
4193                .decompress_vec(input, out, flate2::FlushDecompress::None)
4194                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
4195            let consumed = (decompress.total_in() - before_in) as usize;
4196            let produced = decompress.total_out() - before_out;
4197            input = &input[consumed..];
4198            consumed_total += consumed;
4199            match status {
4200                flate2::Status::StreamEnd => return Ok(consumed_total),
4201                _ if consumed == 0 && produced == 0 => {
4202                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
4203                }
4204                _ => {}
4205            }
4206        }
4207    })
4208}
4209
4210/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
4211/// into `out`, reusing the thread-local state. Used to read a delta's leading
4212/// base-size / result-size varints without inflating the whole instruction stream.
4213fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
4214    INFLATE.with(|cell| {
4215        let mut decompress = cell.borrow_mut();
4216        decompress.reset(true);
4217        out.reserve(max_out.max(16));
4218        let mut input = compressed;
4219        while out.len() < max_out {
4220            if out.len() == out.capacity() {
4221                out.reserve(out.len().max(16));
4222            }
4223            let before_in = decompress.total_in();
4224            let before_out = decompress.total_out();
4225            let status = decompress
4226                .decompress_vec(input, out, flate2::FlushDecompress::None)
4227                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
4228            let consumed = (decompress.total_in() - before_in) as usize;
4229            let produced = decompress.total_out() - before_out;
4230            input = &input[consumed..];
4231            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
4232                break;
4233            }
4234        }
4235        Ok(())
4236    })
4237}
4238
4239/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
4240/// only that object and its delta-base chain instead of parsing the whole pack.
4241///
4242/// Ofs-delta bases are followed by offset (recursively, within this pack);
4243/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
4244/// with the surrounding object store (so a base in another pack or loose still
4245/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
4246pub fn read_object_at_arc<F>(
4247    pack_bytes: &[u8],
4248    offset: u64,
4249    format: ObjectFormat,
4250    resolve_ref_base: F,
4251) -> Result<Arc<EncodedObject>>
4252where
4253    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4254{
4255    read_object_at_with_cache_arc(
4256        pack_bytes,
4257        offset,
4258        format,
4259        resolve_ref_base,
4260        &NoopDeltaCache,
4261    )
4262}
4263
4264/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
4265/// (keyed by in-pack offset) and records every object it decodes.
4266///
4267/// This turns repeated reads from the same pack — where many deltas share a base
4268/// chain — from re-inflating each chain per read into resolving each base once.
4269/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
4270/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
4271/// callers can reuse cache handles without cloning full object bodies.
4272pub fn read_object_at_with_cache_arc<F, C>(
4273    pack_bytes: &[u8],
4274    offset: u64,
4275    format: ObjectFormat,
4276    mut resolve_ref_base: F,
4277    cache: &C,
4278) -> Result<Arc<EncodedObject>>
4279where
4280    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4281    C: PackDeltaCache + ?Sized,
4282{
4283    read_object_at_with_cache_and_ofs_base_arc(
4284        pack_bytes,
4285        offset,
4286        format,
4287        &mut resolve_ref_base,
4288        |_offset| Ok(None),
4289        cache,
4290    )
4291}
4292
4293/// Like [`read_object_at_with_cache_arc`], but lets an object-database caller
4294/// recover an ofs-delta base from another storage copy when the in-pack base
4295/// offset cannot be decoded. Direct pack verification should keep using the
4296/// strict APIs; this hook mirrors normal object lookup, where a corrupt packed
4297/// copy does not hide a good loose or redundant packed copy.
4298pub fn read_object_at_with_cache_and_ofs_base_arc<F, G, C>(
4299    pack_bytes: &[u8],
4300    offset: u64,
4301    format: ObjectFormat,
4302    mut resolve_ref_base: F,
4303    mut resolve_ofs_base: G,
4304    cache: &C,
4305) -> Result<Arc<EncodedObject>>
4306where
4307    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4308    G: FnMut(u64) -> Result<Option<Arc<EncodedObject>>>,
4309    C: PackDeltaCache + ?Sized,
4310{
4311    read_object_at_inner(
4312        pack_bytes,
4313        offset,
4314        format,
4315        &mut resolve_ref_base,
4316        &mut resolve_ofs_base,
4317        cache,
4318    )
4319}
4320
4321/// Like [`read_object_at_with_cache_and_ofs_base_arc`], without an offset-cache.
4322pub fn read_object_at_with_ofs_base_arc<F, G>(
4323    pack_bytes: &[u8],
4324    offset: u64,
4325    format: ObjectFormat,
4326    resolve_ref_base: F,
4327    resolve_ofs_base: G,
4328) -> Result<Arc<EncodedObject>>
4329where
4330    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4331    G: FnMut(u64) -> Result<Option<Arc<EncodedObject>>>,
4332{
4333    read_object_at_with_cache_and_ofs_base_arc(
4334        pack_bytes,
4335        offset,
4336        format,
4337        resolve_ref_base,
4338        resolve_ofs_base,
4339        &NoopDeltaCache,
4340    )
4341}
4342
4343fn read_object_at_inner<F, G, C>(
4344    pack_bytes: &[u8],
4345    offset: u64,
4346    format: ObjectFormat,
4347    resolve_ref_base: &mut F,
4348    resolve_ofs_base: &mut G,
4349    cache: &C,
4350) -> Result<Arc<EncodedObject>>
4351where
4352    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4353    G: FnMut(u64) -> Result<Option<Arc<EncodedObject>>>,
4354    C: PackDeltaCache + ?Sized,
4355{
4356    // A warm cache entry for this exact offset is already the fully resolved
4357    // object, so the whole base chain below can be skipped.
4358    if let Some(object) = cache.get(offset) {
4359        return Ok(object);
4360    }
4361    let trailer_offset = pack_bytes
4362        .len()
4363        .checked_sub(format.raw_len())
4364        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
4365    let mut cursor = usize::try_from(offset)
4366        .ok()
4367        .filter(|&value| value < trailer_offset)
4368        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
4369    let header = parse_entry_header(pack_bytes, &mut cursor)?;
4370    let base = match header.kind {
4371        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
4372            pack_bytes,
4373            &mut cursor,
4374            offset,
4375        )?)),
4376        PackObjectKind::RefDelta => {
4377            let hash_len = format.raw_len();
4378            if cursor + hash_len > trailer_offset {
4379                return Err(GitError::InvalidFormat(
4380                    "truncated ref-delta base object id".into(),
4381                ));
4382            }
4383            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
4384            cursor += hash_len;
4385            Some(DeltaBase::Ref(oid))
4386        }
4387        _ => None,
4388    };
4389    let mut body = Vec::new();
4390    inflate_into(
4391        &pack_bytes[cursor..trailer_offset],
4392        &mut body,
4393        header.size.min(usize::MAX as u64) as usize,
4394    )?;
4395    if body.len() as u64 != header.size {
4396        return Err(GitError::InvalidObject(format!(
4397            "pack object declared {} bytes, decoded {}",
4398            header.size,
4399            body.len()
4400        )));
4401    }
4402    let object = match base {
4403        None => {
4404            let object_type = match header.kind {
4405                PackObjectKind::Commit => ObjectType::Commit,
4406                PackObjectKind::Tree => ObjectType::Tree,
4407                PackObjectKind::Blob => ObjectType::Blob,
4408                PackObjectKind::Tag => ObjectType::Tag,
4409                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
4410                    return Err(GitError::InvalidFormat(
4411                        "delta pack entry decoded without a base".into(),
4412                    ));
4413                }
4414            };
4415            Arc::new(EncodedObject::new(object_type, body))
4416        }
4417        Some(DeltaBase::Offset(base_offset)) => {
4418            let base = match read_object_at_inner(
4419                pack_bytes,
4420                base_offset,
4421                format,
4422                resolve_ref_base,
4423                resolve_ofs_base,
4424                cache,
4425            ) {
4426                Ok(base) => base,
4427                Err(pack_err) => match resolve_ofs_base(base_offset)? {
4428                    Some(base) => base,
4429                    None => return Err(pack_err),
4430                },
4431            };
4432            let resolved = apply_pack_delta(&base.body, &body)?;
4433            Arc::new(EncodedObject::new(base.object_type, resolved))
4434        }
4435        Some(DeltaBase::Ref(base_oid)) => {
4436            let base = resolve_ref_base(&base_oid)?
4437                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
4438            let resolved = apply_pack_delta(&base.body, &body)?;
4439            Arc::new(EncodedObject::new(base.object_type, resolved))
4440        }
4441    };
4442    // Record the fully resolved object so any later read that walks through this
4443    // offset (as a delta base or directly) reuses it. Bases are inserted as the
4444    // recursion unwinds, so a chain is decoded at most once across reads.
4445    cache.insert(offset, Arc::clone(&object));
4446    Ok(object)
4447}
4448
4449/// The object type and final (inflated) size of the entry at `offset`, *without*
4450/// materializing the object body — git's `cat-file --batch-check` fast path.
4451///
4452/// A base object's size is already in its pack entry header, and a delta's result
4453/// size is the second varint at the front of its (small) delta stream, so neither
4454/// inflates the full content. The reported type is the type at the end of the
4455/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
4456/// the type of a ref-delta base that lives outside this pack (resolved through the
4457/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
4458pub fn read_object_header_at<F>(
4459    pack_bytes: &[u8],
4460    offset: u64,
4461    format: ObjectFormat,
4462    mut resolve_ref_base_type: F,
4463) -> Result<(ObjectType, u64)>
4464where
4465    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4466{
4467    read_object_header_at_inner(
4468        pack_bytes,
4469        offset,
4470        format,
4471        &mut resolve_ref_base_type,
4472        &mut NoopHeaderTypeCache,
4473    )
4474}
4475
4476/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
4477/// the `cat-file --batch-check` header fast path.
4478///
4479/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
4480/// to its base on every header read, re-inflating each link's leading varints
4481/// from scratch — so reading every object in a deeply-deltified pack costs
4482/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
4483/// of memoizing `offset -> (type, size)`:
4484///
4485/// * a chain's end-of-chain type is resolved at most once, so later objects on
4486///   the same chain skip the walk; and
4487/// * a repeated lookup of the same object (common in batch input) returns from
4488///   the memo without re-inflating its delta header at all.
4489///
4490/// The size stored is the object's final (inflated) result size — read from its
4491/// own pack/delta header, never by materializing the body.
4492pub trait HeaderTypeCache {
4493    /// The previously resolved header at `pack_offset`, if any.
4494    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
4495    /// Record the resolved header at `pack_offset` for reuse by later reads.
4496    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
4497}
4498
4499struct NoopHeaderTypeCache;
4500
4501impl HeaderTypeCache for NoopHeaderTypeCache {
4502    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
4503        None
4504    }
4505    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
4506}
4507
4508/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
4509/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
4510/// most once per chain and (b) a repeated lookup of the same offset returns from
4511/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
4512/// so it must be scoped to a single pack's bytes by the caller.
4513pub fn read_object_header_at_with_cache<F, C>(
4514    pack_bytes: &[u8],
4515    offset: u64,
4516    format: ObjectFormat,
4517    mut resolve_ref_base_type: F,
4518    type_cache: &mut C,
4519) -> Result<(ObjectType, u64)>
4520where
4521    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4522    C: HeaderTypeCache + ?Sized,
4523{
4524    if let Some(header) = type_cache.get(offset) {
4525        return Ok(header);
4526    }
4527    read_object_header_at_inner(
4528        pack_bytes,
4529        offset,
4530        format,
4531        &mut resolve_ref_base_type,
4532        type_cache,
4533    )
4534}
4535
4536fn read_object_header_at_inner<F, C>(
4537    pack_bytes: &[u8],
4538    offset: u64,
4539    format: ObjectFormat,
4540    resolve_ref_base_type: &mut F,
4541    type_cache: &mut C,
4542) -> Result<(ObjectType, u64)>
4543where
4544    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4545    C: HeaderTypeCache + ?Sized,
4546{
4547    let trailer_offset = pack_bytes
4548        .len()
4549        .checked_sub(format.raw_len())
4550        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
4551    let mut cursor = usize::try_from(offset)
4552        .ok()
4553        .filter(|&value| value < trailer_offset)
4554        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
4555    let header = parse_entry_header(pack_bytes, &mut cursor)?;
4556    let resolved = match header.kind {
4557        PackObjectKind::Commit => (ObjectType::Commit, header.size),
4558        PackObjectKind::Tree => (ObjectType::Tree, header.size),
4559        PackObjectKind::Blob => (ObjectType::Blob, header.size),
4560        PackObjectKind::Tag => (ObjectType::Tag, header.size),
4561        PackObjectKind::OfsDelta => {
4562            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
4563            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4564            // The end-of-chain type only depends on the base, so reuse it across
4565            // reads instead of re-walking the chain per object (sley#26).
4566            let base_type = match type_cache.get(base_offset) {
4567                Some((base_type, _)) => base_type,
4568                None => {
4569                    let (base_type, _) = read_object_header_at_inner(
4570                        pack_bytes,
4571                        base_offset,
4572                        format,
4573                        resolve_ref_base_type,
4574                        type_cache,
4575                    )?;
4576                    base_type
4577                }
4578            };
4579            (base_type, size)
4580        }
4581        PackObjectKind::RefDelta => {
4582            let hash_len = format.raw_len();
4583            if cursor + hash_len > trailer_offset {
4584                return Err(GitError::InvalidFormat(
4585                    "truncated ref-delta base object id".into(),
4586                ));
4587            }
4588            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
4589            cursor += hash_len;
4590            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4591            let base_type = resolve_ref_base_type(&oid)?
4592                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
4593            (base_type, size)
4594        }
4595    };
4596    // Memoize the fully resolved header so a repeated lookup of this offset (or a
4597    // chain that bases on it) returns without re-inflating (sley#26).
4598    type_cache.put(offset, resolved);
4599    Ok(resolved)
4600}
4601
4602/// Number of inflated delta-stream bytes to read when only the leading base-size
4603/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
4604/// prefix always covers both without inflating the delta instructions.
4605const DELTA_HEADER_PREFIX_LEN: usize = 32;
4606
4607/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
4608/// inflating only the short prefix that holds its two leading varints.
4609fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
4610    let mut prefix = Vec::new();
4611    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
4612    decoded_delta_result_size(&prefix)
4613}
4614
4615fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
4616    let first = next_byte(bytes, offset)?;
4617    let mut size = u64::from(first & 0x0f);
4618    let kind = match (first >> 4) & 0x07 {
4619        1 => PackObjectKind::Commit,
4620        2 => PackObjectKind::Tree,
4621        3 => PackObjectKind::Blob,
4622        4 => PackObjectKind::Tag,
4623        6 => PackObjectKind::OfsDelta,
4624        7 => PackObjectKind::RefDelta,
4625        other => {
4626            return Err(GitError::InvalidFormat(format!(
4627                "invalid pack object type {other}"
4628            )));
4629        }
4630    };
4631    let mut shift = 4;
4632    let mut byte = first;
4633    while byte & 0x80 != 0 {
4634        byte = next_byte(bytes, offset)?;
4635        let part = u64::from(byte & 0x7f);
4636        size = size
4637            .checked_add(
4638                part.checked_shl(shift)
4639                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
4640            )
4641            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
4642        shift += 7;
4643    }
4644    Ok(EntryHeader { kind, size })
4645}
4646
4647fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
4648    let mut byte = next_byte(bytes, offset)?;
4649    let mut relative = u64::from(byte & 0x7f);
4650    while byte & 0x80 != 0 {
4651        byte = next_byte(bytes, offset)?;
4652        relative = relative
4653            .checked_add(1)
4654            .and_then(|value| value.checked_shl(7))
4655            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
4656            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
4657    }
4658    entry_offset
4659        .checked_sub(relative)
4660        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
4661}
4662
4663fn resolve_pack_entries<F>(
4664    parsed: Vec<ParsedPackEntry>,
4665    format: ObjectFormat,
4666    external_base: &mut F,
4667) -> Result<Vec<PackObject>>
4668where
4669    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4670{
4671    let mut offset_to_index = HashMap::with_capacity(parsed.len());
4672    for (idx, entry) in parsed.iter().enumerate() {
4673        offset_to_index.insert(parsed_entry_offset(entry), idx);
4674    }
4675
4676    let mut resolved = vec![None; parsed.len()];
4677    let mut oid_to_index = HashMap::new();
4678    let mut unresolved = 0usize;
4679    for (idx, entry) in parsed.iter().enumerate() {
4680        match entry {
4681            ParsedPackEntry::Resolved(object) => {
4682                oid_to_index.insert(object.entry.oid, idx);
4683                resolved[idx] = Some(object.clone());
4684            }
4685            ParsedPackEntry::Delta { .. } => unresolved += 1,
4686        }
4687    }
4688
4689    while unresolved != 0 {
4690        let mut progress = false;
4691        for idx in 0..parsed.len() {
4692            if resolved[idx].is_some() {
4693                continue;
4694            }
4695            let ParsedPackEntry::Delta {
4696                base,
4697                compressed_size,
4698                delta_size,
4699                offset,
4700                delta,
4701            } = &parsed[idx]
4702            else {
4703                continue;
4704            };
4705            let Some(base_object) = delta_base_object(
4706                base,
4707                &offset_to_index,
4708                &oid_to_index,
4709                &resolved,
4710                external_base,
4711            )?
4712            else {
4713                continue;
4714            };
4715            let body = apply_pack_delta(base_object.body(), delta)?;
4716            let object = EncodedObject::new(base_object.object_type(), body);
4717            let oid = object.object_id(format)?;
4718            let pack_object = PackObject {
4719                entry: PackEntry {
4720                    oid,
4721                    compressed_size: *compressed_size,
4722                    uncompressed_size: object.body.len() as u64,
4723                    offset: *offset,
4724                },
4725                object,
4726            };
4727            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
4728                return Err(GitError::InvalidObject(
4729                    "resolved delta size does not match delta header".into(),
4730                ));
4731            }
4732            if *delta_size != delta.len() as u64 {
4733                return Err(GitError::InvalidObject(format!(
4734                    "pack delta declared {delta_size} bytes, decoded {}",
4735                    delta.len()
4736                )));
4737            }
4738            oid_to_index.insert(oid, idx);
4739            resolved[idx] = Some(pack_object);
4740            unresolved -= 1;
4741            progress = true;
4742        }
4743        if !progress {
4744            return Err(GitError::Unsupported("unresolved delta base".into()));
4745        }
4746    }
4747
4748    resolved
4749        .into_iter()
4750        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
4751        .collect()
4752}
4753
4754fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
4755    match entry {
4756        ParsedPackEntry::Resolved(object) => object.entry.offset,
4757        ParsedPackEntry::Delta { offset, .. } => *offset,
4758    }
4759}
4760
4761enum DeltaBaseObject<'a> {
4762    Borrowed(&'a EncodedObject),
4763    Owned(EncodedObject),
4764}
4765
4766impl DeltaBaseObject<'_> {
4767    fn object_type(&self) -> ObjectType {
4768        match self {
4769            Self::Borrowed(object) => object.object_type,
4770            Self::Owned(object) => object.object_type,
4771        }
4772    }
4773
4774    fn body(&self) -> &[u8] {
4775        match self {
4776            Self::Borrowed(object) => &object.body,
4777            Self::Owned(object) => &object.body,
4778        }
4779    }
4780}
4781
4782fn delta_base_object<'a, F>(
4783    base: &DeltaBase,
4784    offset_to_index: &HashMap<u64, usize>,
4785    oid_to_index: &HashMap<ObjectId, usize>,
4786    resolved: &'a [Option<PackObject>],
4787    external_base: &mut F,
4788) -> Result<Option<DeltaBaseObject<'a>>>
4789where
4790    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4791{
4792    match base {
4793        DeltaBase::Offset(offset) => {
4794            let Some(index) = offset_to_index.get(offset).copied() else {
4795                return Err(GitError::InvalidFormat(format!(
4796                    "ofs-delta base offset {offset} not found"
4797                )));
4798            };
4799            Ok(resolved[index]
4800                .as_ref()
4801                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
4802        }
4803        DeltaBase::Ref(oid) => {
4804            if let Some(index) = oid_to_index.get(oid).copied() {
4805                return Ok(resolved[index]
4806                    .as_ref()
4807                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
4808            }
4809            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
4810        }
4811    }
4812}
4813
4814fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
4815    let mut cursor = 0usize;
4816    let base_size = read_delta_varint(delta, &mut cursor)?;
4817    if base_size != base.len() as u64 {
4818        return Err(GitError::InvalidObject(format!(
4819            "delta base size mismatch: expected {base_size}, got {}",
4820            base.len()
4821        )));
4822    }
4823    let result_size = read_delta_varint(delta, &mut cursor)?;
4824    // `result_size` is an attacker-controlled delta varint from a network pack
4825    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
4826    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
4827    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
4828    // size-mismatch check below can fire. Route the up-front reservation through
4829    // the sley#2 bound so the speculative allocation is capped; `result.extend`
4830    // still grows the buffer organically and the post-decode length check
4831    // (`result.len() != result_size`) rejects the lie cleanly.
4832    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
4833    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
4834    while cursor < delta.len() {
4835        let command = delta[cursor];
4836        cursor += 1;
4837        if command & 0x80 != 0 {
4838            let copy_offset =
4839                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
4840            let mut copy_size =
4841                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
4842            if copy_size == 0 {
4843                copy_size = 0x10000;
4844            }
4845            let start = usize::try_from(copy_offset)
4846                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
4847            let len = usize::try_from(copy_size)
4848                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
4849            let end = start
4850                .checked_add(len)
4851                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
4852            let Some(slice) = base.get(start..end) else {
4853                return Err(GitError::InvalidObject(
4854                    "delta copy range exceeds base object".into(),
4855                ));
4856            };
4857            result.extend_from_slice(slice);
4858        } else if command != 0 {
4859            let len = usize::from(command);
4860            let end = cursor
4861                .checked_add(len)
4862                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
4863            let Some(slice) = delta.get(cursor..end) else {
4864                return Err(GitError::InvalidObject(
4865                    "delta insert range exceeds delta data".into(),
4866                ));
4867            };
4868            result.extend_from_slice(slice);
4869            cursor = end;
4870        } else {
4871            return Err(GitError::InvalidObject(
4872                "delta contains reserved zero command".into(),
4873            ));
4874        }
4875    }
4876    if result.len() as u64 != result_size {
4877        return Err(GitError::InvalidObject(format!(
4878            "delta result size mismatch: expected {result_size}, got {}",
4879            result.len()
4880        )));
4881    }
4882    Ok(result)
4883}
4884
4885fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
4886    let mut cursor = 0usize;
4887    let _ = read_delta_varint(delta, &mut cursor)?;
4888    read_delta_varint(delta, &mut cursor)
4889}
4890
4891/// Size, in bytes, of the fixed blocks used to index a base object for delta
4892/// compression. Matches git's `diff-delta.c` block size.
4893const DELTA_BLOCK_SIZE: usize = 16;
4894
4895/// Distance between indexed base anchors. Delta generation still scans target
4896/// objects byte-by-byte once there is evidence of shared content; anchoring the
4897/// base at block boundaries keeps the index compact and avoids per-object
4898/// hash-table allocation storms on unrelated blobs.
4899const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
4900
4901/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
4902/// base object's anchors while keeping exact-hash candidate scans short.
4903const DELTA_BUCKET_BITS: usize = 12;
4904const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
4905const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
4906
4907/// An index over a base object's content used to generate deltas against it.
4908///
4909/// The index hashes block-sized anchors of the base, groups them into fixed
4910/// buckets, and verifies exact byte matches before copying. This avoids both
4911/// per-bucket allocation storms and the per-object sort needed by a single
4912/// sorted vector.
4913struct DeltaIndex<'a> {
4914    base: &'a [u8],
4915    blocks: Vec<DeltaBlock>,
4916    buckets: Vec<usize>,
4917}
4918
4919#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4920struct DeltaBlock {
4921    hash: u32,
4922    offset: usize,
4923}
4924
4925impl<'a> DeltaIndex<'a> {
4926    fn new(base: &'a [u8]) -> Self {
4927        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
4928        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
4929        for_each_delta_anchor(base.len(), |offset| {
4930            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
4931            buckets[delta_bucket(hash) + 1] += 1;
4932            anchors.push(DeltaBlock { hash, offset });
4933        });
4934        for idx in 1..buckets.len() {
4935            buckets[idx] += buckets[idx - 1];
4936        }
4937
4938        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
4939        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
4940        for anchor in anchors {
4941            let bucket = delta_bucket(anchor.hash);
4942            let next = &mut next_offsets[bucket];
4943            blocks[*next] = anchor;
4944            *next += 1;
4945        }
4946
4947        Self {
4948            base,
4949            blocks,
4950            buckets,
4951        }
4952    }
4953
4954    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
4955        let bucket = delta_bucket(hash);
4956        let start = self.buckets[bucket];
4957        let end = self.buckets[bucket + 1];
4958        self.blocks[start..end]
4959            .iter()
4960            .filter(move |block| block.hash == hash)
4961    }
4962
4963    fn has_hash(&self, hash: u32) -> bool {
4964        self.candidate_blocks(hash).next().is_some()
4965    }
4966
4967    fn has_shared_anchor(&self, target: &[u8]) -> bool {
4968        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
4969            return false;
4970        }
4971        let last = target.len() - DELTA_BLOCK_SIZE;
4972        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
4973            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
4974            if self.has_hash(hash) {
4975                return true;
4976            }
4977        }
4978        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
4979            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
4980            if self.has_hash(hash) {
4981                return true;
4982            }
4983        }
4984        false
4985    }
4986
4987    /// Generate a delta that reconstructs `target` from this index's base.
4988    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
4989        if !self.has_shared_anchor(target) {
4990            return None;
4991        }
4992        let base = self.base;
4993        let mut delta = Vec::new();
4994        write_delta_varint(&mut delta, base.len() as u64);
4995        write_delta_varint(&mut delta, target.len() as u64);
4996
4997        let mut pending_insert_start = 0usize;
4998        let mut pos = 0usize;
4999        while pos < target.len() {
5000            let mut best_len = 0usize;
5001            let mut best_offset = 0usize;
5002            if pos + DELTA_BLOCK_SIZE <= target.len() {
5003                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
5004                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
5005                    // Confirm the block actually matches (hash collisions are
5006                    // possible) before measuring how far it extends.
5007                    let candidate = candidate.offset;
5008                    let max_len = (base.len() - candidate).min(target.len() - pos);
5009                    let mut len = 0usize;
5010                    while len < max_len && base[candidate + len] == target[pos + len] {
5011                        len += 1;
5012                    }
5013                    if len > best_len {
5014                        best_len = len;
5015                        best_offset = candidate;
5016                    }
5017                }
5018            }
5019
5020            if best_len >= DELTA_BLOCK_SIZE {
5021                if pending_insert_start < pos {
5022                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
5023                }
5024                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
5025                pos += best_len;
5026                pending_insert_start = pos;
5027            } else {
5028                pos += 1;
5029            }
5030        }
5031        if pending_insert_start < target.len() {
5032            write_delta_insert(&mut delta, &target[pending_insert_start..]);
5033        }
5034        Some(delta)
5035    }
5036}
5037
5038fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
5039    if len < DELTA_BLOCK_SIZE {
5040        return;
5041    }
5042    len -= DELTA_BLOCK_SIZE;
5043    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
5044        visit(offset);
5045    }
5046    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
5047        visit(len);
5048    }
5049}
5050
5051fn delta_anchor_count(len: usize) -> usize {
5052    if len < DELTA_BLOCK_SIZE {
5053        return 0;
5054    }
5055    let last = len - DELTA_BLOCK_SIZE;
5056    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
5057}
5058
5059fn delta_bucket(hash: u32) -> usize {
5060    (hash as usize) & DELTA_BUCKET_MASK
5061}
5062
5063/// Maximum number of base offsets retained per block-hash bucket. Caps the work
5064/// done extending candidate matches for inputs with many repeated blocks.
5065const DELTA_MAX_CHAIN: usize = 64;
5066
5067/// Hash a fixed-size block of base/target bytes into a bucket key.
5068///
5069/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
5070/// always verified byte-for-byte before use, so collisions only cost a little
5071/// extra comparison work and never affect correctness.
5072fn block_hash(block: &[u8]) -> u32 {
5073    let mut hash = 0u32;
5074    for &byte in block {
5075        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
5076    }
5077    hash
5078}
5079
5080/// The chosen storage form for a single object during pack generation.
5081#[derive(Debug, Clone, PartialEq, Eq)]
5082enum PlannedBase {
5083    /// Stored undeltified (a base for others, or no good delta was found).
5084    None,
5085    /// Delta against another object in this pack, identified by its original
5086    /// index. The pre-computed `delta` bytes reconstruct the object from that
5087    /// base's body.
5088    InPack { base_idx: usize, delta: Vec<u8> },
5089    /// Delta against an external (thin-pack) base, referenced by object id.
5090    External { base_oid: ObjectId, delta: Vec<u8> },
5091}
5092
5093#[derive(Debug, Clone, PartialEq, Eq)]
5094struct PlannedEntry {
5095    base: PlannedBase,
5096}
5097
5098#[derive(Debug, Clone)]
5099struct StreamingDeltaBase {
5100    oid: ObjectId,
5101    object: Arc<EncodedObject>,
5102    offset: u64,
5103    depth: usize,
5104}
5105
5106#[derive(Debug, Clone, PartialEq, Eq)]
5107enum StreamingPlannedBase {
5108    None,
5109    Current {
5110        base_idx: usize,
5111        delta: Vec<u8>,
5112    },
5113    Previous {
5114        base_oid: ObjectId,
5115        base_offset: u64,
5116        delta: Vec<u8>,
5117    },
5118    External {
5119        base_oid: ObjectId,
5120        delta: Vec<u8>,
5121    },
5122}
5123
5124#[derive(Debug, Clone, PartialEq, Eq)]
5125struct StreamingPlannedEntry {
5126    base: StreamingPlannedBase,
5127    depth: usize,
5128}
5129
5130#[derive(Debug, Clone, PartialEq, Eq)]
5131enum StreamingCandidateBase {
5132    Previous {
5133        oid: ObjectId,
5134        offset: u64,
5135        depth: usize,
5136    },
5137    Current {
5138        idx: usize,
5139        depth: usize,
5140    },
5141}
5142
5143struct StreamingDeltaWindowEntry<'a> {
5144    base: StreamingCandidateBase,
5145    object_type: ObjectType,
5146    index: DeltaIndex<'a>,
5147}
5148
5149fn compress_planned_payloads(
5150    objects: &[&EncodedObject],
5151    plan: &[PlannedEntry],
5152    order: &[usize],
5153    compression_level: u32,
5154) -> Result<Vec<Vec<u8>>> {
5155    if order.is_empty() {
5156        return Ok(Vec::new());
5157    }
5158
5159    let worker_count = std::thread::available_parallelism()
5160        .map(|threads| threads.get())
5161        .unwrap_or(1)
5162        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
5163        .min(order.len());
5164    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
5165        let mut payloads = Vec::with_capacity(order.len());
5166        for &idx in order {
5167            payloads.push(compressed_payload(
5168                planned_payload(objects, plan, idx),
5169                compression_level,
5170            )?);
5171        }
5172        return Ok(payloads);
5173    }
5174
5175    let chunk_len = order.len().div_ceil(worker_count);
5176    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
5177    std::thread::scope(|scope| {
5178        let mut handles = Vec::new();
5179        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
5180            let chunk_start = chunk_idx * chunk_len;
5181            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
5182                let mut chunk_payloads = Vec::with_capacity(chunk.len());
5183                for (offset, &idx) in chunk.iter().enumerate() {
5184                    chunk_payloads.push((
5185                        chunk_start + offset,
5186                        compressed_payload(planned_payload(objects, plan, idx), compression_level)?,
5187                    ));
5188                }
5189                Ok(chunk_payloads)
5190            }));
5191        }
5192
5193        let mut first_error = None;
5194        for handle in handles {
5195            match handle.join() {
5196                Ok(Ok(chunk_payloads)) => {
5197                    if first_error.is_none() {
5198                        for (pos, payload) in chunk_payloads {
5199                            payloads[pos] = payload;
5200                        }
5201                    }
5202                }
5203                Ok(Err(err)) => {
5204                    first_error.get_or_insert(err);
5205                }
5206                Err(_) => {
5207                    first_error.get_or_insert_with(|| {
5208                        GitError::InvalidObject("pack compression worker panicked".into())
5209                    });
5210                }
5211            }
5212        }
5213
5214        match first_error {
5215            Some(err) => Err(err),
5216            None => Ok(()),
5217        }
5218    })?;
5219    Ok(payloads)
5220}
5221
5222fn compress_streaming_planned_payloads(
5223    objects: &[Arc<EncodedObject>],
5224    plan: &[StreamingPlannedEntry],
5225    order: &[usize],
5226    compression_level: u32,
5227) -> Result<Vec<Vec<u8>>> {
5228    if order.is_empty() {
5229        return Ok(Vec::new());
5230    }
5231
5232    let worker_count = std::thread::available_parallelism()
5233        .map(|threads| threads.get())
5234        .unwrap_or(1)
5235        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
5236        .min(order.len());
5237    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
5238        let mut payloads = Vec::with_capacity(order.len());
5239        for &idx in order {
5240            payloads.push(compressed_payload(
5241                streaming_planned_payload(objects, plan, idx),
5242                compression_level,
5243            )?);
5244        }
5245        return Ok(payloads);
5246    }
5247
5248    let chunk_len = order.len().div_ceil(worker_count);
5249    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
5250    std::thread::scope(|scope| {
5251        let mut handles = Vec::new();
5252        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
5253            let chunk_start = chunk_idx * chunk_len;
5254            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
5255                let mut chunk_payloads = Vec::with_capacity(chunk.len());
5256                for (offset, &idx) in chunk.iter().enumerate() {
5257                    chunk_payloads.push((
5258                        chunk_start + offset,
5259                        compressed_payload(
5260                            streaming_planned_payload(objects, plan, idx),
5261                            compression_level,
5262                        )?,
5263                    ));
5264                }
5265                Ok(chunk_payloads)
5266            }));
5267        }
5268
5269        let mut first_error = None;
5270        for handle in handles {
5271            match handle.join() {
5272                Ok(Ok(chunk_payloads)) => {
5273                    if first_error.is_none() {
5274                        for (pos, payload) in chunk_payloads {
5275                            payloads[pos] = payload;
5276                        }
5277                    }
5278                }
5279                Ok(Err(err)) => {
5280                    first_error.get_or_insert(err);
5281                }
5282                Err(_) => {
5283                    first_error.get_or_insert_with(|| {
5284                        GitError::InvalidObject("pack compression worker panicked".into())
5285                    });
5286                }
5287            }
5288        }
5289
5290        match first_error {
5291            Some(err) => Err(err),
5292            None => Ok(()),
5293        }
5294    })?;
5295    Ok(payloads)
5296}
5297
5298fn compress_undeltified_payloads(
5299    objects: &[Arc<EncodedObject>],
5300    compression_level: u32,
5301) -> Result<Vec<Vec<u8>>> {
5302    if objects.is_empty() {
5303        return Ok(Vec::new());
5304    }
5305
5306    let worker_count = std::thread::available_parallelism()
5307        .map(|threads| threads.get())
5308        .unwrap_or(1)
5309        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
5310        .min(objects.len());
5311    if worker_count <= 1 || objects.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
5312        let mut payloads = Vec::with_capacity(objects.len());
5313        for object in objects {
5314            payloads.push(compressed_payload(&object.body, compression_level)?);
5315        }
5316        return Ok(payloads);
5317    }
5318
5319    let chunk_len = objects.len().div_ceil(worker_count);
5320    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new)
5321        .take(objects.len())
5322        .collect();
5323    std::thread::scope(|scope| {
5324        let mut handles = Vec::new();
5325        for (chunk_idx, chunk) in objects.chunks(chunk_len).enumerate() {
5326            let chunk_start = chunk_idx * chunk_len;
5327            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
5328                let mut chunk_payloads = Vec::with_capacity(chunk.len());
5329                for (offset, object) in chunk.iter().enumerate() {
5330                    chunk_payloads.push((
5331                        chunk_start + offset,
5332                        compressed_payload(&object.body, compression_level)?,
5333                    ));
5334                }
5335                Ok(chunk_payloads)
5336            }));
5337        }
5338
5339        let mut first_error = None;
5340        for handle in handles {
5341            match handle.join() {
5342                Ok(Ok(chunk_payloads)) => {
5343                    if first_error.is_none() {
5344                        for (pos, payload) in chunk_payloads {
5345                            payloads[pos] = payload;
5346                        }
5347                    }
5348                }
5349                Ok(Err(err)) => {
5350                    first_error.get_or_insert(err);
5351                }
5352                Err(_) => {
5353                    first_error.get_or_insert_with(|| {
5354                        GitError::InvalidObject("pack compression worker panicked".into())
5355                    });
5356                }
5357            }
5358        }
5359
5360        match first_error {
5361            Some(err) => Err(err),
5362            None => Ok(()),
5363        }
5364    })?;
5365    Ok(payloads)
5366}
5367
5368fn streaming_planned_payload<'a>(
5369    objects: &'a [Arc<EncodedObject>],
5370    plan: &'a [StreamingPlannedEntry],
5371    idx: usize,
5372) -> &'a [u8] {
5373    match &plan[idx].base {
5374        StreamingPlannedBase::None => &objects[idx].body,
5375        StreamingPlannedBase::Current { delta, .. }
5376        | StreamingPlannedBase::Previous { delta, .. }
5377        | StreamingPlannedBase::External { delta, .. } => delta,
5378    }
5379}
5380
5381fn planned_payload<'a>(
5382    objects: &'a [&'a EncodedObject],
5383    plan: &'a [PlannedEntry],
5384    idx: usize,
5385) -> &'a [u8] {
5386    match &plan[idx].base {
5387        PlannedBase::None => &objects[idx].body,
5388        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
5389    }
5390}
5391
5392fn compressed_payload(body: &[u8], compression_level: u32) -> Result<Vec<u8>> {
5393    let mut out = Vec::new();
5394    write_compressed_payload(&mut out, body, compression_level)?;
5395    Ok(out)
5396}
5397
5398/// Maximum number of external thin-pack bases compared against any single
5399/// object. Bounds the work of the thin path when a large base set is supplied.
5400const DELTA_MAX_EXTERNAL_BASES: usize = 64;
5401
5402struct DeltaWindowEntry<'a> {
5403    idx: usize,
5404    index: DeltaIndex<'a>,
5405}
5406
5407/// Rank object types for delta grouping. Objects of the same type are far more
5408/// likely to delta well, so the sort groups by this rank first.
5409fn delta_type_rank(object_type: ObjectType) -> u8 {
5410    match object_type {
5411        ObjectType::Commit => 0,
5412        ObjectType::Tree => 1,
5413        ObjectType::Blob => 2,
5414        ObjectType::Tag => 3,
5415    }
5416}
5417
5418fn plan_streaming_window_deltas(
5419    objects: &[Arc<EncodedObject>],
5420    object_ids: &[ObjectId],
5421    base_horizon: &VecDeque<StreamingDeltaBase>,
5422    options: &PackWriteOptions,
5423) -> (Vec<StreamingPlannedEntry>, Vec<usize>) {
5424    let count = objects.len();
5425    let mut plan: Vec<StreamingPlannedEntry> = (0..count)
5426        .map(|_| StreamingPlannedEntry {
5427            base: StreamingPlannedBase::None,
5428            depth: 0,
5429        })
5430        .collect();
5431
5432    let mut order: Vec<usize> = (0..count).collect();
5433    if options.reorder && options.depth > 0 {
5434        order.sort_by(|&left, &right| {
5435            delta_type_rank(objects[left].object_type)
5436                .cmp(&delta_type_rank(objects[right].object_type))
5437                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
5438                .then_with(|| {
5439                    object_ids[left]
5440                        .as_bytes()
5441                        .cmp(object_ids[right].as_bytes())
5442                })
5443        });
5444    }
5445
5446    if options.depth == 0 || options.window == 0 {
5447        return (plan, order);
5448    }
5449
5450    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
5451        Vec::with_capacity(options.thin_bases.len());
5452    let mut external_bases = options.thin_bases.iter().collect::<Vec<_>>();
5453    external_bases
5454        .sort_by(|(left_oid, _), (right_oid, _)| left_oid.as_bytes().cmp(right_oid.as_bytes()));
5455    for (oid, object) in external_bases {
5456        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
5457    }
5458
5459    let mut window: VecDeque<StreamingDeltaWindowEntry<'_>> =
5460        VecDeque::with_capacity(options.window.min(base_horizon.len() + count));
5461    for base in base_horizon {
5462        window.push_back(StreamingDeltaWindowEntry {
5463            base: StreamingCandidateBase::Previous {
5464                oid: base.oid,
5465                offset: base.offset,
5466                depth: base.depth,
5467            },
5468            object_type: base.object.object_type,
5469            index: DeltaIndex::new(&base.object.body),
5470        });
5471    }
5472    while window.len() > options.window {
5473        window.pop_front();
5474    }
5475
5476    for &idx in &order {
5477        let target = &objects[idx].body;
5478        let target_type = objects[idx].object_type;
5479
5480        let mut best_delta: Option<Vec<u8>> = None;
5481        let mut best_base = StreamingPlannedBase::None;
5482        let mut best_base_depth = 0usize;
5483
5484        for base_entry in window.iter().rev() {
5485            if base_entry.object_type != target_type {
5486                continue;
5487            }
5488            let base_depth = match &base_entry.base {
5489                StreamingCandidateBase::Previous { depth, .. }
5490                | StreamingCandidateBase::Current { depth, .. } => *depth,
5491            };
5492            if base_depth + 1 > options.depth {
5493                continue;
5494            }
5495            let Some(delta) = base_entry.index.delta(target) else {
5496                continue;
5497            };
5498            if !delta_is_acceptable(&delta, target.len()) {
5499                continue;
5500            }
5501            if best_delta
5502                .as_ref()
5503                .is_none_or(|current| delta.len() < current.len())
5504            {
5505                best_delta = Some(delta);
5506                best_base_depth = base_depth;
5507                best_base = match &base_entry.base {
5508                    StreamingCandidateBase::Previous { oid, offset, .. } => {
5509                        StreamingPlannedBase::Previous {
5510                            base_oid: *oid,
5511                            base_offset: *offset,
5512                            delta: Vec::new(),
5513                        }
5514                    }
5515                    StreamingCandidateBase::Current { idx: base_idx, .. } => {
5516                        StreamingPlannedBase::Current {
5517                            base_idx: *base_idx,
5518                            delta: Vec::new(),
5519                        }
5520                    }
5521                };
5522            }
5523        }
5524
5525        for (base_oid, base_type, base_index) in
5526            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
5527        {
5528            if *base_type != target_type {
5529                continue;
5530            }
5531            let Some(delta) = base_index.delta(target) else {
5532                continue;
5533            };
5534            if !delta_is_acceptable(&delta, target.len()) {
5535                continue;
5536            }
5537            if best_delta
5538                .as_ref()
5539                .is_none_or(|current| delta.len() < current.len())
5540            {
5541                best_delta = Some(delta);
5542                best_base_depth = 0;
5543                best_base = StreamingPlannedBase::External {
5544                    base_oid: *base_oid,
5545                    delta: Vec::new(),
5546                };
5547            }
5548        }
5549
5550        if let Some(delta) = best_delta {
5551            plan[idx].depth = best_base_depth + 1;
5552            plan[idx].base = match best_base {
5553                StreamingPlannedBase::Current { base_idx, .. } => {
5554                    StreamingPlannedBase::Current { base_idx, delta }
5555                }
5556                StreamingPlannedBase::Previous {
5557                    base_oid,
5558                    base_offset,
5559                    ..
5560                } => StreamingPlannedBase::Previous {
5561                    base_oid,
5562                    base_offset,
5563                    delta,
5564                },
5565                StreamingPlannedBase::External { base_oid, .. } => {
5566                    StreamingPlannedBase::External { base_oid, delta }
5567                }
5568                StreamingPlannedBase::None => StreamingPlannedBase::None,
5569            };
5570        }
5571
5572        window.push_back(StreamingDeltaWindowEntry {
5573            base: StreamingCandidateBase::Current {
5574                idx,
5575                depth: plan[idx].depth,
5576            },
5577            object_type: objects[idx].object_type,
5578            index: DeltaIndex::new(&objects[idx].body),
5579        });
5580        while window.len() > options.window {
5581            window.pop_front();
5582        }
5583    }
5584
5585    (plan, order)
5586}
5587
5588/// Decide how each object is stored (undeltified or deltified) and the order in
5589/// which objects are emitted into the pack.
5590///
5591/// # Ordering
5592///
5593/// Candidates are sorted by `(type, size descending, object id)`:
5594/// * **type** — only same-type objects are deltified against one another, so
5595///   grouping by type keeps the sliding window full of viable bases. Type rank
5596///   follows [`delta_type_rank`] (commit, tree, blob, tag).
5597/// * **size descending** — larger objects come first so smaller, later objects
5598///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
5599///   no path/name, so the usual path-hash key is unavailable; size is the next
5600///   best locality signal.
5601/// * **object id** — a deterministic tiebreaker for reproducible packs.
5602///
5603/// # Selection
5604///
5605/// Each object is compared against the previous up to `window` same-type
5606/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
5607/// bases of the same type). The smallest delta whose encoded length is strictly
5608/// less than the object's own body is kept; otherwise the object is stored
5609/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
5610/// only be used if doing so keeps the resulting chain within the bound); a depth
5611/// of `0` disables deltification entirely.
5612///
5613/// Returns the per-object plan (indexed by original object index) together with
5614/// the emit order. Every in-pack delta references a candidate that is earlier in
5615/// the emit order, so emitting in that order writes each base before any object
5616/// that depends on it.
5617fn plan_pack_deltas(
5618    objects: &[&EncodedObject],
5619    object_ids: &[ObjectId],
5620    options: &PackWriteOptions,
5621) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
5622    let count = objects.len();
5623    let mut plan: Vec<PlannedEntry> = (0..count)
5624        .map(|_| PlannedEntry {
5625            base: PlannedBase::None,
5626        })
5627        .collect();
5628
5629    // Processing order. Deltas only point backwards within this order, which is
5630    // therefore also a valid emit order. Reordering by type/size improves delta
5631    // locality but is skipped when disabled or when deltification is off.
5632    let mut order: Vec<usize> = (0..count).collect();
5633    if options.reorder && options.depth > 0 {
5634        order.sort_by(|&left, &right| {
5635            delta_type_rank(objects[left].object_type)
5636                .cmp(&delta_type_rank(objects[right].object_type))
5637                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
5638                .then_with(|| {
5639                    object_ids[left]
5640                        .as_bytes()
5641                        .cmp(object_ids[right].as_bytes())
5642                })
5643        });
5644    }
5645
5646    if options.depth == 0 {
5647        return Ok((plan, order));
5648    }
5649
5650    // Pre-build delta indexes for external thin-pack bases, grouped by type so
5651    // an object only compares against compatible bases.
5652    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
5653        Vec::with_capacity(options.thin_bases.len());
5654    for (oid, object) in &options.thin_bases {
5655        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
5656    }
5657
5658    // Chain depth ending at each object (0 = undeltified). Used to keep delta
5659    // chains within `options.depth`.
5660    let mut depth = vec![0usize; count];
5661    // Sliding window of recently processed original indices, most recent last.
5662    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
5663        std::collections::VecDeque::new();
5664
5665    for &idx in &order {
5666        let target = &objects[idx].body;
5667        let target_type = objects[idx].object_type;
5668
5669        let mut best_delta: Option<Vec<u8>> = None;
5670        let mut best_base = PlannedBase::None;
5671
5672        // Try in-pack candidates from the window (same type only).
5673        for base_entry in window.iter().rev() {
5674            let base_idx = base_entry.idx;
5675            if objects[base_idx].object_type != target_type {
5676                continue;
5677            }
5678            // Using this base would make the new chain depth + 1; skip if that
5679            // would exceed the configured maximum.
5680            if depth[base_idx] + 1 > options.depth {
5681                continue;
5682            }
5683            let Some(delta) = base_entry.index.delta(target) else {
5684                continue;
5685            };
5686            if !delta_is_acceptable(&delta, target.len()) {
5687                continue;
5688            }
5689            if best_delta
5690                .as_ref()
5691                .is_none_or(|current| delta.len() < current.len())
5692            {
5693                best_delta = Some(delta);
5694                best_base = PlannedBase::InPack {
5695                    base_idx,
5696                    delta: Vec::new(),
5697                };
5698            }
5699        }
5700
5701        // Try external thin-pack bases (ref-delta; external base is depth 0, so
5702        // the resulting chain depth is 1, always within a non-zero bound).
5703        for (base_oid, base_type, base_index) in
5704            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
5705        {
5706            if *base_type != target_type {
5707                continue;
5708            }
5709            let Some(delta) = base_index.delta(target) else {
5710                continue;
5711            };
5712            if !delta_is_acceptable(&delta, target.len()) {
5713                continue;
5714            }
5715            if best_delta
5716                .as_ref()
5717                .is_none_or(|current| delta.len() < current.len())
5718            {
5719                best_delta = Some(delta);
5720                best_base = PlannedBase::External {
5721                    base_oid: *base_oid,
5722                    delta: Vec::new(),
5723                };
5724            }
5725        }
5726
5727        if let Some(delta) = best_delta {
5728            match best_base {
5729                PlannedBase::InPack { base_idx, .. } => {
5730                    depth[idx] = depth[base_idx] + 1;
5731                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
5732                }
5733                PlannedBase::External { base_oid, .. } => {
5734                    depth[idx] = 1;
5735                    plan[idx].base = PlannedBase::External { base_oid, delta };
5736                }
5737                PlannedBase::None => {}
5738            }
5739        }
5740
5741        // Add this object to the window for subsequent candidates.
5742        window.push_back(DeltaWindowEntry {
5743            idx,
5744            index: DeltaIndex::new(&objects[idx].body),
5745        });
5746        while window.len() > options.window {
5747            window.pop_front();
5748        }
5749    }
5750
5751    Ok((plan, order))
5752}
5753
5754/// Whether a generated delta is worth using instead of storing the object
5755/// undeltified. The encoded delta must be strictly smaller than the object's own
5756/// body; otherwise the undeltified form is the same size or smaller and is
5757/// always self-contained.
5758fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
5759    !delta.is_empty() && delta.len() < target_len
5760}
5761
5762fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
5763    loop {
5764        let mut byte = (value as u8) & 0x7f;
5765        value >>= 7;
5766        if value != 0 {
5767            byte |= 0x80;
5768        }
5769        out.push(byte);
5770        if value == 0 {
5771            break;
5772        }
5773    }
5774}
5775
5776fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
5777    while size != 0 {
5778        let chunk = size.min(0x10000);
5779        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
5780        let mut command = 0x80u8;
5781        let mut payload = [0u8; 7];
5782        let mut payload_len = 0usize;
5783        for idx in 0..4 {
5784            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
5785            if byte != 0 {
5786                command |= 1 << idx;
5787                payload[payload_len] = byte;
5788                payload_len += 1;
5789            }
5790        }
5791        for idx in 0..3 {
5792            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
5793            if byte != 0 {
5794                command |= 0x10 << idx;
5795                payload[payload_len] = byte;
5796                payload_len += 1;
5797            }
5798        }
5799        out.push(command);
5800        out.extend_from_slice(&payload[..payload_len]);
5801        offset += chunk;
5802        size -= chunk;
5803    }
5804}
5805
5806fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
5807    while !bytes.is_empty() {
5808        let chunk_len = bytes.len().min(0x7f);
5809        out.push(chunk_len as u8);
5810        out.extend_from_slice(&bytes[..chunk_len]);
5811        bytes = &bytes[chunk_len..];
5812    }
5813}
5814
5815fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
5816    let mut value = 0u64;
5817    let mut shift = 0u32;
5818    loop {
5819        let Some(byte) = delta.get(*cursor).copied() else {
5820            return Err(GitError::InvalidObject("truncated delta size".into()));
5821        };
5822        *cursor += 1;
5823        value = value
5824            .checked_add(
5825                u64::from(byte & 0x7f)
5826                    .checked_shl(shift)
5827                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
5828            )
5829            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5830        if byte & 0x80 == 0 {
5831            return Ok(value);
5832        }
5833        shift = shift
5834            .checked_add(7)
5835            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5836    }
5837}
5838
5839fn read_delta_copy_value(
5840    delta: &[u8],
5841    cursor: &mut usize,
5842    command: u8,
5843    masks: &[u8],
5844) -> Result<u64> {
5845    let mut value = 0u64;
5846    for (shift, mask) in masks.iter().enumerate() {
5847        if command & mask != 0 {
5848            let Some(byte) = delta.get(*cursor).copied() else {
5849                return Err(GitError::InvalidObject(
5850                    "truncated delta copy command".into(),
5851                ));
5852            };
5853            *cursor += 1;
5854            value |= u64::from(byte) << (shift * 8);
5855        }
5856    }
5857    Ok(value)
5858}
5859
5860fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8], compression_level: u32) -> Result<()> {
5861    let mut compressor = Compress::new(Compression::new(compression_level.min(9)), true);
5862    out.reserve(zlib_compress_bound(body.len()));
5863    let status = compressor
5864        .compress_vec(body, out, FlushCompress::Finish)
5865        .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
5866    if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
5867        return Err(GitError::InvalidObject(
5868            "zlib compression did not finish pack entry".into(),
5869        ));
5870    }
5871    Ok(())
5872}
5873
5874fn zlib_compress_bound(len: usize) -> usize {
5875    len.saturating_add(len >> 12)
5876        .saturating_add(len >> 14)
5877        .saturating_add(len >> 25)
5878        .saturating_add(13)
5879}
5880
5881fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
5882    let type_code = match object_type {
5883        ObjectType::Commit => 1,
5884        ObjectType::Tree => 2,
5885        ObjectType::Blob => 3,
5886        ObjectType::Tag => 4,
5887    };
5888    write_pack_entry_header_kind(out, type_code, size);
5889}
5890
5891fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
5892    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
5893    size >>= 4;
5894    if size != 0 {
5895        byte |= 0x80;
5896    }
5897    out.push(byte);
5898    while size != 0 {
5899        let mut byte = (size as u8) & 0x7f;
5900        size >>= 7;
5901        if size != 0 {
5902            byte |= 0x80;
5903        }
5904        out.push(byte);
5905    }
5906}
5907
5908fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
5909    if relative == 0 {
5910        return Err(GitError::InvalidFormat(
5911            "ofs-delta relative offset cannot be zero".into(),
5912        ));
5913    }
5914    let mut value = relative;
5915    let mut bytes = vec![(value & 0x7f) as u8];
5916    value >>= 7;
5917    while value != 0 {
5918        value -= 1;
5919        bytes.push(((value & 0x7f) as u8) | 0x80);
5920        value >>= 7;
5921    }
5922    bytes.reverse();
5923    out.extend_from_slice(&bytes);
5924    Ok(())
5925}
5926
5927fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
5928    let Some(byte) = bytes.get(*offset).copied() else {
5929        return Err(GitError::InvalidFormat(
5930            "truncated pack entry header".into(),
5931        ));
5932    };
5933    *offset += 1;
5934    Ok(byte)
5935}
5936
5937fn u16_be(bytes: &[u8]) -> u16 {
5938    u16::from_be_bytes([bytes[0], bytes[1]])
5939}
5940
5941fn u32_be(bytes: &[u8]) -> u32 {
5942    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
5943}
5944
5945fn u64_be(bytes: &[u8]) -> u64 {
5946    u64::from_be_bytes([
5947        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
5948    ])
5949}
5950
5951fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
5952    let mut fanout = [0u32; 256];
5953    let mut previous = 0u32;
5954    for slot in &mut fanout {
5955        *slot = u32_be(&bytes[*offset..*offset + 4]);
5956        if *slot < previous {
5957            return Err(GitError::InvalidFormat(
5958                "pack index fanout is not monotonic".into(),
5959            ));
5960        }
5961        previous = *slot;
5962        *offset += 4;
5963    }
5964    Ok(fanout)
5965}
5966
5967fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
5968    let expected_min = if oid_bytes[0] == 0 {
5969        0
5970    } else {
5971        fanout[usize::from(oid_bytes[0] - 1)]
5972    };
5973    if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
5974        return Err(GitError::InvalidFormat(
5975            "pack index object id is outside its fanout bucket".into(),
5976        ));
5977    }
5978    Ok(())
5979}
5980
5981fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
5982    if raw_offset & 0x8000_0000 == 0 {
5983        return Ok(u64::from(raw_offset));
5984    }
5985    let large_idx = (raw_offset & 0x7fff_ffff) as usize;
5986    let large_start = large_idx
5987        .checked_mul(8)
5988        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5989    let large_end = large_start
5990        .checked_add(8)
5991        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5992    if large_end > large_offset_table.len() {
5993        return Err(GitError::InvalidFormat(
5994            "pack index large offset points past table".into(),
5995        ));
5996    }
5997    Ok(u64_be(&large_offset_table[large_start..large_end]))
5998}
5999
6000fn checked_range(
6001    start: usize,
6002    count: usize,
6003    width: usize,
6004    total: usize,
6005) -> Result<std::ops::Range<usize>> {
6006    let len = count
6007        .checked_mul(width)
6008        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
6009    let end = start
6010        .checked_add(len)
6011        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
6012    if end > total {
6013        return Err(GitError::InvalidFormat("truncated pack index table".into()));
6014    }
6015    Ok(start..end)
6016}
6017
6018fn validate_position_permutation(positions: &[u32]) -> Result<()> {
6019    let mut seen = vec![false; positions.len()];
6020    for position in positions {
6021        let idx = *position as usize;
6022        if idx >= positions.len() {
6023            return Err(GitError::InvalidFormat(
6024                "reverse index position points past object table".into(),
6025            ));
6026        }
6027        if seen[idx] {
6028            return Err(GitError::InvalidFormat(
6029                "reverse index position is duplicated".into(),
6030            ));
6031        }
6032        seen[idx] = true;
6033    }
6034    Ok(())
6035}
6036
6037fn parse_midx_pack_names(
6038    bytes: &[u8],
6039    chunks: &[MultiPackIndexChunk],
6040    pack_count: usize,
6041    version: u8,
6042) -> Result<Vec<String>> {
6043    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
6044        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
6045    let mut names = Vec::with_capacity(pack_count);
6046    let mut offset = 0usize;
6047    while names.len() < pack_count {
6048        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
6049            return Err(GitError::InvalidFormat(
6050                "fatal: multi-pack-index pack-name chunk is too short".into(),
6051            ));
6052        };
6053        let name_bytes = &data[offset..offset + relative_end];
6054        if name_bytes.is_empty() {
6055            return Err(GitError::InvalidFormat(
6056                "multi-pack-index PNAM entry is empty".into(),
6057            ));
6058        }
6059        let name = std::str::from_utf8(name_bytes)
6060            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
6061        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
6062            return Err(GitError::InvalidFormat(
6063                "multi-pack-index PNAM entry contains a path separator".into(),
6064            ));
6065        }
6066        names.push(name.to_string());
6067        offset += relative_end + 1;
6068    }
6069    let padding = &data[offset..];
6070    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
6071        return Err(GitError::InvalidFormat(
6072            "multi-pack-index PNAM padding is invalid".into(),
6073        ));
6074    }
6075    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
6076        return Err(GitError::InvalidFormat(
6077            "multi-pack-index v1 PNAM entries are not sorted".into(),
6078        ));
6079    }
6080    Ok(names)
6081}
6082
6083fn parse_midx_oid_fanout(
6084    bytes: &[u8],
6085    chunks: &[MultiPackIndexChunk],
6086) -> Result<([u32; 256], usize)> {
6087    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
6088        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
6089    if data.len() != 256 * 4 {
6090        return Err(GitError::InvalidFormat(
6091            "error: multi-pack-index OID fanout is of the wrong size\nfatal: multi-pack-index required OID fanout chunk missing or corrupted".into(),
6092        ));
6093    }
6094    let mut fanout = [0u32; 256];
6095    let mut previous = 0u32;
6096    for (idx, slot) in fanout.iter_mut().enumerate() {
6097        let start = idx * 4;
6098        *slot = u32_be(&data[start..start + 4]);
6099        if *slot < previous {
6100            return Err(GitError::InvalidFormat(format!(
6101                "error: oid fanout out of order: fanout[{}] = {:x} > {:x} = fanout[{idx}]\nfatal: multi-pack-index required OID fanout chunk missing or corrupted",
6102                idx - 1,
6103                previous,
6104                *slot
6105            )));
6106        }
6107        previous = *slot;
6108    }
6109    Ok((fanout, fanout[255] as usize))
6110}
6111
6112fn parse_midx_object_ids(
6113    bytes: &[u8],
6114    chunks: &[MultiPackIndexChunk],
6115    format: ObjectFormat,
6116    object_count: usize,
6117    fanout: &[u32; 256],
6118) -> Result<Vec<ObjectId>> {
6119    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
6120        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
6121    let expected_len = object_count
6122        .checked_mul(format.raw_len())
6123        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
6124    if data.len() != expected_len {
6125        return Err(GitError::InvalidFormat(
6126            "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
6127        ));
6128    }
6129
6130    let mut ids = Vec::with_capacity(object_count);
6131    let mut counts = [0u32; 256];
6132    let mut previous_oid: Option<ObjectId> = None;
6133    for idx in 0..object_count {
6134        let start = idx * format.raw_len();
6135        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
6136        if let Some(previous) = &previous_oid
6137            && previous.as_bytes() >= oid.as_bytes()
6138        {
6139            return Err(GitError::InvalidFormat(
6140                "multi-pack-index OIDL object ids are not strictly sorted".into(),
6141            ));
6142        }
6143        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
6144            .checked_add(1)
6145            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
6146        previous_oid = Some(oid);
6147        ids.push(oid);
6148    }
6149
6150    let mut running = 0u32;
6151    for (idx, count) in counts.iter().enumerate() {
6152        running = running
6153            .checked_add(*count)
6154            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
6155        if fanout[idx] != running {
6156            return Err(GitError::InvalidFormat(
6157                "multi-pack-index OIDF fanout does not match OIDL".into(),
6158            ));
6159        }
6160    }
6161    Ok(ids)
6162}
6163
6164fn parse_midx_object_offsets(
6165    bytes: &[u8],
6166    chunks: &[MultiPackIndexChunk],
6167    object_ids: Vec<ObjectId>,
6168    pack_count: u32,
6169) -> Result<Vec<MultiPackIndexEntry>> {
6170    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
6171        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
6172    let expected_len = object_ids
6173        .len()
6174        .checked_mul(8)
6175        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
6176    if data.len() != expected_len {
6177        return Err(GitError::InvalidFormat(
6178            "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
6179        ));
6180    }
6181    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
6182    if let Some(large_offsets) = large_offsets
6183        && large_offsets.len() % 8 != 0
6184    {
6185        return Err(GitError::InvalidFormat(
6186            "multi-pack-index LOFF chunk has invalid length".into(),
6187        ));
6188    }
6189
6190    let mut entries = Vec::with_capacity(object_ids.len());
6191    for (idx, oid) in object_ids.into_iter().enumerate() {
6192        let start = idx * 8;
6193        let pack_int_id = u32_be(&data[start..start + 4]);
6194        if pack_int_id >= pack_count {
6195            return Err(GitError::InvalidFormat(
6196                "multi-pack-index object points past pack table".into(),
6197            ));
6198        }
6199        let raw_offset = u32_be(&data[start + 4..start + 8]);
6200        let offset = if raw_offset & 0x8000_0000 == 0 {
6201            u64::from(raw_offset)
6202        } else {
6203            let Some(large_offsets) = large_offsets else {
6204                return Err(GitError::InvalidFormat(
6205                    "multi-pack-index large offset missing LOFF chunk".into(),
6206                ));
6207            };
6208            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
6209            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
6210                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
6211            })?;
6212            let large_end = large_start.checked_add(8).ok_or_else(|| {
6213                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
6214            })?;
6215            if large_end > large_offsets.len() {
6216                return Err(GitError::InvalidFormat(
6217                    "fatal: multi-pack-index large offset out of bounds".into(),
6218                ));
6219            }
6220            u64_be(&large_offsets[large_start..large_end])
6221        };
6222        entries.push(MultiPackIndexEntry {
6223            oid,
6224            pack_int_id,
6225            offset,
6226            force_large_offset: raw_offset & 0x8000_0000 != 0,
6227        });
6228    }
6229    Ok(entries)
6230}
6231
6232fn parse_midx_reverse_index(
6233    bytes: &[u8],
6234    chunks: &[MultiPackIndexChunk],
6235    object_count: usize,
6236) -> Result<Option<Vec<u32>>> {
6237    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
6238        return Ok(None);
6239    };
6240    let expected_len = object_count
6241        .checked_mul(4)
6242        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
6243    if data.len() != expected_len {
6244        return Err(GitError::InvalidFormat(
6245            "multi-pack-index reverse-index chunk is the wrong size".into(),
6246        ));
6247    }
6248    let mut positions = Vec::with_capacity(object_count);
6249    for idx in 0..object_count {
6250        let start = idx * 4;
6251        positions.push(u32_be(&data[start..start + 4]));
6252    }
6253    validate_position_permutation(&positions)?;
6254    Ok(Some(positions))
6255}
6256
6257fn parse_midx_bitmapped_packs(
6258    bytes: &[u8],
6259    chunks: &[MultiPackIndexChunk],
6260    pack_count: usize,
6261    object_count: usize,
6262) -> Result<Option<Vec<MultiPackBitmapPack>>> {
6263    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
6264        return Ok(None);
6265    };
6266    let expected_len = pack_count
6267        .checked_mul(8)
6268        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
6269    if data.len() != expected_len {
6270        return Err(GitError::InvalidFormat(
6271            "multi-pack-index BTMP chunk has invalid length".into(),
6272        ));
6273    }
6274    let mut entries = Vec::with_capacity(pack_count);
6275    for idx in 0..pack_count {
6276        let start = idx * 8;
6277        let bitmap_pos = u32_be(&data[start..start + 4]);
6278        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
6279        let bitmap_end = u64::from(bitmap_pos)
6280            .checked_add(u64::from(bitmap_nr))
6281            .ok_or_else(|| {
6282                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
6283            })?;
6284        if bitmap_end > object_count as u64 {
6285            return Err(GitError::InvalidFormat(
6286                "multi-pack-index BTMP range points past object table".into(),
6287            ));
6288        }
6289        entries.push(MultiPackBitmapPack {
6290            bitmap_pos,
6291            bitmap_nr,
6292        });
6293    }
6294    Ok(Some(entries))
6295}
6296
6297fn midx_chunk_data<'a>(
6298    bytes: &'a [u8],
6299    chunks: &[MultiPackIndexChunk],
6300    id: [u8; 4],
6301    required: bool,
6302) -> Result<Option<&'a [u8]>> {
6303    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
6304        if required {
6305            return Err(GitError::InvalidFormat(format!(
6306                "multi-pack-index missing {} chunk",
6307                std::str::from_utf8(&id).unwrap_or("required")
6308            )));
6309        }
6310        return Ok(None);
6311    };
6312    let start = usize::try_from(chunk.offset)
6313        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
6314    let len = usize::try_from(chunk.len)
6315        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
6316    let end = start
6317        .checked_add(len)
6318        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
6319    let Some(data) = bytes.get(start..end) else {
6320        return Err(GitError::InvalidFormat(
6321            "multi-pack-index chunk extends past file".into(),
6322        ));
6323    };
6324    Ok(Some(data))
6325}
6326
6327fn hash_function_id(format: ObjectFormat) -> u32 {
6328    match format {
6329        ObjectFormat::Sha1 => 1,
6330        ObjectFormat::Sha256 => 2,
6331    }
6332}
6333
6334/// Maximum number of clean (run) words that a single EWAH running-length word
6335/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
6336const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
6337
6338/// Maximum number of literal (dirty) words that can trail a single EWAH
6339/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
6340const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
6341
6342/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
6343const EWAH_ALL_ONES: u64 = u64::MAX;
6344
6345impl EwahBitmap {
6346    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
6347    /// from a slice of raw uncompressed 64-bit words.
6348    ///
6349    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
6350    /// matching git's on-disk convention. `bit_size` records the number of
6351    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
6352    ///
6353    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
6354    /// encoder: consecutive all-zero or all-one words collapse into a run, and
6355    /// any other word is stored verbatim as a literal. Only the first
6356    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
6357    /// words supplied by the caller are ignored, just as git encodes a bitmap
6358    /// sized to its highest set bit.
6359    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
6360        let required_words = bit_size.div_ceil(64) as usize;
6361        if required_words > words.len() {
6362            return Err(GitError::InvalidFormat(format!(
6363                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
6364                words.len()
6365            )));
6366        }
6367        // Only the words that actually back the declared bits matter; libgit
6368        // never emits clean trailing zero words for the unused tail.
6369        let significant = &words[..required_words];
6370        let mut builder = EwahBuilder::new(bit_size);
6371        for &word in significant {
6372            if word == 0 {
6373                builder.add_empty_words(false, 1);
6374            } else if word == EWAH_ALL_ONES {
6375                builder.add_empty_words(true, 1);
6376            } else {
6377                builder.add_literal(word);
6378            }
6379        }
6380        builder.finish()
6381    }
6382
6383    /// Constructs an [`EwahBitmap`] from a set of bit positions.
6384    ///
6385    /// `bit_size` is the number of logical bits (typically the pack object
6386    /// count). Every position in `positions` must be strictly less than
6387    /// `bit_size`. Positions may be given in any order and may repeat.
6388    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
6389        let word_count = bit_size.div_ceil(64) as usize;
6390        let mut words = vec![0u64; word_count];
6391        for &position in positions {
6392            if position >= bit_size {
6393                return Err(GitError::InvalidFormat(format!(
6394                    "EWAH bit position {position} out of range for bit_size {bit_size}"
6395                )));
6396            }
6397            let word_index = (position / 64) as usize;
6398            let bit_index = position % 64;
6399            words[word_index] |= 1u64 << bit_index;
6400        }
6401        Self::from_words(bit_size, &words)
6402    }
6403
6404    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
6405    /// all-zero type bitmap (e.g. when a pack has no tags).
6406    pub fn empty() -> Self {
6407        Self {
6408            bit_size: 0,
6409            words: Vec::new(),
6410            rlw_position: 0,
6411        }
6412    }
6413
6414    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
6415    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
6416    ///
6417    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
6418    /// bitmap actually covers and is primarily used to validate roundtrips.
6419    pub fn to_words(&self) -> Result<Vec<u64>> {
6420        let mut out = Vec::new();
6421        let mut word_idx = 0usize;
6422        while word_idx < self.words.len() {
6423            let rlw = self.words[word_idx];
6424            let run_bit = rlw & 1;
6425            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
6426            let literal_words = (rlw >> 33) as usize;
6427            word_idx += 1;
6428            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
6429            for _ in 0..run_words {
6430                out.push(fill);
6431            }
6432            let literal_end = word_idx
6433                .checked_add(literal_words)
6434                .filter(|end| *end <= self.words.len())
6435                .ok_or_else(|| {
6436                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
6437                })?;
6438            out.extend_from_slice(&self.words[word_idx..literal_end]);
6439            word_idx = literal_end;
6440        }
6441        let required_words = (self.bit_size as usize).div_ceil(64);
6442        if out.len() < required_words {
6443            out.resize(required_words, 0);
6444        }
6445        out.truncate(required_words);
6446        Ok(out)
6447    }
6448
6449    /// Returns the sorted set bit positions covered by this bitmap.
6450    pub fn to_positions(&self) -> Result<Vec<u32>> {
6451        let words = self.to_words()?;
6452        let mut positions = Vec::new();
6453        for (word_index, word) in words.iter().enumerate() {
6454            let mut remaining = *word;
6455            while remaining != 0 {
6456                let bit = remaining.trailing_zeros();
6457                let position = (word_index as u64) * 64 + u64::from(bit);
6458                if position < u64::from(self.bit_size) {
6459                    // position always fits in u32 because bit_size is u32.
6460                    positions.push(position as u32);
6461                }
6462                remaining &= remaining - 1;
6463            }
6464        }
6465        Ok(positions)
6466    }
6467
6468    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
6469    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
6470    /// running-length-word position (u32 BE).
6471    pub fn to_bytes(&self) -> Vec<u8> {
6472        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
6473        self.append_bytes(&mut out);
6474        out
6475    }
6476
6477    fn append_bytes(&self, out: &mut Vec<u8>) {
6478        out.extend_from_slice(&self.bit_size.to_be_bytes());
6479        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
6480        for word in &self.words {
6481            out.extend_from_slice(&word.to_be_bytes());
6482        }
6483        out.extend_from_slice(&self.rlw_position.to_be_bytes());
6484    }
6485}
6486
6487/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
6488///
6489/// The buffer is a sequence of blocks. Each block begins with a running-length
6490/// word (RLW) and is followed by zero or more literal words:
6491///   * bit 0      => value of the clean run words (0 or 1)
6492///   * bits 1..=32 => number of clean run words (32-bit field)
6493///   * bits 33..=63 => number of trailing literal words (31-bit field)
6494struct EwahBuilder {
6495    bit_size: u32,
6496    words: Vec<u64>,
6497    rlw_position: usize,
6498}
6499
6500impl EwahBuilder {
6501    fn new(bit_size: u32) -> Self {
6502        // Every EWAH buffer begins with an RLW, even an empty one.
6503        Self {
6504            bit_size,
6505            words: vec![0u64],
6506            rlw_position: 0,
6507        }
6508    }
6509
6510    fn rlw(&self) -> u64 {
6511        self.words[self.rlw_position]
6512    }
6513
6514    fn set_rlw(&mut self, value: u64) {
6515        self.words[self.rlw_position] = value;
6516    }
6517
6518    fn rlw_running_len(&self) -> u64 {
6519        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
6520    }
6521
6522    fn rlw_running_bit(&self) -> bool {
6523        self.rlw() & 1 == 1
6524    }
6525
6526    fn rlw_literal_len(&self) -> u64 {
6527        self.rlw() >> 33
6528    }
6529
6530    fn set_running_bit(&mut self, bit: bool) {
6531        let mut value = self.rlw();
6532        value &= !1;
6533        value |= u64::from(bit);
6534        self.set_rlw(value);
6535    }
6536
6537    fn set_running_len(&mut self, len: u64) {
6538        let mut value = self.rlw();
6539        value &= !(EWAH_MAX_RUNNING_LEN << 1);
6540        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
6541        self.set_rlw(value);
6542    }
6543
6544    fn set_literal_len(&mut self, len: u64) {
6545        let mut value = self.rlw();
6546        value &= (1u64 << 33) - 1;
6547        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
6548        self.set_rlw(value);
6549    }
6550
6551    /// Begins a fresh RLW block at the end of the buffer.
6552    fn push_rlw(&mut self) {
6553        self.rlw_position = self.words.len();
6554        self.words.push(0);
6555    }
6556
6557    /// Appends `number` clean words whose bits are all `value`, mirroring
6558    /// libgit's `ewah_add_empty_words`.
6559    ///
6560    /// A run can only be merged into the current RLW when that RLW has not yet
6561    /// emitted any literal words and its run either is empty or already carries
6562    /// the same fill value. Otherwise a fresh RLW block must be started, because
6563    /// every block stores its run strictly before its literals.
6564    fn add_empty_words(&mut self, value: bool, mut number: u64) {
6565        while number > 0 {
6566            // The current RLW can absorb more run words only when it has no
6567            // literals yet, its run is either empty or already the right fill
6568            // value, and the 32-bit run-length field is not already saturated.
6569            let can_extend = self.rlw_literal_len() == 0
6570                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
6571                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
6572            if !can_extend {
6573                self.push_rlw();
6574            }
6575            if self.rlw_running_len() == 0 {
6576                self.set_running_bit(value);
6577            }
6578            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
6579            let take = available.min(number);
6580            self.set_running_len(self.rlw_running_len() + take);
6581            number -= take;
6582        }
6583    }
6584
6585    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
6586    /// `ewah_add_dirty_words` for a count of one.
6587    fn add_literal(&mut self, word: u64) {
6588        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
6589            self.push_rlw();
6590        }
6591        let literal_len = self.rlw_literal_len();
6592        self.set_literal_len(literal_len + 1);
6593        self.words.push(word);
6594    }
6595
6596    fn finish(self) -> Result<EwahBitmap> {
6597        let rlw_position = u32::try_from(self.rlw_position)
6598            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
6599        if self.words.len() > u32::MAX as usize {
6600            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
6601        }
6602        Ok(EwahBitmap {
6603            bit_size: self.bit_size,
6604            words: self.words,
6605            rlw_position,
6606        })
6607    }
6608}
6609
6610/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
6611///
6612/// The writer is constructed from the object layout of a pack (one
6613/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
6614/// Callers then register one selected commit per [`add_commit`] call, supplying
6615/// the set of pack positions reachable from that commit. [`build`]/[`write`]
6616/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
6617/// on-disk format (signature `BITM`, version 1).
6618///
6619/// [`add_commit`]: PackBitmapWriter::add_commit
6620/// [`build`]: PackBitmapWriter::build
6621/// [`write`]: PackBitmapWriter::write
6622#[derive(Debug, Clone)]
6623pub struct PackBitmapWriter {
6624    format: ObjectFormat,
6625    pack_checksum: ObjectId,
6626    object_count: u32,
6627    commit_positions: Vec<u32>,
6628    tree_positions: Vec<u32>,
6629    blob_positions: Vec<u32>,
6630    tag_positions: Vec<u32>,
6631    name_hash_cache: Option<Vec<u32>>,
6632    selected: Vec<SelectedCommit>,
6633    pseudo_merges: Vec<PackBitmapPseudoMerge>,
6634}
6635
6636#[derive(Debug, Clone)]
6637struct SelectedCommit {
6638    /// Oid-sorted `.idx` position (what the on-disk entry records). The
6639    /// commit's pack-order position lives in `reachable` with the rest of the
6640    /// bits.
6641    commit_index_position: u32,
6642    flags: u8,
6643    reachable: Vec<u32>,
6644}
6645
6646impl PackBitmapWriter {
6647    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
6648    /// compression against a previously selected commit). This is the only flag
6649    /// value this writer emits.
6650    pub const FLAG_NONE: u8 = 0;
6651
6652    /// Creates a writer for a pack whose objects (in pack order) have the given
6653    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
6654    ///
6655    /// Returns an error if the pack contains more than `u32::MAX` objects, if
6656    /// `pack_checksum`'s format does not match `format`, or if any object type
6657    /// is not one of the four reachable git object kinds.
6658    pub fn new(
6659        format: ObjectFormat,
6660        pack_checksum: ObjectId,
6661        object_types: &[ObjectType],
6662    ) -> Result<Self> {
6663        if object_types.len() > u32::MAX as usize {
6664            return Err(GitError::InvalidFormat(
6665                "too many objects for a pack bitmap".into(),
6666            ));
6667        }
6668        if pack_checksum.format() != format {
6669            return Err(GitError::InvalidObjectId(
6670                "pack checksum format does not match bitmap format".into(),
6671            ));
6672        }
6673        let object_count = object_types.len() as u32;
6674        let mut commit_positions = Vec::new();
6675        let mut tree_positions = Vec::new();
6676        let mut blob_positions = Vec::new();
6677        let mut tag_positions = Vec::new();
6678        for (index, object_type) in object_types.iter().enumerate() {
6679            let position = index as u32;
6680            match object_type {
6681                ObjectType::Commit => commit_positions.push(position),
6682                ObjectType::Tree => tree_positions.push(position),
6683                ObjectType::Blob => blob_positions.push(position),
6684                ObjectType::Tag => tag_positions.push(position),
6685            }
6686        }
6687        Ok(Self {
6688            format,
6689            pack_checksum,
6690            object_count,
6691            commit_positions,
6692            tree_positions,
6693            blob_positions,
6694            tag_positions,
6695            name_hash_cache: None,
6696            selected: Vec::new(),
6697            pseudo_merges: Vec::new(),
6698        })
6699    }
6700
6701    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
6702    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
6703    /// and appends the cache after the bitmap entries, exactly as git does.
6704    ///
6705    /// Returns an error if the cache length does not equal the object count.
6706    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
6707        if cache.len() != self.object_count as usize {
6708            return Err(GitError::InvalidFormat(format!(
6709                "name hash cache has {} entries but pack has {} objects",
6710                cache.len(),
6711                self.object_count
6712            )));
6713        }
6714        self.name_hash_cache = Some(cache);
6715        Ok(self)
6716    }
6717
6718    /// Registers a selected commit and the pack positions reachable from it.
6719    ///
6720    /// `commit_position` is the *pack-order* position of the commit itself (the
6721    /// bit-number space); it must reference a commit object and is implicitly
6722    /// part of the reachable set. `commit_index_position` is the commit's
6723    /// position in the *oid-sorted* pack index — this is what the on-disk entry
6724    /// records (upstream `oid_pos`); bits and entry positions live in different
6725    /// spaces. `reachable` lists the pack-order positions of every object
6726    /// reachable from the commit (it may include or omit `commit_position`;
6727    /// duplicates are fine). All positions must be in range. The commit's full
6728    /// (non-XORed) bitmap is stored.
6729    pub fn add_commit(
6730        &mut self,
6731        commit_position: u32,
6732        commit_index_position: u32,
6733        reachable: &[u32],
6734    ) -> Result<()> {
6735        if commit_position >= self.object_count {
6736            return Err(GitError::InvalidFormat(format!(
6737                "commit position {commit_position} out of range for {} objects",
6738                self.object_count
6739            )));
6740        }
6741        if commit_index_position >= self.object_count {
6742            return Err(GitError::InvalidFormat(format!(
6743                "commit index position {commit_index_position} out of range for {} objects",
6744                self.object_count
6745            )));
6746        }
6747        if !self.commit_positions.contains(&commit_position) {
6748            return Err(GitError::InvalidFormat(format!(
6749                "bitmap commit position {commit_position} is not a commit object"
6750            )));
6751        }
6752        for &position in reachable {
6753            if position >= self.object_count {
6754                return Err(GitError::InvalidFormat(format!(
6755                    "reachable position {position} out of range for {} objects",
6756                    self.object_count
6757                )));
6758            }
6759        }
6760        let mut reachable = reachable.to_vec();
6761        reachable.push(commit_position);
6762        self.selected.push(SelectedCommit {
6763            commit_index_position,
6764            flags: Self::FLAG_NONE,
6765            reachable,
6766        });
6767        Ok(())
6768    }
6769
6770    /// Registers a pseudo-merge bitmap. Both `commits` and `reachable` are
6771    /// positions in the bitmap's bit-numbering order (pack order for a single
6772    /// pack, pseudo-pack order for a MIDX). Every commit position must refer to
6773    /// a commit object; every reachable position must be in range.
6774    pub fn add_pseudo_merge(&mut self, commits: &[u32], reachable: &[u32]) -> Result<()> {
6775        if commits.is_empty() {
6776            return Err(GitError::InvalidFormat(
6777                "pseudo-merge must contain at least one commit".into(),
6778            ));
6779        }
6780        for &position in commits {
6781            if position >= self.object_count {
6782                return Err(GitError::InvalidFormat(format!(
6783                    "pseudo-merge commit position {position} out of range for {} objects",
6784                    self.object_count
6785                )));
6786            }
6787            if !self.commit_positions.contains(&position) {
6788                return Err(GitError::InvalidFormat(format!(
6789                    "pseudo-merge commit position {position} is not a commit object"
6790                )));
6791            }
6792        }
6793        for &position in reachable {
6794            if position >= self.object_count {
6795                return Err(GitError::InvalidFormat(format!(
6796                    "pseudo-merge reachable position {position} out of range for {} objects",
6797                    self.object_count
6798                )));
6799            }
6800        }
6801        self.pseudo_merges.push(PackBitmapPseudoMerge {
6802            commits: EwahBitmap::from_positions(self.object_count, commits)?,
6803            bitmap: EwahBitmap::from_positions(self.object_count, reachable)?,
6804        });
6805        Ok(())
6806    }
6807
6808    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
6809    ///
6810    /// The resulting index always advertises
6811    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
6812    /// the pack) and, when a name-hash cache was attached,
6813    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
6814    pub fn build(&self) -> Result<PackBitmapIndex> {
6815        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
6816        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
6817        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
6818        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
6819
6820        let mut entries = Vec::with_capacity(self.selected.len());
6821        for selected in &self.selected {
6822            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
6823            entries.push(PackBitmapEntry {
6824                object_position: selected.commit_index_position,
6825                xor_offset: 0,
6826                flags: selected.flags,
6827                bitmap,
6828            });
6829        }
6830
6831        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
6832        if self.name_hash_cache.is_some() {
6833            options |= PackBitmapIndex::OPTION_HASH_CACHE;
6834        }
6835        if !self.pseudo_merges.is_empty() {
6836            options |= PackBitmapIndex::OPTION_PSEUDO_MERGES;
6837        }
6838
6839        // The index checksum is only known once the body is serialised; the
6840        // dedicated `write` path fills it in. `build` reports a placeholder of
6841        // the correct format so the struct is self-consistent for callers that
6842        // only need the decoded bitmaps.
6843        let placeholder_checksum = ObjectId::null(self.format);
6844        Ok(PackBitmapIndex {
6845            version: 1,
6846            format: self.format,
6847            options,
6848            pack_checksum: self.pack_checksum.clone(),
6849            index_checksum: placeholder_checksum,
6850            type_bitmaps: PackBitmapTypeBitmaps {
6851                commits,
6852                trees,
6853                blobs,
6854                tags,
6855            },
6856            entries,
6857            pseudo_merges: self.pseudo_merges.clone(),
6858            name_hash_cache: self.name_hash_cache.clone(),
6859        })
6860    }
6861
6862    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
6863    /// (including the trailing index checksum).
6864    pub fn write(&self) -> Result<Vec<u8>> {
6865        self.build()?.write()
6866    }
6867}
6868
6869impl PackBitmapIndex {
6870    /// Serialises this index into git's on-disk `.bitmap` byte layout.
6871    ///
6872    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
6873    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
6874    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
6875    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
6876    /// optional pseudo-merge extension, the optional name-hash cache, and
6877    /// finally the trailing index checksum over everything written so far.
6878    ///
6879    /// The `index_checksum` field of `self` is ignored and recomputed from the
6880    /// serialised body. Returns an error for unsupported versions, mismatched
6881    /// object-id formats, an oversized entry table, or an inconsistent name-hash
6882    /// cache.
6883    pub fn write(&self) -> Result<Vec<u8>> {
6884        if self.version != 1 {
6885            return Err(GitError::Unsupported(format!(
6886                "bitmap index version {}",
6887                self.version
6888            )));
6889        }
6890        let mut options = self.options;
6891        if !self.pseudo_merges.is_empty() {
6892            options |= Self::OPTION_PSEUDO_MERGES;
6893        }
6894        let known_options =
6895            Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE | Self::OPTION_PSEUDO_MERGES;
6896        if options & !known_options != 0 {
6897            return Err(GitError::Unsupported(format!(
6898                "bitmap index options {:#06x}",
6899                options & !known_options
6900            )));
6901        }
6902        if self.pack_checksum.format() != self.format {
6903            return Err(GitError::InvalidObjectId(
6904                "bitmap pack checksum format does not match index format".into(),
6905            ));
6906        }
6907        if self.entries.len() > u32::MAX as usize {
6908            return Err(GitError::InvalidFormat(
6909                "too many bitmap index entries".into(),
6910            ));
6911        }
6912        if options & Self::OPTION_PSEUDO_MERGES != 0 && self.pseudo_merges.is_empty() {
6913            return Err(GitError::InvalidFormat(
6914                "OPTION_PSEUDO_MERGES set without pseudo-merge records".into(),
6915            ));
6916        }
6917        let want_cache = options & Self::OPTION_HASH_CACHE != 0;
6918        match (&self.name_hash_cache, want_cache) {
6919            (Some(_), false) => {
6920                return Err(GitError::InvalidFormat(
6921                    "name hash cache present without OPTION_HASH_CACHE".into(),
6922                ));
6923            }
6924            (None, true) => {
6925                return Err(GitError::InvalidFormat(
6926                    "OPTION_HASH_CACHE set without a name hash cache".into(),
6927                ));
6928            }
6929            _ => {}
6930        }
6931
6932        let mut out = Vec::new();
6933        out.extend_from_slice(b"BITM");
6934        out.extend_from_slice(&self.version.to_be_bytes());
6935        out.extend_from_slice(&options.to_be_bytes());
6936        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
6937        out.extend_from_slice(self.pack_checksum.as_bytes());
6938
6939        self.type_bitmaps.commits.append_bytes(&mut out);
6940        self.type_bitmaps.trees.append_bytes(&mut out);
6941        self.type_bitmaps.blobs.append_bytes(&mut out);
6942        self.type_bitmaps.tags.append_bytes(&mut out);
6943
6944        for (idx, entry) in self.entries.iter().enumerate() {
6945            if entry.xor_offset as usize > idx {
6946                return Err(GitError::InvalidFormat(
6947                    "bitmap index entry has invalid XOR offset".into(),
6948                ));
6949            }
6950            out.extend_from_slice(&entry.object_position.to_be_bytes());
6951            out.push(entry.xor_offset);
6952            out.push(entry.flags);
6953            entry.bitmap.append_bytes(&mut out);
6954        }
6955
6956        if !self.pseudo_merges.is_empty() {
6957            append_bitmap_pseudo_merges(&mut out, &self.pseudo_merges)?;
6958        }
6959
6960        if let Some(cache) = &self.name_hash_cache {
6961            for value in cache {
6962                out.extend_from_slice(&value.to_be_bytes());
6963            }
6964        }
6965
6966        let checksum = sley_core::digest_bytes(self.format, &out)?;
6967        out.extend_from_slice(checksum.as_bytes());
6968        Ok(out)
6969    }
6970}
6971
6972fn append_bitmap_pseudo_merges(
6973    out: &mut Vec<u8>,
6974    pseudo_merges: &[PackBitmapPseudoMerge],
6975) -> Result<()> {
6976    if pseudo_merges.len() > u32::MAX as usize {
6977        return Err(GitError::InvalidFormat(
6978            "too many pseudo-merge bitmap records".into(),
6979        ));
6980    }
6981    let start = out.len();
6982    let mut pseudo_offsets = Vec::with_capacity(pseudo_merges.len());
6983    let mut commit_to_offsets: BTreeMap<u32, Vec<u64>> = BTreeMap::new();
6984    for merge in pseudo_merges {
6985        let offset = u64::try_from(out.len())
6986            .map_err(|_| GitError::InvalidFormat("bitmap file offset overflow".into()))?;
6987        pseudo_offsets.push(offset);
6988        for commit_pos in merge.commits.to_positions()? {
6989            commit_to_offsets
6990                .entry(commit_pos)
6991                .or_default()
6992                .push(offset);
6993        }
6994        merge.commits.append_bytes(out);
6995        merge.bitmap.append_bytes(out);
6996    }
6997    if commit_to_offsets.len() > u32::MAX as usize {
6998        return Err(GitError::InvalidFormat(
6999            "too many pseudo-merge commits".into(),
7000        ));
7001    }
7002
7003    let lookup_start = out.len();
7004    let lookup_len = commit_to_offsets
7005        .len()
7006        .checked_mul(12)
7007        .ok_or_else(|| GitError::InvalidFormat("pseudo-merge lookup overflow".into()))?;
7008    let mut next_extended = u64::try_from(
7009        lookup_start
7010            .checked_add(lookup_len)
7011            .ok_or_else(|| GitError::InvalidFormat("pseudo-merge lookup overflow".into()))?,
7012    )
7013    .map_err(|_| GitError::InvalidFormat("bitmap file offset overflow".into()))?;
7014    let mut rows = Vec::with_capacity(commit_to_offsets.len());
7015    for (commit_pos, offsets) in commit_to_offsets {
7016        let extended_offset = if offsets.len() > 1 {
7017            if next_extended & (1u64 << 63) != 0 {
7018                return Err(GitError::InvalidFormat(
7019                    "pseudo-merge extended offset overflow".into(),
7020                ));
7021            }
7022            let offset = next_extended;
7023            let ext_len = offsets
7024                .len()
7025                .checked_mul(8)
7026                .and_then(|len| len.checked_add(4))
7027                .ok_or_else(|| {
7028                    GitError::InvalidFormat("pseudo-merge extended lookup overflow".into())
7029                })?;
7030            next_extended = next_extended.checked_add(ext_len as u64).ok_or_else(|| {
7031                GitError::InvalidFormat("pseudo-merge extended lookup overflow".into())
7032            })?;
7033            Some(offset)
7034        } else {
7035            None
7036        };
7037        rows.push((commit_pos, offsets, extended_offset));
7038    }
7039
7040    for (commit_pos, offsets, extended_offset) in &rows {
7041        out.extend_from_slice(&commit_pos.to_be_bytes());
7042        match extended_offset {
7043            Some(offset) => out.extend_from_slice(&(offset | (1u64 << 63)).to_be_bytes()),
7044            None => out.extend_from_slice(&offsets[0].to_be_bytes()),
7045        }
7046    }
7047
7048    for (_commit_pos, offsets, extended_offset) in &rows {
7049        if extended_offset.is_none() {
7050            continue;
7051        }
7052        let count = u32::try_from(offsets.len())
7053            .map_err(|_| GitError::InvalidFormat("pseudo-merge extended lookup overflow".into()))?;
7054        out.extend_from_slice(&count.to_be_bytes());
7055        for offset in offsets {
7056            out.extend_from_slice(&offset.to_be_bytes());
7057        }
7058    }
7059
7060    for offset in &pseudo_offsets {
7061        out.extend_from_slice(&offset.to_be_bytes());
7062    }
7063    out.extend_from_slice(&(pseudo_merges.len() as u32).to_be_bytes());
7064    out.extend_from_slice(&(rows.len() as u32).to_be_bytes());
7065    let lookup_relative = lookup_start
7066        .checked_sub(start)
7067        .ok_or_else(|| GitError::InvalidFormat("pseudo-merge lookup underflow".into()))?;
7068    out.extend_from_slice(&(lookup_relative as u64).to_be_bytes());
7069    let extension_size = out
7070        .len()
7071        .checked_sub(start)
7072        .and_then(|len| len.checked_add(8))
7073        .ok_or_else(|| GitError::InvalidFormat("pseudo-merge extension overflow".into()))?;
7074    out.extend_from_slice(&(extension_size as u64).to_be_bytes());
7075    Ok(())
7076}
7077
7078/// Convenience wrapper that builds a `.bitmap` file in one call.
7079///
7080/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
7081/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
7082/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
7083/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
7084/// optional `name_hash_cache` (one entry per object) may be supplied to emit
7085/// the hash-cache extension.
7086pub fn write_bitmap(
7087    format: ObjectFormat,
7088    pack_checksum: ObjectId,
7089    object_types: &[ObjectType],
7090    commits: &[(u32, u32, Vec<u32>)],
7091    name_hash_cache: Option<Vec<u32>>,
7092) -> Result<Vec<u8>> {
7093    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
7094    if let Some(cache) = name_hash_cache {
7095        writer = writer.with_name_hash_cache(cache)?;
7096    }
7097    for (commit_position, commit_index_position, reachable) in commits {
7098        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
7099    }
7100    writer.write()
7101}
7102
7103#[cfg(test)]
7104mod tests {
7105    use super::*;
7106    use flate2::Compression;
7107    use flate2::read::ZlibDecoder;
7108    use flate2::write::ZlibEncoder;
7109    use std::fs;
7110    use std::io::Read;
7111    use std::io::Write;
7112    use std::path::{Path, PathBuf};
7113    use std::process::Command;
7114    use std::time::{SystemTime, UNIX_EPOCH};
7115
7116    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
7117        PackWriteOptions::new()
7118            .with_prefer_ofs_delta(prefer_ofs_delta)
7119            .with_reorder(false)
7120    }
7121
7122    #[test]
7123    fn parses_single_blob_pack() {
7124        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
7125        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
7126        assert_eq!(parsed.version, 2);
7127        assert_eq!(parsed.entries.len(), 1);
7128        let object = &parsed.entries[0].object;
7129        assert_eq!(object.object_type, ObjectType::Blob);
7130        assert_eq!(object.body, b"hello\n");
7131        assert_eq!(
7132            parsed.entries[0].entry.oid.to_hex(),
7133            "ce013625030ba8dba906f756967f9e9ca394464a"
7134        );
7135    }
7136
7137    #[test]
7138    fn parses_single_blob_pack_sha256() {
7139        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
7140        let parsed =
7141            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
7142        assert_eq!(parsed.version, 2);
7143        assert_eq!(parsed.entries.len(), 1);
7144        let object = &parsed.entries[0].object;
7145        assert_eq!(object.object_type, ObjectType::Blob);
7146        assert_eq!(object.body, b"hello\n");
7147        assert_eq!(
7148            parsed.entries[0].entry.oid,
7149            object
7150                .object_id(ObjectFormat::Sha256)
7151                .expect("test operation should succeed")
7152        );
7153    }
7154
7155    #[test]
7156    fn parses_bundle_pack_payload_with_bundle_format() {
7157        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
7158        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
7159            .expect("test operation should succeed");
7160        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
7161            .into_bytes()
7162            .into_iter()
7163            .chain(pack)
7164            .collect::<Vec<_>>();
7165        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7166            .expect("test operation should succeed");
7167
7168        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
7169        assert_eq!(parsed.entries.len(), 1);
7170        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
7171        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
7172    }
7173
7174    /// Build a pack whose single blob entry header LIES about its decompressed
7175    /// size: it declares `declared_size` while the actual zlib payload only
7176    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
7177    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
7178    /// from a handful of compressed bytes.
7179    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
7180        let mut pack = Vec::new();
7181        pack.extend_from_slice(b"PACK");
7182        pack.extend_from_slice(&2u32.to_be_bytes());
7183        pack.extend_from_slice(&1u32.to_be_bytes());
7184        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
7185        write_pack_entry_header_kind(&mut pack, 3, declared_size);
7186        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7187        encoder
7188            .write_all(real_body)
7189            .expect("test operation should succeed");
7190        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7191        let checksum =
7192            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7193        pack.extend_from_slice(checksum.as_bytes());
7194        pack
7195    }
7196
7197    /// Regression: a crafted pack object header declaring a gigantic decompressed
7198    /// size with a tiny compressed payload must NOT drive an up-front
7199    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
7200    /// header `size` is attacker-controlled over the network (install_raw_pack →
7201    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
7202    ///
7203    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
7204    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
7205    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
7206    /// run parse on a worker thread so that panic surfaces as a `join()` error
7207    /// rather than killing the test process; the fix turns this into a clean
7208    /// `Err` returned normally.
7209    #[test]
7210    fn rejects_decompression_bomb_header_without_oom() {
7211        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
7212            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
7213            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
7214            let result = handle.join();
7215            // The parse thread must not have panicked/aborted on a huge reserve.
7216            assert!(
7217                result.is_ok(),
7218                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
7219            );
7220            // And parsing must reject the lie (decoded len != declared size).
7221            let parse_result = result.expect("parse thread should not panic on a bomb header");
7222            assert!(
7223                parse_result.is_err(),
7224                "bomb header (declared={declared}) should be rejected as invalid"
7225            );
7226        }
7227    }
7228
7229    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
7230    /// whose *result-size* varint lies, declaring `declared_result_size`, while
7231    /// carrying a tiny real instruction stream. The delta's base-size varint is
7232    /// set correctly (so the base-size check at the top of `apply_pack_delta`
7233    /// passes and we reach the result reservation). Used to drive the sley#35
7234    /// delta-result-size bomb.
7235    fn lying_result_size_delta_pack(
7236        format: ObjectFormat,
7237        declared_result_size: u64,
7238        delta_kind: DeltaKind,
7239    ) -> Vec<u8> {
7240        let base = b"hello";
7241        let result = b"hello world"; // real produced length = 11
7242
7243        // Hand-build a delta with a truthful base-size and a LYING result-size.
7244        let mut delta = Vec::new();
7245        write_delta_varint(&mut delta, base.len() as u64);
7246        write_delta_varint(&mut delta, declared_result_size);
7247        // Real instructions: copy `base` then insert " world".
7248        let suffix = &result[base.len()..];
7249        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
7250        delta.push(base.len() as u8);
7251        delta.push(suffix.len() as u8);
7252        delta.extend_from_slice(suffix);
7253
7254        let mut pack = Vec::new();
7255        pack.extend_from_slice(b"PACK");
7256        pack.extend_from_slice(&2u32.to_be_bytes());
7257        pack.extend_from_slice(&2u32.to_be_bytes());
7258
7259        let base_offset = pack.len();
7260        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7261        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7262        encoder
7263            .write_all(base)
7264            .expect("test operation should succeed");
7265        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7266
7267        let delta_offset = pack.len();
7268        write_pack_entry_header_kind(
7269            &mut pack,
7270            match delta_kind {
7271                DeltaKind::Offset => 6,
7272                DeltaKind::Ref => 7,
7273            },
7274            delta.len() as u64,
7275        );
7276        match delta_kind {
7277            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7278            DeltaKind::Ref => {
7279                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7280                    .expect("test operation should succeed");
7281                pack.extend_from_slice(base_oid.as_bytes());
7282            }
7283        }
7284        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7285        encoder
7286            .write_all(&delta)
7287            .expect("test operation should succeed");
7288        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7289
7290        let checksum =
7291            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7292        pack.extend_from_slice(checksum.as_bytes());
7293        pack
7294    }
7295
7296    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
7297    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
7298    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
7299    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
7300    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
7301    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
7302    /// reach the same reservation, so both must be safe. We resolve the pack on a
7303    /// worker thread so an abort/panic surfaces as a `join()` error rather than
7304    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
7305    #[test]
7306    fn rejects_delta_result_size_bomb_without_oom() {
7307        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
7308        for &declared in bombs {
7309            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
7310                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
7311                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
7312                let join_result = handle.join();
7313                assert!(
7314                    join_result.is_ok(),
7315                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
7316                     instead of erroring cleanly"
7317                );
7318                let parse_result =
7319                    join_result.expect("parse thread should not panic on a delta bomb");
7320                assert!(
7321                    parse_result.is_err(),
7322                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
7323                     as invalid (result.len() != declared)"
7324                );
7325            }
7326        }
7327    }
7328
7329    /// A legitimate (truthful) delta whose result-size varint matches the real
7330    /// produced length must still resolve correctly — the bound only caps the
7331    /// speculative reservation, it must not break real delta application.
7332    #[test]
7333    fn applies_legitimate_delta_after_result_size_bound() {
7334        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
7335            let base = b"hello";
7336            let result = b"hello world";
7337            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
7338            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
7339            assert_eq!(parsed.entries.len(), 2);
7340            assert_eq!(parsed.entries[0].object.body, base);
7341            assert_eq!(parsed.entries[1].object.body, result);
7342        }
7343    }
7344
7345    #[test]
7346    fn bounded_inflate_reserve_caps_attacker_declared_size() {
7347        // A tiny compressed input can't justify a multi-gigabyte reservation.
7348        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
7349        // The absolute ceiling caps even a large input-justified hint.
7350        assert_eq!(
7351            bounded_inflate_reserve(usize::MAX, usize::MAX),
7352            MAX_INFLATE_RESERVE
7353        );
7354        // A modest legitimate hint is preserved unchanged (no regression for real
7355        // objects): 1000 bytes of output from 500 bytes of input is well within
7356        // both bounds.
7357        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
7358        // Floor of 64 for tiny hints.
7359        assert_eq!(bounded_inflate_reserve(0, 0), 64);
7360    }
7361
7362    #[test]
7363    fn rejects_bundle_pack_payload_with_wrong_object_format() {
7364        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
7365        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
7366            .expect("test operation should succeed");
7367        let bundle_bytes =
7368            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
7369                .into_bytes()
7370                .into_iter()
7371                .chain(pack)
7372                .collect::<Vec<_>>();
7373        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7374            .expect("test operation should succeed");
7375
7376        assert!(PackFile::parse_bundle(&bundle).is_err());
7377    }
7378
7379    fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
7380        let owned = PackIndex::parse(index, format).expect("test operation should succeed");
7381        let view = PackIndexView::parse(index, format).expect("test operation should succeed");
7382        let owned_view =
7383            PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
7384                .expect("test operation should succeed");
7385
7386        assert_eq!(view.version, owned.version);
7387        assert_eq!(view.count, owned.entries.len());
7388        assert_eq!(view.count(), owned.entries.len());
7389        assert_eq!(view.fanout(), &owned.fanout);
7390        assert_eq!(view.pack_checksum, owned.pack_checksum);
7391        assert_eq!(view.index_checksum, owned.index_checksum);
7392        assert_eq!(owned_view.version, owned.version);
7393        assert_eq!(owned_view.count(), owned.entries.len());
7394        assert_eq!(owned_view.fanout(), &owned.fanout);
7395        assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
7396        assert_eq!(owned_view.index_checksum, owned.index_checksum);
7397        for entry in &owned.entries {
7398            let owned_found = owned
7399                .find(&entry.oid)
7400                .expect("test operation should succeed");
7401            let expected = Some(PackIndexLookup {
7402                crc32: owned_found.crc32,
7403                offset: owned_found.offset,
7404            });
7405            assert_eq!(view.find(&entry.oid), expected);
7406            assert_eq!(owned_view.find(&entry.oid), expected);
7407        }
7408    }
7409
7410    #[test]
7411    fn writes_pack_and_index_that_round_trip() {
7412        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
7413        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7414            .expect("test operation should succeed");
7415        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7416        let index =
7417            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
7418        let oid = object
7419            .object_id(ObjectFormat::Sha1)
7420            .expect("test operation should succeed");
7421        assert_eq!(pack.entries[0].object, object);
7422        assert_eq!(index.pack_checksum, pack.checksum);
7423        assert_eq!(
7424            index
7425                .find(&oid)
7426                .expect("test operation should succeed")
7427                .offset,
7428            12
7429        );
7430    }
7431
7432    #[test]
7433    fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
7434        let objects = (0..8)
7435            .map(|idx| {
7436                EncodedObject::new(
7437                    ObjectType::Blob,
7438                    format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
7439                )
7440            })
7441            .collect::<Vec<_>>();
7442        let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7443            .expect("test operation should succeed");
7444
7445        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
7446
7447        let view =
7448            PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
7449        let missing = sley_core::object_id_for_bytes(
7450            ObjectFormat::Sha1,
7451            "blob",
7452            b"not present in borrowed index\n",
7453        )
7454        .expect("test operation should succeed");
7455        assert_eq!(view.find(&missing), None);
7456    }
7457
7458    #[test]
7459    fn writes_sha256_pack_and_index_that_round_trip() {
7460        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
7461        let written =
7462            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7463                .expect("test operation should succeed");
7464        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
7465            .expect("test operation should succeed");
7466        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
7467            .expect("test operation should succeed");
7468        let oid = object
7469            .object_id(ObjectFormat::Sha256)
7470            .expect("test operation should succeed");
7471        assert_eq!(pack.entries[0].object, object);
7472        assert_eq!(index.pack_checksum, pack.checksum);
7473        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
7474        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
7475        assert_eq!(
7476            index
7477                .find(&oid)
7478                .expect("test operation should succeed")
7479                .offset,
7480            12
7481        );
7482    }
7483
7484    #[test]
7485    fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
7486        let objects = (0..4)
7487            .map(|idx| {
7488                EncodedObject::new(
7489                    ObjectType::Blob,
7490                    format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
7491                )
7492            })
7493            .collect::<Vec<_>>();
7494        let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
7495            .expect("test operation should succeed");
7496
7497        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
7498    }
7499
7500    #[test]
7501    fn indexes_existing_sha256_pack_bytes() {
7502        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
7503        let written =
7504            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7505                .expect("test operation should succeed");
7506
7507        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
7508            .expect("test operation should succeed");
7509        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
7510            .expect("test operation should succeed");
7511
7512        assert_eq!(indexed.pack_checksum, written.checksum);
7513        assert_eq!(indexed.entries, written.entries);
7514        assert_eq!(index.pack_checksum, written.checksum);
7515        assert_eq!(index.entries, written.entries);
7516    }
7517
7518    #[test]
7519    fn indexes_existing_delta_pack_bytes() {
7520        let (base, changed) = similar_blob_objects();
7521        let options = delta_pack_options(true);
7522        let written = PackFile::write_packed_with_options(
7523            &[base, changed.clone()],
7524            ObjectFormat::Sha1,
7525            &options,
7526        )
7527        .expect("test operation should succeed");
7528
7529        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
7530            .expect("test operation should succeed");
7531        let index =
7532            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
7533        let changed_oid = changed
7534            .object_id(ObjectFormat::Sha1)
7535            .expect("test operation should succeed");
7536
7537        assert_eq!(indexed.pack_checksum, written.checksum);
7538        assert_eq!(indexed.entries, written.entries);
7539        assert_eq!(
7540            index
7541                .find(&changed_oid)
7542                .expect("test operation should succeed")
7543                .offset,
7544            written.entries[1].offset
7545        );
7546        assert_eq!(
7547            index
7548                .find(&changed_oid)
7549                .expect("test operation should succeed")
7550                .crc32,
7551            written.entries[1].crc32
7552        );
7553    }
7554
7555    #[test]
7556    fn writes_ref_delta_pack_and_index_that_round_trip() {
7557        let (base, changed) = similar_blob_objects();
7558        let options = delta_pack_options(false);
7559        let written = PackFile::write_packed_with_options(
7560            &[base.clone(), changed.clone()],
7561            ObjectFormat::Sha1,
7562            &options,
7563        )
7564        .expect("test operation should succeed");
7565        let mut second_offset = written.entries[1].offset as usize;
7566        let header = parse_entry_header(&written.pack, &mut second_offset)
7567            .expect("test operation should succeed");
7568        assert_eq!(header.kind, PackObjectKind::RefDelta);
7569
7570        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7571        let index =
7572            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
7573        let oid = changed
7574            .object_id(ObjectFormat::Sha1)
7575            .expect("test operation should succeed");
7576        assert_eq!(pack.entries[0].object, base);
7577        assert_eq!(pack.entries[1].object, changed);
7578        assert_eq!(index.pack_checksum, pack.checksum);
7579        assert_eq!(
7580            index
7581                .find(&oid)
7582                .expect("test operation should succeed")
7583                .offset,
7584            written.entries[1].offset
7585        );
7586    }
7587
7588    #[test]
7589    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
7590        let (base, changed) = similar_blob_objects();
7591        let options = delta_pack_options(true);
7592        let written = PackFile::write_packed_with_options(
7593            &[base, changed.clone()],
7594            ObjectFormat::Sha1,
7595            &options,
7596        )
7597        .expect("test operation should succeed");
7598        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
7599        let mut second = written.entries[1].offset as usize;
7600        assert_eq!(
7601            parse_entry_header(&written.pack, &mut second)
7602                .expect("test operation should succeed")
7603                .kind,
7604            PackObjectKind::OfsDelta
7605        );
7606        // Ground truth from a full parse; single-object decode must match at every offset.
7607        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7608        for po in &parsed.entries {
7609            let got =
7610                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
7611                    Ok(None)
7612                })
7613                .expect("test operation should succeed");
7614            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
7615        }
7616    }
7617
7618    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
7619    /// read is byte-identical to the uncached one cold and warm (sley#26).
7620    #[derive(Default)]
7621    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
7622
7623    impl HeaderTypeCache for MapHeaderTypeCache {
7624        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
7625            self.0.get(&pack_offset).copied()
7626        }
7627        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
7628            self.0.insert(pack_offset, header);
7629        }
7630    }
7631
7632    #[test]
7633    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
7634        let (base, changed) = similar_blob_objects();
7635        let options = delta_pack_options(true);
7636        let written =
7637            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
7638                .expect("test operation should succeed");
7639        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
7640        let mut second = written.entries[1].offset as usize;
7641        assert_eq!(
7642            parse_entry_header(&written.pack, &mut second)
7643                .expect("test operation should succeed")
7644                .kind,
7645            PackObjectKind::OfsDelta
7646        );
7647
7648        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7649        let mut cache = MapHeaderTypeCache::default();
7650        for po in &parsed.entries {
7651            let uncached =
7652                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
7653                    Ok(None)
7654                })
7655                .expect("test operation should succeed");
7656            // Type inherited from the chain base; size is the inflated body length.
7657            assert_eq!(
7658                uncached,
7659                (po.object.object_type, po.object.body.len() as u64),
7660                "uncached header at offset {}",
7661                po.entry.offset
7662            );
7663            // Cold cache: must agree with the uncached read and populate the memo.
7664            let cold = read_object_header_at_with_cache(
7665                &written.pack,
7666                po.entry.offset,
7667                ObjectFormat::Sha1,
7668                |_| Ok(None),
7669                &mut cache,
7670            )
7671            .expect("test operation should succeed");
7672            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
7673        }
7674        // Warm cache: every offset now resolves from the memo and is still correct,
7675        // proving the fast path does not change behavior (sley#26).
7676        for po in &parsed.entries {
7677            let warm = read_object_header_at_with_cache(
7678                &written.pack,
7679                po.entry.offset,
7680                ObjectFormat::Sha1,
7681                |_| panic!("warm cache must not re-walk the chain"),
7682                &mut cache,
7683            )
7684            .expect("test operation should succeed");
7685            assert_eq!(
7686                warm,
7687                (po.object.object_type, po.object.body.len() as u64),
7688                "warm cache at offset {}",
7689                po.entry.offset
7690            );
7691        }
7692    }
7693
7694    #[test]
7695    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
7696        let (base, changed) = similar_blob_objects();
7697        let options = delta_pack_options(false);
7698        let written = PackFile::write_packed_with_options(
7699            &[base, changed.clone()],
7700            ObjectFormat::Sha1,
7701            &options,
7702        )
7703        .expect("test operation should succeed");
7704        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7705        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
7706            .entries
7707            .iter()
7708            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
7709            .collect();
7710        for po in &parsed.entries {
7711            let got =
7712                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
7713                    Ok(by_oid.get(oid).cloned())
7714                })
7715                .expect("test operation should succeed");
7716            assert_eq!(*got, po.object);
7717        }
7718    }
7719
7720    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
7721    /// used to prove the cached decode path is byte-identical to the uncached
7722    /// one and that bases are reused across reads.
7723    #[derive(Default)]
7724    struct CountingDeltaCache {
7725        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
7726        hits: std::cell::Cell<usize>,
7727        inserts: std::cell::Cell<usize>,
7728    }
7729
7730    impl PackDeltaCache for CountingDeltaCache {
7731        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
7732            let hit = self.map.borrow().get(&offset).cloned();
7733            if hit.is_some() {
7734                self.hits.set(self.hits.get() + 1);
7735            }
7736            hit
7737        }
7738        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
7739            self.inserts.set(self.inserts.get() + 1);
7740            self.map.borrow_mut().insert(offset, object);
7741        }
7742    }
7743
7744    #[test]
7745    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
7746        // A multi-object pack with a real ofs-delta chain so the cache has bases
7747        // to reuse. Build several similar blobs to encourage deltification.
7748        let mut objects = Vec::new();
7749        for idx in 0..8u32 {
7750            let mut body = vec![b'x'; 4096];
7751            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
7752            objects.push(EncodedObject::new(ObjectType::Blob, body));
7753        }
7754        let options = delta_pack_options(true);
7755        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7756            .expect("test operation should succeed");
7757        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7758
7759        let cache = CountingDeltaCache::default();
7760        // Read every object twice through the cache; each result must equal the
7761        // ground-truth from the full parse, byte for byte, both times.
7762        for _ in 0..2 {
7763            for po in &parsed.entries {
7764                let got = read_object_at_with_cache_arc(
7765                    &written.pack,
7766                    po.entry.offset,
7767                    ObjectFormat::Sha1,
7768                    |_| Ok(None),
7769                    &cache,
7770                )
7771                .expect("test operation should succeed");
7772                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
7773            }
7774        }
7775        // The second pass reads everything straight from the cache, so there must
7776        // be at least one hit (proving reuse, not just correctness).
7777        assert!(cache.hits.get() > 0, "cache never served a warm object");
7778    }
7779
7780    #[test]
7781    fn writes_ofs_delta_pack_and_index_that_round_trip() {
7782        let (base, changed) = similar_blob_objects();
7783        let options = delta_pack_options(true);
7784        let written = PackFile::write_packed_with_options(
7785            &[base.clone(), changed.clone()],
7786            ObjectFormat::Sha1,
7787            &options,
7788        )
7789        .expect("test operation should succeed");
7790        let mut second_offset = written.entries[1].offset as usize;
7791        let header = parse_entry_header(&written.pack, &mut second_offset)
7792            .expect("test operation should succeed");
7793        assert_eq!(header.kind, PackObjectKind::OfsDelta);
7794
7795        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7796        let index =
7797            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
7798        let oid = changed
7799            .object_id(ObjectFormat::Sha1)
7800            .expect("test operation should succeed");
7801        assert_eq!(pack.entries[0].object, base);
7802        assert_eq!(pack.entries[1].object, changed);
7803        assert_eq!(index.pack_checksum, pack.checksum);
7804        assert_eq!(
7805            index
7806                .find(&oid)
7807                .expect("test operation should succeed")
7808                .offset,
7809            written.entries[1].offset
7810        );
7811    }
7812
7813    #[test]
7814    fn resolves_ofs_delta_pack_entry() {
7815        let base = b"hello";
7816        let result = b"hello world";
7817        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
7818        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
7819        assert_eq!(parsed.entries.len(), 2);
7820        assert_eq!(parsed.entries[0].object.body, base);
7821        assert_eq!(parsed.entries[1].object.body, result);
7822        assert_eq!(
7823            parsed.entries[1].entry.oid,
7824            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
7825                .expect("test operation should succeed")
7826        );
7827    }
7828
7829    #[test]
7830    fn resolves_ref_delta_pack_entry() {
7831        let base = b"hello";
7832        let result = b"hello world";
7833        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
7834        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
7835        assert_eq!(parsed.entries.len(), 2);
7836        assert_eq!(parsed.entries[0].object.body, base);
7837        assert_eq!(parsed.entries[1].object.body, result);
7838        assert_eq!(
7839            parsed.entries[1].entry.oid,
7840            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
7841                .expect("test operation should succeed")
7842        );
7843    }
7844
7845    #[test]
7846    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
7847        let base = b"hello";
7848        let result = b"hello world";
7849        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
7850        assert!(PackFile::parse_sha1(&pack).is_err());
7851
7852        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
7853            .expect("test operation should succeed");
7854        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
7855            if oid == &base_oid {
7856                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
7857            } else {
7858                Ok(None)
7859            }
7860        })
7861        .expect("test operation should succeed");
7862        assert_eq!(parsed.entries.len(), 1);
7863        assert_eq!(parsed.entries[0].object.body, result);
7864        assert_eq!(
7865            parsed.entries[0].entry.oid,
7866            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
7867                .expect("test operation should succeed")
7868        );
7869    }
7870
7871    #[test]
7872    fn rejects_bad_pack_checksum() {
7873        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
7874        let last = pack.len() - 1;
7875        pack[last] ^= 1;
7876        assert!(PackFile::parse_sha1(&pack).is_err());
7877    }
7878
7879    #[test]
7880    fn raw_pack_index_rejects_bad_pack_checksum() {
7881        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
7882        let last = pack.len() - 1;
7883        pack[last] ^= 1;
7884        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
7885    }
7886
7887    #[test]
7888    fn pack_index_writer_rejects_duplicate_object_ids() {
7889        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
7890            .expect("test operation should succeed");
7891        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7892            .expect("test operation should succeed");
7893        let entries = vec![
7894            PackIndexEntry {
7895                oid,
7896                crc32: 1,
7897                offset: 12,
7898            },
7899            PackIndexEntry {
7900                oid,
7901                crc32: 2,
7902                offset: 24,
7903            },
7904        ];
7905        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
7906    }
7907
7908    #[test]
7909    fn parses_single_entry_pack_index() {
7910        let oid = ObjectId::from_hex(
7911            ObjectFormat::Sha1,
7912            "ce013625030ba8dba906f756967f9e9ca394464a",
7913        )
7914        .expect("test operation should succeed");
7915        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7916            .expect("test operation should succeed");
7917        let index = single_entry_index(
7918            ObjectFormat::Sha1,
7919            oid,
7920            0x1234_5678,
7921            12,
7922            pack_checksum.clone(),
7923        );
7924        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
7925        assert_eq!(parsed.version, 2);
7926        assert_eq!(parsed.pack_checksum, pack_checksum);
7927        assert_eq!(parsed.entries.len(), 1);
7928        assert_eq!(
7929            parsed
7930                .find(&oid)
7931                .expect("test operation should succeed")
7932                .offset,
7933            12
7934        );
7935        assert_eq!(
7936            parsed
7937                .find(&oid)
7938                .expect("test operation should succeed")
7939                .crc32,
7940            0x1234_5678
7941        );
7942        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7943    }
7944
7945    #[test]
7946    fn parses_single_entry_pack_index_v1() {
7947        let oid = ObjectId::from_hex(
7948            ObjectFormat::Sha1,
7949            "ce013625030ba8dba906f756967f9e9ca394464a",
7950        )
7951        .expect("test operation should succeed");
7952        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7953            .expect("test operation should succeed");
7954        let index =
7955            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
7956        let parsed =
7957            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
7958        assert_eq!(parsed.version, 1);
7959        assert_eq!(parsed.pack_checksum, pack_checksum);
7960        assert_eq!(parsed.entries.len(), 1);
7961        assert_eq!(
7962            parsed
7963                .find(&oid)
7964                .expect("test operation should succeed")
7965                .offset,
7966            0x1234_5678
7967        );
7968        assert_eq!(
7969            parsed
7970                .find(&oid)
7971                .expect("test operation should succeed")
7972                .crc32,
7973            0
7974        );
7975        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7976    }
7977
7978    #[test]
7979    fn rejects_bad_pack_index_v1_checksum() {
7980        let oid = ObjectId::from_hex(
7981            ObjectFormat::Sha1,
7982            "ce013625030ba8dba906f756967f9e9ca394464a",
7983        )
7984        .expect("test operation should succeed");
7985        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7986            .expect("test operation should succeed");
7987        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
7988        let last = index.len() - 1;
7989        index[last] ^= 1;
7990        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
7991    }
7992
7993    #[test]
7994    fn pack_index_view_reads_v2_large_offsets() {
7995        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
7996            .expect("test operation should succeed");
7997        let second =
7998            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
7999                .expect("test operation should succeed");
8000        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8001            .expect("test operation should succeed");
8002        let entries = vec![
8003            PackIndexEntry {
8004                oid: first,
8005                crc32: 0x1111_2222,
8006                offset: 0x8000_0000,
8007            },
8008            PackIndexEntry {
8009                oid: second,
8010                crc32: 0x3333_4444,
8011                offset: 0x1_0000_0042,
8012            },
8013        ];
8014        let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
8015            .expect("test operation should succeed");
8016
8017        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
8018        let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
8019            .expect("test operation should succeed");
8020        for entry in entries {
8021            assert_eq!(
8022                view.find(&entry.oid),
8023                Some(PackIndexLookup {
8024                    crc32: entry.crc32,
8025                    offset: entry.offset,
8026                })
8027            );
8028        }
8029    }
8030
8031    #[test]
8032    fn pack_index_view_default_parse_checks_index_checksum() {
8033        let oid = ObjectId::from_hex(
8034            ObjectFormat::Sha1,
8035            "ce013625030ba8dba906f756967f9e9ca394464a",
8036        )
8037        .expect("test operation should succeed");
8038        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8039            .expect("test operation should succeed");
8040        let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
8041        let last = index.len() - 1;
8042        index[last] ^= 1;
8043
8044        assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
8045        let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
8046            .expect("test operation should succeed");
8047        let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
8048            Arc::from(index.clone().into_boxed_slice()),
8049            ObjectFormat::Sha1,
8050        )
8051        .expect("test operation should succeed");
8052        assert_eq!(
8053            view.find(&oid),
8054            Some(PackIndexLookup {
8055                crc32: 0x1234_5678,
8056                offset: 12,
8057            })
8058        );
8059        assert_eq!(
8060            trusted_view.find(&oid),
8061            Some(PackIndexLookup {
8062                crc32: 0x1234_5678,
8063                offset: 12,
8064            })
8065        );
8066    }
8067
8068    #[test]
8069    fn parses_pack_reverse_index() {
8070        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8071            .expect("test operation should succeed");
8072        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
8073            .expect("test operation should succeed");
8074        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
8075            .expect("test operation should succeed");
8076        assert_eq!(parsed.version, 1);
8077        assert_eq!(parsed.format, ObjectFormat::Sha1);
8078        assert_eq!(parsed.positions, vec![2, 0, 1]);
8079        assert_eq!(parsed.pack_checksum, pack_checksum);
8080        assert_eq!(
8081            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
8082                .expect("test operation should succeed"),
8083            reverse_index
8084        );
8085    }
8086
8087    #[test]
8088    fn rejects_bad_pack_reverse_index_checksum() {
8089        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8090            .expect("test operation should succeed");
8091        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
8092            .expect("test operation should succeed");
8093        let last = reverse_index.len() - 1;
8094        reverse_index[last] ^= 1;
8095        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
8096    }
8097
8098    #[test]
8099    fn rejects_bad_pack_reverse_index_positions() {
8100        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8101            .expect("test operation should succeed");
8102        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
8103        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
8104        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
8105        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
8106        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8107            .expect("test operation should succeed");
8108        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
8109        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
8110    }
8111
8112    #[test]
8113    fn parses_pack_mtimes() {
8114        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8115            .expect("test operation should succeed");
8116        let mtimes = PackMtimes::write(
8117            ObjectFormat::Sha1,
8118            &[1, 1_700_000_000, u32::MAX],
8119            &pack_checksum,
8120        )
8121        .expect("test operation should succeed");
8122        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
8123            .expect("test operation should succeed");
8124        assert_eq!(parsed.version, 1);
8125        assert_eq!(parsed.format, ObjectFormat::Sha1);
8126        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
8127        assert_eq!(parsed.pack_checksum, pack_checksum);
8128        assert_eq!(
8129            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
8130                .expect("test operation should succeed"),
8131            mtimes
8132        );
8133    }
8134
8135    #[test]
8136    fn rejects_bad_pack_mtimes_checksum() {
8137        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8138            .expect("test operation should succeed");
8139        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
8140            .expect("test operation should succeed");
8141        let last = mtimes.len() - 1;
8142        mtimes[last] ^= 1;
8143        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
8144    }
8145
8146    #[test]
8147    fn rejects_bad_pack_mtimes_shape() {
8148        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8149            .expect("test operation should succeed");
8150        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
8151        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
8152
8153        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
8154        wrong_hash[11] = 2;
8155        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
8156        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
8157            .expect("test operation should succeed");
8158        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
8159        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
8160    }
8161
8162    #[test]
8163    fn parses_multi_pack_index_header_and_chunk_lookup() {
8164        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8165            .expect("test operation should succeed");
8166        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8167            .expect("test operation should succeed");
8168        let chunks = midx_chunks_with_pack_names(
8169            ObjectFormat::Sha1,
8170            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8171            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
8172        );
8173        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
8174        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
8175            .expect("test operation should succeed");
8176        assert_eq!(parsed.version, 2);
8177        assert_eq!(parsed.format, ObjectFormat::Sha1);
8178        assert_eq!(parsed.pack_count, 2);
8179        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
8180        assert_eq!(parsed.object_count, 2);
8181        assert_eq!(parsed.objects.len(), 2);
8182        assert_eq!(
8183            parsed
8184                .find(&first)
8185                .expect("test operation should succeed")
8186                .pack_int_id,
8187            0
8188        );
8189        assert_eq!(
8190            parsed
8191                .find(&first)
8192                .expect("test operation should succeed")
8193                .offset,
8194            12
8195        );
8196        assert_eq!(
8197            parsed
8198                .find(&second)
8199                .expect("test operation should succeed")
8200                .pack_int_id,
8201            1
8202        );
8203        assert_eq!(
8204            parsed
8205                .find(&second)
8206                .expect("test operation should succeed")
8207                .offset,
8208            0x1_0000_0000
8209        );
8210        assert_eq!(parsed.reverse_index, None);
8211        assert_eq!(parsed.bitmapped_packs, None);
8212        assert_eq!(parsed.chunks.len(), 5);
8213        assert_eq!(parsed.chunks[0].id, *b"PNAM");
8214        assert_eq!(parsed.chunks[0].offset, 84);
8215        assert_eq!(parsed.chunks[0].len, 24);
8216        assert_eq!(parsed.chunks[1].id, *b"OIDF");
8217        assert_eq!(parsed.chunks[1].offset, 108);
8218        assert_eq!(parsed.chunks[1].len, 1024);
8219    }
8220
8221    #[test]
8222    fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
8223        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8224            .expect("test operation should succeed");
8225        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8226            .expect("test operation should succeed");
8227        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
8228            .expect("test operation should succeed");
8229        let chunks = midx_chunks_with_pack_names(
8230            ObjectFormat::Sha1,
8231            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8232            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
8233        );
8234        let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
8235        let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
8236            .expect("test operation should succeed");
8237
8238        assert!(lookup.contains(&first));
8239        assert!(lookup.contains(&second));
8240        assert!(!lookup.contains(&missing));
8241
8242        let first_entry = lookup
8243            .find(&first)
8244            .expect("test operation should succeed")
8245            .expect("object should be present");
8246        assert_eq!(
8247            lookup.pack_name(first_entry.pack_int_id),
8248            Some("pack-a.idx")
8249        );
8250        assert_eq!(first_entry.offset, 12);
8251
8252        let second_entry = lookup
8253            .find(&second)
8254            .expect("test operation should succeed")
8255            .expect("object should be present");
8256        assert_eq!(
8257            lookup.pack_name(second_entry.pack_int_id),
8258            Some("pack-b.idx")
8259        );
8260        assert_eq!(second_entry.offset, 0x1_0000_0000);
8261        assert!(
8262            lookup
8263                .find(&missing)
8264                .expect("test operation should succeed")
8265                .is_none()
8266        );
8267    }
8268
8269    #[test]
8270    fn rejects_bad_multi_pack_index_checksum() {
8271        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
8272        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
8273        let last = midx.len() - 1;
8274        midx[last] ^= 1;
8275        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
8276    }
8277
8278    #[test]
8279    fn rejects_bad_multi_pack_index_shape() {
8280        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
8281        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
8282        wrong_hash[5] = 2;
8283        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
8284        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
8285            .expect("test operation should succeed");
8286        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
8287        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
8288
8289        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
8290        missing_terminator[12] = b'B';
8291        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
8292        let checksum =
8293            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
8294                .expect("test operation should succeed");
8295        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
8296        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
8297
8298        let mut bad_offset = multi_pack_index(
8299            ObjectFormat::Sha1,
8300            2,
8301            0,
8302            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
8303        );
8304        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
8305        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
8306        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
8307            .expect("test operation should succeed");
8308        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
8309        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
8310    }
8311
8312    #[test]
8313    fn rejects_bad_multi_pack_index_pack_names() {
8314        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
8315        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
8316
8317        let too_few = multi_pack_index(
8318            ObjectFormat::Sha1,
8319            2,
8320            2,
8321            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
8322        );
8323        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
8324
8325        let bad_padding = multi_pack_index(
8326            ObjectFormat::Sha1,
8327            2,
8328            1,
8329            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
8330        );
8331        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
8332
8333        let unsorted_v1 = multi_pack_index(
8334            ObjectFormat::Sha1,
8335            1,
8336            2,
8337            &midx_chunks_with_pack_names(
8338                ObjectFormat::Sha1,
8339                b"pack-b.idx\0pack-a.idx\0".to_vec(),
8340                &[],
8341            ),
8342        );
8343        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
8344
8345        let unsorted_v2 = multi_pack_index(
8346            ObjectFormat::Sha1,
8347            2,
8348            2,
8349            &midx_chunks_with_pack_names(
8350                ObjectFormat::Sha1,
8351                b"pack-b.idx\0pack-a.idx\0".to_vec(),
8352                &[],
8353            ),
8354        );
8355        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
8356            .expect("test operation should succeed");
8357        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
8358    }
8359
8360    #[test]
8361    fn rejects_bad_multi_pack_index_object_tables() {
8362        let oid_a = ObjectId::from_hex(
8363            ObjectFormat::Sha1,
8364            "1111111111111111111111111111111111111111",
8365        )
8366        .expect("test operation should succeed");
8367        let oid_b = ObjectId::from_hex(
8368            ObjectFormat::Sha1,
8369            "2222222222222222222222222222222222222222",
8370        )
8371        .expect("test operation should succeed");
8372
8373        let missing_oidf = multi_pack_index(
8374            ObjectFormat::Sha1,
8375            2,
8376            1,
8377            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
8378        );
8379        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
8380
8381        let bad_fanout = vec![
8382            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
8383            (*b"OIDF", vec![0; 256 * 4]),
8384            (*b"OIDL", oid_a.as_bytes().to_vec()),
8385            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
8386        ];
8387        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
8388        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
8389
8390        let mut unsorted = Vec::new();
8391        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
8392        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
8393        let mut oid_lookup = Vec::new();
8394        oid_lookup.extend_from_slice(oid_b.as_bytes());
8395        oid_lookup.extend_from_slice(oid_a.as_bytes());
8396        unsorted.push((*b"OIDL", oid_lookup));
8397        unsorted.push((
8398            *b"OOFF",
8399            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
8400        ));
8401        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
8402        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
8403
8404        let bad_pack = multi_pack_index(
8405            ObjectFormat::Sha1,
8406            2,
8407            1,
8408            &midx_chunks_with_pack_names(
8409                ObjectFormat::Sha1,
8410                b"pack-a.idx\0\0".to_vec(),
8411                &[(oid_a.clone(), 1, 12)],
8412            ),
8413        );
8414        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
8415
8416        let mut large_offsets = Vec::new();
8417        let missing_loff = vec![
8418            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
8419            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
8420            (*b"OIDL", oid_a.as_bytes().to_vec()),
8421            (
8422                *b"OOFF",
8423                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
8424            ),
8425        ];
8426        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
8427        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
8428
8429        let mut bad_loff =
8430            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
8431        bad_loff.push((*b"LOFF", vec![0]));
8432        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
8433        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
8434    }
8435
8436    #[test]
8437    fn parses_multi_pack_index_bitmap_chunks() {
8438        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8439            .expect("test operation should succeed");
8440        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8441            .expect("test operation should succeed");
8442        let mut chunks = midx_chunks_with_pack_names(
8443            ObjectFormat::Sha1,
8444            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8445            &[(first, 0, 12), (second, 1, 24)],
8446        );
8447        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
8448        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
8449        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
8450
8451        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
8452            .expect("test operation should succeed");
8453        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
8454        assert_eq!(
8455            parsed.bitmapped_packs,
8456            Some(vec![
8457                MultiPackBitmapPack {
8458                    bitmap_pos: 0,
8459                    bitmap_nr: 1,
8460                },
8461                MultiPackBitmapPack {
8462                    bitmap_pos: 1,
8463                    bitmap_nr: 1,
8464                },
8465            ])
8466        );
8467    }
8468
8469    #[test]
8470    fn writes_multi_pack_index_that_round_trips() {
8471        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8472            .expect("test operation should succeed");
8473        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8474            .expect("test operation should succeed");
8475        let bytes = MultiPackIndex::write(
8476            ObjectFormat::Sha1,
8477            2,
8478            &["pack-b.idx".into(), "pack-a.idx".into()],
8479            &[
8480                MultiPackIndexEntry {
8481                    oid: second.clone(),
8482                    pack_int_id: 0,
8483                    offset: 0x1_0000_0000,
8484                    force_large_offset: false,
8485                },
8486                MultiPackIndexEntry {
8487                    oid: first.clone(),
8488                    pack_int_id: 1,
8489                    offset: 12,
8490                    force_large_offset: false,
8491                },
8492            ],
8493        )
8494        .expect("test operation should succeed");
8495
8496        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
8497            .expect("test operation should succeed");
8498        assert_eq!(parsed.version, 2);
8499        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
8500        assert_eq!(parsed.object_count, 2);
8501        assert_eq!(
8502            parsed
8503                .find(&first)
8504                .expect("test operation should succeed")
8505                .pack_int_id,
8506            1
8507        );
8508        assert_eq!(
8509            parsed
8510                .find(&first)
8511                .expect("test operation should succeed")
8512                .offset,
8513            12
8514        );
8515        assert_eq!(
8516            parsed
8517                .find(&second)
8518                .expect("test operation should succeed")
8519                .pack_int_id,
8520            0
8521        );
8522        assert_eq!(
8523            parsed
8524                .find(&second)
8525                .expect("test operation should succeed")
8526                .offset,
8527            0x1_0000_0000
8528        );
8529        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
8530    }
8531
8532    #[test]
8533    fn write_multi_pack_index_rejects_invalid_inputs() {
8534        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
8535            .expect("test operation should succeed");
8536        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
8537        assert!(
8538            MultiPackIndex::write(
8539                ObjectFormat::Sha1,
8540                1,
8541                &["pack-b.idx".into(), "pack-a.idx".into()],
8542                &[],
8543            )
8544            .is_err()
8545        );
8546        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
8547        assert!(
8548            MultiPackIndex::write(
8549                ObjectFormat::Sha1,
8550                2,
8551                &["pack-a.idx".into()],
8552                &[MultiPackIndexEntry {
8553                    oid,
8554                    pack_int_id: 1,
8555                    offset: 12,
8556                    force_large_offset: false,
8557                }],
8558            )
8559            .is_err()
8560        );
8561        assert!(
8562            MultiPackIndex::write(
8563                ObjectFormat::Sha1,
8564                2,
8565                &["pack-a.idx".into()],
8566                &[
8567                    MultiPackIndexEntry {
8568                        oid,
8569                        pack_int_id: 0,
8570                        offset: 12,
8571                        force_large_offset: false,
8572                    },
8573                    MultiPackIndexEntry {
8574                        oid,
8575                        pack_int_id: 0,
8576                        offset: 24,
8577                        force_large_offset: false,
8578                    },
8579                ],
8580            )
8581            .is_err()
8582        );
8583    }
8584
8585    #[test]
8586    fn rejects_bad_multi_pack_index_bitmap_chunks() {
8587        let oid_a = ObjectId::from_hex(
8588            ObjectFormat::Sha1,
8589            "1111111111111111111111111111111111111111",
8590        )
8591        .expect("test operation should succeed");
8592        let oid_b = ObjectId::from_hex(
8593            ObjectFormat::Sha1,
8594            "2222222222222222222222222222222222222222",
8595        )
8596        .expect("test operation should succeed");
8597
8598        let mut duplicate_ridx = midx_chunks_with_pack_names(
8599            ObjectFormat::Sha1,
8600            b"pack-a.idx\0\0".to_vec(),
8601            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
8602        );
8603        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
8604        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
8605        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
8606
8607        let mut short_btmp = midx_chunks_with_pack_names(
8608            ObjectFormat::Sha1,
8609            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8610            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
8611        );
8612        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
8613        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
8614        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
8615
8616        let mut out_of_range_btmp = midx_chunks_with_pack_names(
8617            ObjectFormat::Sha1,
8618            b"pack-a.idx\0\0".to_vec(),
8619            &[(oid_a, 0, 12), (oid_b, 0, 24)],
8620        );
8621        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
8622        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
8623        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
8624    }
8625
8626    #[test]
8627    fn parses_pack_bitmap_index_with_hash_cache() {
8628        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8629            .expect("test operation should succeed");
8630        let bitmap = pack_bitmap_index(
8631            ObjectFormat::Sha1,
8632            3,
8633            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
8634            &pack_checksum,
8635            &[(2, 0, 1, &[0b101])],
8636            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
8637        );
8638
8639        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
8640            .expect("test operation should succeed");
8641        assert_eq!(parsed.version, 1);
8642        assert_eq!(parsed.format, ObjectFormat::Sha1);
8643        assert_eq!(
8644            parsed.options,
8645            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8646        );
8647        assert_eq!(parsed.pack_checksum, pack_checksum);
8648        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
8649        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
8650        assert_eq!(parsed.entries.len(), 1);
8651        let entry = parsed
8652            .entry_for_index_position(2)
8653            .expect("test operation should succeed");
8654        assert_eq!(entry.xor_offset, 0);
8655        assert_eq!(entry.flags, 1);
8656        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
8657        assert_eq!(
8658            parsed.name_hash_cache,
8659            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
8660        );
8661    }
8662
8663    #[test]
8664    fn parses_pack_bitmap_index_sha256() {
8665        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8666            .expect("test operation should succeed");
8667        let bitmap = pack_bitmap_index(
8668            ObjectFormat::Sha256,
8669            2,
8670            PackBitmapIndex::OPTION_FULL_DAG,
8671            &pack_checksum,
8672            &[(0, 0, 0, &[0b11])],
8673            None,
8674        );
8675
8676        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
8677            .expect("test operation should succeed");
8678        assert_eq!(parsed.version, 1);
8679        assert_eq!(parsed.format, ObjectFormat::Sha256);
8680        assert_eq!(parsed.pack_checksum, pack_checksum);
8681        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8682        assert_eq!(parsed.entries[0].object_position, 0);
8683        assert_eq!(parsed.name_hash_cache, None);
8684    }
8685
8686    #[test]
8687    fn parses_upstream_git_written_pack_bitmap_index() {
8688        let root = unique_temp_dir("git-pack-bitmap-upstream");
8689        fs::create_dir_all(&root).expect("test operation should succeed");
8690        {
8691            run_git_success(&root, &["init", "-q", "-b", "main"]);
8692            run_git_success(
8693                &root,
8694                &[
8695                    "-c",
8696                    "user.name=Example User",
8697                    "-c",
8698                    "user.email=example@example.invalid",
8699                    "commit",
8700                    "--allow-empty",
8701                    "-q",
8702                    "-m",
8703                    "one",
8704                ],
8705            );
8706            run_git_success(
8707                &root,
8708                &[
8709                    "-c",
8710                    "user.name=Example User",
8711                    "-c",
8712                    "user.email=example@example.invalid",
8713                    "commit",
8714                    "--allow-empty",
8715                    "-q",
8716                    "-m",
8717                    "two",
8718                ],
8719            );
8720            run_git_success(&root, &["repack", "-adb"]);
8721            let pack_dir = root.join(".git").join("objects").join("pack");
8722            let idx_path = single_path_with_extension(&pack_dir, "idx");
8723            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
8724            let index = PackIndex::parse(
8725                &fs::read(idx_path).expect("test operation should succeed"),
8726                ObjectFormat::Sha1,
8727            )
8728            .expect("test operation should succeed");
8729            let bitmap = PackBitmapIndex::parse(
8730                &fs::read(bitmap_path).expect("test operation should succeed"),
8731                ObjectFormat::Sha1,
8732                index.entries.len(),
8733            )
8734            .expect("test operation should succeed");
8735            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
8736            assert!(!bitmap.entries.is_empty());
8737        };
8738        let _ = fs::remove_dir_all(&root);
8739    }
8740
8741    #[test]
8742    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
8743        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8744            .expect("test operation should succeed");
8745        let bitmap = pack_bitmap_index(
8746            ObjectFormat::Sha1,
8747            1,
8748            PackBitmapIndex::OPTION_FULL_DAG,
8749            &pack_checksum,
8750            &[(0, 0, 0, &[1])],
8751            None,
8752        );
8753
8754        let mut bad_signature = bitmap.clone();
8755        bad_signature[0] = b'X';
8756        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
8757
8758        let mut bad_version = bitmap.clone();
8759        bad_version[5] = 2;
8760        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
8761        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
8762
8763        let mut bad_option = bitmap.clone();
8764        bad_option[7] = 0x20;
8765        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
8766        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
8767
8768        let mut bad_checksum = bitmap;
8769        let last = bad_checksum.len() - 1;
8770        bad_checksum[last] ^= 1;
8771        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
8772    }
8773
8774    #[test]
8775    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
8776        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8777            .expect("test operation should succeed");
8778        let bitmap = pack_bitmap_index(
8779            ObjectFormat::Sha1,
8780            2,
8781            PackBitmapIndex::OPTION_FULL_DAG,
8782            &pack_checksum,
8783            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
8784            None,
8785        );
8786
8787        let mut truncated = bitmap.clone();
8788        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
8789        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
8790        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
8791
8792        let mut out_of_range_position = pack_bitmap_index(
8793            ObjectFormat::Sha1,
8794            2,
8795            PackBitmapIndex::OPTION_FULL_DAG,
8796            &pack_checksum,
8797            &[(2, 0, 0, &[0b01])],
8798            None,
8799        );
8800        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
8801        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
8802        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
8803
8804        let invalid_xor = pack_bitmap_index(
8805            ObjectFormat::Sha1,
8806            2,
8807            PackBitmapIndex::OPTION_FULL_DAG,
8808            &pack_checksum,
8809            &[(0, 1, 0, &[0b01])],
8810            None,
8811        );
8812        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
8813    }
8814
8815    #[test]
8816    fn parses_single_entry_pack_index_sha256() {
8817        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
8818            .expect("test operation should succeed");
8819        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8820            .expect("test operation should succeed");
8821        let index = single_entry_index(
8822            ObjectFormat::Sha256,
8823            oid,
8824            0x1234_5678,
8825            12,
8826            pack_checksum.clone(),
8827        );
8828        let parsed =
8829            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
8830        assert_eq!(parsed.version, 2);
8831        assert_eq!(parsed.pack_checksum, pack_checksum);
8832        assert_eq!(parsed.entries.len(), 1);
8833        assert_eq!(
8834            parsed
8835                .find(&oid)
8836                .expect("test operation should succeed")
8837                .offset,
8838            12
8839        );
8840        assert_eq!(
8841            parsed
8842                .find(&oid)
8843                .expect("test operation should succeed")
8844                .crc32,
8845            0x1234_5678
8846        );
8847        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8848        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
8849    }
8850
8851    #[test]
8852    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
8853        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
8854    }
8855
8856    #[test]
8857    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
8858        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
8859    }
8860
8861    #[test]
8862    fn write_packed_rejects_duplicate_objects() {
8863        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
8864        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
8865    }
8866
8867    #[test]
8868    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
8869        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
8870        let sha1 = object
8871            .object_id(ObjectFormat::Sha1)
8872            .expect("test operation should succeed");
8873        let sha256 = object
8874            .object_id(ObjectFormat::Sha256)
8875            .expect("test operation should succeed");
8876        let duplicate = [
8877            PackInput {
8878                oid: &sha1,
8879                object: &object,
8880            },
8881            PackInput {
8882                oid: &sha1,
8883                object: &object,
8884            },
8885        ];
8886        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
8887
8888        let wrong_format = [PackInput {
8889            oid: &sha256,
8890            object: &object,
8891        }];
8892        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
8893    }
8894
8895    #[test]
8896    fn write_packed_with_known_ids_to_writer_matches_in_memory_pack() {
8897        let objects = similar_blob_family(6);
8898        let object_ids = objects
8899            .iter()
8900            .map(|object| {
8901                object
8902                    .object_id(ObjectFormat::Sha1)
8903                    .expect("test operation should succeed")
8904            })
8905            .collect::<Vec<_>>();
8906        let inputs = objects
8907            .iter()
8908            .zip(&object_ids)
8909            .map(|(object, oid)| PackInput { oid, object })
8910            .collect::<Vec<_>>();
8911        let options = PackWriteOptions::new();
8912        let in_memory = PackFile::write_packed_with_known_ids_and_options(
8913            &inputs,
8914            ObjectFormat::Sha1,
8915            &options,
8916        )
8917        .expect("test operation should succeed");
8918        let mut written = Vec::new();
8919        let streamed = PackFile::write_packed_with_known_ids_to_writer(
8920            &inputs,
8921            ObjectFormat::Sha1,
8922            &options,
8923            &mut written,
8924        )
8925        .expect("test operation should succeed");
8926
8927        assert_eq!(written, in_memory.pack);
8928        assert_eq!(streamed.index, in_memory.index);
8929        assert_eq!(streamed.checksum, in_memory.checksum);
8930        assert_eq!(streamed.entries, in_memory.entries);
8931        assert_eq!(streamed.delta_count, in_memory.delta_count);
8932        assert_eq!(streamed.pack_size, in_memory.pack.len() as u64);
8933    }
8934
8935    #[test]
8936    fn write_packed_from_source_to_writer_deltifies_across_windows() {
8937        let format = ObjectFormat::Sha1;
8938        let mut objects = Vec::new();
8939        for idx in 0..PACK_STREAM_COMPRESSION_WINDOW_OBJECTS - 1 {
8940            objects.push(EncodedObject::new(
8941                ObjectType::Blob,
8942                format!("unrelated streamed source object {idx:04}\n").into_bytes(),
8943            ));
8944        }
8945        let base_body = b"cross-window base payload with enough shared anchors\nbase\n".to_vec();
8946        let target_body =
8947            b"cross-window base payload with enough shared anchors\ntarget\n".to_vec();
8948        objects.push(EncodedObject::new(ObjectType::Blob, base_body));
8949        objects.push(EncodedObject::new(ObjectType::Blob, target_body));
8950
8951        let object_ids = objects
8952            .iter()
8953            .map(|object| {
8954                object
8955                    .object_id(format)
8956                    .expect("test operation should succeed")
8957            })
8958            .collect::<Vec<_>>();
8959        let base_oid = object_ids[PACK_STREAM_COMPRESSION_WINDOW_OBJECTS - 1];
8960        let target_oid = object_ids[PACK_STREAM_COMPRESSION_WINDOW_OBJECTS];
8961        let object_map = object_ids
8962            .iter()
8963            .copied()
8964            .zip(objects.into_iter().map(Arc::new))
8965            .collect::<HashMap<_, _>>();
8966
8967        let options = PackWriteOptions::new().with_reorder(false).with_window(10);
8968        let mut written = Vec::new();
8969        let summary = PackFile::write_packed_from_source_to_writer(
8970            &object_ids,
8971            format,
8972            &options,
8973            |oid| {
8974                object_map
8975                    .get(oid)
8976                    .cloned()
8977                    .ok_or_else(|| GitError::not_found(format!("missing test object {oid}")))
8978            },
8979            &mut written,
8980        )
8981        .expect("test operation should succeed");
8982
8983        assert!(
8984            summary.delta_count > 0,
8985            "expected source-backed streaming writer to find deltas"
8986        );
8987        let stats =
8988            PackFile::verify_pack_stats(&written, format).expect("test operation should succeed");
8989        let target = stats
8990            .objects
8991            .iter()
8992            .find(|entry| entry.oid == target_oid)
8993            .expect("target object should be present");
8994        assert_eq!(target.base_oid, Some(base_oid));
8995    }
8996
8997    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
8998        let objects = similar_blob_family(8);
8999        let packed =
9000            PackFile::write_packed(&objects, format).expect("test operation should succeed");
9001        let undeltified =
9002            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
9003
9004        // The whole point of delta selection: the packed output is smaller than
9005        // storing every object undeltified.
9006        assert!(
9007            packed.pack.len() < undeltified.pack.len(),
9008            "expected delta pack ({}) smaller than undeltified pack ({})",
9009            packed.pack.len(),
9010            undeltified.pack.len()
9011        );
9012
9013        // At least one object must actually be stored as a delta.
9014        let kinds = pack_entry_kinds(&packed.pack, format);
9015        let delta_count = kinds
9016            .iter()
9017            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
9018            .count();
9019        assert!(
9020            delta_count >= 1,
9021            "expected at least one delta entry, found kinds {kinds:?}"
9022        );
9023
9024        // Round-trip: every original object reconstructs byte-for-byte.
9025        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
9026        assert_eq!(parsed.entries.len(), objects.len());
9027        for object in &objects {
9028            let oid = object
9029                .object_id(format)
9030                .expect("test operation should succeed");
9031            let found = parsed
9032                .entries
9033                .iter()
9034                .find(|entry| entry.entry.oid == oid)
9035                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
9036            assert_eq!(&found.object, object, "object {oid} did not round-trip");
9037        }
9038
9039        // The index must agree with the pack and locate every object.
9040        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
9041        assert_eq!(index.pack_checksum, packed.checksum);
9042        for object in &objects {
9043            let oid = object
9044                .object_id(format)
9045                .expect("test operation should succeed");
9046            assert!(index.find(&oid).is_some(), "index missing {oid}");
9047        }
9048    }
9049
9050    #[test]
9051    fn write_packed_emits_ofs_delta_by_default() {
9052        let objects = similar_blob_family(6);
9053        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
9054            .expect("test operation should succeed");
9055        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
9056        assert!(
9057            kinds.contains(&PackObjectKind::OfsDelta),
9058            "expected an ofs-delta entry by default, found {kinds:?}"
9059        );
9060        assert!(
9061            !kinds.contains(&PackObjectKind::RefDelta),
9062            "default self-contained pack must not use ref-delta, found {kinds:?}"
9063        );
9064        // Round-trips.
9065        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
9066    }
9067
9068    #[test]
9069    fn write_packed_can_emit_ref_delta() {
9070        let objects = similar_blob_family(6);
9071        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
9072        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
9073            .expect("test operation should succeed");
9074        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
9075        assert!(
9076            kinds.contains(&PackObjectKind::RefDelta),
9077            "expected a ref-delta entry, found {kinds:?}"
9078        );
9079        assert!(
9080            !kinds.contains(&PackObjectKind::OfsDelta),
9081            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
9082        );
9083
9084        // Ref-delta packs are still self-contained here, so they round-trip
9085        // without any external base lookup.
9086        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
9087            .expect("test operation should succeed");
9088        assert_eq!(parsed.entries.len(), objects.len());
9089    }
9090
9091    #[test]
9092    fn write_packed_bounds_delta_chain_depth() {
9093        // A long chain of progressively-modified blobs. With a large window
9094        // every object could otherwise delta against its immediate predecessor,
9095        // forming a chain as long as the input.
9096        let objects = incremental_blob_chain(20);
9097        let format = ObjectFormat::Sha1;
9098
9099        for max_depth in [1usize, 2, 5] {
9100            let options = PackWriteOptions::new()
9101                .with_window(20)
9102                .with_depth(max_depth);
9103            let packed = PackFile::write_packed_with_options(&objects, format, &options)
9104                .expect("test operation should succeed");
9105
9106            let depths = pack_entry_depths(&packed.pack, format);
9107            let observed = depths.iter().copied().max().unwrap_or(0);
9108            assert!(
9109                observed <= max_depth,
9110                "max chain depth {observed} exceeded bound {max_depth}"
9111            );
9112
9113            // Still correct: round-trips byte-for-byte.
9114            let parsed =
9115                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
9116            for object in &objects {
9117                let oid = object
9118                    .object_id(format)
9119                    .expect("test operation should succeed");
9120                let found = parsed
9121                    .entries
9122                    .iter()
9123                    .find(|entry| entry.entry.oid == oid)
9124                    .expect("test operation should succeed");
9125                assert_eq!(&found.object, object);
9126            }
9127        }
9128    }
9129
9130    #[test]
9131    fn write_packed_depth_zero_stores_everything_undeltified() {
9132        let objects = similar_blob_family(5);
9133        let options = PackWriteOptions::new().with_depth(0);
9134        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
9135            .expect("test operation should succeed");
9136        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
9137        assert!(
9138            kinds
9139                .iter()
9140                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
9141            "depth 0 must disable deltas, found {kinds:?}"
9142        );
9143    }
9144
9145    #[test]
9146    fn write_thin_uses_external_base_and_round_trips_sha1() {
9147        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
9148    }
9149
9150    #[test]
9151    fn write_thin_uses_external_base_and_round_trips_sha256() {
9152        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
9153    }
9154
9155    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
9156        // The base object stays OUT of the pack; only `target` is written, as a
9157        // ref-delta against the external base's object id.
9158        let base = blob_with_marker("EXTERNAL-BASE");
9159        let target = blob_with_marker("EXTERNAL-TARGET");
9160        let base_oid = base
9161            .object_id(format)
9162            .expect("test operation should succeed");
9163
9164        let mut external = HashMap::new();
9165        external.insert(base_oid, base.clone());
9166        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
9167            .expect("test operation should succeed");
9168
9169        // Exactly one entry, encoded as a ref-delta to the external base.
9170        let kinds = pack_entry_kinds(&packed.pack, format);
9171        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
9172
9173        // The external base reference must be the base oid.
9174        let mut offset = 12usize;
9175        let header =
9176            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
9177        assert_eq!(header.kind, PackObjectKind::RefDelta);
9178        let referenced =
9179            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
9180                .expect("test operation should succeed");
9181        assert_eq!(referenced, base_oid);
9182
9183        // A plain (non-thin) parse fails: the base is not present.
9184        assert!(PackFile::parse(&packed.pack, format).is_err());
9185
9186        // A thin parse that supplies the external base reconstructs the target.
9187        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
9188            if oid == &base_oid {
9189                Ok(Some(base.clone()))
9190            } else {
9191                Ok(None)
9192            }
9193        })
9194        .expect("test operation should succeed");
9195        assert_eq!(parsed.entries.len(), 1);
9196        assert_eq!(parsed.entries[0].object, target);
9197    }
9198
9199    #[test]
9200    fn write_packed_preserves_distinct_objects_with_no_similarity() {
9201        // Unrelated objects: nothing should delta, but the pack must still be
9202        // valid and complete.
9203        let objects = vec![
9204            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
9205            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
9206            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
9207        ];
9208        let format = ObjectFormat::Sha1;
9209        let packed =
9210            PackFile::write_packed(&objects, format).expect("test operation should succeed");
9211        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
9212        assert_eq!(parsed.entries.len(), objects.len());
9213        for object in &objects {
9214            let oid = object
9215                .object_id(format)
9216                .expect("test operation should succeed");
9217            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
9218        }
9219    }
9220
9221    /// Build a family of blobs that all share a large common region but differ
9222    /// in a marker placed in the *middle*, so a good delta finds copy regions on
9223    /// both sides of the change.
9224    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
9225        let mut common_head = Vec::new();
9226        for _ in 0..200 {
9227            common_head.extend_from_slice(b"shared header line for delta testing\n");
9228        }
9229        let mut common_tail = Vec::new();
9230        for _ in 0..200 {
9231            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
9232        }
9233        (0..count)
9234            .map(|idx| {
9235                let mut body = common_head.clone();
9236                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
9237                body.extend_from_slice(&common_tail);
9238                EncodedObject::new(ObjectType::Blob, body)
9239            })
9240            .collect()
9241    }
9242
9243    /// Build a chain where each blob is the previous one plus an appended line,
9244    /// so each is highly similar to its predecessor.
9245    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
9246        let mut body = Vec::new();
9247        for _ in 0..100 {
9248            body.extend_from_slice(b"baseline content shared across the whole chain\n");
9249        }
9250        let mut objects = Vec::with_capacity(count);
9251        for idx in 0..count {
9252            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
9253            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
9254        }
9255        objects
9256    }
9257
9258    fn blob_with_marker(marker: &str) -> EncodedObject {
9259        let mut body = Vec::new();
9260        for _ in 0..150 {
9261            body.extend_from_slice(b"common body shared between base and target\n");
9262        }
9263        body.extend_from_slice(marker.as_bytes());
9264        body.push(b'\n');
9265        for _ in 0..150 {
9266            body.extend_from_slice(b"more common body shared between objects\n");
9267        }
9268        EncodedObject::new(ObjectType::Blob, body)
9269    }
9270
9271    /// Classify every entry in a pack (in pack order) by its on-disk kind.
9272    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
9273        pack_entry_descriptors(pack, format)
9274            .into_iter()
9275            .map(|descriptor| descriptor.kind)
9276            .collect()
9277    }
9278
9279    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
9280    /// order. Entries always appear after their in-pack bases, so a single
9281    /// forward pass suffices.
9282    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
9283        let descriptors = pack_entry_descriptors(pack, format);
9284        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
9285        let mut depths = Vec::with_capacity(descriptors.len());
9286        for descriptor in &descriptors {
9287            let depth = match &descriptor.base {
9288                EntryBase::None => 0,
9289                EntryBase::Offset(base_offset) => {
9290                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
9291                }
9292                // Ref-delta to an in-pack base: look it up by offset via oid is
9293                // unnecessary for these tests (which only use ofs-delta for the
9294                // chains), so treat as depth 1 if unknown.
9295                EntryBase::Ref => 1,
9296            };
9297            depth_by_offset.insert(descriptor.offset, depth);
9298            depths.push(depth);
9299        }
9300        depths
9301    }
9302
9303    struct EntryDescriptor {
9304        offset: u64,
9305        kind: PackObjectKind,
9306        base: EntryBase,
9307    }
9308
9309    enum EntryBase {
9310        None,
9311        Offset(u64),
9312        Ref,
9313    }
9314
9315    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
9316        let trailer_offset = pack.len() - format.raw_len();
9317        let count = u32_be(&pack[8..12]) as usize;
9318        let mut offset = 12usize;
9319        let mut descriptors = Vec::with_capacity(count);
9320        for _ in 0..count {
9321            let entry_offset = offset as u64;
9322            let header =
9323                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
9324            let base = match header.kind {
9325                PackObjectKind::OfsDelta => {
9326                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
9327                        .expect("test operation should succeed");
9328                    EntryBase::Offset(base_offset)
9329                }
9330                PackObjectKind::RefDelta => {
9331                    offset += format.raw_len();
9332                    EntryBase::Ref
9333                }
9334                _ => EntryBase::None,
9335            };
9336            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
9337            let mut body = Vec::new();
9338            decoder
9339                .read_to_end(&mut body)
9340                .expect("test operation should succeed");
9341            offset += decoder.total_in() as usize;
9342            descriptors.push(EntryDescriptor {
9343                offset: entry_offset,
9344                kind: header.kind,
9345                base,
9346            });
9347        }
9348        descriptors
9349    }
9350
9351    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
9352        let mut base = Vec::new();
9353        for _ in 0..300 {
9354            base.extend_from_slice(b"common payload\n");
9355        }
9356        base.extend_from_slice(b"base\n");
9357        let mut changed = Vec::new();
9358        for _ in 0..300 {
9359            changed.extend_from_slice(b"common payload\n");
9360        }
9361        changed.extend_from_slice(b"changed\n");
9362        (
9363            EncodedObject::new(ObjectType::Blob, base),
9364            EncodedObject::new(ObjectType::Blob, changed),
9365        )
9366    }
9367
9368    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
9369        let mut pack = Vec::new();
9370        pack.extend_from_slice(b"PACK");
9371        pack.extend_from_slice(&2u32.to_be_bytes());
9372        pack.extend_from_slice(&1u32.to_be_bytes());
9373        write_entry_header(&mut pack, object_type, body.len() as u64);
9374        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9375        encoder
9376            .write_all(body)
9377            .expect("test operation should succeed");
9378        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9379        let checksum =
9380            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
9381        pack.extend_from_slice(checksum.as_bytes());
9382        pack
9383    }
9384
9385    #[derive(Clone, Copy, Debug)]
9386    enum DeltaKind {
9387        Offset,
9388        Ref,
9389    }
9390
9391    fn two_object_delta_pack(
9392        format: ObjectFormat,
9393        base: &[u8],
9394        result: &[u8],
9395        delta_kind: DeltaKind,
9396    ) -> Vec<u8> {
9397        let mut pack = Vec::new();
9398        pack.extend_from_slice(b"PACK");
9399        pack.extend_from_slice(&2u32.to_be_bytes());
9400        pack.extend_from_slice(&2u32.to_be_bytes());
9401
9402        let base_offset = pack.len();
9403        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
9404        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9405        encoder
9406            .write_all(base)
9407            .expect("test operation should succeed");
9408        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9409
9410        let delta = append_suffix_delta(base, result);
9411        let delta_offset = pack.len();
9412        write_pack_entry_header_kind(
9413            &mut pack,
9414            match delta_kind {
9415                DeltaKind::Offset => 6,
9416                DeltaKind::Ref => 7,
9417            },
9418            delta.len() as u64,
9419        );
9420        match delta_kind {
9421            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
9422            DeltaKind::Ref => {
9423                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
9424                    .expect("test operation should succeed");
9425                pack.extend_from_slice(base_oid.as_bytes());
9426            }
9427        }
9428        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9429        encoder
9430            .write_all(&delta)
9431            .expect("test operation should succeed");
9432        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9433
9434        let checksum =
9435            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
9436        pack.extend_from_slice(checksum.as_bytes());
9437        pack
9438    }
9439
9440    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
9441        let mut pack = Vec::new();
9442        pack.extend_from_slice(b"PACK");
9443        pack.extend_from_slice(&2u32.to_be_bytes());
9444        pack.extend_from_slice(&1u32.to_be_bytes());
9445
9446        let delta = append_suffix_delta(base, result);
9447        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
9448        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
9449            .expect("test operation should succeed");
9450        pack.extend_from_slice(base_oid.as_bytes());
9451        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9452        encoder
9453            .write_all(&delta)
9454            .expect("test operation should succeed");
9455        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9456
9457        let checksum =
9458            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
9459        pack.extend_from_slice(checksum.as_bytes());
9460        pack
9461    }
9462
9463    fn unique_temp_dir(name: &str) -> PathBuf {
9464        let nanos = SystemTime::now()
9465            .duration_since(UNIX_EPOCH)
9466            .expect("test operation should succeed")
9467            .as_nanos();
9468        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
9469    }
9470
9471    fn run_git_success(cwd: &Path, args: &[&str]) {
9472        let output = Command::new("git")
9473            .current_dir(cwd)
9474            .args(args)
9475            .output()
9476            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
9477        assert!(
9478            output.status.success(),
9479            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
9480            output.status.code(),
9481            String::from_utf8_lossy(&output.stdout),
9482            String::from_utf8_lossy(&output.stderr)
9483        );
9484    }
9485
9486    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
9487        let mut paths = fs::read_dir(dir)
9488            .expect("test operation should succeed")
9489            .map(|entry| entry.expect("test operation should succeed").path())
9490            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
9491            .collect::<Vec<_>>();
9492        assert_eq!(paths.len(), 1, "expected one .{extension} file");
9493        paths.remove(0)
9494    }
9495
9496    fn pack_bitmap_index(
9497        format: ObjectFormat,
9498        object_count: u32,
9499        options: u16,
9500        pack_checksum: &ObjectId,
9501        entries: &[(u32, u8, u8, &[u64])],
9502        name_hash_cache: Option<&[u32]>,
9503    ) -> Vec<u8> {
9504        let mut out = Vec::new();
9505        out.extend_from_slice(b"BITM");
9506        out.extend_from_slice(&1u16.to_be_bytes());
9507        out.extend_from_slice(&options.to_be_bytes());
9508        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
9509        out.extend_from_slice(pack_checksum.as_bytes());
9510        write_test_ewah(&mut out, object_count, &[0b001]);
9511        write_test_ewah(&mut out, object_count, &[0b010]);
9512        write_test_ewah(&mut out, object_count, &[0b100]);
9513        write_test_ewah(&mut out, object_count, &[0]);
9514        for (position, xor_offset, flags, words) in entries {
9515            out.extend_from_slice(&position.to_be_bytes());
9516            out.push(*xor_offset);
9517            out.push(*flags);
9518            write_test_ewah(&mut out, object_count, words);
9519        }
9520        if let Some(cache) = name_hash_cache {
9521            for value in cache {
9522                out.extend_from_slice(&value.to_be_bytes());
9523            }
9524        }
9525        let checksum =
9526            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
9527        out.extend_from_slice(checksum.as_bytes());
9528        out
9529    }
9530
9531    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
9532        out.extend_from_slice(&bit_size.to_be_bytes());
9533        let words = ewah_literal_words(literals);
9534        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
9535        for word in words {
9536            out.extend_from_slice(&word.to_be_bytes());
9537        }
9538        out.extend_from_slice(&0u32.to_be_bytes());
9539    }
9540
9541    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
9542        let rlw = (literals.len() as u64) << 33;
9543        let mut words = vec![rlw];
9544        words.extend_from_slice(literals);
9545        words
9546    }
9547
9548    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
9549        let checksum_offset = bytes.len() - format.raw_len();
9550        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
9551            .expect("test operation should succeed");
9552        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
9553    }
9554
9555    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
9556        assert!(result.starts_with(base));
9557        let suffix = &result[base.len()..];
9558        assert!(base.len() < 0x10000);
9559        assert!(suffix.len() < 0x80);
9560        let mut delta = Vec::new();
9561        write_delta_varint(&mut delta, base.len() as u64);
9562        write_delta_varint(&mut delta, result.len() as u64);
9563        delta.push(0x90);
9564        delta.push(base.len() as u8);
9565        delta.push(suffix.len() as u8);
9566        delta.extend_from_slice(suffix);
9567        delta
9568    }
9569
9570    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
9571        loop {
9572            let mut byte = (value as u8) & 0x7f;
9573            value >>= 7;
9574            if value != 0 {
9575                byte |= 0x80;
9576            }
9577            out.push(byte);
9578            if value == 0 {
9579                break;
9580            }
9581        }
9582    }
9583
9584    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
9585        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
9586        size >>= 4;
9587        if size != 0 {
9588            byte |= 0x80;
9589        }
9590        out.push(byte);
9591        while size != 0 {
9592            let mut byte = (size as u8) & 0x7f;
9593            size >>= 7;
9594            if size != 0 {
9595                byte |= 0x80;
9596            }
9597            out.push(byte);
9598        }
9599    }
9600
9601    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
9602        assert!(relative < 0x80);
9603        out.push(relative as u8);
9604    }
9605
9606    fn single_entry_index(
9607        format: ObjectFormat,
9608        oid: ObjectId,
9609        crc32: u32,
9610        offset: u32,
9611        pack_checksum: ObjectId,
9612    ) -> Vec<u8> {
9613        let mut index = Vec::new();
9614        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
9615        index.extend_from_slice(&2u32.to_be_bytes());
9616        for idx in 0..256 {
9617            let count = if idx >= usize::from(oid.as_bytes()[0]) {
9618                1u32
9619            } else {
9620                0u32
9621            };
9622            index.extend_from_slice(&count.to_be_bytes());
9623        }
9624        index.extend_from_slice(oid.as_bytes());
9625        index.extend_from_slice(&crc32.to_be_bytes());
9626        index.extend_from_slice(&offset.to_be_bytes());
9627        index.extend_from_slice(pack_checksum.as_bytes());
9628        let checksum =
9629            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
9630        index.extend_from_slice(checksum.as_bytes());
9631        index
9632    }
9633
9634    fn single_entry_index_v1(
9635        format: ObjectFormat,
9636        oid: ObjectId,
9637        offset: u32,
9638        pack_checksum: ObjectId,
9639    ) -> Vec<u8> {
9640        let mut index = Vec::new();
9641        for idx in 0..256 {
9642            let count = if idx >= usize::from(oid.as_bytes()[0]) {
9643                1u32
9644            } else {
9645                0u32
9646            };
9647            index.extend_from_slice(&count.to_be_bytes());
9648        }
9649        index.extend_from_slice(&offset.to_be_bytes());
9650        index.extend_from_slice(oid.as_bytes());
9651        index.extend_from_slice(pack_checksum.as_bytes());
9652        let checksum =
9653            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
9654        index.extend_from_slice(checksum.as_bytes());
9655        index
9656    }
9657
9658    fn pack_reverse_index(
9659        format: ObjectFormat,
9660        positions: &[u32],
9661        pack_checksum: ObjectId,
9662    ) -> Vec<u8> {
9663        let mut reverse_index = Vec::new();
9664        reverse_index.extend_from_slice(b"RIDX");
9665        reverse_index.extend_from_slice(&1u32.to_be_bytes());
9666        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
9667        for position in positions {
9668            reverse_index.extend_from_slice(&position.to_be_bytes());
9669        }
9670        reverse_index.extend_from_slice(pack_checksum.as_bytes());
9671        let checksum =
9672            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
9673        reverse_index.extend_from_slice(checksum.as_bytes());
9674        reverse_index
9675    }
9676
9677    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
9678        let mut out = Vec::new();
9679        out.extend_from_slice(b"MTME");
9680        out.extend_from_slice(&1u32.to_be_bytes());
9681        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
9682        for mtime in mtimes {
9683            out.extend_from_slice(&mtime.to_be_bytes());
9684        }
9685        out.extend_from_slice(pack_checksum.as_bytes());
9686        let checksum =
9687            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
9688        out.extend_from_slice(checksum.as_bytes());
9689        out
9690    }
9691
9692    fn midx_chunks_with_pack_names(
9693        _format: ObjectFormat,
9694        pack_names: Vec<u8>,
9695        entries: &[(ObjectId, u32, u64)],
9696    ) -> Vec<([u8; 4], Vec<u8>)> {
9697        let mut entries = entries.to_vec();
9698        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
9699        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
9700        let mut large_offsets = Vec::new();
9701        let mut chunks = vec![
9702            (*b"PNAM", pack_names),
9703            (*b"OIDF", midx_oid_fanout(&object_ids)),
9704            (*b"OIDL", midx_oid_lookup(&object_ids)),
9705            (
9706                *b"OOFF",
9707                midx_ooff_entries(
9708                    &entries
9709                        .iter()
9710                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
9711                        .collect::<Vec<_>>(),
9712                    &mut large_offsets,
9713                ),
9714            ),
9715        ];
9716        if !large_offsets.is_empty() {
9717            chunks.push((*b"LOFF", large_offsets));
9718        }
9719        chunks
9720    }
9721
9722    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
9723        let mut counts = [0u32; 256];
9724        for oid in object_ids {
9725            counts[oid.as_bytes()[0] as usize] += 1;
9726        }
9727        let mut running = 0u32;
9728        let mut out = Vec::new();
9729        for count in counts {
9730            running += count;
9731            out.extend_from_slice(&running.to_be_bytes());
9732        }
9733        out
9734    }
9735
9736    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
9737        let mut out = Vec::new();
9738        for oid in object_ids {
9739            out.extend_from_slice(oid.as_bytes());
9740        }
9741        out
9742    }
9743
9744    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
9745        let mut out = Vec::new();
9746        for (pack_int_id, offset) in entries {
9747            out.extend_from_slice(&pack_int_id.to_be_bytes());
9748            if *offset < 0x8000_0000 {
9749                out.extend_from_slice(&(*offset as u32).to_be_bytes());
9750            } else {
9751                let large_idx = (large_offsets.len() / 8) as u32;
9752                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
9753                large_offsets.extend_from_slice(&offset.to_be_bytes());
9754            }
9755        }
9756        out
9757    }
9758
9759    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
9760        let mut out = Vec::new();
9761        for value in values {
9762            out.extend_from_slice(&value.to_be_bytes());
9763        }
9764        out
9765    }
9766
9767    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
9768        let mut out = Vec::new();
9769        for (bitmap_pos, bitmap_nr) in entries {
9770            out.extend_from_slice(&bitmap_pos.to_be_bytes());
9771            out.extend_from_slice(&bitmap_nr.to_be_bytes());
9772        }
9773        out
9774    }
9775
9776    fn multi_pack_index(
9777        format: ObjectFormat,
9778        version: u8,
9779        pack_count: u32,
9780        chunks: &[([u8; 4], Vec<u8>)],
9781    ) -> Vec<u8> {
9782        let lookup_len = (chunks.len() + 1) * 12;
9783        let mut out = Vec::new();
9784        out.extend_from_slice(b"MIDX");
9785        out.push(version);
9786        out.push(hash_function_id(format) as u8);
9787        out.push(chunks.len() as u8);
9788        out.push(0);
9789        out.extend_from_slice(&pack_count.to_be_bytes());
9790        let mut chunk_offset = (12 + lookup_len) as u64;
9791        for (id, data) in chunks {
9792            out.extend_from_slice(id);
9793            out.extend_from_slice(&chunk_offset.to_be_bytes());
9794            chunk_offset += data.len() as u64;
9795        }
9796        out.extend_from_slice(&[0, 0, 0, 0]);
9797        out.extend_from_slice(&chunk_offset.to_be_bytes());
9798        for (_id, data) in chunks {
9799            out.extend_from_slice(data);
9800        }
9801        let checksum =
9802            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
9803        out.extend_from_slice(checksum.as_bytes());
9804        out
9805    }
9806
9807    // ---- EWAH encoder / bitmap writer tests ------------------------------
9808
9809    fn pack_checksum_sha1() -> ObjectId {
9810        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
9811    }
9812
9813    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
9814        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
9815        // expects: a checksum offset that lies just past the serialised bitmap.
9816        let mut offset = 0usize;
9817        let checksum_offset = bytes.len();
9818        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
9819            .expect("test operation should succeed")
9820    }
9821
9822    #[test]
9823    fn ewah_encodes_single_literal_word_matching_helper() {
9824        // A bitmap whose only word is a literal must serialise as one RLW with
9825        // literal_len == 1 followed by the literal, identical to the test
9826        // helper used by the existing parser tests.
9827        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
9828        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
9829        assert_eq!(ewah.rlw_position, 0);
9830        assert_eq!(ewah.bit_size, 64);
9831    }
9832
9833    #[test]
9834    fn ewah_byte_layout_is_big_endian() {
9835        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
9836            .expect("test operation should succeed");
9837        let bytes = ewah.to_bytes();
9838        let mut expected = Vec::new();
9839        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
9840        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
9841        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
9842        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
9843        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
9844        assert_eq!(bytes, expected);
9845    }
9846
9847    #[test]
9848    fn ewah_empty_bitmap_serialises_like_git() {
9849        let ewah = EwahBitmap::empty();
9850        let bytes = ewah.to_bytes();
9851        // bit_size = 0, word_count = 0, rlw_position = 0.
9852        assert_eq!(bytes, vec![0u8; 12]);
9853        // It must still parse and decode to nothing.
9854        let parsed = parse_ewah_bytes(&bytes);
9855        assert_eq!(parsed, ewah);
9856        assert!(
9857            parsed
9858                .to_positions()
9859                .expect("test operation should succeed")
9860                .is_empty()
9861        );
9862    }
9863
9864    #[test]
9865    fn ewah_compresses_clean_zero_run() {
9866        // Three all-zero words followed by a literal: the encoder should emit a
9867        // single RLW carrying a run of 3 clean-zero words plus one literal.
9868        let ewah =
9869            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
9870        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
9871        let rlw = ewah.words[0];
9872        assert_eq!(rlw & 1, 0, "run bit should be zero");
9873        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
9874        assert_eq!(rlw >> 33, 1, "literal length should be 1");
9875        assert_eq!(ewah.words[1], 0b1);
9876    }
9877
9878    #[test]
9879    fn ewah_compresses_clean_ones_run() {
9880        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
9881            .expect("test operation should succeed");
9882        // Pure run of ones, no literals: one RLW only.
9883        assert_eq!(ewah.words.len(), 1);
9884        let rlw = ewah.words[0];
9885        assert_eq!(rlw & 1, 1, "run bit should be one");
9886        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
9887        assert_eq!(rlw >> 33, 0, "no literals");
9888    }
9889
9890    #[test]
9891    fn ewah_run_then_literal_then_run_roundtrips() {
9892        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
9893        let bit_size = (words.len() * 64) as u32;
9894        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
9895        assert_eq!(
9896            ewah.to_words().expect("test operation should succeed"),
9897            words
9898        );
9899    }
9900
9901    #[test]
9902    fn ewah_drops_trailing_clean_zero_words() {
9903        // Trailing all-zero words beyond a literal carry no information and git
9904        // does not serialise them, but to_words() restores them up to bit_size.
9905        let words = vec![0b1, 0, 0, 0];
9906        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
9907        // bit_size of 1 means a single backing word.
9908        assert_eq!(ewah.bit_size, 1);
9909        assert_eq!(
9910            ewah.to_words().expect("test operation should succeed"),
9911            vec![0b1]
9912        );
9913    }
9914
9915    #[test]
9916    fn ewah_from_positions_roundtrips_via_positions() {
9917        let positions = [0u32, 1, 63, 64, 65, 200, 511];
9918        let ewah =
9919            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
9920        let mut decoded = ewah.to_positions().expect("test operation should succeed");
9921        decoded.sort_unstable();
9922        assert_eq!(decoded, positions);
9923    }
9924
9925    #[test]
9926    fn ewah_from_positions_dedupes_and_orders() {
9927        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
9928            .expect("test operation should succeed");
9929        assert_eq!(
9930            ewah.to_positions().expect("test operation should succeed"),
9931            vec![5, 100]
9932        );
9933    }
9934
9935    #[test]
9936    fn ewah_huge_zero_run_spans_multiple_rlws() {
9937        // A run longer than the 32-bit running-length field forces the encoder
9938        // to emit more than one RLW. Use one literal bit far out, with a bit
9939        // size large enough to exceed u32::MAX clean words is impractical, so
9940        // assert the field arithmetic via a direct builder run instead.
9941        let mut builder = EwahBuilder::new(0);
9942        builder.add_empty_words(false, 0xffff_ffff);
9943        builder.add_empty_words(false, 5);
9944        let ewah = builder.finish().expect("test operation should succeed");
9945        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
9946        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
9947        assert_eq!(ewah.words[1] & 1, 0);
9948        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
9949        assert_eq!(ewah.rlw_position, 1);
9950    }
9951
9952    #[test]
9953    fn ewah_from_words_rejects_oversized_bit_size() {
9954        // bit_size demands two words but only one is supplied.
9955        assert!(EwahBitmap::from_words(65, &[0]).is_err());
9956    }
9957
9958    #[test]
9959    fn ewah_from_positions_rejects_out_of_range() {
9960        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
9961    }
9962
9963    #[test]
9964    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
9965        // Exercise the full encode -> serialise -> parse loop for a non-trivial
9966        // pattern and assert structural equality against the parser's model.
9967        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
9968        let bit_size = (words.len() * 64) as u32;
9969        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
9970        let bytes = ewah.to_bytes();
9971        let parsed = parse_ewah_bytes(&bytes);
9972        assert_eq!(parsed, ewah);
9973        assert_eq!(
9974            parsed.to_words().expect("test operation should succeed"),
9975            words
9976        );
9977    }
9978
9979    #[test]
9980    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
9981        // commit, tree, blob in pack order; one selected commit reaching all.
9982        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
9983        let bytes = write_bitmap(
9984            ObjectFormat::Sha1,
9985            pack_checksum_sha1(),
9986            &object_types,
9987            &[(0u32, 0u32, vec![1u32, 2u32])],
9988            None,
9989        )
9990        .expect("test operation should succeed");
9991        assert_eq!(&bytes[..4], b"BITM");
9992
9993        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
9994            .expect("test operation should succeed");
9995        assert_eq!(parsed.version, 1);
9996        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
9997        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
9998        assert_eq!(
9999            parsed
10000                .type_bitmaps
10001                .commits
10002                .to_positions()
10003                .expect("test operation should succeed"),
10004            vec![0]
10005        );
10006        assert_eq!(
10007            parsed
10008                .type_bitmaps
10009                .trees
10010                .to_positions()
10011                .expect("test operation should succeed"),
10012            vec![1]
10013        );
10014        assert_eq!(
10015            parsed
10016                .type_bitmaps
10017                .blobs
10018                .to_positions()
10019                .expect("test operation should succeed"),
10020            vec![2]
10021        );
10022        assert!(
10023            parsed
10024                .type_bitmaps
10025                .tags
10026                .to_positions()
10027                .expect("test operation should succeed")
10028                .is_empty()
10029        );
10030        assert_eq!(parsed.entries.len(), 1);
10031        let entry = parsed
10032            .entry_for_index_position(0)
10033            .expect("test operation should succeed");
10034        assert_eq!(entry.xor_offset, 0);
10035        assert_eq!(entry.flags, 0);
10036        assert_eq!(
10037            entry
10038                .bitmap
10039                .to_positions()
10040                .expect("test operation should succeed"),
10041            vec![0, 1, 2]
10042        );
10043        assert_eq!(parsed.name_hash_cache, None);
10044    }
10045
10046    #[test]
10047    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
10048        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
10049            .expect("test operation should succeed");
10050        let object_types = [ObjectType::Commit, ObjectType::Tree];
10051        let bytes = write_bitmap(
10052            ObjectFormat::Sha256,
10053            pack_checksum.clone(),
10054            &object_types,
10055            &[(0u32, 0u32, vec![1u32])],
10056            None,
10057        )
10058        .expect("test operation should succeed");
10059        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
10060            .expect("test operation should succeed");
10061        assert_eq!(parsed.format, ObjectFormat::Sha256);
10062        assert_eq!(parsed.pack_checksum, pack_checksum);
10063        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
10064        assert_eq!(
10065            parsed.entries[0]
10066                .bitmap
10067                .to_positions()
10068                .expect("test operation should succeed"),
10069            vec![0, 1]
10070        );
10071    }
10072
10073    #[test]
10074    fn pack_bitmap_index_write_includes_name_hash_cache() {
10075        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
10076        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
10077        let bytes = write_bitmap(
10078            ObjectFormat::Sha1,
10079            pack_checksum_sha1(),
10080            &object_types,
10081            &[(0u32, 0u32, vec![1u32, 2u32])],
10082            Some(cache.clone()),
10083        )
10084        .expect("test operation should succeed");
10085        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
10086            .expect("test operation should succeed");
10087        assert_eq!(
10088            parsed.options,
10089            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
10090        );
10091        assert_eq!(parsed.name_hash_cache, Some(cache));
10092    }
10093
10094    #[test]
10095    fn pack_bitmap_writer_supports_multiple_commits() {
10096        let object_types = [
10097            ObjectType::Commit,
10098            ObjectType::Commit,
10099            ObjectType::Tree,
10100            ObjectType::Blob,
10101        ];
10102        let mut writer =
10103            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
10104                .expect("test operation should succeed");
10105        writer
10106            .add_commit(0, 0, &[2, 3])
10107            .expect("test operation should succeed");
10108        writer
10109            .add_commit(1, 1, &[2])
10110            .expect("test operation should succeed");
10111        let bytes = writer.write().expect("test operation should succeed");
10112        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
10113            .expect("test operation should succeed");
10114        assert_eq!(parsed.entries.len(), 2);
10115        assert_eq!(
10116            parsed
10117                .type_bitmaps
10118                .commits
10119                .to_positions()
10120                .expect("test operation should succeed"),
10121            vec![0, 1]
10122        );
10123        let first = parsed
10124            .entry_for_index_position(0)
10125            .expect("test operation should succeed");
10126        assert_eq!(
10127            first
10128                .bitmap
10129                .to_positions()
10130                .expect("test operation should succeed"),
10131            vec![0, 2, 3]
10132        );
10133        let second = parsed
10134            .entry_for_index_position(1)
10135            .expect("test operation should succeed");
10136        assert_eq!(
10137            second
10138                .bitmap
10139                .to_positions()
10140                .expect("test operation should succeed"),
10141            vec![1, 2]
10142        );
10143    }
10144
10145    #[test]
10146    fn pack_bitmap_index_recomputes_checksum_on_write() {
10147        // The provided index_checksum field is ignored; write recomputes it so
10148        // a bogus placeholder still produces a valid, parseable file.
10149        let object_types = [ObjectType::Commit, ObjectType::Blob];
10150        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
10151            .expect("test operation should succeed");
10152        let mut index = writer.build().expect("test operation should succeed");
10153        // build() sets an all-zero placeholder checksum.
10154        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
10155        index.entries.clear(); // mutate the model after build
10156        index.entries.push(PackBitmapEntry {
10157            object_position: 0,
10158            xor_offset: 0,
10159            flags: 0,
10160            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
10161        });
10162        let bytes = index.write().expect("test operation should succeed");
10163        // Parsing validates the trailing checksum, so a wrong checksum fails.
10164        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
10165            .expect("test operation should succeed");
10166        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
10167    }
10168
10169    #[test]
10170    fn pack_bitmap_writer_rejects_non_commit_selection() {
10171        let object_types = [ObjectType::Commit, ObjectType::Blob];
10172        let mut writer =
10173            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
10174                .expect("test operation should succeed");
10175        // Position 1 is a blob, not a commit.
10176        assert!(writer.add_commit(1, 1, &[]).is_err());
10177        // Position 5 is out of range entirely.
10178        assert!(writer.add_commit(5, 5, &[]).is_err());
10179        // Index position out of range.
10180        assert!(writer.add_commit(0, 5, &[]).is_err());
10181        // Reachable position out of range.
10182        assert!(writer.add_commit(0, 0, &[9]).is_err());
10183    }
10184
10185    #[test]
10186    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
10187        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
10188            .expect("test operation should succeed");
10189        assert!(
10190            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
10191                .is_err()
10192        );
10193    }
10194
10195    #[test]
10196    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
10197        let writer = PackBitmapWriter::new(
10198            ObjectFormat::Sha1,
10199            pack_checksum_sha1(),
10200            &[ObjectType::Commit],
10201        )
10202        .expect("test operation should succeed");
10203        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
10204    }
10205
10206    #[test]
10207    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
10208        let mut index = PackBitmapWriter::new(
10209            ObjectFormat::Sha1,
10210            pack_checksum_sha1(),
10211            &[ObjectType::Commit],
10212        )
10213        .expect("test operation should succeed")
10214        .build()
10215        .expect("test operation should succeed");
10216        // Flag set but no cache present.
10217        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
10218        assert!(index.write().is_err());
10219        // Cache present but flag missing.
10220        index.options = PackBitmapIndex::OPTION_FULL_DAG;
10221        index.name_hash_cache = Some(vec![0]);
10222        assert!(index.write().is_err());
10223    }
10224
10225    #[test]
10226    fn write_bitmap_roundtrips_through_upstream_git_parser() {
10227        // Build a real pack with git, then overwrite reachability with our own
10228        // writer using the real pack checksum and object types, and confirm our
10229        // bytes parse under the same parser that reads upstream bitmaps.
10230        let root = unique_temp_dir("git-pack-bitmap-writer");
10231        fs::create_dir_all(&root).expect("test operation should succeed");
10232        {
10233            run_git_success(&root, &["init", "-q", "-b", "main"]);
10234            run_git_success(
10235                &root,
10236                &[
10237                    "-c",
10238                    "user.name=Example User",
10239                    "-c",
10240                    "user.email=example@example.invalid",
10241                    "commit",
10242                    "--allow-empty",
10243                    "-q",
10244                    "-m",
10245                    "one",
10246                ],
10247            );
10248            run_git_success(&root, &["repack", "-adb"]);
10249            let pack_dir = root.join(".git").join("objects").join("pack");
10250            let idx_path = single_path_with_extension(&pack_dir, "idx");
10251            let index = PackIndex::parse(
10252                &fs::read(idx_path).expect("test operation should succeed"),
10253                ObjectFormat::Sha1,
10254            )
10255            .expect("test operation should succeed");
10256            // Read object types from the pack so the type bitmaps are accurate.
10257            let pack_path = single_path_with_extension(&pack_dir, "pack");
10258            let pack =
10259                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
10260                    .expect("test operation should succeed");
10261            // Map each index entry (sorted by oid) to its pack offset, then to a
10262            // pack-order position so positions line up with the index ordering.
10263            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
10264            offsets.sort_unstable();
10265            let position_of = |offset: u64| -> u32 {
10266                offsets
10267                    .iter()
10268                    .position(|value| *value == offset)
10269                    .expect("test operation should succeed") as u32
10270            };
10271            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
10272            for entry in &index.entries {
10273                let position = position_of(entry.offset) as usize;
10274                // Find the parsed object at this pack offset to read its type.
10275                if let Some(parsed) = pack
10276                    .entries
10277                    .iter()
10278                    .find(|po| po.entry.offset == entry.offset)
10279                {
10280                    object_types[position] = parsed.object.object_type;
10281                }
10282            }
10283            // Select the first commit position we find and reach everything.
10284            let commit_position = object_types
10285                .iter()
10286                .position(|ty| *ty == ObjectType::Commit)
10287                .expect("test operation should succeed") as u32;
10288            // The entry records the commit's position in the oid-sorted index.
10289            let commit_index_position = index
10290                .entries
10291                .iter()
10292                .position(|entry| position_of(entry.offset) == commit_position)
10293                .expect("test operation should succeed")
10294                as u32;
10295            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
10296            let bytes = write_bitmap(
10297                ObjectFormat::Sha1,
10298                index.pack_checksum.clone(),
10299                &object_types,
10300                &[(commit_position, commit_index_position, reachable)],
10301                None,
10302            )
10303            .expect("test operation should succeed");
10304            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
10305                .expect("test operation should succeed");
10306            assert_eq!(parsed.pack_checksum, index.pack_checksum);
10307            assert_eq!(parsed.entries.len(), 1);
10308            assert_eq!(
10309                parsed.entries[0]
10310                    .bitmap
10311                    .to_positions()
10312                    .expect("test operation should succeed")
10313                    .len(),
10314                index.entries.len()
10315            );
10316        };
10317        let _ = fs::remove_dir_all(&root);
10318    }
10319}