Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result, StreamingDigest};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet, VecDeque};
12use std::fmt;
13use std::fs::File;
14use std::io::{Read, Seek, SeekFrom, Write};
15use std::ops::Range;
16use std::path::Path;
17use std::sync::Arc;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct PackEntry {
21    pub oid: ObjectId,
22    pub compressed_size: u64,
23    pub uncompressed_size: u64,
24    pub offset: u64,
25}
26
27/// Default sliding-window size used by [`PackFile::write_packed`].
28///
29/// Each object is compared against up to this many previously emitted
30/// candidates of the same type when searching for a small delta. Matches git's
31/// default `pack.window`.
32pub const DEFAULT_PACK_WINDOW: usize = 10;
33
34/// Default maximum delta chain depth used by [`PackFile::write_packed`].
35///
36/// A delta may reference a base that is itself a delta; this bounds how long
37/// such chains may grow so that reconstructing any object stays cheap and the
38/// reader's recursion stays shallow. Matches git's default `pack.depth`.
39pub const DEFAULT_PACK_DEPTH: usize = 50;
40
41/// Object-count threshold before pack payload compression is fanned out across
42/// worker threads. Below this, thread setup and extra buffering cost more than
43/// they save.
44const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
45
46/// Keep parallel compression bounded. Git gets much of its wall-clock win from
47/// using several cores, but unbounded threads can steal cache from delta
48/// planning and inflate peak memory on large packs.
49const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
50
51/// Options controlling sliding-window delta selection during pack generation.
52///
53/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
54/// the builder-style setters, or build one directly. Used by
55/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
56#[derive(Debug, Clone)]
57pub struct PackWriteOptions {
58    /// Number of previous same-type candidates each object is deltified
59    /// against. Larger windows find better deltas at higher cost.
60    pub window: usize,
61    /// Maximum delta chain depth. A value of `0` disables deltification.
62    pub depth: usize,
63    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
64    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
65    /// against external thin-pack bases always use ref-deltas regardless.
66    pub prefer_ofs_delta: bool,
67    /// External base objects, keyed by object id, that are *not* written into
68    /// the pack but may be used as delta bases. Supplying any entries here
69    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
70    /// yielding a self-contained pack.
71    pub thin_bases: HashMap<ObjectId, EncodedObject>,
72    /// When `true` (the default), objects are reordered by type and size for
73    /// better delta locality. When `false`, the input order is preserved (the
74    /// emitted pack lists objects in the order supplied); deltas then only
75    /// reference earlier input objects. Reordering is always skipped when
76    /// deltification is disabled (`depth == 0`), since it has no effect there.
77    pub reorder: bool,
78    /// Zlib compression level for pack entry payloads.
79    pub compression_level: u32,
80}
81
82impl Default for PackWriteOptions {
83    fn default() -> Self {
84        Self::new()
85    }
86}
87
88impl PackWriteOptions {
89    /// Options with git-compatible defaults: window
90    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
91    /// no external thin bases.
92    pub fn new() -> Self {
93        Self {
94            window: DEFAULT_PACK_WINDOW,
95            depth: DEFAULT_PACK_DEPTH,
96            prefer_ofs_delta: true,
97            thin_bases: HashMap::new(),
98            reorder: true,
99            compression_level: 6,
100        }
101    }
102
103    /// Set the sliding-window size.
104    pub fn with_window(mut self, window: usize) -> Self {
105        self.window = window;
106        self
107    }
108
109    /// Set the maximum delta chain depth (`0` disables deltas).
110    pub fn with_depth(mut self, depth: usize) -> Self {
111        self.depth = depth;
112        self
113    }
114
115    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
116    /// (`false`) base references.
117    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
118        self.prefer_ofs_delta = prefer_ofs_delta;
119        self
120    }
121
122    /// Provide the set of external base objects permitted for a thin pack.
123    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
124        self.thin_bases = thin_bases;
125        self
126    }
127
128    /// Choose whether objects may be reordered for delta locality (`true`) or
129    /// emitted in input order (`false`).
130    pub fn with_reorder(mut self, reorder: bool) -> Self {
131        self.reorder = reorder;
132        self
133    }
134
135    /// Set the zlib compression level used for pack entry payloads.
136    pub fn with_compression_level(mut self, level: u32) -> Self {
137        self.compression_level = level.min(9);
138        self
139    }
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct RepackPolicy {
144    pub write_bitmaps: bool,
145    pub cruft_packs: bool,
146    pub geometric_factor: Option<u8>,
147}
148
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct PackFile {
151    pub version: u32,
152    pub entries: Vec<PackObject>,
153    pub checksum: ObjectId,
154}
155
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct PackObject {
158    pub entry: PackEntry,
159    pub object: EncodedObject,
160}
161
162/// Per-object statistics for one entry of a verified pack, in the shape
163/// `git verify-pack -v` reports.
164#[derive(Debug, Clone, PartialEq, Eq)]
165pub struct PackVerifyStat {
166    /// Resolved object id.
167    pub oid: ObjectId,
168    /// Resolved object type (the delta's *result* type, not `ofs-delta`).
169    pub object_type: ObjectType,
170    /// Resolved (inflated) object size in bytes.
171    pub size: u64,
172    /// Bytes this object occupies in the pack: the offset delta to the next
173    /// object, or to the trailing checksum for the last object.
174    pub size_in_pack: u64,
175    /// In-pack byte offset where this object's entry begins.
176    pub offset: u64,
177    /// Delta chain depth: `0` for undeltified objects, base-depth + 1 otherwise.
178    pub delta_depth: u32,
179    /// For delta objects, the id of the *immediate* base object (which may
180    /// itself be a delta). `None` for undeltified objects.
181    pub base_oid: Option<ObjectId>,
182}
183
184/// Result of [`PackFile::verify_pack_stats`]: per-object stats in pack offset
185/// order plus the pack's trailing checksum.
186#[derive(Debug, Clone, PartialEq, Eq)]
187pub struct PackVerifyStats {
188    pub objects: Vec<PackVerifyStat>,
189    pub checksum: ObjectId,
190}
191
192#[derive(Debug, Clone, PartialEq, Eq)]
193pub struct PackWrite {
194    pub pack: Vec<u8>,
195    pub index: Vec<u8>,
196    pub checksum: ObjectId,
197    pub entries: Vec<PackIndexEntry>,
198    pub delta_count: u32,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct PackWriteSummary {
203    pub index: Vec<u8>,
204    pub checksum: ObjectId,
205    pub entries: Vec<PackIndexEntry>,
206    pub delta_count: u32,
207    pub pack_size: u64,
208}
209
210#[derive(Debug, Clone, Copy, PartialEq, Eq)]
211pub struct PackInput<'a> {
212    pub oid: &'a ObjectId,
213    pub object: &'a EncodedObject,
214}
215
216#[derive(Debug, Clone, PartialEq, Eq)]
217pub struct PackIndexBuild {
218    pub index: Vec<u8>,
219    pub pack_checksum: ObjectId,
220    pub entries: Vec<PackIndexEntry>,
221}
222
223#[derive(Debug, Clone, PartialEq, Eq)]
224pub struct PackStreamIndexBuild {
225    pub index: Vec<u8>,
226    pub pack_checksum: ObjectId,
227    pub entries: Vec<PackIndexEntry>,
228    pub objects: Vec<PackIndexedObject>,
229}
230
231#[derive(Debug, Clone, PartialEq, Eq)]
232pub struct PackIndexedObject {
233    pub oid: ObjectId,
234    pub object_type: ObjectType,
235    pub size: u64,
236    pub offset: u64,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240pub struct PackIndex {
241    pub version: u32,
242    pub fanout: [u32; 256],
243    pub entries: Vec<PackIndexEntry>,
244    pub pack_checksum: ObjectId,
245    pub index_checksum: ObjectId,
246}
247
248#[derive(Debug, Clone, PartialEq, Eq)]
249pub struct PackIndexView<'a> {
250    pub version: u32,
251    pub count: usize,
252    pub fanout: [u32; 256],
253    pub pack_checksum: ObjectId,
254    pub index_checksum: ObjectId,
255    bytes: &'a [u8],
256    format: ObjectFormat,
257    tables: PackIndexViewTables,
258}
259
260pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
261    fn as_bytes(&self) -> &[u8];
262}
263
264impl<T> PackIndexByteSource for T
265where
266    T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
267{
268    fn as_bytes(&self) -> &[u8] {
269        self.as_ref()
270    }
271}
272
273#[derive(Debug)]
274struct SharedIndexBytes(Arc<[u8]>);
275
276impl PackIndexByteSource for SharedIndexBytes {
277    fn as_bytes(&self) -> &[u8] {
278        self.0.as_ref()
279    }
280}
281
282#[derive(Debug, Clone)]
283pub struct PackIndexViewData {
284    pub version: u32,
285    pub count: usize,
286    pub fanout: [u32; 256],
287    pub pack_checksum: ObjectId,
288    pub index_checksum: ObjectId,
289    bytes: Arc<dyn PackIndexByteSource>,
290    format: ObjectFormat,
291    tables: PackIndexViewTables,
292}
293
294#[derive(Debug, Clone, PartialEq, Eq)]
295pub struct PackIndexEntry {
296    pub oid: ObjectId,
297    pub crc32: u32,
298    pub offset: u64,
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq)]
302pub struct PackIndexLookup {
303    pub crc32: u32,
304    pub offset: u64,
305}
306
307#[derive(Debug, Clone, PartialEq, Eq)]
308enum PackIndexViewTables {
309    V1 {
310        entry_table: Range<usize>,
311    },
312    V2 {
313        oid_table: Range<usize>,
314        crc_table: Range<usize>,
315        small_offset_table: Range<usize>,
316        large_offset_table: Range<usize>,
317    },
318}
319
320#[derive(Debug, Clone, PartialEq, Eq)]
321pub struct PackReverseIndex {
322    pub version: u32,
323    pub format: ObjectFormat,
324    pub positions: Vec<u32>,
325    pub pack_checksum: ObjectId,
326    pub index_checksum: ObjectId,
327}
328
329#[derive(Debug, Clone, PartialEq, Eq)]
330pub struct PackMtimes {
331    pub version: u32,
332    pub format: ObjectFormat,
333    pub mtimes: Vec<u32>,
334    pub pack_checksum: ObjectId,
335    pub index_checksum: ObjectId,
336}
337
338#[derive(Debug, Clone, PartialEq, Eq)]
339pub struct PackBitmapIndex {
340    pub version: u16,
341    pub format: ObjectFormat,
342    pub options: u16,
343    pub pack_checksum: ObjectId,
344    pub index_checksum: ObjectId,
345    pub type_bitmaps: PackBitmapTypeBitmaps,
346    pub entries: Vec<PackBitmapEntry>,
347    pub name_hash_cache: Option<Vec<u32>>,
348}
349
350#[derive(Debug, Clone, PartialEq, Eq)]
351pub struct PackBitmapTypeBitmaps {
352    pub commits: EwahBitmap,
353    pub trees: EwahBitmap,
354    pub blobs: EwahBitmap,
355    pub tags: EwahBitmap,
356}
357
358#[derive(Debug, Clone, PartialEq, Eq)]
359pub struct PackBitmapEntry {
360    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
361    /// NOT the pack-order position used for the bitmap's bit numbering.
362    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
363    /// back via `nth_packed_object_id` (pack-bitmap.c).
364    pub object_position: u32,
365    pub xor_offset: u8,
366    pub flags: u8,
367    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
368    /// order* (offset order), as mapped by the pack's reverse index.
369    pub bitmap: EwahBitmap,
370}
371
372#[derive(Debug, Clone, PartialEq, Eq)]
373pub struct EwahBitmap {
374    pub bit_size: u32,
375    pub words: Vec<u64>,
376    pub rlw_position: u32,
377}
378
379#[derive(Debug, Clone, PartialEq, Eq)]
380pub struct MultiPackIndex {
381    pub version: u8,
382    pub format: ObjectFormat,
383    pub pack_count: u32,
384    pub pack_names: Vec<String>,
385    pub object_count: u32,
386    pub fanout: [u32; 256],
387    pub objects: Vec<MultiPackIndexEntry>,
388    pub reverse_index: Option<Vec<u32>>,
389    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
390    pub chunks: Vec<MultiPackIndexChunk>,
391    pub checksum: ObjectId,
392}
393
394#[derive(Debug, Clone)]
395pub struct MultiPackIndexOidLookup {
396    format: ObjectFormat,
397    pack_count: u32,
398    pack_names: Vec<String>,
399    fanout: [u32; 256],
400    object_count: usize,
401    oid_lookup_offset: usize,
402    object_offsets_offset: usize,
403    large_offsets_offset: Option<usize>,
404    large_offsets_len: usize,
405    bytes: Arc<dyn PackIndexByteSource>,
406}
407
408#[derive(Debug, Clone, PartialEq, Eq)]
409pub struct MultiPackIndexEntry {
410    pub oid: ObjectId,
411    pub pack_int_id: u32,
412    pub offset: u64,
413    pub force_large_offset: bool,
414}
415
416#[derive(Debug, Clone, PartialEq, Eq)]
417pub struct MultiPackBitmapPack {
418    pub bitmap_pos: u32,
419    pub bitmap_nr: u32,
420}
421
422#[derive(Debug, Clone, PartialEq, Eq)]
423pub struct MultiPackIndexChunk {
424    pub id: [u8; 4],
425    pub offset: u64,
426    pub len: u64,
427}
428
429#[derive(Debug, Clone, Copy, PartialEq, Eq)]
430enum PackObjectKind {
431    Commit,
432    Tree,
433    Blob,
434    Tag,
435    OfsDelta,
436    RefDelta,
437}
438
439#[derive(Debug, Clone, PartialEq, Eq)]
440enum ParsedPackEntry {
441    Resolved(PackObject),
442    Delta {
443        base: DeltaBase,
444        compressed_size: u64,
445        delta_size: u64,
446        offset: u64,
447        delta: Vec<u8>,
448    },
449}
450
451#[derive(Debug, Clone, PartialEq, Eq)]
452enum DeltaBase {
453    Offset(u64),
454    Ref(ObjectId),
455}
456
457/// One pack entry as stored on disk, used by [`PackFile::verify_pack_stats`] to
458/// recover the delta structure and on-disk stream size that resolved
459/// [`PackObject`]s no longer carry.
460struct OnDiskEntry {
461    offset: u64,
462    base: Option<DeltaBase>,
463    stream_size: u64,
464}
465
466impl PackFile {
467    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
468        Self::parse(bytes, ObjectFormat::Sha1)
469    }
470
471    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
472        Self::parse_with_base(bytes, format, |_| Ok(None))
473    }
474
475    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
476        Self::parse(&bundle.pack, bundle.format)
477    }
478
479    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
480        let PackIndexBuild {
481            index,
482            pack_checksum,
483            entries,
484        } = PackIndex::write_v2_for_pack(bytes, format)?;
485        Ok(PackWrite {
486            pack: bytes.to_vec(),
487            index,
488            checksum: pack_checksum,
489            entries,
490            delta_count: 0,
491        })
492    }
493
494    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
495    where
496        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
497    {
498        Self::parse_with_base(bytes, format, external_base)
499    }
500
501    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
502    where
503        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
504    {
505        let trailer_len = format.raw_len();
506        if bytes.len() < 12 + trailer_len {
507            return Err(GitError::InvalidFormat("pack file too short".into()));
508        }
509        let trailer_offset = bytes.len() - trailer_len;
510        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
511        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
512        if checksum != expected {
513            return Err(GitError::InvalidFormat(format!(
514                "pack checksum mismatch: expected {expected}, got {checksum}"
515            )));
516        }
517
518        if &bytes[..4] != b"PACK" {
519            return Err(GitError::InvalidFormat("missing PACK signature".into()));
520        }
521        let version = u32_be(&bytes[4..8]);
522        if version != 2 && version != 3 {
523            return Err(GitError::Unsupported(format!("pack version {version}")));
524        }
525        let count = u32_be(&bytes[8..12]) as usize;
526        let mut offset = 12usize;
527        let mut entries = Vec::with_capacity(count);
528        for _ in 0..count {
529            let entry_offset = offset;
530            let header = parse_entry_header(bytes, &mut offset)?;
531            let base =
532                match header.kind {
533                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
534                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
535                    )),
536                    PackObjectKind::RefDelta => {
537                        let hash_len = format.raw_len();
538                        if offset + hash_len > trailer_offset {
539                            return Err(GitError::InvalidFormat(
540                                "truncated ref-delta base object id".into(),
541                            ));
542                        }
543                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
544                        offset += hash_len;
545                        Some(DeltaBase::Ref(oid))
546                    }
547                    _ => None,
548                };
549            let mut body = Vec::new();
550            let consumed = inflate_into(
551                &bytes[offset..trailer_offset],
552                &mut body,
553                header.size.min(usize::MAX as u64) as usize,
554            )?;
555            if body.len() as u64 != header.size {
556                return Err(GitError::InvalidObject(format!(
557                    "pack object declared {} bytes, decoded {}",
558                    header.size,
559                    body.len()
560                )));
561            }
562            if consumed == 0 {
563                return Err(GitError::InvalidFormat(
564                    "empty compressed pack entry".into(),
565                ));
566            }
567            offset = offset
568                .checked_add(consumed)
569                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
570            if offset > trailer_offset {
571                return Err(GitError::InvalidFormat(
572                    "pack entry extends past checksum".into(),
573                ));
574            }
575            if let Some(base) = base {
576                entries.push(ParsedPackEntry::Delta {
577                    base,
578                    compressed_size: consumed as u64,
579                    delta_size: header.size,
580                    offset: entry_offset as u64,
581                    delta: body,
582                });
583            } else {
584                let object_type = match header.kind {
585                    PackObjectKind::Commit => ObjectType::Commit,
586                    PackObjectKind::Tree => ObjectType::Tree,
587                    PackObjectKind::Blob => ObjectType::Blob,
588                    PackObjectKind::Tag => ObjectType::Tag,
589                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
590                };
591                let object = EncodedObject::new(object_type, body);
592                let oid = object.object_id(format)?;
593                entries.push(ParsedPackEntry::Resolved(PackObject {
594                    entry: PackEntry {
595                        oid,
596                        compressed_size: consumed as u64,
597                        uncompressed_size: header.size,
598                        offset: entry_offset as u64,
599                    },
600                    object,
601                }));
602            }
603        }
604        if offset != trailer_offset {
605            return Err(GitError::InvalidFormat(format!(
606                "pack has {} trailing bytes before checksum",
607                trailer_offset - offset
608            )));
609        }
610        Ok(Self {
611            version,
612            entries: resolve_pack_entries(entries, format, &mut external_base)?,
613            checksum,
614        })
615    }
616
617    /// Walk the pack and produce per-object statistics matching the output of
618    /// `git verify-pack -v` / `git index-pack --verify-stat`.
619    ///
620    /// Objects are returned in pack offset order (the order `git verify-pack -v`
621    /// prints them). Each entry carries the *resolved* object id, type and size,
622    /// the in-pack byte span (`size_in_pack` = the offset delta to the next
623    /// object, or to the trailing checksum for the last object), the in-pack
624    /// offset, the delta chain depth (`0` for undeltified objects), and — for
625    /// deltas — the object id of the *immediate* base (which may itself be a
626    /// delta). This mirrors `builtin/index-pack.c`'s `show_pack_info`.
627    pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
628        // Resolve the whole pack first: this validates the trailing checksum,
629        // every object's inflate, and yields the resolved oid/type/size keyed by
630        // offset. `verify-pack` is exactly this validation plus the stat report.
631        let pack = Self::parse(bytes, format)?;
632
633        // Independently walk the on-disk entries to recover each object's stored
634        // kind and (for deltas) its base reference — information `PackFile`
635        // discards once deltas are resolved.
636        let trailer_len = format.raw_len();
637        let trailer_offset = bytes.len() - trailer_len;
638        let count = u32_be(&bytes[8..12]) as usize;
639        let mut offset = 12usize;
640        // Per entry in read (offset) order: (offset, base, on-disk stream size).
641        // The stream size is what git prints in the size column: it is the
642        // resolved object size for an undeltified entry, but the *delta
643        // instruction stream* length for a delta entry (builtin/index-pack.c sets
644        // `obj->size` from the entry header, before any delta is applied).
645        let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
646        for _ in 0..count {
647            let entry_offset = offset as u64;
648            let header = parse_entry_header(bytes, &mut offset)?;
649            let stream_size = header.size;
650            let base =
651                match header.kind {
652                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
653                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
654                    )),
655                    PackObjectKind::RefDelta => {
656                        let hash_len = format.raw_len();
657                        if offset + hash_len > trailer_offset {
658                            return Err(GitError::InvalidFormat(
659                                "truncated ref-delta base object id".into(),
660                            ));
661                        }
662                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
663                        offset += hash_len;
664                        Some(DeltaBase::Ref(oid))
665                    }
666                    _ => None,
667                };
668            // Skip the compressed body to reach the next entry header.
669            let mut body = Vec::new();
670            let consumed = inflate_into(
671                &bytes[offset..trailer_offset],
672                &mut body,
673                header.size.min(usize::MAX as u64) as usize,
674            )?;
675            offset = offset
676                .checked_add(consumed)
677                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
678            on_disk.push(OnDiskEntry {
679                offset: entry_offset,
680                base,
681                stream_size,
682            });
683        }
684
685        // Map offset -> resolved object so the on-disk walk can join in oid/type.
686        let mut resolved_by_offset: HashMap<u64, &PackObject> =
687            HashMap::with_capacity(pack.entries.len());
688        for object in &pack.entries {
689            resolved_by_offset.insert(object.entry.offset, object);
690        }
691        // Map offset -> resolved oid, for ofs-delta base lookups.
692        let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
693        for entry in &on_disk {
694            if let Some(object) = resolved_by_offset.get(&entry.offset) {
695                oid_by_offset.insert(entry.offset, object.entry.oid);
696            }
697        }
698        // Map base offset -> index in `on_disk`, for delta-depth propagation.
699        let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
700        for (idx, entry) in on_disk.iter().enumerate() {
701            index_by_offset.insert(entry.offset, idx);
702        }
703
704        // Sorted offsets give the size-in-pack span (next offset - this offset),
705        // with the trailing checksum offset as the final sentinel.
706        let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
707        sorted_offsets.sort_unstable();
708        let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
709        for window in sorted_offsets.windows(2) {
710            next_offset.insert(window[0], window[1]);
711        }
712        if let Some(last) = sorted_offsets.last() {
713            next_offset.insert(*last, trailer_offset as u64);
714        }
715
716        // Compute delta depth by following base offsets. Depth of a non-delta is
717        // 0; a delta's depth is its base's depth + 1. `index_by_offset` lets an
718        // ofs-delta find its base's index; a ref-delta resolves its base oid to
719        // an in-pack offset when present (thin-pack external bases are not stored
720        // in this pack, but verify-pack only ever runs on self-contained packs).
721        let mut depth = vec![None; on_disk.len()];
722        fn resolve_depth(
723            idx: usize,
724            on_disk: &[OnDiskEntry],
725            index_by_offset: &HashMap<u64, usize>,
726            offset_of_oid: &HashMap<ObjectId, u64>,
727            depth: &mut [Option<u32>],
728        ) -> u32 {
729            if let Some(d) = depth[idx] {
730                return d;
731            }
732            let computed = match &on_disk[idx].base {
733                None => 0,
734                Some(base) => {
735                    let base_idx = match base {
736                        DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
737                        DeltaBase::Ref(oid) => offset_of_oid
738                            .get(oid)
739                            .and_then(|off| index_by_offset.get(off).copied()),
740                    };
741                    match base_idx {
742                        Some(bi) => {
743                            resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
744                        }
745                        // Base not in this pack (thin pack); treat as depth 1.
746                        None => 1,
747                    }
748                }
749            };
750            depth[idx] = Some(computed);
751            computed
752        }
753        let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
754        for (off, oid) in &oid_by_offset {
755            offset_of_oid.insert(*oid, *off);
756        }
757        for idx in 0..on_disk.len() {
758            resolve_depth(idx, &on_disk, &index_by_offset, &offset_of_oid, &mut depth);
759        }
760
761        let mut stats = Vec::with_capacity(on_disk.len());
762        for (idx, entry) in on_disk.iter().enumerate() {
763            let off = entry.offset;
764            let object = resolved_by_offset.get(&off).ok_or_else(|| {
765                GitError::InvalidFormat("pack offset missing from resolved set".into())
766            })?;
767            let size_in_pack = next_offset
768                .get(&off)
769                .copied()
770                .unwrap_or(trailer_offset as u64)
771                .saturating_sub(off);
772            let base_oid = match &entry.base {
773                None => None,
774                Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
775                Some(DeltaBase::Ref(oid)) => Some(*oid),
776            };
777            stats.push(PackVerifyStat {
778                oid: object.entry.oid,
779                object_type: object.object.object_type,
780                // git prints the on-disk stream size: object body size for an
781                // undeltified entry, delta-instruction stream size for a delta.
782                size: entry.stream_size,
783                size_in_pack,
784                offset: off,
785                delta_depth: depth[idx].unwrap_or(0),
786                base_oid,
787            });
788        }
789        // Emit in pack offset order, matching git's read order.
790        stats.sort_by_key(|stat| stat.offset);
791
792        Ok(PackVerifyStats {
793            objects: stats,
794            checksum: pack.checksum,
795        })
796    }
797
798    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
799    where
800        T: Borrow<EncodedObject>,
801    {
802        Self::write_undeltified(objects, ObjectFormat::Sha1)
803    }
804
805    /// Write a pack with every object stored undeltified (no delta entries).
806    ///
807    /// This is the simple, self-contained encoding; objects appear in the given
808    /// order. For smaller output that exploits similarity between objects, use
809    /// [`PackFile::write_packed`].
810    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
811    where
812        T: Borrow<EncodedObject>,
813    {
814        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
815        Self::write_packed_impl(objects, format, &options)
816    }
817
818    /// Write a pack using sliding-window delta selection with git-compatible
819    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
820    /// ofs-deltas, self-contained).
821    ///
822    /// Objects are grouped by type and ordered for good deltas, then each is
823    /// compared against a window of previously emitted candidates; the smallest
824    /// acceptable delta is kept, otherwise the object is stored undeltified. The
825    /// result round-trips through [`PackFile::parse`].
826    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
827    where
828        T: Borrow<EncodedObject>,
829    {
830        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
831    }
832
833    /// Like [`PackFile::write_packed`] but with caller-supplied
834    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
835    /// external thin bases).
836    pub fn write_packed_with_options<T>(
837        objects: &[T],
838        format: ObjectFormat,
839        options: &PackWriteOptions,
840    ) -> Result<PackWrite>
841    where
842        T: Borrow<EncodedObject>,
843    {
844        Self::write_packed_impl(objects, format, options)
845    }
846
847    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
848    /// instead of re-hashing each object before pack planning.
849    ///
850    /// This is intended for object-database paths that reached each object by
851    /// its id and already trust that id/object mapping. The function validates
852    /// id formats and duplicate ids, but it does not re-hash object bodies; use
853    /// [`PackFile::write_packed`] when the ids are not already known to be
854    /// canonical.
855    pub fn write_packed_with_known_ids(
856        inputs: &[PackInput<'_>],
857        format: ObjectFormat,
858    ) -> Result<PackWrite> {
859        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
860    }
861
862    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
863    /// [`PackWriteOptions`].
864    pub fn write_packed_with_known_ids_and_options(
865        inputs: &[PackInput<'_>],
866        format: ObjectFormat,
867        options: &PackWriteOptions,
868    ) -> Result<PackWrite> {
869        if inputs.len() > u32::MAX as usize {
870            return Err(GitError::InvalidFormat("too many pack objects".into()));
871        }
872        let mut objects = Vec::with_capacity(inputs.len());
873        let mut object_ids = Vec::with_capacity(inputs.len());
874        for input in inputs {
875            if input.oid.format() != format {
876                return Err(GitError::InvalidObjectId(format!(
877                    "pack object id {} uses {}, pack uses {}",
878                    input.oid,
879                    input.oid.format().name(),
880                    format.name()
881                )));
882            }
883            objects.push(input.object);
884            object_ids.push(*input.oid);
885        }
886        Self::write_packed_from_parts(objects, object_ids, format, options)
887    }
888
889    pub fn write_packed_with_known_ids_to_writer<W>(
890        inputs: &[PackInput<'_>],
891        format: ObjectFormat,
892        options: &PackWriteOptions,
893        writer: &mut W,
894    ) -> Result<PackWriteSummary>
895    where
896        W: Write,
897    {
898        if inputs.len() > u32::MAX as usize {
899            return Err(GitError::InvalidFormat("too many pack objects".into()));
900        }
901        let mut objects = Vec::with_capacity(inputs.len());
902        let mut object_ids = Vec::with_capacity(inputs.len());
903        for input in inputs {
904            if input.oid.format() != format {
905                return Err(GitError::InvalidObjectId(format!(
906                    "pack object id {} uses {}, pack uses {}",
907                    input.oid,
908                    input.oid.format().name(),
909                    format.name()
910                )));
911            }
912            objects.push(input.object);
913            object_ids.push(*input.oid);
914        }
915        Self::write_packed_from_parts_to_writer(objects, object_ids, format, options, writer)
916    }
917
918    /// Write a thin pack: objects may be deltified against `external_bases`
919    /// that are *not* included in the pack, referenced by ref-delta to their
920    /// object id.
921    ///
922    /// The receiver must already have (or otherwise obtain) those base objects
923    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
924    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
925    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
926    pub fn write_thin<T>(
927        objects: &[T],
928        format: ObjectFormat,
929        external_bases: HashMap<ObjectId, EncodedObject>,
930    ) -> Result<PackWrite>
931    where
932        T: Borrow<EncodedObject>,
933    {
934        let options = PackWriteOptions::new().with_thin_bases(external_bases);
935        Self::write_packed_impl(objects, format, &options)
936    }
937
938    fn write_packed_impl<T>(
939        objects: &[T],
940        format: ObjectFormat,
941        options: &PackWriteOptions,
942    ) -> Result<PackWrite>
943    where
944        T: Borrow<EncodedObject>,
945    {
946        if objects.len() > u32::MAX as usize {
947            return Err(GitError::InvalidFormat("too many pack objects".into()));
948        }
949        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
950
951        // Compute object ids up front; they are needed both for the index and,
952        // for ref-deltas, inside the pack entries themselves.
953        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
954        for object in &objects {
955            object_ids.push(object.object_id(format)?);
956        }
957        Self::write_packed_from_parts(objects, object_ids, format, options)
958    }
959
960    fn write_packed_from_parts(
961        objects: Vec<&EncodedObject>,
962        object_ids: Vec<ObjectId>,
963        format: ObjectFormat,
964        options: &PackWriteOptions,
965    ) -> Result<PackWrite> {
966        let mut seen = HashSet::with_capacity(object_ids.len());
967        for oid in &object_ids {
968            if !seen.insert(oid) {
969                return Err(GitError::InvalidFormat(format!(
970                    "pack contains duplicate object id {oid}"
971                )));
972            }
973        }
974
975        // Validate external thin bases share the pack's hash format.
976        for oid in options.thin_bases.keys() {
977            if oid.format() != format {
978                return Err(GitError::InvalidObjectId(
979                    "thin pack base object id format does not match pack format".into(),
980                ));
981            }
982        }
983
984        // Decide, for each object, whether it is stored undeltified or as a
985        // delta against another object (in-pack or an external thin base), and
986        // obtain the emit order. In-pack deltas only ever reference candidates
987        // that appear earlier in `order`, so emitting in `order` guarantees a
988        // base is always written before any object that deltas against it.
989        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
990
991        let mut pack = Vec::new();
992        pack.extend_from_slice(b"PACK");
993        pack.extend_from_slice(&2u32.to_be_bytes());
994        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
995
996        let mut index_entries = Vec::with_capacity(objects.len());
997        let mut delta_count = 0u32;
998        // Pack offset at which each original object index was written, or
999        // `None` until it has been emitted.
1000        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1001
1002        let compressed_payloads =
1003            compress_planned_payloads(&objects, &plan, &order, options.compression_level)?;
1004
1005        for (order_pos, &idx) in order.iter().enumerate() {
1006            let offset = pack.len() as u64;
1007            let mut entry_bytes = Vec::new();
1008            match &plan[idx].base {
1009                PlannedBase::None => {
1010                    write_entry_header(
1011                        &mut entry_bytes,
1012                        objects[idx].object_type,
1013                        objects[idx].body.len() as u64,
1014                    );
1015                }
1016                PlannedBase::InPack { base_idx, delta } => {
1017                    delta_count += 1;
1018                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1019                        GitError::InvalidFormat(
1020                            "in-pack delta base emitted after dependent object".into(),
1021                        )
1022                    })?;
1023                    if options.prefer_ofs_delta {
1024                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
1025                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1026                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
1027                        })?;
1028                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
1029                    } else {
1030                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1031                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
1032                    }
1033                }
1034                PlannedBase::External { base_oid, delta } => {
1035                    delta_count += 1;
1036                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1037                    entry_bytes.extend_from_slice(base_oid.as_bytes());
1038                }
1039            }
1040            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
1041            let crc32 = crc32fast::hash(&entry_bytes);
1042            pack.extend_from_slice(&entry_bytes);
1043            written_offsets[idx] = Some(offset);
1044            index_entries.push(PackIndexEntry {
1045                oid: object_ids[idx].clone(),
1046                crc32,
1047                offset,
1048            });
1049        }
1050
1051        let checksum = sley_core::digest_bytes(format, &pack)?;
1052        pack.extend_from_slice(checksum.as_bytes());
1053        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1054        Ok(PackWrite {
1055            pack,
1056            index,
1057            checksum,
1058            entries: index_entries,
1059            delta_count,
1060        })
1061    }
1062
1063    fn write_packed_from_parts_to_writer<W>(
1064        objects: Vec<&EncodedObject>,
1065        object_ids: Vec<ObjectId>,
1066        format: ObjectFormat,
1067        options: &PackWriteOptions,
1068        writer: &mut W,
1069    ) -> Result<PackWriteSummary>
1070    where
1071        W: Write,
1072    {
1073        let mut seen = HashSet::with_capacity(object_ids.len());
1074        for oid in &object_ids {
1075            if !seen.insert(oid) {
1076                return Err(GitError::InvalidFormat(format!(
1077                    "pack contains duplicate object id {oid}"
1078                )));
1079            }
1080        }
1081
1082        for oid in options.thin_bases.keys() {
1083            if oid.format() != format {
1084                return Err(GitError::InvalidObjectId(
1085                    "thin pack base object id format does not match pack format".into(),
1086                ));
1087            }
1088        }
1089
1090        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
1091        let mut output = PackDigestWriter::new(writer, format);
1092        output.write_pack_bytes(b"PACK")?;
1093        output.write_pack_bytes(&2u32.to_be_bytes())?;
1094        output.write_pack_bytes(&(objects.len() as u32).to_be_bytes())?;
1095
1096        let mut index_entries = Vec::with_capacity(objects.len());
1097        let mut delta_count = 0u32;
1098        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1099
1100        for &idx in &order {
1101            let offset = output.position();
1102            let mut entry_header = Vec::new();
1103            match &plan[idx].base {
1104                PlannedBase::None => {
1105                    write_entry_header(
1106                        &mut entry_header,
1107                        objects[idx].object_type,
1108                        objects[idx].body.len() as u64,
1109                    );
1110                }
1111                PlannedBase::InPack { base_idx, delta } => {
1112                    delta_count += 1;
1113                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1114                        GitError::InvalidFormat(
1115                            "in-pack delta base emitted after dependent object".into(),
1116                        )
1117                    })?;
1118                    if options.prefer_ofs_delta {
1119                        write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1120                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1121                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
1122                        })?;
1123                        write_ofs_delta_offset(&mut entry_header, relative)?;
1124                    } else {
1125                        write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1126                        entry_header.extend_from_slice(object_ids[*base_idx].as_bytes());
1127                    }
1128                }
1129                PlannedBase::External { base_oid, delta } => {
1130                    delta_count += 1;
1131                    write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1132                    entry_header.extend_from_slice(base_oid.as_bytes());
1133                }
1134            }
1135            let compressed_payload = compressed_payload(
1136                planned_payload(&objects, &plan, idx),
1137                options.compression_level,
1138            )?;
1139            let mut crc32 = crc32fast::Hasher::new();
1140            crc32.update(&entry_header);
1141            crc32.update(&compressed_payload);
1142            output.write_pack_bytes(&entry_header)?;
1143            output.write_pack_bytes(&compressed_payload)?;
1144            written_offsets[idx] = Some(offset);
1145            index_entries.push(PackIndexEntry {
1146                oid: object_ids[idx],
1147                crc32: crc32.finalize(),
1148                offset,
1149            });
1150        }
1151
1152        let (checksum, pack_size) = output.finish()?;
1153        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1154        Ok(PackWriteSummary {
1155            index,
1156            checksum,
1157            entries: index_entries,
1158            delta_count,
1159            pack_size,
1160        })
1161    }
1162}
1163
1164struct PackDigestWriter<'a, W> {
1165    writer: &'a mut W,
1166    digest: StreamingDigest,
1167    position: u64,
1168}
1169
1170impl<'a, W> PackDigestWriter<'a, W>
1171where
1172    W: Write,
1173{
1174    fn new(writer: &'a mut W, format: ObjectFormat) -> Self {
1175        Self {
1176            writer,
1177            digest: StreamingDigest::new(format),
1178            position: 0,
1179        }
1180    }
1181
1182    fn position(&self) -> u64 {
1183        self.position
1184    }
1185
1186    fn write_pack_bytes(&mut self, bytes: &[u8]) -> Result<()> {
1187        self.writer.write_all(bytes)?;
1188        self.digest.update(bytes);
1189        self.position = self
1190            .position
1191            .checked_add(bytes.len() as u64)
1192            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1193        Ok(())
1194    }
1195
1196    fn finish(mut self) -> Result<(ObjectId, u64)> {
1197        let checksum = self.digest.finalize()?;
1198        self.writer.write_all(checksum.as_bytes())?;
1199        self.position = self
1200            .position
1201            .checked_add(checksum.as_bytes().len() as u64)
1202            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1203        Ok((checksum, self.position))
1204    }
1205}
1206
1207impl<'a> PackIndexView<'a> {
1208    pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
1209        Self::parse(bytes, ObjectFormat::Sha1)
1210    }
1211
1212    pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1213        Self::parse_impl(bytes, format, true, true)
1214    }
1215
1216    /// Parse and validate the index layout without recomputing the trailing
1217    /// index checksum. The checksum stored in the file is still exposed via
1218    /// [`PackIndexView::index_checksum`].
1219    pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1220        Self::parse_impl(bytes, format, false, true)
1221    }
1222
1223    /// Parse a local/trusted pack index without recomputing the trailing index
1224    /// checksum or walking every entry for canonical-order validation.
1225    ///
1226    /// This still validates the table layout and all lookup paths remain
1227    /// bounds-checked, but it avoids O(number-of-objects) startup validation for
1228    /// repository-owned `.idx` files in hot read paths.
1229    pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1230        Self::parse_impl(bytes, format, false, false)
1231    }
1232
1233    pub fn count(&self) -> usize {
1234        self.count
1235    }
1236
1237    pub fn fanout(&self) -> &[u32; 256] {
1238        &self.fanout
1239    }
1240
1241    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1242        if oid.format() != self.format {
1243            return None;
1244        }
1245        let bucket = usize::from(oid.as_bytes()[0]);
1246        let mut start = if bucket == 0 {
1247            0
1248        } else {
1249            self.fanout[bucket - 1] as usize
1250        };
1251        let mut end = self.fanout[bucket] as usize;
1252        let target = oid.as_bytes();
1253
1254        while start < end {
1255            let mid = start + (end - start) / 2;
1256            match self.oid_bytes_at(mid).cmp(target) {
1257                std::cmp::Ordering::Less => start = mid + 1,
1258                std::cmp::Ordering::Equal => return self.lookup_at(mid),
1259                std::cmp::Ordering::Greater => end = mid,
1260            }
1261        }
1262        None
1263    }
1264
1265    fn parse_impl(
1266        bytes: &'a [u8],
1267        format: ObjectFormat,
1268        verify_checksum: bool,
1269        validate_entries: bool,
1270    ) -> Result<Self> {
1271        let hash_len = format.raw_len();
1272        if bytes.len() < 4 {
1273            return Err(GitError::InvalidFormat("pack index too short".into()));
1274        }
1275        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1276            return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1277        }
1278        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1279            return Err(GitError::InvalidFormat("pack index too short".into()));
1280        }
1281        let version = u32_be(&bytes[4..8]);
1282        if version != 2 {
1283            return Err(GitError::Unsupported(format!(
1284                "pack index version {version}"
1285            )));
1286        }
1287        let index_checksum_offset = bytes.len() - hash_len;
1288        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1289        if verify_checksum {
1290            let actual_index_checksum =
1291                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1292            if actual_index_checksum != index_checksum {
1293                return Err(GitError::InvalidFormat(format!(
1294                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1295                )));
1296            }
1297        }
1298
1299        let mut offset = 8usize;
1300        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1301        let count = fanout[255] as usize;
1302        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1303        offset = oid_table.end;
1304        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1305        offset = crc_table.end;
1306        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1307        offset = small_offset_table.end;
1308
1309        let large_offset_count = (0..count)
1310            .filter(|idx| {
1311                let start = small_offset_table.start + idx * 4;
1312                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1313            })
1314            .count();
1315        let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1316        offset = large_offset_table.end;
1317
1318        let expected_trailer_offset = bytes.len() - hash_len * 2;
1319        if offset != expected_trailer_offset {
1320            if !verify_checksum && offset < expected_trailer_offset {
1321                large_offset_table = large_offset_table.start..expected_trailer_offset;
1322                offset = expected_trailer_offset;
1323            } else {
1324                return Err(GitError::InvalidFormat(format!(
1325                    "pack index has {} unexpected bytes before trailer",
1326                    expected_trailer_offset.saturating_sub(offset)
1327                )));
1328            }
1329        }
1330        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1331
1332        let view = Self {
1333            version,
1334            count,
1335            fanout,
1336            pack_checksum,
1337            index_checksum,
1338            bytes,
1339            format,
1340            tables: PackIndexViewTables::V2 {
1341                oid_table,
1342                crc_table,
1343                small_offset_table,
1344                large_offset_table,
1345            },
1346        };
1347        if validate_entries {
1348            view.validate_v2_entries()?;
1349        }
1350        Ok(view)
1351    }
1352
1353    fn parse_v1_impl(
1354        bytes: &'a [u8],
1355        format: ObjectFormat,
1356        verify_checksum: bool,
1357        validate_entries: bool,
1358    ) -> Result<Self> {
1359        let hash_len = format.raw_len();
1360        if bytes.len() < 256 * 4 + 2 * hash_len {
1361            return Err(GitError::InvalidFormat("pack index too short".into()));
1362        }
1363        let index_checksum_offset = bytes.len() - hash_len;
1364        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1365        if verify_checksum {
1366            let actual_index_checksum =
1367                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1368            if actual_index_checksum != index_checksum {
1369                return Err(GitError::InvalidFormat(format!(
1370                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1371                )));
1372            }
1373        }
1374
1375        let mut offset = 0usize;
1376        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1377        let count = fanout[255] as usize;
1378        let entry_len = hash_len
1379            .checked_add(4)
1380            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1381        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1382        offset = entry_table.end;
1383        let expected_trailer_offset = bytes.len() - hash_len * 2;
1384        if offset != expected_trailer_offset {
1385            return Err(GitError::InvalidFormat(format!(
1386                "pack index has {} unexpected bytes before trailer",
1387                expected_trailer_offset.saturating_sub(offset)
1388            )));
1389        }
1390        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1391
1392        let view = Self {
1393            version: 1,
1394            count,
1395            fanout,
1396            pack_checksum,
1397            index_checksum,
1398            bytes,
1399            format,
1400            tables: PackIndexViewTables::V1 { entry_table },
1401        };
1402        if validate_entries {
1403            view.validate_v1_entries()?;
1404        }
1405        Ok(view)
1406    }
1407
1408    fn validate_v2_entries(&self) -> Result<()> {
1409        let PackIndexViewTables::V2 {
1410            oid_table,
1411            small_offset_table,
1412            large_offset_table,
1413            ..
1414        } = &self.tables
1415        else {
1416            unreachable!("v2 validation only runs for v2 views");
1417        };
1418        let oid_table = self.slice(oid_table.clone());
1419        let small_offset_table = self.slice(small_offset_table.clone());
1420        let large_offset_table = self.slice(large_offset_table.clone());
1421        let hash_len = self.format.raw_len();
1422        for idx in 0..self.count {
1423            let oid_start = idx * hash_len;
1424            let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1425            if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1426                return Err(GitError::InvalidFormat(
1427                    "pack index object ids are not strictly ascending".into(),
1428                ));
1429            }
1430            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1431
1432            let offset_start = idx * 4;
1433            let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1434            pack_index_v2_offset(raw_offset, large_offset_table)?;
1435        }
1436        Ok(())
1437    }
1438
1439    fn validate_v1_entries(&self) -> Result<()> {
1440        let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1441            unreachable!("v1 validation only runs for v1 views");
1442        };
1443        let entry_table = self.slice(entry_table.clone());
1444        let hash_len = self.format.raw_len();
1445        let entry_len = hash_len
1446            .checked_add(4)
1447            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1448        for idx in 0..self.count {
1449            let start = idx * entry_len;
1450            let oid_start = start + 4;
1451            let oid_bytes = &entry_table[oid_start..start + entry_len];
1452            if idx > 0 {
1453                let previous_oid_start = oid_start - entry_len;
1454                let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1455                if previous_oid >= oid_bytes {
1456                    return Err(GitError::InvalidFormat(
1457                        "pack index object ids are not strictly sorted".into(),
1458                    ));
1459                }
1460            }
1461            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1462        }
1463        Ok(())
1464    }
1465
1466    fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1467        let hash_len = self.format.raw_len();
1468        match &self.tables {
1469            PackIndexViewTables::V1 { entry_table } => {
1470                let entry_table = self.slice(entry_table.clone());
1471                let entry_len = hash_len + 4;
1472                let start = idx * entry_len + 4;
1473                &entry_table[start..start + hash_len]
1474            }
1475            PackIndexViewTables::V2 { oid_table, .. } => {
1476                let oid_table = self.slice(oid_table.clone());
1477                let start = idx * hash_len;
1478                &oid_table[start..start + hash_len]
1479            }
1480        }
1481    }
1482
1483    fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1484        if idx >= self.count {
1485            return None;
1486        }
1487        let hash_len = self.format.raw_len();
1488        match &self.tables {
1489            PackIndexViewTables::V1 { entry_table } => {
1490                let entry_table = self.slice(entry_table.clone());
1491                let entry_len = hash_len + 4;
1492                let start = idx * entry_len;
1493                Some(PackIndexLookup {
1494                    crc32: 0,
1495                    offset: u64::from(u32_be(&entry_table[start..start + 4])),
1496                })
1497            }
1498            PackIndexViewTables::V2 {
1499                crc_table,
1500                small_offset_table,
1501                large_offset_table,
1502                ..
1503            } => {
1504                let crc_table = self.slice(crc_table.clone());
1505                let small_offset_table = self.slice(small_offset_table.clone());
1506                let large_offset_table = self.slice(large_offset_table.clone());
1507                let crc_start = idx * 4;
1508                let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1509                Some(PackIndexLookup {
1510                    crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1511                    offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1512                })
1513            }
1514        }
1515    }
1516
1517    fn slice(&self, range: Range<usize>) -> &'a [u8] {
1518        &self.bytes[range]
1519    }
1520}
1521
1522impl PackIndexViewData {
1523    pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1524        Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1525    }
1526
1527    /// Parse and validate an owned index view without recomputing the trailing
1528    /// index checksum. The stored checksum is still exposed via
1529    /// [`PackIndexViewData::index_checksum`].
1530    pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1531        Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1532    }
1533
1534    /// Parse a local/trusted owned index view without the checksum or full-entry
1535    /// validation passes.
1536    pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1537        Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1538    }
1539
1540    pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1541        Self::parse_impl(bytes, format, true, true)
1542    }
1543
1544    pub fn parse_source_without_checksum(
1545        bytes: Arc<dyn PackIndexByteSource>,
1546        format: ObjectFormat,
1547    ) -> Result<Self> {
1548        Self::parse_impl(bytes, format, false, true)
1549    }
1550
1551    pub fn parse_trusted_source_without_checksum(
1552        bytes: Arc<dyn PackIndexByteSource>,
1553        format: ObjectFormat,
1554    ) -> Result<Self> {
1555        Self::parse_impl(bytes, format, false, false)
1556    }
1557
1558    pub fn count(&self) -> usize {
1559        self.count
1560    }
1561
1562    pub fn fanout(&self) -> &[u32; 256] {
1563        &self.fanout
1564    }
1565
1566    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1567        self.as_view().find(oid)
1568    }
1569
1570    pub fn as_view(&self) -> PackIndexView<'_> {
1571        PackIndexView {
1572            version: self.version,
1573            count: self.count,
1574            fanout: self.fanout,
1575            pack_checksum: self.pack_checksum,
1576            index_checksum: self.index_checksum,
1577            bytes: self.bytes.as_bytes(),
1578            format: self.format,
1579            tables: self.tables.clone(),
1580        }
1581    }
1582
1583    fn parse_impl(
1584        bytes: Arc<dyn PackIndexByteSource>,
1585        format: ObjectFormat,
1586        verify_checksum: bool,
1587        validate_entries: bool,
1588    ) -> Result<Self> {
1589        let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1590            let view = PackIndexView::parse_impl(
1591                bytes.as_bytes(),
1592                format,
1593                verify_checksum,
1594                validate_entries,
1595            )?;
1596            (
1597                view.version,
1598                view.count,
1599                view.fanout,
1600                view.pack_checksum,
1601                view.index_checksum,
1602                view.tables,
1603            )
1604        };
1605        Ok(Self {
1606            version,
1607            count,
1608            fanout,
1609            pack_checksum,
1610            index_checksum,
1611            bytes,
1612            format,
1613            tables,
1614        })
1615    }
1616}
1617
1618impl PackIndex {
1619    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1620        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1621    }
1622
1623    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1624        let trailer_len = format.raw_len();
1625        if pack_bytes.len() < 12 + trailer_len {
1626            return Err(GitError::InvalidFormat("pack file too short".into()));
1627        }
1628        let trailer_offset = pack_bytes.len() - trailer_len;
1629        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1630        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1631        if pack_checksum != expected {
1632            return Err(GitError::InvalidFormat(format!(
1633                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1634            )));
1635        }
1636
1637        if &pack_bytes[..4] != b"PACK" {
1638            return Err(GitError::InvalidFormat("missing PACK signature".into()));
1639        }
1640        let version = u32_be(&pack_bytes[4..8]);
1641        if version != 2 && version != 3 {
1642            return Err(GitError::Unsupported(format!("pack version {version}")));
1643        }
1644        let count = u32_be(&pack_bytes[8..12]) as usize;
1645        let mut offset = 12usize;
1646        let mut parsed_entries = Vec::with_capacity(count);
1647        let mut raw_entries = Vec::with_capacity(count);
1648        for _ in 0..count {
1649            let entry_offset = offset;
1650            let header = parse_entry_header(pack_bytes, &mut offset)?;
1651            let base = match header.kind {
1652                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1653                    pack_bytes,
1654                    &mut offset,
1655                    entry_offset as u64,
1656                )?)),
1657                PackObjectKind::RefDelta => {
1658                    let hash_len = format.raw_len();
1659                    if offset + hash_len > trailer_offset {
1660                        return Err(GitError::InvalidFormat(
1661                            "truncated ref-delta base object id".into(),
1662                        ));
1663                    }
1664                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1665                    offset += hash_len;
1666                    Some(DeltaBase::Ref(oid))
1667                }
1668                _ => None,
1669            };
1670            let mut body = Vec::new();
1671            let consumed = inflate_into(
1672                &pack_bytes[offset..trailer_offset],
1673                &mut body,
1674                header.size.min(usize::MAX as u64) as usize,
1675            )?;
1676            if body.len() as u64 != header.size {
1677                return Err(GitError::InvalidObject(format!(
1678                    "pack object declared {} bytes, decoded {}",
1679                    header.size,
1680                    body.len()
1681                )));
1682            }
1683            if consumed == 0 {
1684                return Err(GitError::InvalidFormat(
1685                    "empty compressed pack entry".into(),
1686                ));
1687            }
1688            offset = offset
1689                .checked_add(consumed)
1690                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1691            if offset > trailer_offset {
1692                return Err(GitError::InvalidFormat(
1693                    "pack entry extends past checksum".into(),
1694                ));
1695            }
1696            raw_entries.push((
1697                entry_offset as u64,
1698                crc32fast::hash(&pack_bytes[entry_offset..offset]),
1699            ));
1700            if let Some(base) = base {
1701                parsed_entries.push(ParsedPackEntry::Delta {
1702                    base,
1703                    compressed_size: consumed as u64,
1704                    delta_size: header.size,
1705                    offset: entry_offset as u64,
1706                    delta: body,
1707                });
1708            } else {
1709                let object_type = match header.kind {
1710                    PackObjectKind::Commit => ObjectType::Commit,
1711                    PackObjectKind::Tree => ObjectType::Tree,
1712                    PackObjectKind::Blob => ObjectType::Blob,
1713                    PackObjectKind::Tag => ObjectType::Tag,
1714                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1715                };
1716                let object = EncodedObject::new(object_type, body);
1717                let oid = object.object_id(format)?;
1718                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1719                    entry: PackEntry {
1720                        oid,
1721                        compressed_size: consumed as u64,
1722                        uncompressed_size: header.size,
1723                        offset: entry_offset as u64,
1724                    },
1725                    object,
1726                }));
1727            }
1728        }
1729        if offset != trailer_offset {
1730            return Err(GitError::InvalidFormat(format!(
1731                "pack has {} trailing bytes before checksum",
1732                trailer_offset - offset
1733            )));
1734        }
1735
1736        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1737        let entries = resolved
1738            .iter()
1739            .zip(raw_entries)
1740            .map(|(object, (offset, crc32))| PackIndexEntry {
1741                oid: object.entry.oid,
1742                crc32,
1743                offset,
1744            })
1745            .collect::<Vec<_>>();
1746        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1747        Ok(PackIndexBuild {
1748            index,
1749            pack_checksum,
1750            entries,
1751        })
1752    }
1753
1754    /// Validate and index a pack from the reader's current position to EOF.
1755    ///
1756    /// This produces the same v2 `.idx` bytes and object metadata as
1757    /// [`PackIndex::write_v2_for_pack`] without requiring the caller to provide
1758    /// the pack as one contiguous byte slice. The reader is left positioned at
1759    /// EOF on success.
1760    pub fn write_v2_for_pack_reader<R>(
1761        reader: &mut R,
1762        format: ObjectFormat,
1763    ) -> Result<PackStreamIndexBuild>
1764    where
1765        R: Read + Seek,
1766    {
1767        let start = reader.stream_position()?;
1768        let end = reader.seek(SeekFrom::End(0))?;
1769        let pack_len = end
1770            .checked_sub(start)
1771            .ok_or_else(|| GitError::InvalidFormat("pack stream position overflow".into()))?;
1772        reader.seek(SeekFrom::Start(start))?;
1773        index_pack_from_reader(reader, format, pack_len)
1774    }
1775
1776    /// Validate and index a pack from the reader's current position, stopping
1777    /// after the pack trailer checksum.
1778    ///
1779    /// This is for transports where the pack length is not known in advance but
1780    /// the stream is expected to contain exactly one pack. It avoids forcing the
1781    /// caller to first materialize the pack only to learn its length.
1782    pub fn write_v2_for_pack_reader_to_trailer<R>(
1783        reader: &mut R,
1784        format: ObjectFormat,
1785    ) -> Result<PackStreamIndexBuild>
1786    where
1787        R: Read,
1788    {
1789        index_pack_from_reader_to_trailer(reader, format)
1790    }
1791
1792    pub fn write_v2_for_pack_reader_with_len<R>(
1793        reader: &mut R,
1794        format: ObjectFormat,
1795        pack_len: u64,
1796    ) -> Result<PackStreamIndexBuild>
1797    where
1798        R: Read,
1799    {
1800        index_pack_from_reader(reader, format, pack_len)
1801    }
1802
1803    /// Validate and index a pack from a filesystem path without loading the
1804    /// entire pack file into memory.
1805    pub fn write_v2_for_pack_path(
1806        path: impl AsRef<Path>,
1807        format: ObjectFormat,
1808    ) -> Result<PackStreamIndexBuild> {
1809        let mut file = File::open(path)?;
1810        Self::write_v2_for_pack_reader(&mut file, format)
1811    }
1812
1813    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1814        Self::parse(bytes, ObjectFormat::Sha1)
1815    }
1816
1817    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1818        Self::parse_impl(bytes, format, true)
1819    }
1820
1821    pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1822        Self::parse_impl(bytes, format, false)
1823    }
1824
1825    fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1826        let hash_len = format.raw_len();
1827        if bytes.len() < 4 {
1828            return Err(GitError::InvalidFormat("pack index too short".into()));
1829        }
1830        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1831            return Self::parse_v1_impl(bytes, format, verify_checksum);
1832        }
1833        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1834            return Err(GitError::InvalidFormat("pack index too short".into()));
1835        }
1836        let version = u32_be(&bytes[4..8]);
1837        if version != 2 {
1838            return Err(GitError::Unsupported(format!(
1839                "pack index version {version}"
1840            )));
1841        }
1842        let index_checksum_offset = bytes.len() - hash_len;
1843        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1844        if verify_checksum {
1845            let actual_index_checksum =
1846                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1847            if actual_index_checksum != index_checksum {
1848                return Err(GitError::InvalidFormat(format!(
1849                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1850                )));
1851            }
1852        }
1853
1854        let mut offset = 8usize;
1855        let mut fanout = [0u32; 256];
1856        let mut previous = 0u32;
1857        for slot in &mut fanout {
1858            *slot = u32_be(&bytes[offset..offset + 4]);
1859            if *slot < previous {
1860                return Err(GitError::InvalidFormat(
1861                    "pack index fanout is not monotonic".into(),
1862                ));
1863            }
1864            previous = *slot;
1865            offset += 4;
1866        }
1867        let count = fanout[255] as usize;
1868        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1869        offset = oid_table.end;
1870        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1871        offset = crc_table.end;
1872        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1873        offset = small_offset_table.end;
1874
1875        let large_offset_count = (0..count)
1876            .filter(|idx| {
1877                let start = small_offset_table.start + idx * 4;
1878                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1879            })
1880            .count();
1881        let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1882        offset = large_offset_table.end;
1883
1884        let expected_trailer_offset = bytes.len() - hash_len * 2;
1885        if offset != expected_trailer_offset {
1886            if !verify_checksum && offset < expected_trailer_offset {
1887                large_offset_table = large_offset_table.start..expected_trailer_offset;
1888                offset = expected_trailer_offset;
1889            } else {
1890                return Err(GitError::InvalidFormat(format!(
1891                    "pack index has {} unexpected bytes before trailer",
1892                    expected_trailer_offset.saturating_sub(offset)
1893                )));
1894            }
1895        }
1896        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1897
1898        let mut entries = Vec::with_capacity(count);
1899        for idx in 0..count {
1900            let oid_start = oid_table.start + idx * hash_len;
1901            let crc_start = crc_table.start + idx * 4;
1902            let offset_start = small_offset_table.start + idx * 4;
1903            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1904            // Object ids must be strictly ascending: lookup binary-searches them,
1905            // and the fanout must match the first byte. A malformed/forged index
1906            // (e.g. from a received pack) would otherwise yield silent misses.
1907            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1908                return Err(GitError::InvalidFormat(
1909                    "pack index object ids are not strictly ascending".into(),
1910                ));
1911            }
1912            let expected_min = if oid_bytes[0] == 0 {
1913                0
1914            } else {
1915                fanout[usize::from(oid_bytes[0] - 1)]
1916            };
1917            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1918                return Err(GitError::InvalidFormat(
1919                    "pack index object id is outside its fanout bucket".into(),
1920                ));
1921            }
1922            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1923            let offset = if raw_offset & 0x8000_0000 == 0 {
1924                u64::from(raw_offset)
1925            } else {
1926                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1927                let large_start = large_offset_table.start + large_idx * 8;
1928                if large_idx >= large_offset_table.len() / 8 {
1929                    return Err(GitError::InvalidFormat(
1930                        "pack index large offset points past table".into(),
1931                    ));
1932                }
1933                u64_be(&bytes[large_start..large_start + 8])
1934            };
1935            entries.push(PackIndexEntry {
1936                oid: ObjectId::from_raw(format, oid_bytes)?,
1937                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1938                offset,
1939            });
1940        }
1941        Ok(Self {
1942            version,
1943            fanout,
1944            entries,
1945            pack_checksum,
1946            index_checksum,
1947        })
1948    }
1949
1950    fn parse_v1_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1951        let hash_len = format.raw_len();
1952        if bytes.len() < 256 * 4 + 2 * hash_len {
1953            return Err(GitError::InvalidFormat("pack index too short".into()));
1954        }
1955        let index_checksum_offset = bytes.len() - hash_len;
1956        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1957        if verify_checksum {
1958            let actual_index_checksum =
1959                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1960            if actual_index_checksum != index_checksum {
1961                return Err(GitError::InvalidFormat(format!(
1962                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1963                )));
1964            }
1965        }
1966
1967        let mut offset = 0usize;
1968        let mut fanout = [0u32; 256];
1969        let mut previous = 0u32;
1970        for slot in &mut fanout {
1971            *slot = u32_be(&bytes[offset..offset + 4]);
1972            if *slot < previous {
1973                return Err(GitError::InvalidFormat(
1974                    "pack index fanout is not monotonic".into(),
1975                ));
1976            }
1977            previous = *slot;
1978            offset += 4;
1979        }
1980        let count = fanout[255] as usize;
1981        let entry_len = hash_len
1982            .checked_add(4)
1983            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1984        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1985        offset = entry_table.end;
1986        let expected_trailer_offset = bytes.len() - hash_len * 2;
1987        if offset != expected_trailer_offset {
1988            return Err(GitError::InvalidFormat(format!(
1989                "pack index has {} unexpected bytes before trailer",
1990                expected_trailer_offset.saturating_sub(offset)
1991            )));
1992        }
1993        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1994
1995        let mut entries = Vec::with_capacity(count);
1996        let mut previous_oid: Option<ObjectId> = None;
1997        for idx in 0..count {
1998            let start = entry_table.start + idx * entry_len;
1999            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
2000            if let Some(previous) = &previous_oid
2001                && previous.as_bytes() >= oid.as_bytes()
2002            {
2003                return Err(GitError::InvalidFormat(
2004                    "pack index object ids are not strictly sorted".into(),
2005                ));
2006            }
2007            previous_oid = Some(oid);
2008            entries.push(PackIndexEntry {
2009                oid,
2010                crc32: 0,
2011                offset: u64::from(u32_be(&bytes[start..start + 4])),
2012            });
2013        }
2014        Ok(Self {
2015            version: 1,
2016            fanout,
2017            entries,
2018            pack_checksum,
2019            index_checksum,
2020        })
2021    }
2022
2023    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
2024        self.entries
2025            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2026            .ok()
2027            .map(|idx| &self.entries[idx])
2028    }
2029
2030    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
2031        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
2032    }
2033
2034    pub fn write_v2(
2035        format: ObjectFormat,
2036        entries: &[PackIndexEntry],
2037        pack_checksum: &ObjectId,
2038    ) -> Result<Vec<u8>> {
2039        if pack_checksum.format() != format {
2040            return Err(GitError::InvalidObjectId(
2041                "pack checksum format does not match index format".into(),
2042            ));
2043        }
2044        let mut entries = entries.iter().collect::<Vec<_>>();
2045        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2046        for pair in entries.windows(2) {
2047            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2048                return Err(GitError::InvalidFormat(format!(
2049                    "pack index contains duplicate object id {}",
2050                    pair[0].oid
2051                )));
2052            }
2053        }
2054        let mut fanout = [0u32; 256];
2055        for entry in &entries {
2056            if entry.oid.format() != format {
2057                return Err(GitError::InvalidObjectId(
2058                    "pack index entry format does not match index format".into(),
2059                ));
2060            }
2061            let first = entry.oid.as_bytes()[0] as usize;
2062            fanout[first] = fanout[first]
2063                .checked_add(1)
2064                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2065        }
2066        let mut running = 0u32;
2067        for slot in &mut fanout {
2068            running = running
2069                .checked_add(*slot)
2070                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2071            *slot = running;
2072        }
2073
2074        let mut index = Vec::new();
2075        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
2076        index.extend_from_slice(&2u32.to_be_bytes());
2077        for count in fanout {
2078            index.extend_from_slice(&count.to_be_bytes());
2079        }
2080        for entry in &entries {
2081            index.extend_from_slice(entry.oid.as_bytes());
2082        }
2083        for entry in &entries {
2084            index.extend_from_slice(&entry.crc32.to_be_bytes());
2085        }
2086
2087        let mut large_offsets = Vec::new();
2088        for entry in &entries {
2089            if entry.offset < 0x8000_0000 {
2090                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2091            } else {
2092                if large_offsets.len() > 0x7fff_ffff {
2093                    return Err(GitError::InvalidFormat(
2094                        "too many large pack offsets".into(),
2095                    ));
2096                }
2097                let large_idx = large_offsets.len() as u32;
2098                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
2099                large_offsets.push(entry.offset);
2100            }
2101        }
2102        for offset in large_offsets {
2103            index.extend_from_slice(&offset.to_be_bytes());
2104        }
2105        index.extend_from_slice(pack_checksum.as_bytes());
2106        let index_checksum = sley_core::digest_bytes(format, &index)?;
2107        index.extend_from_slice(index_checksum.as_bytes());
2108        Ok(index)
2109    }
2110
2111    /// Serialise a version-1 pack `.idx`: a 256-entry fanout, then for each
2112    /// object an inline 4-byte big-endian pack offset immediately followed by
2113    /// its object id (sorted by oid), then the pack checksum and a trailing
2114    /// index checksum. v1 has no CRC table and cannot represent offsets that
2115    /// do not fit in 32 bits.
2116    pub fn write_v1(
2117        format: ObjectFormat,
2118        entries: &[PackIndexEntry],
2119        pack_checksum: &ObjectId,
2120    ) -> Result<Vec<u8>> {
2121        if pack_checksum.format() != format {
2122            return Err(GitError::InvalidObjectId(
2123                "pack checksum format does not match index format".into(),
2124            ));
2125        }
2126        let mut entries = entries.iter().collect::<Vec<_>>();
2127        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2128        for pair in entries.windows(2) {
2129            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2130                return Err(GitError::InvalidFormat(format!(
2131                    "pack index contains duplicate object id {}",
2132                    pair[0].oid
2133                )));
2134            }
2135        }
2136        let mut fanout = [0u32; 256];
2137        for entry in &entries {
2138            if entry.oid.format() != format {
2139                return Err(GitError::InvalidObjectId(
2140                    "pack index entry format does not match index format".into(),
2141                ));
2142            }
2143            if entry.offset > 0xffff_ffff {
2144                return Err(GitError::InvalidFormat(
2145                    "pack offset too large for a version-1 index".into(),
2146                ));
2147            }
2148            let first = entry.oid.as_bytes()[0] as usize;
2149            fanout[first] = fanout[first]
2150                .checked_add(1)
2151                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2152        }
2153        let mut running = 0u32;
2154        for slot in &mut fanout {
2155            running = running
2156                .checked_add(*slot)
2157                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2158            *slot = running;
2159        }
2160
2161        let mut index = Vec::new();
2162        for count in fanout {
2163            index.extend_from_slice(&count.to_be_bytes());
2164        }
2165        for entry in &entries {
2166            index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2167            index.extend_from_slice(entry.oid.as_bytes());
2168        }
2169        index.extend_from_slice(pack_checksum.as_bytes());
2170        let index_checksum = sley_core::digest_bytes(format, &index)?;
2171        index.extend_from_slice(index_checksum.as_bytes());
2172        Ok(index)
2173    }
2174}
2175
2176fn index_pack_from_reader<R>(
2177    reader: &mut R,
2178    format: ObjectFormat,
2179    pack_len: u64,
2180) -> Result<PackStreamIndexBuild>
2181where
2182    R: Read,
2183{
2184    index_pack_from_stream(PackReadStream::new(reader, format, Some(pack_len))?, format)
2185}
2186
2187fn index_pack_from_reader_to_trailer<R>(
2188    reader: &mut R,
2189    format: ObjectFormat,
2190) -> Result<PackStreamIndexBuild>
2191where
2192    R: Read,
2193{
2194    index_pack_from_stream(PackReadStream::new(reader, format, None)?, format)
2195}
2196
2197fn index_pack_from_stream<R>(
2198    mut stream: PackReadStream<'_, R>,
2199    format: ObjectFormat,
2200) -> Result<PackStreamIndexBuild>
2201where
2202    R: Read,
2203{
2204    let mut header = [0u8; 12];
2205    stream.read_pack_bytes(&mut header)?;
2206    if &header[..4] != b"PACK" {
2207        return Err(GitError::InvalidFormat("missing PACK signature".into()));
2208    }
2209    let version = u32_be(&header[4..8]);
2210    if version != 2 && version != 3 {
2211        return Err(GitError::Unsupported(format!("pack version {version}")));
2212    }
2213    let count = u32_be(&header[8..12]) as usize;
2214    let mut parsed_entries = Vec::with_capacity(count);
2215    let mut raw_entries = Vec::with_capacity(count);
2216    for _ in 0..count {
2217        let entry_offset = stream.pack_offset();
2218        let mut entry_crc = crc32fast::Hasher::new();
2219        let header = parse_entry_header_from_stream(&mut stream, &mut entry_crc)?;
2220        let base = match header.kind {
2221            PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
2222                parse_ofs_delta_base_offset_from_stream(&mut stream, &mut entry_crc, entry_offset)?,
2223            )),
2224            PackObjectKind::RefDelta => {
2225                let mut raw = vec![0u8; format.raw_len()];
2226                stream.read_entry_bytes(&mut raw, &mut entry_crc)?;
2227                Some(DeltaBase::Ref(ObjectId::from_raw(format, &raw)?))
2228            }
2229            _ => None,
2230        };
2231        let (body, consumed) = inflate_entry_from_stream(
2232            &mut stream,
2233            &mut entry_crc,
2234            header.size.min(usize::MAX as u64) as usize,
2235        )?;
2236        if body.len() as u64 != header.size {
2237            return Err(GitError::InvalidObject(format!(
2238                "pack object declared {} bytes, decoded {}",
2239                header.size,
2240                body.len()
2241            )));
2242        }
2243        if consumed == 0 {
2244            return Err(GitError::InvalidFormat(
2245                "empty compressed pack entry".into(),
2246            ));
2247        }
2248        raw_entries.push((entry_offset, entry_crc.finalize()));
2249        if let Some(base) = base {
2250            parsed_entries.push(ParsedPackEntry::Delta {
2251                base,
2252                compressed_size: consumed as u64,
2253                delta_size: header.size,
2254                offset: entry_offset,
2255                delta: body,
2256            });
2257        } else {
2258            let object_type = pack_object_kind_to_object_type(header.kind)?;
2259            let object = EncodedObject::new(object_type, body);
2260            let oid = object.object_id(format)?;
2261            parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
2262                entry: PackEntry {
2263                    oid,
2264                    compressed_size: consumed as u64,
2265                    uncompressed_size: header.size,
2266                    offset: entry_offset,
2267                },
2268                object,
2269            }));
2270        }
2271    }
2272    if stream.pack_offset() != stream.trailer_pack_offset() {
2273        return Err(GitError::InvalidFormat(format!(
2274            "pack has {} trailing bytes before checksum",
2275            stream.trailer_pack_offset() - stream.pack_offset()
2276        )));
2277    }
2278    let expected = stream.read_trailer_oid()?;
2279    let pack_checksum = stream.finish_digest()?;
2280    if pack_checksum != expected {
2281        return Err(GitError::InvalidFormat(format!(
2282            "pack checksum mismatch: expected {expected}, got {pack_checksum}"
2283        )));
2284    }
2285
2286    let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
2287    let entries = resolved
2288        .iter()
2289        .zip(raw_entries)
2290        .map(|(object, (offset, crc32))| PackIndexEntry {
2291            oid: object.entry.oid,
2292            crc32,
2293            offset,
2294        })
2295        .collect::<Vec<_>>();
2296    let objects = resolved
2297        .iter()
2298        .map(|object| PackIndexedObject {
2299            oid: object.entry.oid,
2300            object_type: object.object.object_type,
2301            size: object.object.body.len() as u64,
2302            offset: object.entry.offset,
2303        })
2304        .collect::<Vec<_>>();
2305    let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
2306    Ok(PackStreamIndexBuild {
2307        index,
2308        pack_checksum,
2309        entries,
2310        objects,
2311    })
2312}
2313
2314fn pack_object_kind_to_object_type(kind: PackObjectKind) -> Result<ObjectType> {
2315    match kind {
2316        PackObjectKind::Commit => Ok(ObjectType::Commit),
2317        PackObjectKind::Tree => Ok(ObjectType::Tree),
2318        PackObjectKind::Blob => Ok(ObjectType::Blob),
2319        PackObjectKind::Tag => Ok(ObjectType::Tag),
2320        PackObjectKind::OfsDelta | PackObjectKind::RefDelta => Err(GitError::InvalidFormat(
2321            "delta entry cannot be used as an object type".into(),
2322        )),
2323    }
2324}
2325
2326struct PackReadStream<'a, R> {
2327    reader: &'a mut R,
2328    position: u64,
2329    pack_len: Option<u64>,
2330    trailer_position: Option<u64>,
2331    digest: StreamingDigest,
2332    format: ObjectFormat,
2333    pending: VecDeque<u8>,
2334}
2335
2336impl<'a, R> PackReadStream<'a, R>
2337where
2338    R: Read,
2339{
2340    fn new(reader: &'a mut R, format: ObjectFormat, pack_len: Option<u64>) -> Result<Self> {
2341        let trailer_len = format.raw_len() as u64;
2342        let trailer_position = pack_len
2343            .map(|pack_len| {
2344                if pack_len < 12 + trailer_len {
2345                    return Err(GitError::InvalidFormat("pack file too short".into()));
2346                }
2347                Ok(pack_len - trailer_len)
2348            })
2349            .transpose()?;
2350        Ok(Self {
2351            reader,
2352            position: 0,
2353            pack_len,
2354            trailer_position,
2355            digest: StreamingDigest::new(format),
2356            format,
2357            pending: VecDeque::new(),
2358        })
2359    }
2360
2361    fn pack_offset(&self) -> u64 {
2362        self.position
2363    }
2364
2365    fn trailer_pack_offset(&self) -> u64 {
2366        self.trailer_position.unwrap_or(self.position)
2367    }
2368
2369    fn read_pack_bytes(&mut self, bytes: &mut [u8]) -> Result<()> {
2370        let end = self
2371            .position
2372            .checked_add(bytes.len() as u64)
2373            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2374        if self
2375            .trailer_position
2376            .is_some_and(|trailer_position| end > trailer_position)
2377        {
2378            return Err(GitError::InvalidFormat(
2379                "pack entry extends past checksum".into(),
2380            ));
2381        }
2382        self.read_exact_raw(bytes)?;
2383        self.position = end;
2384        self.digest.update(bytes);
2385        Ok(())
2386    }
2387
2388    fn read_exact_raw(&mut self, bytes: &mut [u8]) -> Result<()> {
2389        let mut written = 0usize;
2390        while written < bytes.len() {
2391            if let Some(byte) = self.pending.pop_front() {
2392                bytes[written] = byte;
2393                written += 1;
2394                continue;
2395            }
2396            self.reader.read_exact(&mut bytes[written..])?;
2397            break;
2398        }
2399        Ok(())
2400    }
2401
2402    fn read_entry_bytes(&mut self, bytes: &mut [u8], crc: &mut crc32fast::Hasher) -> Result<()> {
2403        self.read_pack_bytes(bytes)?;
2404        crc.update(bytes);
2405        Ok(())
2406    }
2407
2408    fn read_entry_byte(&mut self, crc: &mut crc32fast::Hasher) -> Result<u8> {
2409        let mut byte = [0u8; 1];
2410        self.read_entry_bytes(&mut byte, crc)?;
2411        Ok(byte[0])
2412    }
2413
2414    fn read_compressed_chunk(&mut self, bytes: &mut [u8]) -> Result<usize> {
2415        let len = if let Some(trailer_position) = self.trailer_position {
2416            if self.position >= trailer_position {
2417                return Ok(0);
2418            }
2419            let remaining = trailer_position - self.position;
2420            if remaining < bytes.len() as u64 {
2421                remaining as usize
2422            } else {
2423                bytes.len()
2424            }
2425        } else {
2426            bytes.len()
2427        };
2428        let mut read = 0usize;
2429        while read < len {
2430            let Some(byte) = self.pending.pop_front() else {
2431                break;
2432            };
2433            bytes[read] = byte;
2434            read += 1;
2435        }
2436        if read < len {
2437            read += self.reader.read(&mut bytes[read..len])?;
2438        }
2439        self.position = self
2440            .position
2441            .checked_add(read as u64)
2442            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2443        Ok(read)
2444    }
2445
2446    fn accept_compressed_bytes(&mut self, bytes: &[u8], crc: &mut crc32fast::Hasher) {
2447        self.digest.update(bytes);
2448        crc.update(bytes);
2449    }
2450
2451    fn push_back_compressed_bytes(&mut self, bytes: &[u8]) -> Result<()> {
2452        if bytes.is_empty() {
2453            return Ok(());
2454        }
2455        self.position = self
2456            .position
2457            .checked_sub(bytes.len() as u64)
2458            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2459        for byte in bytes.iter().rev() {
2460            self.pending.push_front(*byte);
2461        }
2462        Ok(())
2463    }
2464
2465    fn read_trailer_oid(&mut self) -> Result<ObjectId> {
2466        let mut raw = vec![0u8; self.format.raw_len()];
2467        self.read_exact_raw(&mut raw)?;
2468        self.position = self
2469            .position
2470            .checked_add(raw.len() as u64)
2471            .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2472        if let Some(pack_len) = self.pack_len
2473            && self.position != pack_len
2474        {
2475            return Err(GitError::InvalidFormat(format!(
2476                "pack has {} trailing bytes after checksum",
2477                pack_len - self.position
2478            )));
2479        }
2480        if self.pack_len.is_none() && !self.pending.is_empty() {
2481            return Err(GitError::InvalidFormat(
2482                "pack has trailing bytes after checksum".into(),
2483            ));
2484        }
2485        ObjectId::from_raw(self.format, &raw)
2486    }
2487
2488    fn finish_digest(self) -> Result<ObjectId> {
2489        self.digest.finalize()
2490    }
2491}
2492
2493const STREAM_INFLATE_CHUNK: usize = 32 * 1024;
2494
2495fn inflate_entry_from_stream<R>(
2496    stream: &mut PackReadStream<'_, R>,
2497    crc: &mut crc32fast::Hasher,
2498    size_hint: usize,
2499) -> Result<(Vec<u8>, usize)>
2500where
2501    R: Read,
2502{
2503    INFLATE.with(|cell| {
2504        let mut decompress = cell.borrow_mut();
2505        decompress.reset(true);
2506        let mut out = Vec::with_capacity(bounded_inflate_reserve(size_hint, STREAM_INFLATE_CHUNK));
2507        let mut compressed_total = 0usize;
2508        let mut input = [0u8; STREAM_INFLATE_CHUNK];
2509        loop {
2510            let read = stream.read_compressed_chunk(&mut input)?;
2511            if read == 0 {
2512                return Err(GitError::InvalidObject("truncated zlib stream".into()));
2513            }
2514            let mut cursor = 0usize;
2515            while cursor < read {
2516                if out.len() == out.capacity() {
2517                    out.reserve(out.len().max(64));
2518                }
2519                let before_in = decompress.total_in();
2520                let before_out = decompress.total_out();
2521                let status = decompress
2522                    .decompress_vec(
2523                        &input[cursor..read],
2524                        &mut out,
2525                        flate2::FlushDecompress::None,
2526                    )
2527                    .map_err(|err| {
2528                        GitError::InvalidObject(format!("zlib inflate failed: {err}"))
2529                    })?;
2530                let consumed = (decompress.total_in() - before_in) as usize;
2531                let produced = decompress.total_out() - before_out;
2532                if consumed > 0 {
2533                    let consumed_end = cursor + consumed;
2534                    stream.accept_compressed_bytes(&input[cursor..consumed_end], crc);
2535                    compressed_total = compressed_total
2536                        .checked_add(consumed)
2537                        .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2538                    cursor = consumed_end;
2539                }
2540                match status {
2541                    flate2::Status::StreamEnd => {
2542                        stream.push_back_compressed_bytes(&input[cursor..read])?;
2543                        return Ok((out, compressed_total));
2544                    }
2545                    _ if consumed == 0 && produced == 0 => {
2546                        return Err(GitError::InvalidObject("truncated zlib stream".into()));
2547                    }
2548                    _ => {}
2549                }
2550            }
2551        }
2552    })
2553}
2554
2555fn parse_entry_header_from_stream<R>(
2556    stream: &mut PackReadStream<'_, R>,
2557    crc: &mut crc32fast::Hasher,
2558) -> Result<EntryHeader>
2559where
2560    R: Read,
2561{
2562    let first = stream.read_entry_byte(crc)?;
2563    let mut size = u64::from(first & 0x0f);
2564    let kind = match (first >> 4) & 0x07 {
2565        1 => PackObjectKind::Commit,
2566        2 => PackObjectKind::Tree,
2567        3 => PackObjectKind::Blob,
2568        4 => PackObjectKind::Tag,
2569        6 => PackObjectKind::OfsDelta,
2570        7 => PackObjectKind::RefDelta,
2571        other => {
2572            return Err(GitError::InvalidFormat(format!(
2573                "invalid pack object type {other}"
2574            )));
2575        }
2576    };
2577    let mut shift = 4;
2578    let mut byte = first;
2579    while byte & 0x80 != 0 {
2580        byte = stream.read_entry_byte(crc)?;
2581        let part = u64::from(byte & 0x7f);
2582        size = size
2583            .checked_add(
2584                part.checked_shl(shift)
2585                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
2586            )
2587            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
2588        shift += 7;
2589    }
2590    Ok(EntryHeader { kind, size })
2591}
2592
2593fn parse_ofs_delta_base_offset_from_stream<R>(
2594    stream: &mut PackReadStream<'_, R>,
2595    crc: &mut crc32fast::Hasher,
2596    entry_offset: u64,
2597) -> Result<u64>
2598where
2599    R: Read,
2600{
2601    let mut byte = stream.read_entry_byte(crc)?;
2602    let mut relative = u64::from(byte & 0x7f);
2603    while byte & 0x80 != 0 {
2604        byte = stream.read_entry_byte(crc)?;
2605        relative = relative
2606            .checked_add(1)
2607            .and_then(|value| value.checked_shl(7))
2608            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2609            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2610    }
2611    entry_offset
2612        .checked_sub(relative)
2613        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2614}
2615
2616/// The `.rev` table for a pack: index positions (the rank of each object in
2617/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
2618/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
2619/// the result feeds [`PackReverseIndex::write`].
2620pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
2621    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
2622    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
2623    let mut index_position = vec![0u32; entries.len()];
2624    for (position, &entry) in oid_sorted.iter().enumerate() {
2625        index_position[entry] = position as u32;
2626    }
2627    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
2628    by_offset.sort_by_key(|&entry| entries[entry].offset);
2629    by_offset
2630        .into_iter()
2631        .map(|entry| index_position[entry])
2632        .collect()
2633}
2634
2635impl PackReverseIndex {
2636    pub fn write(
2637        format: ObjectFormat,
2638        positions: &[u32],
2639        pack_checksum: &ObjectId,
2640    ) -> Result<Vec<u8>> {
2641        if pack_checksum.format() != format {
2642            return Err(GitError::InvalidObjectId(
2643                "pack checksum format does not match reverse index format".into(),
2644            ));
2645        }
2646        validate_position_permutation(positions)?;
2647
2648        let mut out = Vec::new();
2649        out.extend_from_slice(b"RIDX");
2650        out.extend_from_slice(&1u32.to_be_bytes());
2651        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2652        for position in positions {
2653            out.extend_from_slice(&position.to_be_bytes());
2654        }
2655        out.extend_from_slice(pack_checksum.as_bytes());
2656        let checksum = sley_core::digest_bytes(format, &out)?;
2657        out.extend_from_slice(checksum.as_bytes());
2658        Ok(out)
2659    }
2660
2661    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2662        let hash_len = format.raw_len();
2663        let table_len = object_count
2664            .checked_mul(4)
2665            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
2666        let min_len = 12usize
2667            .checked_add(table_len)
2668            .and_then(|len| len.checked_add(hash_len * 2))
2669            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
2670        if bytes.len() < min_len {
2671            return Err(GitError::InvalidFormat("reverse index too short".into()));
2672        }
2673        if bytes.len() != min_len {
2674            return Err(GitError::InvalidFormat(format!(
2675                "reverse index has {} trailing bytes",
2676                bytes.len() - min_len
2677            )));
2678        }
2679        if &bytes[..4] != b"RIDX" {
2680            return Err(GitError::InvalidFormat(
2681                "missing reverse index signature".into(),
2682            ));
2683        }
2684        let version = u32_be(&bytes[4..8]);
2685        if version != 1 {
2686            return Err(GitError::Unsupported(format!(
2687                "reverse index version {version}"
2688            )));
2689        }
2690        let hash_id = u32_be(&bytes[8..12]);
2691        if hash_id != hash_function_id(format) {
2692            return Err(GitError::InvalidFormat(format!(
2693                "reverse index hash id {hash_id} does not match {}",
2694                format.name()
2695            )));
2696        }
2697
2698        let index_checksum_offset = bytes.len() - hash_len;
2699        let actual_index_checksum =
2700            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2701        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2702        if actual_index_checksum != index_checksum {
2703            return Err(GitError::InvalidFormat(format!(
2704                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2705            )));
2706        }
2707
2708        let pack_checksum_offset = index_checksum_offset - hash_len;
2709        let pack_checksum =
2710            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2711        let mut positions = Vec::with_capacity(object_count);
2712        let mut offset = 12usize;
2713        for _ in 0..object_count {
2714            let position = u32_be(&bytes[offset..offset + 4]);
2715            positions.push(position);
2716            offset += 4;
2717        }
2718        validate_position_permutation(&positions)?;
2719
2720        Ok(Self {
2721            version,
2722            format,
2723            positions,
2724            pack_checksum,
2725            index_checksum,
2726        })
2727    }
2728}
2729
2730impl PackMtimes {
2731    pub fn write(
2732        format: ObjectFormat,
2733        mtimes: &[u32],
2734        pack_checksum: &ObjectId,
2735    ) -> Result<Vec<u8>> {
2736        if pack_checksum.format() != format {
2737            return Err(GitError::InvalidObjectId(
2738                "pack checksum format does not match mtimes format".into(),
2739            ));
2740        }
2741
2742        let mut out = Vec::new();
2743        out.extend_from_slice(b"MTME");
2744        out.extend_from_slice(&1u32.to_be_bytes());
2745        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2746        for mtime in mtimes {
2747            out.extend_from_slice(&mtime.to_be_bytes());
2748        }
2749        out.extend_from_slice(pack_checksum.as_bytes());
2750        let checksum = sley_core::digest_bytes(format, &out)?;
2751        out.extend_from_slice(checksum.as_bytes());
2752        Ok(out)
2753    }
2754
2755    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2756        let hash_len = format.raw_len();
2757        let table_len = object_count
2758            .checked_mul(4)
2759            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2760        let expected_len = 12usize
2761            .checked_add(table_len)
2762            .and_then(|len| len.checked_add(hash_len * 2))
2763            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2764        if bytes.len() < expected_len {
2765            return Err(GitError::InvalidFormat("mtimes file too short".into()));
2766        }
2767        if bytes.len() != expected_len {
2768            return Err(GitError::InvalidFormat(format!(
2769                "mtimes file has {} trailing bytes",
2770                bytes.len() - expected_len
2771            )));
2772        }
2773        if &bytes[..4] != b"MTME" {
2774            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2775        }
2776        let version = u32_be(&bytes[4..8]);
2777        if version != 1 {
2778            return Err(GitError::Unsupported(format!("mtimes version {version}")));
2779        }
2780        let hash_id = u32_be(&bytes[8..12]);
2781        if hash_id != hash_function_id(format) {
2782            return Err(GitError::InvalidFormat(format!(
2783                "mtimes hash id {hash_id} does not match {}",
2784                format.name()
2785            )));
2786        }
2787
2788        let index_checksum_offset = bytes.len() - hash_len;
2789        let actual_index_checksum =
2790            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2791        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2792        if actual_index_checksum != index_checksum {
2793            return Err(GitError::InvalidFormat(format!(
2794                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2795            )));
2796        }
2797
2798        let pack_checksum_offset = index_checksum_offset - hash_len;
2799        let pack_checksum =
2800            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2801        let mut mtimes = Vec::with_capacity(object_count);
2802        let mut offset = 12usize;
2803        for _ in 0..object_count {
2804            mtimes.push(u32_be(&bytes[offset..offset + 4]));
2805            offset += 4;
2806        }
2807
2808        Ok(Self {
2809            version,
2810            format,
2811            mtimes,
2812            pack_checksum,
2813            index_checksum,
2814        })
2815    }
2816}
2817
2818impl PackBitmapIndex {
2819    pub const OPTION_FULL_DAG: u16 = 0x0001;
2820    pub const OPTION_HASH_CACHE: u16 = 0x0004;
2821
2822    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2823        let hash_len = format.raw_len();
2824        let min_len = 12usize
2825            .checked_add(hash_len * 2)
2826            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2827        if bytes.len() < min_len {
2828            return Err(GitError::InvalidFormat("bitmap index too short".into()));
2829        }
2830        if &bytes[..4] != b"BITM" {
2831            return Err(GitError::InvalidFormat(
2832                "missing bitmap index signature".into(),
2833            ));
2834        }
2835        let version = u16_be(&bytes[4..6]);
2836        if version != 1 {
2837            return Err(GitError::Unsupported(format!(
2838                "bitmap index version {version}"
2839            )));
2840        }
2841        let options = u16_be(&bytes[6..8]);
2842        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2843        if options & !known_options != 0 {
2844            return Err(GitError::Unsupported(format!(
2845                "bitmap index options {:#06x}",
2846                options & !known_options
2847            )));
2848        }
2849        let entry_count = u32_be(&bytes[8..12]) as usize;
2850        let checksum_offset = bytes.len() - hash_len;
2851        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2852        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2853        if actual_index_checksum != index_checksum {
2854            return Err(GitError::InvalidFormat(format!(
2855                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2856            )));
2857        }
2858
2859        let pack_checksum_end = 12usize
2860            .checked_add(hash_len)
2861            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2862        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2863        let mut offset = pack_checksum_end;
2864        let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2865        let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2866        let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2867        let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2868
2869        let mut entries = Vec::with_capacity(entry_count);
2870        for idx in 0..entry_count {
2871            if checksum_offset.saturating_sub(offset) < 6 {
2872                return Err(GitError::InvalidFormat(
2873                    "truncated bitmap index entry".into(),
2874                ));
2875            }
2876            let object_position = u32_be(&bytes[offset..offset + 4]);
2877            offset += 4;
2878            if object_position as usize >= object_count {
2879                return Err(GitError::InvalidFormat(
2880                    "bitmap index entry points past object table".into(),
2881                ));
2882            }
2883            let xor_offset = bytes[offset];
2884            offset += 1;
2885            if xor_offset as usize > idx || xor_offset > 160 {
2886                return Err(GitError::InvalidFormat(
2887                    "bitmap index entry has invalid XOR offset".into(),
2888                ));
2889            }
2890            let flags = bytes[offset];
2891            offset += 1;
2892            let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2893            entries.push(PackBitmapEntry {
2894                object_position,
2895                xor_offset,
2896                flags,
2897                bitmap,
2898            });
2899        }
2900
2901        let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2902            let cache_len = object_count
2903                .checked_mul(4)
2904                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2905            if checksum_offset.saturating_sub(offset) < cache_len {
2906                return Err(GitError::InvalidFormat(
2907                    "truncated bitmap hash cache".into(),
2908                ));
2909            }
2910            let mut cache = Vec::with_capacity(object_count);
2911            for _ in 0..object_count {
2912                cache.push(u32_be(&bytes[offset..offset + 4]));
2913                offset += 4;
2914            }
2915            Some(cache)
2916        } else {
2917            None
2918        };
2919
2920        if offset != checksum_offset {
2921            return Err(GitError::InvalidFormat(format!(
2922                "bitmap index has {} trailing bytes",
2923                checksum_offset - offset
2924            )));
2925        }
2926
2927        Ok(Self {
2928            version,
2929            format,
2930            options,
2931            pack_checksum,
2932            index_checksum,
2933            type_bitmaps: PackBitmapTypeBitmaps {
2934                commits,
2935                trees,
2936                blobs,
2937                tags,
2938            },
2939            entries,
2940            name_hash_cache,
2941        })
2942    }
2943
2944    /// Looks up the stored entry whose commit sits at `position` in the
2945    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
2946    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2947        self.entries
2948            .iter()
2949            .find(|entry| entry.object_position == position)
2950    }
2951}
2952
2953fn parse_bitmap_ewah(
2954    bytes: &[u8],
2955    offset: &mut usize,
2956    checksum_offset: usize,
2957    _object_count: usize,
2958) -> Result<EwahBitmap> {
2959    if checksum_offset.saturating_sub(*offset) < 12 {
2960        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2961    }
2962    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2963    *offset += 4;
2964    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2965    *offset += 4;
2966    let words_len = word_count
2967        .checked_mul(8)
2968        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2969    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2970        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2971    }
2972    let mut words = Vec::with_capacity(word_count);
2973    for _ in 0..word_count {
2974        words.push(u64_be(&bytes[*offset..*offset + 8]));
2975        *offset += 8;
2976    }
2977    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2978    *offset += 4;
2979    validate_ewah_words(bit_size, &words, rlw_position)?;
2980    Ok(EwahBitmap {
2981        bit_size,
2982        words,
2983        rlw_position,
2984    })
2985}
2986
2987fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2988    if words.is_empty() {
2989        if rlw_position != 0 || bit_size != 0 {
2990            return Err(GitError::InvalidFormat(
2991                "EWAH bitmap has invalid empty RLW".into(),
2992            ));
2993        }
2994        return Ok(());
2995    }
2996    if rlw_position as usize >= words.len() {
2997        return Err(GitError::InvalidFormat(
2998            "EWAH RLW position points past word table".into(),
2999        ));
3000    }
3001    let mut word_idx = 0usize;
3002    let mut decoded_words = 0u64;
3003    while word_idx < words.len() {
3004        let rlw = words[word_idx];
3005        let run_words = (rlw >> 1) & 0xffff_ffff;
3006        let literal_words = (rlw >> 33) as usize;
3007        word_idx += 1;
3008        word_idx = word_idx
3009            .checked_add(literal_words)
3010            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
3011        if word_idx > words.len() {
3012            return Err(GitError::InvalidFormat(
3013                "EWAH literal words extend past word table".into(),
3014            ));
3015        }
3016        decoded_words = decoded_words
3017            .checked_add(run_words)
3018            .and_then(|value| value.checked_add(literal_words as u64))
3019            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
3020    }
3021    let decoded_bits = decoded_words
3022        .checked_mul(64)
3023        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
3024    if decoded_bits < u64::from(bit_size) {
3025        return Err(GitError::InvalidFormat(
3026            "EWAH bitmap decodes fewer bits than declared".into(),
3027        ));
3028    }
3029    Ok(())
3030}
3031
3032impl MultiPackIndex {
3033    pub fn write(
3034        format: ObjectFormat,
3035        version: u8,
3036        pack_names: &[String],
3037        objects: &[MultiPackIndexEntry],
3038    ) -> Result<Vec<u8>> {
3039        Self::write_with_reverse_index(format, version, pack_names, objects, None)
3040    }
3041
3042    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
3043    /// additionally emits the `RIDX` chunk: the object order a multi-pack
3044    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
3045    /// the preferred pack first, then the rest by pack id, each pack's slice
3046    /// in offset order), stored as one u32 midx position per object.
3047    ///
3048    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
3049    /// must be in range.
3050    pub fn write_with_reverse_index(
3051        format: ObjectFormat,
3052        version: u8,
3053        pack_names: &[String],
3054        objects: &[MultiPackIndexEntry],
3055        preferred_pack: Option<u32>,
3056    ) -> Result<Vec<u8>> {
3057        Self::write_with_bitmap_packs(format, version, pack_names, objects, preferred_pack, None)
3058    }
3059
3060    pub fn write_with_bitmap_packs(
3061        format: ObjectFormat,
3062        version: u8,
3063        pack_names: &[String],
3064        objects: &[MultiPackIndexEntry],
3065        preferred_pack: Option<u32>,
3066        bitmapped_packs: Option<&[MultiPackBitmapPack]>,
3067    ) -> Result<Vec<u8>> {
3068        if let Some(preferred) = preferred_pack
3069            && preferred as usize >= pack_names.len()
3070        {
3071            return Err(GitError::InvalidFormat(format!(
3072                "preferred pack {preferred} out of range for {} packs",
3073                pack_names.len()
3074            )));
3075        }
3076        if version != 1 && version != 2 {
3077            return Err(GitError::Unsupported(format!(
3078                "multi-pack-index version {version}"
3079            )));
3080        }
3081        if pack_names.len() > u32::MAX as usize {
3082            return Err(GitError::InvalidFormat(
3083                "too many multi-pack-index packs".into(),
3084            ));
3085        }
3086        if objects.len() > u32::MAX as usize {
3087            return Err(GitError::InvalidFormat(
3088                "too many multi-pack-index objects".into(),
3089            ));
3090        }
3091        if let Some(bitmapped_packs) = bitmapped_packs {
3092            if bitmapped_packs.len() != pack_names.len() {
3093                return Err(GitError::InvalidFormat(
3094                    "multi-pack-index BTMP pack count mismatch".into(),
3095                ));
3096            }
3097            for pack in bitmapped_packs {
3098                let bitmap_end = u64::from(pack.bitmap_pos)
3099                    .checked_add(u64::from(pack.bitmap_nr))
3100                    .ok_or_else(|| {
3101                        GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
3102                    })?;
3103                if bitmap_end > objects.len() as u64 {
3104                    return Err(GitError::InvalidFormat(
3105                        "multi-pack-index BTMP range points past object table".into(),
3106                    ));
3107                }
3108            }
3109        }
3110        validate_midx_pack_names(pack_names)?;
3111        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
3112            return Err(GitError::InvalidFormat(
3113                "multi-pack-index v1 pack names must be sorted".into(),
3114            ));
3115        }
3116
3117        let mut objects = objects.iter().collect::<Vec<_>>();
3118        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
3119        let mut previous_oid: Option<&ObjectId> = None;
3120        for object in &objects {
3121            if object.oid.format() != format {
3122                return Err(GitError::InvalidObjectId(
3123                    "multi-pack-index object format does not match index format".into(),
3124                ));
3125            }
3126            if let Some(previous) = previous_oid
3127                && previous.as_bytes() == object.oid.as_bytes()
3128            {
3129                return Err(GitError::InvalidFormat(
3130                    "multi-pack-index contains duplicate object ids".into(),
3131                ));
3132            }
3133            if object.pack_int_id as usize >= pack_names.len() {
3134                return Err(GitError::InvalidFormat(
3135                    "multi-pack-index object points past pack table".into(),
3136                ));
3137            }
3138            previous_oid = Some(&object.oid);
3139        }
3140
3141        let mut large_offsets = Vec::new();
3142        let mut chunks = vec![
3143            (*b"PNAM", write_midx_pack_names(pack_names)),
3144            (*b"OIDF", write_midx_oid_fanout(&objects)?),
3145            (*b"OIDL", write_midx_oid_lookup(&objects)),
3146            (
3147                *b"OOFF",
3148                write_midx_object_offsets(&objects, &mut large_offsets)?,
3149            ),
3150        ];
3151        if !large_offsets.is_empty() {
3152            chunks.push((*b"LOFF", large_offsets));
3153        }
3154        if let Some(preferred) = preferred_pack {
3155            // `objects` is already in midx (oid-sorted) order here; the chunk
3156            // lists each object's midx position in pseudo-pack order.
3157            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
3158            pseudo.sort_by_key(|&midx_pos| {
3159                let object = objects[midx_pos as usize];
3160                (
3161                    object.pack_int_id != preferred,
3162                    object.pack_int_id,
3163                    object.offset,
3164                )
3165            });
3166            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
3167            for midx_pos in pseudo {
3168                ridx.extend_from_slice(&midx_pos.to_be_bytes());
3169            }
3170            chunks.push((*b"RIDX", ridx));
3171        }
3172        if let Some(bitmapped_packs) = bitmapped_packs {
3173            let mut btmp = Vec::with_capacity(bitmapped_packs.len() * 8);
3174            for pack in bitmapped_packs {
3175                btmp.extend_from_slice(&pack.bitmap_pos.to_be_bytes());
3176                btmp.extend_from_slice(&pack.bitmap_nr.to_be_bytes());
3177            }
3178            chunks.push((*b"BTMP", btmp));
3179        }
3180        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
3181    }
3182
3183    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3184        Self::parse_impl(bytes, format, true)
3185    }
3186
3187    pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3188        Self::parse_impl(bytes, format, false)
3189    }
3190
3191    fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
3192        let hash_len = format.raw_len();
3193        if bytes.len() < 12 + 12 + hash_len {
3194            return Err(GitError::InvalidFormat(
3195                "multi-pack-index file too short".into(),
3196            ));
3197        }
3198        if &bytes[..4] != b"MIDX" {
3199            return Err(GitError::InvalidFormat(
3200                "missing multi-pack-index signature".into(),
3201            ));
3202        }
3203        let version = bytes[4];
3204        if version != 1 && version != 2 {
3205            return Err(GitError::Unsupported(format!(
3206                "multi-pack-index version {version}"
3207            )));
3208        }
3209        let hash_id = bytes[5];
3210        if u32::from(hash_id) != hash_function_id(format) {
3211            return Err(GitError::InvalidFormat(format!(
3212                "multi-pack-index hash id {hash_id} does not match {}",
3213                format.name()
3214            )));
3215        }
3216        let chunk_count = bytes[6] as usize;
3217        let base_midx_count = bytes[7];
3218        if base_midx_count != 0 {
3219            return Err(GitError::Unsupported(format!(
3220                "multi-pack-index base count {base_midx_count}"
3221            )));
3222        }
3223        let pack_count = u32_be(&bytes[8..12]);
3224        let lookup_len = (chunk_count + 1)
3225            .checked_mul(12)
3226            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3227        let data_start = 12usize
3228            .checked_add(lookup_len)
3229            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3230        let checksum_offset = bytes.len() - hash_len;
3231        if data_start > checksum_offset {
3232            return Err(GitError::InvalidFormat(
3233                "truncated multi-pack-index chunk lookup".into(),
3234            ));
3235        }
3236
3237        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
3238        if verify_checksum {
3239            let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
3240            if actual_checksum != checksum {
3241                return Err(GitError::InvalidFormat(format!(
3242                    "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
3243                )));
3244            }
3245        }
3246
3247        let mut entries = Vec::with_capacity(chunk_count + 1);
3248        let mut offset = 12usize;
3249        for _ in 0..=chunk_count {
3250            let id = [
3251                bytes[offset],
3252                bytes[offset + 1],
3253                bytes[offset + 2],
3254                bytes[offset + 3],
3255            ];
3256            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
3257            entries.push((id, chunk_offset));
3258            offset += 12;
3259        }
3260        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3261            return Err(GitError::InvalidFormat(
3262                "multi-pack-index chunk lookup is empty".into(),
3263            ));
3264        };
3265        if terminator_id != [0, 0, 0, 0] {
3266            return Err(GitError::InvalidFormat(
3267                "multi-pack-index chunk lookup missing terminator".into(),
3268            ));
3269        }
3270        if terminator_offset != checksum_offset as u64 {
3271            return Err(GitError::InvalidFormat(
3272                "multi-pack-index terminator does not point at checksum".into(),
3273            ));
3274        }
3275
3276        let mut chunks = Vec::with_capacity(chunk_count);
3277        let mut previous_offset = data_start as u64;
3278        let mut reported_unaligned = false;
3279        for pair in entries.windows(2) {
3280            let (id, chunk_offset) = pair[0];
3281            let (_next_id, next_offset) = pair[1];
3282            if id == [0, 0, 0, 0] {
3283                return Err(GitError::InvalidFormat(
3284                    "multi-pack-index chunk id is zero before terminator".into(),
3285                ));
3286            }
3287            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3288                return Err(GitError::InvalidFormat(
3289                    "multi-pack-index chunk offsets are not monotonic".into(),
3290                ));
3291            }
3292            if chunk_offset % 4 != 0 && !reported_unaligned {
3293                eprintln!(
3294                    "error: chunk id {:08x} not 4-byte aligned",
3295                    u32::from_be_bytes(id)
3296                );
3297                reported_unaligned = true;
3298            }
3299            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3300                return Err(GitError::InvalidFormat(
3301                    "multi-pack-index chunk length is invalid".into(),
3302                ));
3303            }
3304            chunks.push(MultiPackIndexChunk {
3305                id,
3306                offset: chunk_offset,
3307                len: next_offset - chunk_offset,
3308            });
3309            previous_offset = chunk_offset;
3310        }
3311
3312        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
3313        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
3314        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
3315        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
3316        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
3317        let bitmapped_packs =
3318            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
3319
3320        Ok(Self {
3321            version,
3322            format,
3323            pack_count,
3324            pack_names,
3325            object_count: object_count as u32,
3326            fanout,
3327            objects,
3328            reverse_index,
3329            bitmapped_packs,
3330            chunks,
3331            checksum,
3332        })
3333    }
3334
3335    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
3336        self.objects
3337            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
3338            .ok()
3339            .map(|idx| &self.objects[idx])
3340    }
3341}
3342
3343impl MultiPackIndexOidLookup {
3344    pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
3345        let raw = bytes.as_bytes();
3346        let hash_len = format.raw_len();
3347        if raw.len() < 12 + 12 + hash_len {
3348            return Err(GitError::InvalidFormat(
3349                "multi-pack-index file too short".into(),
3350            ));
3351        }
3352        if &raw[..4] != b"MIDX" {
3353            return Err(GitError::InvalidFormat(
3354                "missing multi-pack-index signature".into(),
3355            ));
3356        }
3357        let version = raw[4];
3358        if version != 1 && version != 2 {
3359            return Err(GitError::Unsupported(format!(
3360                "multi-pack-index version {version}"
3361            )));
3362        }
3363        let hash_id = raw[5];
3364        if u32::from(hash_id) != hash_function_id(format) {
3365            return Err(GitError::InvalidFormat(format!(
3366                "multi-pack-index hash id {hash_id} does not match {}",
3367                format.name()
3368            )));
3369        }
3370        let chunk_count = raw[6] as usize;
3371        let base_midx_count = raw[7];
3372        if base_midx_count != 0 {
3373            return Err(GitError::Unsupported(format!(
3374                "multi-pack-index base count {base_midx_count}"
3375            )));
3376        }
3377        let pack_count = u32_be(&raw[8..12]);
3378        let lookup_len = (chunk_count + 1)
3379            .checked_mul(12)
3380            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3381        let data_start = 12usize
3382            .checked_add(lookup_len)
3383            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3384        let checksum_offset = raw.len() - hash_len;
3385        if data_start > checksum_offset {
3386            return Err(GitError::InvalidFormat(
3387                "truncated multi-pack-index chunk lookup".into(),
3388            ));
3389        }
3390
3391        let mut entries = Vec::with_capacity(chunk_count + 1);
3392        let mut offset = 12usize;
3393        for _ in 0..=chunk_count {
3394            let id = [
3395                raw[offset],
3396                raw[offset + 1],
3397                raw[offset + 2],
3398                raw[offset + 3],
3399            ];
3400            let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
3401            entries.push((id, chunk_offset));
3402            offset += 12;
3403        }
3404        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3405            return Err(GitError::InvalidFormat(
3406                "multi-pack-index chunk lookup is empty".into(),
3407            ));
3408        };
3409        if terminator_id != [0, 0, 0, 0] {
3410            return Err(GitError::InvalidFormat(
3411                "multi-pack-index chunk lookup missing terminator".into(),
3412            ));
3413        }
3414        if terminator_offset != checksum_offset as u64 {
3415            return Err(GitError::InvalidFormat(
3416                "multi-pack-index terminator does not point at checksum".into(),
3417            ));
3418        }
3419
3420        let mut chunks = Vec::with_capacity(chunk_count);
3421        let mut previous_offset = data_start as u64;
3422        let mut reported_unaligned = false;
3423        for pair in entries.windows(2) {
3424            let (id, chunk_offset) = pair[0];
3425            let (_next_id, next_offset) = pair[1];
3426            if id == [0, 0, 0, 0] {
3427                return Err(GitError::InvalidFormat(
3428                    "multi-pack-index chunk id is zero before terminator".into(),
3429                ));
3430            }
3431            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3432                return Err(GitError::InvalidFormat(
3433                    "multi-pack-index chunk offsets are not monotonic".into(),
3434                ));
3435            }
3436            if chunk_offset % 4 != 0 && !reported_unaligned {
3437                eprintln!(
3438                    "error: chunk id {:08x} not 4-byte aligned",
3439                    u32::from_be_bytes(id)
3440                );
3441                reported_unaligned = true;
3442            }
3443            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3444                return Err(GitError::InvalidFormat(
3445                    "multi-pack-index chunk length is invalid".into(),
3446                ));
3447            }
3448            chunks.push(MultiPackIndexChunk {
3449                id,
3450                offset: chunk_offset,
3451                len: next_offset - chunk_offset,
3452            });
3453            previous_offset = chunk_offset;
3454        }
3455
3456        let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
3457        let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
3458        let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
3459            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
3460        let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
3461            GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
3462        })?;
3463        if oid_lookup.len() != expected_len {
3464            return Err(GitError::InvalidFormat(
3465                "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
3466            ));
3467        }
3468        let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
3469            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
3470        let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
3471            GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
3472        })?;
3473        if object_offsets.len() != expected_offsets_len {
3474            return Err(GitError::InvalidFormat(
3475                "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
3476            ));
3477        }
3478        let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
3479        if let Some(large_offsets) = large_offsets
3480            && large_offsets.len() % 8 != 0
3481        {
3482            return Err(GitError::InvalidFormat(
3483                "multi-pack-index LOFF chunk has invalid length".into(),
3484            ));
3485        }
3486        let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
3487        let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
3488        let (large_offsets_offset, large_offsets_len) = match large_offsets {
3489            Some(large_offsets) => (
3490                Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
3491                large_offsets.len(),
3492            ),
3493            None => (None, 0),
3494        };
3495        Ok(Self {
3496            format,
3497            pack_count,
3498            pack_names,
3499            fanout,
3500            object_count,
3501            oid_lookup_offset,
3502            object_offsets_offset,
3503            large_offsets_offset,
3504            large_offsets_len,
3505            bytes,
3506        })
3507    }
3508
3509    pub fn contains(&self, oid: &ObjectId) -> bool {
3510        self.find_position(oid).is_some()
3511    }
3512
3513    pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
3514        let Some(position) = self.find_position(oid) else {
3515            return Ok(None);
3516        };
3517        let bytes = self.bytes.as_bytes();
3518        let hash_len = self.format.raw_len();
3519        let oid_start = self
3520            .oid_lookup_offset
3521            .checked_add(position * hash_len)
3522            .ok_or_else(|| {
3523                GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
3524            })?;
3525        let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
3526        let offset_start = self
3527            .object_offsets_offset
3528            .checked_add(position * 8)
3529            .ok_or_else(|| {
3530                GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
3531            })?;
3532        let data = &bytes[offset_start..offset_start + 8];
3533        let pack_int_id = u32_be(&data[..4]);
3534        if pack_int_id >= self.pack_count {
3535            return Err(GitError::InvalidFormat(
3536                "multi-pack-index object points past pack table".into(),
3537            ));
3538        }
3539        let raw_offset = u32_be(&data[4..8]);
3540        let offset = if raw_offset & 0x8000_0000 == 0 {
3541            u64::from(raw_offset)
3542        } else {
3543            let Some(large_offsets_offset) = self.large_offsets_offset else {
3544                return Err(GitError::InvalidFormat(
3545                    "multi-pack-index large offset missing LOFF chunk".into(),
3546                ));
3547            };
3548            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
3549            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
3550                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3551            })?;
3552            let large_end = large_start.checked_add(8).ok_or_else(|| {
3553                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3554            })?;
3555            if large_end > self.large_offsets_len {
3556                return Err(GitError::InvalidFormat(
3557                    "fatal: multi-pack-index large offset out of bounds".into(),
3558                ));
3559            }
3560            let start = large_offsets_offset + large_start;
3561            u64_be(&bytes[start..start + 8])
3562        };
3563        Ok(Some(MultiPackIndexEntry {
3564            oid,
3565            pack_int_id,
3566            offset,
3567            force_large_offset: raw_offset & 0x8000_0000 != 0,
3568        }))
3569    }
3570
3571    pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
3572        self.pack_names
3573            .get(pack_int_id as usize)
3574            .map(String::as_str)
3575    }
3576
3577    fn find_position(&self, oid: &ObjectId) -> Option<usize> {
3578        if oid.format() != self.format || self.object_count == 0 {
3579            return None;
3580        }
3581        let first = oid.as_bytes()[0] as usize;
3582        let start = if first == 0 {
3583            0
3584        } else {
3585            self.fanout[first - 1] as usize
3586        };
3587        let end = self.fanout[first] as usize;
3588        if start >= end || end > self.object_count {
3589            return None;
3590        }
3591        let hash_len = self.format.raw_len();
3592        let table_start = self.oid_lookup_offset;
3593        let table_end = table_start + self.object_count * hash_len;
3594        let bytes = self.bytes.as_bytes();
3595        let table = &bytes[table_start..table_end];
3596        let needle = oid.as_bytes();
3597        let mut low = start;
3598        let mut high = end;
3599        while low < high {
3600            let mid = low + (high - low) / 2;
3601            let raw = &table[mid * hash_len..(mid + 1) * hash_len];
3602            match raw.cmp(needle) {
3603                std::cmp::Ordering::Less => low = mid + 1,
3604                std::cmp::Ordering::Equal => return Some(mid),
3605                std::cmp::Ordering::Greater => high = mid,
3606            }
3607        }
3608        None
3609    }
3610}
3611
3612fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
3613    for name in pack_names {
3614        if name.is_empty() {
3615            return Err(GitError::InvalidFormat(
3616                "multi-pack-index pack name is empty".into(),
3617            ));
3618        }
3619        if name
3620            .bytes()
3621            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
3622        {
3623            return Err(GitError::InvalidFormat(
3624                "multi-pack-index pack name contains an invalid byte".into(),
3625            ));
3626        }
3627    }
3628    Ok(())
3629}
3630
3631fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
3632    let mut out = Vec::new();
3633    for name in pack_names {
3634        out.extend_from_slice(name.as_bytes());
3635        out.push(0);
3636    }
3637    while out.len() % 4 != 0 {
3638        out.push(0);
3639    }
3640    out
3641}
3642
3643fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
3644    let mut counts = [0u32; 256];
3645    for object in objects {
3646        let first = object.oid.as_bytes()[0] as usize;
3647        counts[first] = counts[first]
3648            .checked_add(1)
3649            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3650    }
3651    let mut running = 0u32;
3652    let mut out = Vec::with_capacity(256 * 4);
3653    for count in counts {
3654        running = running
3655            .checked_add(count)
3656            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
3657        out.extend_from_slice(&running.to_be_bytes());
3658    }
3659    Ok(out)
3660}
3661
3662fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
3663    let mut out = Vec::new();
3664    for object in objects {
3665        out.extend_from_slice(object.oid.as_bytes());
3666    }
3667    out
3668}
3669
3670fn write_midx_object_offsets(
3671    objects: &[&MultiPackIndexEntry],
3672    large_offsets: &mut Vec<u8>,
3673) -> Result<Vec<u8>> {
3674    let mut out = Vec::new();
3675    for object in objects {
3676        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
3677        if object.offset < 0x8000_0000 && !object.force_large_offset {
3678            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
3679        } else {
3680            let large_idx = large_offsets.len() / 8;
3681            if large_idx > 0x7fff_ffff {
3682                return Err(GitError::InvalidFormat(
3683                    "too many multi-pack-index large offsets".into(),
3684                ));
3685            }
3686            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
3687            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
3688        }
3689    }
3690    Ok(out)
3691}
3692
3693fn write_multi_pack_index_chunks(
3694    format: ObjectFormat,
3695    version: u8,
3696    pack_count: u32,
3697    chunks: &[([u8; 4], Vec<u8>)],
3698) -> Result<Vec<u8>> {
3699    if chunks.len() > u8::MAX as usize {
3700        return Err(GitError::InvalidFormat(
3701            "too many multi-pack-index chunks".into(),
3702        ));
3703    }
3704    let lookup_len = (chunks.len() + 1)
3705        .checked_mul(12)
3706        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3707    let mut out = Vec::new();
3708    out.extend_from_slice(b"MIDX");
3709    out.push(version);
3710    out.push(hash_function_id(format) as u8);
3711    out.push(chunks.len() as u8);
3712    out.push(0);
3713    out.extend_from_slice(&pack_count.to_be_bytes());
3714    let mut chunk_offset = (12usize)
3715        .checked_add(lookup_len)
3716        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
3717        as u64;
3718    for (id, data) in chunks {
3719        out.extend_from_slice(id);
3720        out.extend_from_slice(&chunk_offset.to_be_bytes());
3721        chunk_offset = chunk_offset
3722            .checked_add(data.len() as u64)
3723            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
3724    }
3725    out.extend_from_slice(&[0, 0, 0, 0]);
3726    out.extend_from_slice(&chunk_offset.to_be_bytes());
3727    for (_id, data) in chunks {
3728        out.extend_from_slice(data);
3729    }
3730    let checksum = sley_core::digest_bytes(format, &out)?;
3731    out.extend_from_slice(checksum.as_bytes());
3732    Ok(out)
3733}
3734
3735#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3736struct EntryHeader {
3737    kind: PackObjectKind,
3738    size: u64,
3739}
3740
3741/// A cache of objects already decoded from one specific pack, keyed by the
3742/// in-pack byte offset at which each object's entry begins.
3743///
3744/// Delta resolution within a pack walks a chain of base objects by offset; the
3745/// same base is the parent of many deltas, so without a cache the entire chain
3746/// is re-inflated and re-applied on every read. Implementors let
3747/// [`read_object_at_with_cache`] reuse a warm base instead.
3748///
3749/// Correctness contract: a given `offset` within a given pack's bytes always
3750/// decodes to exactly one object, so caching by offset can never serve the wrong
3751/// object **provided the same cache is only ever used with one pack's bytes**.
3752/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
3753/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
3754pub trait PackDeltaCache {
3755    /// Return the decoded object whose entry begins at `offset`, if cached.
3756    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
3757    /// Record that the entry beginning at `offset` decodes to `object`.
3758    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
3759}
3760
3761/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
3762/// the original, allocation-free behavior for callers that do not opt in.
3763struct NoopDeltaCache;
3764
3765impl PackDeltaCache for NoopDeltaCache {
3766    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
3767        None
3768    }
3769    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
3770}
3771
3772// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
3773// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
3774// an allocation that dominated bulk reads. Borrowed only for the duration of a
3775// single inflate; the recursive pack reader fully inflates each entry's data before
3776// recursing to its base, so the borrow never nests.
3777thread_local! {
3778    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
3779}
3780
3781/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
3782/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
3783/// run of back-references). We pre-reserve no more than this multiple of the
3784/// available compressed input, so an attacker who declares a huge `size_hint`
3785/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
3786/// memory before the inflate has produced a single byte. The stream's *actual*
3787/// output is still verified against the declared size by the caller; this only
3788/// bounds the speculative allocation. git never pre-allocates an attacker's
3789/// declared size beyond a streaming buffer either (see index-pack.c's
3790/// `unpack_entry_data`).
3791const MAX_INFLATE_EXPANSION: usize = 1032;
3792
3793/// An absolute ceiling on the speculative pre-reservation, independent of the
3794/// input length, so even a large legitimate-looking compressed input can't be
3795/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
3796/// output buffer organically past this when a real stream genuinely produces
3797/// that much — this only caps the *speculative* reserve.
3798const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3799
3800/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
3801/// to something safe to reserve up front: no larger than what `compressed_len`
3802/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
3803/// returned value is only used to size the initial allocation; the inflate loop
3804/// grows the buffer as the real stream produces output, so legitimate large
3805/// objects still decode correctly — they just don't get the whole allocation at
3806/// once.
3807fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3808    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3809    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
3810    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3811}
3812
3813/// Inflate the entire zlib stream at the front of `compressed`, appending the
3814/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
3815/// is the caller's expectation for the decompressed length, but it is treated as
3816/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
3817/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
3818/// number of *compressed* bytes consumed (so callers stepping through a pack can
3819/// advance to the next entry). Byte-for-byte equivalent to
3820/// `ZlibDecoder::read_to_end` + `total_in`.
3821fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3822    INFLATE.with(|cell| {
3823        let mut decompress = cell.borrow_mut();
3824        decompress.reset(true);
3825        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3826        let mut input = compressed;
3827        let mut consumed_total = 0usize;
3828        loop {
3829            // Always leave output room so a zero-progress result means the input
3830            // (not the buffer) is exhausted.
3831            if out.len() == out.capacity() {
3832                out.reserve(out.len().max(64));
3833            }
3834            let before_in = decompress.total_in();
3835            let before_out = decompress.total_out();
3836            let status = decompress
3837                .decompress_vec(input, out, flate2::FlushDecompress::None)
3838                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3839            let consumed = (decompress.total_in() - before_in) as usize;
3840            let produced = decompress.total_out() - before_out;
3841            input = &input[consumed..];
3842            consumed_total += consumed;
3843            match status {
3844                flate2::Status::StreamEnd => return Ok(consumed_total),
3845                _ if consumed == 0 && produced == 0 => {
3846                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
3847                }
3848                _ => {}
3849            }
3850        }
3851    })
3852}
3853
3854/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
3855/// into `out`, reusing the thread-local state. Used to read a delta's leading
3856/// base-size / result-size varints without inflating the whole instruction stream.
3857fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3858    INFLATE.with(|cell| {
3859        let mut decompress = cell.borrow_mut();
3860        decompress.reset(true);
3861        out.reserve(max_out.max(16));
3862        let mut input = compressed;
3863        while out.len() < max_out {
3864            if out.len() == out.capacity() {
3865                out.reserve(out.len().max(16));
3866            }
3867            let before_in = decompress.total_in();
3868            let before_out = decompress.total_out();
3869            let status = decompress
3870                .decompress_vec(input, out, flate2::FlushDecompress::None)
3871                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3872            let consumed = (decompress.total_in() - before_in) as usize;
3873            let produced = decompress.total_out() - before_out;
3874            input = &input[consumed..];
3875            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3876                break;
3877            }
3878        }
3879        Ok(())
3880    })
3881}
3882
3883/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
3884/// only that object and its delta-base chain instead of parsing the whole pack.
3885///
3886/// Ofs-delta bases are followed by offset (recursively, within this pack);
3887/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
3888/// with the surrounding object store (so a base in another pack or loose still
3889/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
3890pub fn read_object_at_arc<F>(
3891    pack_bytes: &[u8],
3892    offset: u64,
3893    format: ObjectFormat,
3894    resolve_ref_base: F,
3895) -> Result<Arc<EncodedObject>>
3896where
3897    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3898{
3899    read_object_at_with_cache_arc(
3900        pack_bytes,
3901        offset,
3902        format,
3903        resolve_ref_base,
3904        &NoopDeltaCache,
3905    )
3906}
3907
3908/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
3909/// (keyed by in-pack offset) and records every object it decodes.
3910///
3911/// This turns repeated reads from the same pack — where many deltas share a base
3912/// chain — from re-inflating each chain per read into resolving each base once.
3913/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
3914/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
3915/// callers can reuse cache handles without cloning full object bodies.
3916pub fn read_object_at_with_cache_arc<F, C>(
3917    pack_bytes: &[u8],
3918    offset: u64,
3919    format: ObjectFormat,
3920    mut resolve_ref_base: F,
3921    cache: &C,
3922) -> Result<Arc<EncodedObject>>
3923where
3924    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3925    C: PackDeltaCache + ?Sized,
3926{
3927    read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3928}
3929
3930fn read_object_at_inner<F, C>(
3931    pack_bytes: &[u8],
3932    offset: u64,
3933    format: ObjectFormat,
3934    resolve_ref_base: &mut F,
3935    cache: &C,
3936) -> Result<Arc<EncodedObject>>
3937where
3938    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3939    C: PackDeltaCache + ?Sized,
3940{
3941    // A warm cache entry for this exact offset is already the fully resolved
3942    // object, so the whole base chain below can be skipped.
3943    if let Some(object) = cache.get(offset) {
3944        return Ok(object);
3945    }
3946    let trailer_offset = pack_bytes
3947        .len()
3948        .checked_sub(format.raw_len())
3949        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3950    let mut cursor = usize::try_from(offset)
3951        .ok()
3952        .filter(|&value| value < trailer_offset)
3953        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3954    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3955    let base = match header.kind {
3956        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3957            pack_bytes,
3958            &mut cursor,
3959            offset,
3960        )?)),
3961        PackObjectKind::RefDelta => {
3962            let hash_len = format.raw_len();
3963            if cursor + hash_len > trailer_offset {
3964                return Err(GitError::InvalidFormat(
3965                    "truncated ref-delta base object id".into(),
3966                ));
3967            }
3968            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3969            cursor += hash_len;
3970            Some(DeltaBase::Ref(oid))
3971        }
3972        _ => None,
3973    };
3974    let mut body = Vec::new();
3975    inflate_into(
3976        &pack_bytes[cursor..trailer_offset],
3977        &mut body,
3978        header.size.min(usize::MAX as u64) as usize,
3979    )?;
3980    if body.len() as u64 != header.size {
3981        return Err(GitError::InvalidObject(format!(
3982            "pack object declared {} bytes, decoded {}",
3983            header.size,
3984            body.len()
3985        )));
3986    }
3987    let object = match base {
3988        None => {
3989            let object_type = match header.kind {
3990                PackObjectKind::Commit => ObjectType::Commit,
3991                PackObjectKind::Tree => ObjectType::Tree,
3992                PackObjectKind::Blob => ObjectType::Blob,
3993                PackObjectKind::Tag => ObjectType::Tag,
3994                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3995                    return Err(GitError::InvalidFormat(
3996                        "delta pack entry decoded without a base".into(),
3997                    ));
3998                }
3999            };
4000            Arc::new(EncodedObject::new(object_type, body))
4001        }
4002        Some(DeltaBase::Offset(base_offset)) => {
4003            let base =
4004                read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
4005            let resolved = apply_pack_delta(&base.body, &body)?;
4006            Arc::new(EncodedObject::new(base.object_type, resolved))
4007        }
4008        Some(DeltaBase::Ref(base_oid)) => {
4009            let base = resolve_ref_base(&base_oid)?
4010                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
4011            let resolved = apply_pack_delta(&base.body, &body)?;
4012            Arc::new(EncodedObject::new(base.object_type, resolved))
4013        }
4014    };
4015    // Record the fully resolved object so any later read that walks through this
4016    // offset (as a delta base or directly) reuses it. Bases are inserted as the
4017    // recursion unwinds, so a chain is decoded at most once across reads.
4018    cache.insert(offset, Arc::clone(&object));
4019    Ok(object)
4020}
4021
4022/// The object type and final (inflated) size of the entry at `offset`, *without*
4023/// materializing the object body — git's `cat-file --batch-check` fast path.
4024///
4025/// A base object's size is already in its pack entry header, and a delta's result
4026/// size is the second varint at the front of its (small) delta stream, so neither
4027/// inflates the full content. The reported type is the type at the end of the
4028/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
4029/// the type of a ref-delta base that lives outside this pack (resolved through the
4030/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
4031pub fn read_object_header_at<F>(
4032    pack_bytes: &[u8],
4033    offset: u64,
4034    format: ObjectFormat,
4035    mut resolve_ref_base_type: F,
4036) -> Result<(ObjectType, u64)>
4037where
4038    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4039{
4040    read_object_header_at_inner(
4041        pack_bytes,
4042        offset,
4043        format,
4044        &mut resolve_ref_base_type,
4045        &mut NoopHeaderTypeCache,
4046    )
4047}
4048
4049/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
4050/// the `cat-file --batch-check` header fast path.
4051///
4052/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
4053/// to its base on every header read, re-inflating each link's leading varints
4054/// from scratch — so reading every object in a deeply-deltified pack costs
4055/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
4056/// of memoizing `offset -> (type, size)`:
4057///
4058/// * a chain's end-of-chain type is resolved at most once, so later objects on
4059///   the same chain skip the walk; and
4060/// * a repeated lookup of the same object (common in batch input) returns from
4061///   the memo without re-inflating its delta header at all.
4062///
4063/// The size stored is the object's final (inflated) result size — read from its
4064/// own pack/delta header, never by materializing the body.
4065pub trait HeaderTypeCache {
4066    /// The previously resolved header at `pack_offset`, if any.
4067    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
4068    /// Record the resolved header at `pack_offset` for reuse by later reads.
4069    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
4070}
4071
4072struct NoopHeaderTypeCache;
4073
4074impl HeaderTypeCache for NoopHeaderTypeCache {
4075    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
4076        None
4077    }
4078    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
4079}
4080
4081/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
4082/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
4083/// most once per chain and (b) a repeated lookup of the same offset returns from
4084/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
4085/// so it must be scoped to a single pack's bytes by the caller.
4086pub fn read_object_header_at_with_cache<F, C>(
4087    pack_bytes: &[u8],
4088    offset: u64,
4089    format: ObjectFormat,
4090    mut resolve_ref_base_type: F,
4091    type_cache: &mut C,
4092) -> Result<(ObjectType, u64)>
4093where
4094    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4095    C: HeaderTypeCache + ?Sized,
4096{
4097    if let Some(header) = type_cache.get(offset) {
4098        return Ok(header);
4099    }
4100    read_object_header_at_inner(
4101        pack_bytes,
4102        offset,
4103        format,
4104        &mut resolve_ref_base_type,
4105        type_cache,
4106    )
4107}
4108
4109fn read_object_header_at_inner<F, C>(
4110    pack_bytes: &[u8],
4111    offset: u64,
4112    format: ObjectFormat,
4113    resolve_ref_base_type: &mut F,
4114    type_cache: &mut C,
4115) -> Result<(ObjectType, u64)>
4116where
4117    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4118    C: HeaderTypeCache + ?Sized,
4119{
4120    let trailer_offset = pack_bytes
4121        .len()
4122        .checked_sub(format.raw_len())
4123        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
4124    let mut cursor = usize::try_from(offset)
4125        .ok()
4126        .filter(|&value| value < trailer_offset)
4127        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
4128    let header = parse_entry_header(pack_bytes, &mut cursor)?;
4129    let resolved = match header.kind {
4130        PackObjectKind::Commit => (ObjectType::Commit, header.size),
4131        PackObjectKind::Tree => (ObjectType::Tree, header.size),
4132        PackObjectKind::Blob => (ObjectType::Blob, header.size),
4133        PackObjectKind::Tag => (ObjectType::Tag, header.size),
4134        PackObjectKind::OfsDelta => {
4135            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
4136            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4137            // The end-of-chain type only depends on the base, so reuse it across
4138            // reads instead of re-walking the chain per object (sley#26).
4139            let base_type = match type_cache.get(base_offset) {
4140                Some((base_type, _)) => base_type,
4141                None => {
4142                    let (base_type, _) = read_object_header_at_inner(
4143                        pack_bytes,
4144                        base_offset,
4145                        format,
4146                        resolve_ref_base_type,
4147                        type_cache,
4148                    )?;
4149                    base_type
4150                }
4151            };
4152            (base_type, size)
4153        }
4154        PackObjectKind::RefDelta => {
4155            let hash_len = format.raw_len();
4156            if cursor + hash_len > trailer_offset {
4157                return Err(GitError::InvalidFormat(
4158                    "truncated ref-delta base object id".into(),
4159                ));
4160            }
4161            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
4162            cursor += hash_len;
4163            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4164            let base_type = resolve_ref_base_type(&oid)?
4165                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
4166            (base_type, size)
4167        }
4168    };
4169    // Memoize the fully resolved header so a repeated lookup of this offset (or a
4170    // chain that bases on it) returns without re-inflating (sley#26).
4171    type_cache.put(offset, resolved);
4172    Ok(resolved)
4173}
4174
4175/// Number of inflated delta-stream bytes to read when only the leading base-size
4176/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
4177/// prefix always covers both without inflating the delta instructions.
4178const DELTA_HEADER_PREFIX_LEN: usize = 32;
4179
4180/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
4181/// inflating only the short prefix that holds its two leading varints.
4182fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
4183    let mut prefix = Vec::new();
4184    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
4185    decoded_delta_result_size(&prefix)
4186}
4187
4188fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
4189    let first = next_byte(bytes, offset)?;
4190    let mut size = u64::from(first & 0x0f);
4191    let kind = match (first >> 4) & 0x07 {
4192        1 => PackObjectKind::Commit,
4193        2 => PackObjectKind::Tree,
4194        3 => PackObjectKind::Blob,
4195        4 => PackObjectKind::Tag,
4196        6 => PackObjectKind::OfsDelta,
4197        7 => PackObjectKind::RefDelta,
4198        other => {
4199            return Err(GitError::InvalidFormat(format!(
4200                "invalid pack object type {other}"
4201            )));
4202        }
4203    };
4204    let mut shift = 4;
4205    let mut byte = first;
4206    while byte & 0x80 != 0 {
4207        byte = next_byte(bytes, offset)?;
4208        let part = u64::from(byte & 0x7f);
4209        size = size
4210            .checked_add(
4211                part.checked_shl(shift)
4212                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
4213            )
4214            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
4215        shift += 7;
4216    }
4217    Ok(EntryHeader { kind, size })
4218}
4219
4220fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
4221    let mut byte = next_byte(bytes, offset)?;
4222    let mut relative = u64::from(byte & 0x7f);
4223    while byte & 0x80 != 0 {
4224        byte = next_byte(bytes, offset)?;
4225        relative = relative
4226            .checked_add(1)
4227            .and_then(|value| value.checked_shl(7))
4228            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
4229            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
4230    }
4231    entry_offset
4232        .checked_sub(relative)
4233        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
4234}
4235
4236fn resolve_pack_entries<F>(
4237    parsed: Vec<ParsedPackEntry>,
4238    format: ObjectFormat,
4239    external_base: &mut F,
4240) -> Result<Vec<PackObject>>
4241where
4242    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4243{
4244    let mut offset_to_index = HashMap::with_capacity(parsed.len());
4245    for (idx, entry) in parsed.iter().enumerate() {
4246        offset_to_index.insert(parsed_entry_offset(entry), idx);
4247    }
4248
4249    let mut resolved = vec![None; parsed.len()];
4250    let mut oid_to_index = HashMap::new();
4251    let mut unresolved = 0usize;
4252    for (idx, entry) in parsed.iter().enumerate() {
4253        match entry {
4254            ParsedPackEntry::Resolved(object) => {
4255                oid_to_index.insert(object.entry.oid, idx);
4256                resolved[idx] = Some(object.clone());
4257            }
4258            ParsedPackEntry::Delta { .. } => unresolved += 1,
4259        }
4260    }
4261
4262    while unresolved != 0 {
4263        let mut progress = false;
4264        for idx in 0..parsed.len() {
4265            if resolved[idx].is_some() {
4266                continue;
4267            }
4268            let ParsedPackEntry::Delta {
4269                base,
4270                compressed_size,
4271                delta_size,
4272                offset,
4273                delta,
4274            } = &parsed[idx]
4275            else {
4276                continue;
4277            };
4278            let Some(base_object) = delta_base_object(
4279                base,
4280                &offset_to_index,
4281                &oid_to_index,
4282                &resolved,
4283                external_base,
4284            )?
4285            else {
4286                continue;
4287            };
4288            let body = apply_pack_delta(base_object.body(), delta)?;
4289            let object = EncodedObject::new(base_object.object_type(), body);
4290            let oid = object.object_id(format)?;
4291            let pack_object = PackObject {
4292                entry: PackEntry {
4293                    oid,
4294                    compressed_size: *compressed_size,
4295                    uncompressed_size: object.body.len() as u64,
4296                    offset: *offset,
4297                },
4298                object,
4299            };
4300            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
4301                return Err(GitError::InvalidObject(
4302                    "resolved delta size does not match delta header".into(),
4303                ));
4304            }
4305            if *delta_size != delta.len() as u64 {
4306                return Err(GitError::InvalidObject(format!(
4307                    "pack delta declared {delta_size} bytes, decoded {}",
4308                    delta.len()
4309                )));
4310            }
4311            oid_to_index.insert(oid, idx);
4312            resolved[idx] = Some(pack_object);
4313            unresolved -= 1;
4314            progress = true;
4315        }
4316        if !progress {
4317            return Err(GitError::Unsupported("unresolved delta base".into()));
4318        }
4319    }
4320
4321    resolved
4322        .into_iter()
4323        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
4324        .collect()
4325}
4326
4327fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
4328    match entry {
4329        ParsedPackEntry::Resolved(object) => object.entry.offset,
4330        ParsedPackEntry::Delta { offset, .. } => *offset,
4331    }
4332}
4333
4334enum DeltaBaseObject<'a> {
4335    Borrowed(&'a EncodedObject),
4336    Owned(EncodedObject),
4337}
4338
4339impl DeltaBaseObject<'_> {
4340    fn object_type(&self) -> ObjectType {
4341        match self {
4342            Self::Borrowed(object) => object.object_type,
4343            Self::Owned(object) => object.object_type,
4344        }
4345    }
4346
4347    fn body(&self) -> &[u8] {
4348        match self {
4349            Self::Borrowed(object) => &object.body,
4350            Self::Owned(object) => &object.body,
4351        }
4352    }
4353}
4354
4355fn delta_base_object<'a, F>(
4356    base: &DeltaBase,
4357    offset_to_index: &HashMap<u64, usize>,
4358    oid_to_index: &HashMap<ObjectId, usize>,
4359    resolved: &'a [Option<PackObject>],
4360    external_base: &mut F,
4361) -> Result<Option<DeltaBaseObject<'a>>>
4362where
4363    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4364{
4365    match base {
4366        DeltaBase::Offset(offset) => {
4367            let Some(index) = offset_to_index.get(offset).copied() else {
4368                return Err(GitError::InvalidFormat(format!(
4369                    "ofs-delta base offset {offset} not found"
4370                )));
4371            };
4372            Ok(resolved[index]
4373                .as_ref()
4374                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
4375        }
4376        DeltaBase::Ref(oid) => {
4377            if let Some(index) = oid_to_index.get(oid).copied() {
4378                return Ok(resolved[index]
4379                    .as_ref()
4380                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
4381            }
4382            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
4383        }
4384    }
4385}
4386
4387fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
4388    let mut cursor = 0usize;
4389    let base_size = read_delta_varint(delta, &mut cursor)?;
4390    if base_size != base.len() as u64 {
4391        return Err(GitError::InvalidObject(format!(
4392            "delta base size mismatch: expected {base_size}, got {}",
4393            base.len()
4394        )));
4395    }
4396    let result_size = read_delta_varint(delta, &mut cursor)?;
4397    // `result_size` is an attacker-controlled delta varint from a network pack
4398    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
4399    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
4400    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
4401    // size-mismatch check below can fire. Route the up-front reservation through
4402    // the sley#2 bound so the speculative allocation is capped; `result.extend`
4403    // still grows the buffer organically and the post-decode length check
4404    // (`result.len() != result_size`) rejects the lie cleanly.
4405    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
4406    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
4407    while cursor < delta.len() {
4408        let command = delta[cursor];
4409        cursor += 1;
4410        if command & 0x80 != 0 {
4411            let copy_offset =
4412                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
4413            let mut copy_size =
4414                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
4415            if copy_size == 0 {
4416                copy_size = 0x10000;
4417            }
4418            let start = usize::try_from(copy_offset)
4419                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
4420            let len = usize::try_from(copy_size)
4421                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
4422            let end = start
4423                .checked_add(len)
4424                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
4425            let Some(slice) = base.get(start..end) else {
4426                return Err(GitError::InvalidObject(
4427                    "delta copy range exceeds base object".into(),
4428                ));
4429            };
4430            result.extend_from_slice(slice);
4431        } else if command != 0 {
4432            let len = usize::from(command);
4433            let end = cursor
4434                .checked_add(len)
4435                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
4436            let Some(slice) = delta.get(cursor..end) else {
4437                return Err(GitError::InvalidObject(
4438                    "delta insert range exceeds delta data".into(),
4439                ));
4440            };
4441            result.extend_from_slice(slice);
4442            cursor = end;
4443        } else {
4444            return Err(GitError::InvalidObject(
4445                "delta contains reserved zero command".into(),
4446            ));
4447        }
4448    }
4449    if result.len() as u64 != result_size {
4450        return Err(GitError::InvalidObject(format!(
4451            "delta result size mismatch: expected {result_size}, got {}",
4452            result.len()
4453        )));
4454    }
4455    Ok(result)
4456}
4457
4458fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
4459    let mut cursor = 0usize;
4460    let _ = read_delta_varint(delta, &mut cursor)?;
4461    read_delta_varint(delta, &mut cursor)
4462}
4463
4464/// Size, in bytes, of the fixed blocks used to index a base object for delta
4465/// compression. Matches git's `diff-delta.c` block size.
4466const DELTA_BLOCK_SIZE: usize = 16;
4467
4468/// Distance between indexed base anchors. Delta generation still scans target
4469/// objects byte-by-byte once there is evidence of shared content; anchoring the
4470/// base at block boundaries keeps the index compact and avoids per-object
4471/// hash-table allocation storms on unrelated blobs.
4472const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
4473
4474/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
4475/// base object's anchors while keeping exact-hash candidate scans short.
4476const DELTA_BUCKET_BITS: usize = 12;
4477const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
4478const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
4479
4480/// An index over a base object's content used to generate deltas against it.
4481///
4482/// The index hashes block-sized anchors of the base, groups them into fixed
4483/// buckets, and verifies exact byte matches before copying. This avoids both
4484/// per-bucket allocation storms and the per-object sort needed by a single
4485/// sorted vector.
4486struct DeltaIndex<'a> {
4487    base: &'a [u8],
4488    blocks: Vec<DeltaBlock>,
4489    buckets: Vec<usize>,
4490}
4491
4492#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4493struct DeltaBlock {
4494    hash: u32,
4495    offset: usize,
4496}
4497
4498impl<'a> DeltaIndex<'a> {
4499    fn new(base: &'a [u8]) -> Self {
4500        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
4501        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
4502        for_each_delta_anchor(base.len(), |offset| {
4503            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
4504            buckets[delta_bucket(hash) + 1] += 1;
4505            anchors.push(DeltaBlock { hash, offset });
4506        });
4507        for idx in 1..buckets.len() {
4508            buckets[idx] += buckets[idx - 1];
4509        }
4510
4511        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
4512        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
4513        for anchor in anchors {
4514            let bucket = delta_bucket(anchor.hash);
4515            let next = &mut next_offsets[bucket];
4516            blocks[*next] = anchor;
4517            *next += 1;
4518        }
4519
4520        Self {
4521            base,
4522            blocks,
4523            buckets,
4524        }
4525    }
4526
4527    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
4528        let bucket = delta_bucket(hash);
4529        let start = self.buckets[bucket];
4530        let end = self.buckets[bucket + 1];
4531        self.blocks[start..end]
4532            .iter()
4533            .filter(move |block| block.hash == hash)
4534    }
4535
4536    fn has_hash(&self, hash: u32) -> bool {
4537        self.candidate_blocks(hash).next().is_some()
4538    }
4539
4540    fn has_shared_anchor(&self, target: &[u8]) -> bool {
4541        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
4542            return false;
4543        }
4544        let last = target.len() - DELTA_BLOCK_SIZE;
4545        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
4546            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
4547            if self.has_hash(hash) {
4548                return true;
4549            }
4550        }
4551        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
4552            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
4553            if self.has_hash(hash) {
4554                return true;
4555            }
4556        }
4557        false
4558    }
4559
4560    /// Generate a delta that reconstructs `target` from this index's base.
4561    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
4562        if !self.has_shared_anchor(target) {
4563            return None;
4564        }
4565        let base = self.base;
4566        let mut delta = Vec::new();
4567        write_delta_varint(&mut delta, base.len() as u64);
4568        write_delta_varint(&mut delta, target.len() as u64);
4569
4570        let mut pending_insert_start = 0usize;
4571        let mut pos = 0usize;
4572        while pos < target.len() {
4573            let mut best_len = 0usize;
4574            let mut best_offset = 0usize;
4575            if pos + DELTA_BLOCK_SIZE <= target.len() {
4576                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
4577                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
4578                    // Confirm the block actually matches (hash collisions are
4579                    // possible) before measuring how far it extends.
4580                    let candidate = candidate.offset;
4581                    let max_len = (base.len() - candidate).min(target.len() - pos);
4582                    let mut len = 0usize;
4583                    while len < max_len && base[candidate + len] == target[pos + len] {
4584                        len += 1;
4585                    }
4586                    if len > best_len {
4587                        best_len = len;
4588                        best_offset = candidate;
4589                    }
4590                }
4591            }
4592
4593            if best_len >= DELTA_BLOCK_SIZE {
4594                if pending_insert_start < pos {
4595                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
4596                }
4597                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
4598                pos += best_len;
4599                pending_insert_start = pos;
4600            } else {
4601                pos += 1;
4602            }
4603        }
4604        if pending_insert_start < target.len() {
4605            write_delta_insert(&mut delta, &target[pending_insert_start..]);
4606        }
4607        Some(delta)
4608    }
4609}
4610
4611fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
4612    if len < DELTA_BLOCK_SIZE {
4613        return;
4614    }
4615    len -= DELTA_BLOCK_SIZE;
4616    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
4617        visit(offset);
4618    }
4619    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
4620        visit(len);
4621    }
4622}
4623
4624fn delta_anchor_count(len: usize) -> usize {
4625    if len < DELTA_BLOCK_SIZE {
4626        return 0;
4627    }
4628    let last = len - DELTA_BLOCK_SIZE;
4629    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
4630}
4631
4632fn delta_bucket(hash: u32) -> usize {
4633    (hash as usize) & DELTA_BUCKET_MASK
4634}
4635
4636/// Maximum number of base offsets retained per block-hash bucket. Caps the work
4637/// done extending candidate matches for inputs with many repeated blocks.
4638const DELTA_MAX_CHAIN: usize = 64;
4639
4640/// Hash a fixed-size block of base/target bytes into a bucket key.
4641///
4642/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
4643/// always verified byte-for-byte before use, so collisions only cost a little
4644/// extra comparison work and never affect correctness.
4645fn block_hash(block: &[u8]) -> u32 {
4646    let mut hash = 0u32;
4647    for &byte in block {
4648        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
4649    }
4650    hash
4651}
4652
4653/// The chosen storage form for a single object during pack generation.
4654#[derive(Debug, Clone, PartialEq, Eq)]
4655enum PlannedBase {
4656    /// Stored undeltified (a base for others, or no good delta was found).
4657    None,
4658    /// Delta against another object in this pack, identified by its original
4659    /// index. The pre-computed `delta` bytes reconstruct the object from that
4660    /// base's body.
4661    InPack { base_idx: usize, delta: Vec<u8> },
4662    /// Delta against an external (thin-pack) base, referenced by object id.
4663    External { base_oid: ObjectId, delta: Vec<u8> },
4664}
4665
4666#[derive(Debug, Clone, PartialEq, Eq)]
4667struct PlannedEntry {
4668    base: PlannedBase,
4669}
4670
4671fn compress_planned_payloads(
4672    objects: &[&EncodedObject],
4673    plan: &[PlannedEntry],
4674    order: &[usize],
4675    compression_level: u32,
4676) -> Result<Vec<Vec<u8>>> {
4677    if order.is_empty() {
4678        return Ok(Vec::new());
4679    }
4680
4681    let worker_count = std::thread::available_parallelism()
4682        .map(|threads| threads.get())
4683        .unwrap_or(1)
4684        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
4685        .min(order.len());
4686    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
4687        let mut payloads = Vec::with_capacity(order.len());
4688        for &idx in order {
4689            payloads.push(compressed_payload(
4690                planned_payload(objects, plan, idx),
4691                compression_level,
4692            )?);
4693        }
4694        return Ok(payloads);
4695    }
4696
4697    let chunk_len = order.len().div_ceil(worker_count);
4698    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
4699    std::thread::scope(|scope| {
4700        let mut handles = Vec::new();
4701        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
4702            let chunk_start = chunk_idx * chunk_len;
4703            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
4704                let mut chunk_payloads = Vec::with_capacity(chunk.len());
4705                for (offset, &idx) in chunk.iter().enumerate() {
4706                    chunk_payloads.push((
4707                        chunk_start + offset,
4708                        compressed_payload(planned_payload(objects, plan, idx), compression_level)?,
4709                    ));
4710                }
4711                Ok(chunk_payloads)
4712            }));
4713        }
4714
4715        let mut first_error = None;
4716        for handle in handles {
4717            match handle.join() {
4718                Ok(Ok(chunk_payloads)) => {
4719                    if first_error.is_none() {
4720                        for (pos, payload) in chunk_payloads {
4721                            payloads[pos] = payload;
4722                        }
4723                    }
4724                }
4725                Ok(Err(err)) => {
4726                    first_error.get_or_insert(err);
4727                }
4728                Err(_) => {
4729                    first_error.get_or_insert_with(|| {
4730                        GitError::InvalidObject("pack compression worker panicked".into())
4731                    });
4732                }
4733            }
4734        }
4735
4736        match first_error {
4737            Some(err) => Err(err),
4738            None => Ok(()),
4739        }
4740    })?;
4741    Ok(payloads)
4742}
4743
4744fn planned_payload<'a>(
4745    objects: &'a [&'a EncodedObject],
4746    plan: &'a [PlannedEntry],
4747    idx: usize,
4748) -> &'a [u8] {
4749    match &plan[idx].base {
4750        PlannedBase::None => &objects[idx].body,
4751        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
4752    }
4753}
4754
4755fn compressed_payload(body: &[u8], compression_level: u32) -> Result<Vec<u8>> {
4756    let mut out = Vec::new();
4757    write_compressed_payload(&mut out, body, compression_level)?;
4758    Ok(out)
4759}
4760
4761/// Maximum number of external thin-pack bases compared against any single
4762/// object. Bounds the work of the thin path when a large base set is supplied.
4763const DELTA_MAX_EXTERNAL_BASES: usize = 64;
4764
4765struct DeltaWindowEntry<'a> {
4766    idx: usize,
4767    index: DeltaIndex<'a>,
4768}
4769
4770/// Rank object types for delta grouping. Objects of the same type are far more
4771/// likely to delta well, so the sort groups by this rank first.
4772fn delta_type_rank(object_type: ObjectType) -> u8 {
4773    match object_type {
4774        ObjectType::Commit => 0,
4775        ObjectType::Tree => 1,
4776        ObjectType::Blob => 2,
4777        ObjectType::Tag => 3,
4778    }
4779}
4780
4781/// Decide how each object is stored (undeltified or deltified) and the order in
4782/// which objects are emitted into the pack.
4783///
4784/// # Ordering
4785///
4786/// Candidates are sorted by `(type, size descending, object id)`:
4787/// * **type** — only same-type objects are deltified against one another, so
4788///   grouping by type keeps the sliding window full of viable bases. Type rank
4789///   follows [`delta_type_rank`] (commit, tree, blob, tag).
4790/// * **size descending** — larger objects come first so smaller, later objects
4791///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
4792///   no path/name, so the usual path-hash key is unavailable; size is the next
4793///   best locality signal.
4794/// * **object id** — a deterministic tiebreaker for reproducible packs.
4795///
4796/// # Selection
4797///
4798/// Each object is compared against the previous up to `window` same-type
4799/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
4800/// bases of the same type). The smallest delta whose encoded length is strictly
4801/// less than the object's own body is kept; otherwise the object is stored
4802/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
4803/// only be used if doing so keeps the resulting chain within the bound); a depth
4804/// of `0` disables deltification entirely.
4805///
4806/// Returns the per-object plan (indexed by original object index) together with
4807/// the emit order. Every in-pack delta references a candidate that is earlier in
4808/// the emit order, so emitting in that order writes each base before any object
4809/// that depends on it.
4810fn plan_pack_deltas(
4811    objects: &[&EncodedObject],
4812    object_ids: &[ObjectId],
4813    options: &PackWriteOptions,
4814) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4815    let count = objects.len();
4816    let mut plan: Vec<PlannedEntry> = (0..count)
4817        .map(|_| PlannedEntry {
4818            base: PlannedBase::None,
4819        })
4820        .collect();
4821
4822    // Processing order. Deltas only point backwards within this order, which is
4823    // therefore also a valid emit order. Reordering by type/size improves delta
4824    // locality but is skipped when disabled or when deltification is off.
4825    let mut order: Vec<usize> = (0..count).collect();
4826    if options.reorder && options.depth > 0 {
4827        order.sort_by(|&left, &right| {
4828            delta_type_rank(objects[left].object_type)
4829                .cmp(&delta_type_rank(objects[right].object_type))
4830                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4831                .then_with(|| {
4832                    object_ids[left]
4833                        .as_bytes()
4834                        .cmp(object_ids[right].as_bytes())
4835                })
4836        });
4837    }
4838
4839    if options.depth == 0 {
4840        return Ok((plan, order));
4841    }
4842
4843    // Pre-build delta indexes for external thin-pack bases, grouped by type so
4844    // an object only compares against compatible bases.
4845    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4846        Vec::with_capacity(options.thin_bases.len());
4847    for (oid, object) in &options.thin_bases {
4848        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4849    }
4850
4851    // Chain depth ending at each object (0 = undeltified). Used to keep delta
4852    // chains within `options.depth`.
4853    let mut depth = vec![0usize; count];
4854    // Sliding window of recently processed original indices, most recent last.
4855    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4856        std::collections::VecDeque::new();
4857
4858    for &idx in &order {
4859        let target = &objects[idx].body;
4860        let target_type = objects[idx].object_type;
4861
4862        let mut best_delta: Option<Vec<u8>> = None;
4863        let mut best_base = PlannedBase::None;
4864
4865        // Try in-pack candidates from the window (same type only).
4866        for base_entry in window.iter().rev() {
4867            let base_idx = base_entry.idx;
4868            if objects[base_idx].object_type != target_type {
4869                continue;
4870            }
4871            // Using this base would make the new chain depth + 1; skip if that
4872            // would exceed the configured maximum.
4873            if depth[base_idx] + 1 > options.depth {
4874                continue;
4875            }
4876            let Some(delta) = base_entry.index.delta(target) else {
4877                continue;
4878            };
4879            if !delta_is_acceptable(&delta, target.len()) {
4880                continue;
4881            }
4882            if best_delta
4883                .as_ref()
4884                .is_none_or(|current| delta.len() < current.len())
4885            {
4886                best_delta = Some(delta);
4887                best_base = PlannedBase::InPack {
4888                    base_idx,
4889                    delta: Vec::new(),
4890                };
4891            }
4892        }
4893
4894        // Try external thin-pack bases (ref-delta; external base is depth 0, so
4895        // the resulting chain depth is 1, always within a non-zero bound).
4896        for (base_oid, base_type, base_index) in
4897            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4898        {
4899            if *base_type != target_type {
4900                continue;
4901            }
4902            let Some(delta) = base_index.delta(target) else {
4903                continue;
4904            };
4905            if !delta_is_acceptable(&delta, target.len()) {
4906                continue;
4907            }
4908            if best_delta
4909                .as_ref()
4910                .is_none_or(|current| delta.len() < current.len())
4911            {
4912                best_delta = Some(delta);
4913                best_base = PlannedBase::External {
4914                    base_oid: *base_oid,
4915                    delta: Vec::new(),
4916                };
4917            }
4918        }
4919
4920        if let Some(delta) = best_delta {
4921            match best_base {
4922                PlannedBase::InPack { base_idx, .. } => {
4923                    depth[idx] = depth[base_idx] + 1;
4924                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
4925                }
4926                PlannedBase::External { base_oid, .. } => {
4927                    depth[idx] = 1;
4928                    plan[idx].base = PlannedBase::External { base_oid, delta };
4929                }
4930                PlannedBase::None => {}
4931            }
4932        }
4933
4934        // Add this object to the window for subsequent candidates.
4935        window.push_back(DeltaWindowEntry {
4936            idx,
4937            index: DeltaIndex::new(&objects[idx].body),
4938        });
4939        while window.len() > options.window {
4940            window.pop_front();
4941        }
4942    }
4943
4944    Ok((plan, order))
4945}
4946
4947/// Whether a generated delta is worth using instead of storing the object
4948/// undeltified. The encoded delta must be strictly smaller than the object's own
4949/// body; otherwise the undeltified form is the same size or smaller and is
4950/// always self-contained.
4951fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4952    !delta.is_empty() && delta.len() < target_len
4953}
4954
4955fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4956    loop {
4957        let mut byte = (value as u8) & 0x7f;
4958        value >>= 7;
4959        if value != 0 {
4960            byte |= 0x80;
4961        }
4962        out.push(byte);
4963        if value == 0 {
4964            break;
4965        }
4966    }
4967}
4968
4969fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4970    while size != 0 {
4971        let chunk = size.min(0x10000);
4972        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4973        let mut command = 0x80u8;
4974        let mut payload = [0u8; 7];
4975        let mut payload_len = 0usize;
4976        for idx in 0..4 {
4977            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4978            if byte != 0 {
4979                command |= 1 << idx;
4980                payload[payload_len] = byte;
4981                payload_len += 1;
4982            }
4983        }
4984        for idx in 0..3 {
4985            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4986            if byte != 0 {
4987                command |= 0x10 << idx;
4988                payload[payload_len] = byte;
4989                payload_len += 1;
4990            }
4991        }
4992        out.push(command);
4993        out.extend_from_slice(&payload[..payload_len]);
4994        offset += chunk;
4995        size -= chunk;
4996    }
4997}
4998
4999fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
5000    while !bytes.is_empty() {
5001        let chunk_len = bytes.len().min(0x7f);
5002        out.push(chunk_len as u8);
5003        out.extend_from_slice(&bytes[..chunk_len]);
5004        bytes = &bytes[chunk_len..];
5005    }
5006}
5007
5008fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
5009    let mut value = 0u64;
5010    let mut shift = 0u32;
5011    loop {
5012        let Some(byte) = delta.get(*cursor).copied() else {
5013            return Err(GitError::InvalidObject("truncated delta size".into()));
5014        };
5015        *cursor += 1;
5016        value = value
5017            .checked_add(
5018                u64::from(byte & 0x7f)
5019                    .checked_shl(shift)
5020                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
5021            )
5022            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5023        if byte & 0x80 == 0 {
5024            return Ok(value);
5025        }
5026        shift = shift
5027            .checked_add(7)
5028            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5029    }
5030}
5031
5032fn read_delta_copy_value(
5033    delta: &[u8],
5034    cursor: &mut usize,
5035    command: u8,
5036    masks: &[u8],
5037) -> Result<u64> {
5038    let mut value = 0u64;
5039    for (shift, mask) in masks.iter().enumerate() {
5040        if command & mask != 0 {
5041            let Some(byte) = delta.get(*cursor).copied() else {
5042                return Err(GitError::InvalidObject(
5043                    "truncated delta copy command".into(),
5044                ));
5045            };
5046            *cursor += 1;
5047            value |= u64::from(byte) << (shift * 8);
5048        }
5049    }
5050    Ok(value)
5051}
5052
5053fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8], compression_level: u32) -> Result<()> {
5054    let mut compressor = Compress::new(Compression::new(compression_level.min(9)), true);
5055    out.reserve(zlib_compress_bound(body.len()));
5056    let status = compressor
5057        .compress_vec(body, out, FlushCompress::Finish)
5058        .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
5059    if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
5060        return Err(GitError::InvalidObject(
5061            "zlib compression did not finish pack entry".into(),
5062        ));
5063    }
5064    Ok(())
5065}
5066
5067fn zlib_compress_bound(len: usize) -> usize {
5068    len.saturating_add(len >> 12)
5069        .saturating_add(len >> 14)
5070        .saturating_add(len >> 25)
5071        .saturating_add(13)
5072}
5073
5074fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
5075    let type_code = match object_type {
5076        ObjectType::Commit => 1,
5077        ObjectType::Tree => 2,
5078        ObjectType::Blob => 3,
5079        ObjectType::Tag => 4,
5080    };
5081    write_pack_entry_header_kind(out, type_code, size);
5082}
5083
5084fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
5085    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
5086    size >>= 4;
5087    if size != 0 {
5088        byte |= 0x80;
5089    }
5090    out.push(byte);
5091    while size != 0 {
5092        let mut byte = (size as u8) & 0x7f;
5093        size >>= 7;
5094        if size != 0 {
5095            byte |= 0x80;
5096        }
5097        out.push(byte);
5098    }
5099}
5100
5101fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
5102    if relative == 0 {
5103        return Err(GitError::InvalidFormat(
5104            "ofs-delta relative offset cannot be zero".into(),
5105        ));
5106    }
5107    let mut value = relative;
5108    let mut bytes = vec![(value & 0x7f) as u8];
5109    value >>= 7;
5110    while value != 0 {
5111        value -= 1;
5112        bytes.push(((value & 0x7f) as u8) | 0x80);
5113        value >>= 7;
5114    }
5115    bytes.reverse();
5116    out.extend_from_slice(&bytes);
5117    Ok(())
5118}
5119
5120fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
5121    let Some(byte) = bytes.get(*offset).copied() else {
5122        return Err(GitError::InvalidFormat(
5123            "truncated pack entry header".into(),
5124        ));
5125    };
5126    *offset += 1;
5127    Ok(byte)
5128}
5129
5130fn u16_be(bytes: &[u8]) -> u16 {
5131    u16::from_be_bytes([bytes[0], bytes[1]])
5132}
5133
5134fn u32_be(bytes: &[u8]) -> u32 {
5135    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
5136}
5137
5138fn u64_be(bytes: &[u8]) -> u64 {
5139    u64::from_be_bytes([
5140        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
5141    ])
5142}
5143
5144fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
5145    let mut fanout = [0u32; 256];
5146    let mut previous = 0u32;
5147    for slot in &mut fanout {
5148        *slot = u32_be(&bytes[*offset..*offset + 4]);
5149        if *slot < previous {
5150            return Err(GitError::InvalidFormat(
5151                "pack index fanout is not monotonic".into(),
5152            ));
5153        }
5154        previous = *slot;
5155        *offset += 4;
5156    }
5157    Ok(fanout)
5158}
5159
5160fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
5161    let expected_min = if oid_bytes[0] == 0 {
5162        0
5163    } else {
5164        fanout[usize::from(oid_bytes[0] - 1)]
5165    };
5166    if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
5167        return Err(GitError::InvalidFormat(
5168            "pack index object id is outside its fanout bucket".into(),
5169        ));
5170    }
5171    Ok(())
5172}
5173
5174fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
5175    if raw_offset & 0x8000_0000 == 0 {
5176        return Ok(u64::from(raw_offset));
5177    }
5178    let large_idx = (raw_offset & 0x7fff_ffff) as usize;
5179    let large_start = large_idx
5180        .checked_mul(8)
5181        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5182    let large_end = large_start
5183        .checked_add(8)
5184        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5185    if large_end > large_offset_table.len() {
5186        return Err(GitError::InvalidFormat(
5187            "pack index large offset points past table".into(),
5188        ));
5189    }
5190    Ok(u64_be(&large_offset_table[large_start..large_end]))
5191}
5192
5193fn checked_range(
5194    start: usize,
5195    count: usize,
5196    width: usize,
5197    total: usize,
5198) -> Result<std::ops::Range<usize>> {
5199    let len = count
5200        .checked_mul(width)
5201        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
5202    let end = start
5203        .checked_add(len)
5204        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
5205    if end > total {
5206        return Err(GitError::InvalidFormat("truncated pack index table".into()));
5207    }
5208    Ok(start..end)
5209}
5210
5211fn validate_position_permutation(positions: &[u32]) -> Result<()> {
5212    let mut seen = vec![false; positions.len()];
5213    for position in positions {
5214        let idx = *position as usize;
5215        if idx >= positions.len() {
5216            return Err(GitError::InvalidFormat(
5217                "reverse index position points past object table".into(),
5218            ));
5219        }
5220        if seen[idx] {
5221            return Err(GitError::InvalidFormat(
5222                "reverse index position is duplicated".into(),
5223            ));
5224        }
5225        seen[idx] = true;
5226    }
5227    Ok(())
5228}
5229
5230fn parse_midx_pack_names(
5231    bytes: &[u8],
5232    chunks: &[MultiPackIndexChunk],
5233    pack_count: usize,
5234    version: u8,
5235) -> Result<Vec<String>> {
5236    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
5237        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
5238    let mut names = Vec::with_capacity(pack_count);
5239    let mut offset = 0usize;
5240    while names.len() < pack_count {
5241        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
5242            return Err(GitError::InvalidFormat(
5243                "fatal: multi-pack-index pack-name chunk is too short".into(),
5244            ));
5245        };
5246        let name_bytes = &data[offset..offset + relative_end];
5247        if name_bytes.is_empty() {
5248            return Err(GitError::InvalidFormat(
5249                "multi-pack-index PNAM entry is empty".into(),
5250            ));
5251        }
5252        let name = std::str::from_utf8(name_bytes)
5253            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
5254        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
5255            return Err(GitError::InvalidFormat(
5256                "multi-pack-index PNAM entry contains a path separator".into(),
5257            ));
5258        }
5259        names.push(name.to_string());
5260        offset += relative_end + 1;
5261    }
5262    let padding = &data[offset..];
5263    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
5264        return Err(GitError::InvalidFormat(
5265            "multi-pack-index PNAM padding is invalid".into(),
5266        ));
5267    }
5268    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
5269        return Err(GitError::InvalidFormat(
5270            "multi-pack-index v1 PNAM entries are not sorted".into(),
5271        ));
5272    }
5273    Ok(names)
5274}
5275
5276fn parse_midx_oid_fanout(
5277    bytes: &[u8],
5278    chunks: &[MultiPackIndexChunk],
5279) -> Result<([u32; 256], usize)> {
5280    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
5281        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
5282    if data.len() != 256 * 4 {
5283        return Err(GitError::InvalidFormat(
5284            "error: multi-pack-index OID fanout is of the wrong size\nfatal: multi-pack-index required OID fanout chunk missing or corrupted".into(),
5285        ));
5286    }
5287    let mut fanout = [0u32; 256];
5288    let mut previous = 0u32;
5289    for (idx, slot) in fanout.iter_mut().enumerate() {
5290        let start = idx * 4;
5291        *slot = u32_be(&data[start..start + 4]);
5292        if *slot < previous {
5293            return Err(GitError::InvalidFormat(format!(
5294                "error: oid fanout out of order: fanout[{}] = {:x} > {:x} = fanout[{idx}]\nfatal: multi-pack-index required OID fanout chunk missing or corrupted",
5295                idx - 1,
5296                previous,
5297                *slot
5298            )));
5299        }
5300        previous = *slot;
5301    }
5302    Ok((fanout, fanout[255] as usize))
5303}
5304
5305fn parse_midx_object_ids(
5306    bytes: &[u8],
5307    chunks: &[MultiPackIndexChunk],
5308    format: ObjectFormat,
5309    object_count: usize,
5310    fanout: &[u32; 256],
5311) -> Result<Vec<ObjectId>> {
5312    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
5313        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
5314    let expected_len = object_count
5315        .checked_mul(format.raw_len())
5316        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
5317    if data.len() != expected_len {
5318        return Err(GitError::InvalidFormat(
5319            "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
5320        ));
5321    }
5322
5323    let mut ids = Vec::with_capacity(object_count);
5324    let mut counts = [0u32; 256];
5325    let mut previous_oid: Option<ObjectId> = None;
5326    for idx in 0..object_count {
5327        let start = idx * format.raw_len();
5328        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
5329        if let Some(previous) = &previous_oid
5330            && previous.as_bytes() >= oid.as_bytes()
5331        {
5332            return Err(GitError::InvalidFormat(
5333                "multi-pack-index OIDL object ids are not strictly sorted".into(),
5334            ));
5335        }
5336        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
5337            .checked_add(1)
5338            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
5339        previous_oid = Some(oid);
5340        ids.push(oid);
5341    }
5342
5343    let mut running = 0u32;
5344    for (idx, count) in counts.iter().enumerate() {
5345        running = running
5346            .checked_add(*count)
5347            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
5348        if fanout[idx] != running {
5349            return Err(GitError::InvalidFormat(
5350                "multi-pack-index OIDF fanout does not match OIDL".into(),
5351            ));
5352        }
5353    }
5354    Ok(ids)
5355}
5356
5357fn parse_midx_object_offsets(
5358    bytes: &[u8],
5359    chunks: &[MultiPackIndexChunk],
5360    object_ids: Vec<ObjectId>,
5361    pack_count: u32,
5362) -> Result<Vec<MultiPackIndexEntry>> {
5363    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
5364        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
5365    let expected_len = object_ids
5366        .len()
5367        .checked_mul(8)
5368        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
5369    if data.len() != expected_len {
5370        return Err(GitError::InvalidFormat(
5371            "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
5372        ));
5373    }
5374    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
5375    if let Some(large_offsets) = large_offsets
5376        && large_offsets.len() % 8 != 0
5377    {
5378        return Err(GitError::InvalidFormat(
5379            "multi-pack-index LOFF chunk has invalid length".into(),
5380        ));
5381    }
5382
5383    let mut entries = Vec::with_capacity(object_ids.len());
5384    for (idx, oid) in object_ids.into_iter().enumerate() {
5385        let start = idx * 8;
5386        let pack_int_id = u32_be(&data[start..start + 4]);
5387        if pack_int_id >= pack_count {
5388            return Err(GitError::InvalidFormat(
5389                "multi-pack-index object points past pack table".into(),
5390            ));
5391        }
5392        let raw_offset = u32_be(&data[start + 4..start + 8]);
5393        let offset = if raw_offset & 0x8000_0000 == 0 {
5394            u64::from(raw_offset)
5395        } else {
5396            let Some(large_offsets) = large_offsets else {
5397                return Err(GitError::InvalidFormat(
5398                    "multi-pack-index large offset missing LOFF chunk".into(),
5399                ));
5400            };
5401            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
5402            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
5403                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
5404            })?;
5405            let large_end = large_start.checked_add(8).ok_or_else(|| {
5406                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
5407            })?;
5408            if large_end > large_offsets.len() {
5409                return Err(GitError::InvalidFormat(
5410                    "fatal: multi-pack-index large offset out of bounds".into(),
5411                ));
5412            }
5413            u64_be(&large_offsets[large_start..large_end])
5414        };
5415        entries.push(MultiPackIndexEntry {
5416            oid,
5417            pack_int_id,
5418            offset,
5419            force_large_offset: raw_offset & 0x8000_0000 != 0,
5420        });
5421    }
5422    Ok(entries)
5423}
5424
5425fn parse_midx_reverse_index(
5426    bytes: &[u8],
5427    chunks: &[MultiPackIndexChunk],
5428    object_count: usize,
5429) -> Result<Option<Vec<u32>>> {
5430    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
5431        return Ok(None);
5432    };
5433    let expected_len = object_count
5434        .checked_mul(4)
5435        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
5436    if data.len() != expected_len {
5437        return Err(GitError::InvalidFormat(
5438            "multi-pack-index reverse-index chunk is the wrong size".into(),
5439        ));
5440    }
5441    let mut positions = Vec::with_capacity(object_count);
5442    for idx in 0..object_count {
5443        let start = idx * 4;
5444        positions.push(u32_be(&data[start..start + 4]));
5445    }
5446    validate_position_permutation(&positions)?;
5447    Ok(Some(positions))
5448}
5449
5450fn parse_midx_bitmapped_packs(
5451    bytes: &[u8],
5452    chunks: &[MultiPackIndexChunk],
5453    pack_count: usize,
5454    object_count: usize,
5455) -> Result<Option<Vec<MultiPackBitmapPack>>> {
5456    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
5457        return Ok(None);
5458    };
5459    let expected_len = pack_count
5460        .checked_mul(8)
5461        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
5462    if data.len() != expected_len {
5463        return Err(GitError::InvalidFormat(
5464            "multi-pack-index BTMP chunk has invalid length".into(),
5465        ));
5466    }
5467    let mut entries = Vec::with_capacity(pack_count);
5468    for idx in 0..pack_count {
5469        let start = idx * 8;
5470        let bitmap_pos = u32_be(&data[start..start + 4]);
5471        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
5472        let bitmap_end = u64::from(bitmap_pos)
5473            .checked_add(u64::from(bitmap_nr))
5474            .ok_or_else(|| {
5475                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
5476            })?;
5477        if bitmap_end > object_count as u64 {
5478            return Err(GitError::InvalidFormat(
5479                "multi-pack-index BTMP range points past object table".into(),
5480            ));
5481        }
5482        entries.push(MultiPackBitmapPack {
5483            bitmap_pos,
5484            bitmap_nr,
5485        });
5486    }
5487    Ok(Some(entries))
5488}
5489
5490fn midx_chunk_data<'a>(
5491    bytes: &'a [u8],
5492    chunks: &[MultiPackIndexChunk],
5493    id: [u8; 4],
5494    required: bool,
5495) -> Result<Option<&'a [u8]>> {
5496    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
5497        if required {
5498            return Err(GitError::InvalidFormat(format!(
5499                "multi-pack-index missing {} chunk",
5500                std::str::from_utf8(&id).unwrap_or("required")
5501            )));
5502        }
5503        return Ok(None);
5504    };
5505    let start = usize::try_from(chunk.offset)
5506        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
5507    let len = usize::try_from(chunk.len)
5508        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
5509    let end = start
5510        .checked_add(len)
5511        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
5512    let Some(data) = bytes.get(start..end) else {
5513        return Err(GitError::InvalidFormat(
5514            "multi-pack-index chunk extends past file".into(),
5515        ));
5516    };
5517    Ok(Some(data))
5518}
5519
5520fn hash_function_id(format: ObjectFormat) -> u32 {
5521    match format {
5522        ObjectFormat::Sha1 => 1,
5523        ObjectFormat::Sha256 => 2,
5524    }
5525}
5526
5527/// Maximum number of clean (run) words that a single EWAH running-length word
5528/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
5529const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
5530
5531/// Maximum number of literal (dirty) words that can trail a single EWAH
5532/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
5533const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
5534
5535/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
5536const EWAH_ALL_ONES: u64 = u64::MAX;
5537
5538impl EwahBitmap {
5539    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
5540    /// from a slice of raw uncompressed 64-bit words.
5541    ///
5542    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
5543    /// matching git's on-disk convention. `bit_size` records the number of
5544    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
5545    ///
5546    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
5547    /// encoder: consecutive all-zero or all-one words collapse into a run, and
5548    /// any other word is stored verbatim as a literal. Only the first
5549    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
5550    /// words supplied by the caller are ignored, just as git encodes a bitmap
5551    /// sized to its highest set bit.
5552    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
5553        let required_words = bit_size.div_ceil(64) as usize;
5554        if required_words > words.len() {
5555            return Err(GitError::InvalidFormat(format!(
5556                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
5557                words.len()
5558            )));
5559        }
5560        // Only the words that actually back the declared bits matter; libgit
5561        // never emits clean trailing zero words for the unused tail.
5562        let significant = &words[..required_words];
5563        let mut builder = EwahBuilder::new(bit_size);
5564        for &word in significant {
5565            if word == 0 {
5566                builder.add_empty_words(false, 1);
5567            } else if word == EWAH_ALL_ONES {
5568                builder.add_empty_words(true, 1);
5569            } else {
5570                builder.add_literal(word);
5571            }
5572        }
5573        builder.finish()
5574    }
5575
5576    /// Constructs an [`EwahBitmap`] from a set of bit positions.
5577    ///
5578    /// `bit_size` is the number of logical bits (typically the pack object
5579    /// count). Every position in `positions` must be strictly less than
5580    /// `bit_size`. Positions may be given in any order and may repeat.
5581    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
5582        let word_count = bit_size.div_ceil(64) as usize;
5583        let mut words = vec![0u64; word_count];
5584        for &position in positions {
5585            if position >= bit_size {
5586                return Err(GitError::InvalidFormat(format!(
5587                    "EWAH bit position {position} out of range for bit_size {bit_size}"
5588                )));
5589            }
5590            let word_index = (position / 64) as usize;
5591            let bit_index = position % 64;
5592            words[word_index] |= 1u64 << bit_index;
5593        }
5594        Self::from_words(bit_size, &words)
5595    }
5596
5597    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
5598    /// all-zero type bitmap (e.g. when a pack has no tags).
5599    pub fn empty() -> Self {
5600        Self {
5601            bit_size: 0,
5602            words: Vec::new(),
5603            rlw_position: 0,
5604        }
5605    }
5606
5607    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
5608    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
5609    ///
5610    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
5611    /// bitmap actually covers and is primarily used to validate roundtrips.
5612    pub fn to_words(&self) -> Result<Vec<u64>> {
5613        let mut out = Vec::new();
5614        let mut word_idx = 0usize;
5615        while word_idx < self.words.len() {
5616            let rlw = self.words[word_idx];
5617            let run_bit = rlw & 1;
5618            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
5619            let literal_words = (rlw >> 33) as usize;
5620            word_idx += 1;
5621            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
5622            for _ in 0..run_words {
5623                out.push(fill);
5624            }
5625            let literal_end = word_idx
5626                .checked_add(literal_words)
5627                .filter(|end| *end <= self.words.len())
5628                .ok_or_else(|| {
5629                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
5630                })?;
5631            out.extend_from_slice(&self.words[word_idx..literal_end]);
5632            word_idx = literal_end;
5633        }
5634        let required_words = (self.bit_size as usize).div_ceil(64);
5635        if out.len() < required_words {
5636            out.resize(required_words, 0);
5637        }
5638        out.truncate(required_words);
5639        Ok(out)
5640    }
5641
5642    /// Returns the sorted set bit positions covered by this bitmap.
5643    pub fn to_positions(&self) -> Result<Vec<u32>> {
5644        let words = self.to_words()?;
5645        let mut positions = Vec::new();
5646        for (word_index, word) in words.iter().enumerate() {
5647            let mut remaining = *word;
5648            while remaining != 0 {
5649                let bit = remaining.trailing_zeros();
5650                let position = (word_index as u64) * 64 + u64::from(bit);
5651                if position < u64::from(self.bit_size) {
5652                    // position always fits in u32 because bit_size is u32.
5653                    positions.push(position as u32);
5654                }
5655                remaining &= remaining - 1;
5656            }
5657        }
5658        Ok(positions)
5659    }
5660
5661    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
5662    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
5663    /// running-length-word position (u32 BE).
5664    pub fn to_bytes(&self) -> Vec<u8> {
5665        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
5666        self.append_bytes(&mut out);
5667        out
5668    }
5669
5670    fn append_bytes(&self, out: &mut Vec<u8>) {
5671        out.extend_from_slice(&self.bit_size.to_be_bytes());
5672        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
5673        for word in &self.words {
5674            out.extend_from_slice(&word.to_be_bytes());
5675        }
5676        out.extend_from_slice(&self.rlw_position.to_be_bytes());
5677    }
5678}
5679
5680/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
5681///
5682/// The buffer is a sequence of blocks. Each block begins with a running-length
5683/// word (RLW) and is followed by zero or more literal words:
5684///   * bit 0      => value of the clean run words (0 or 1)
5685///   * bits 1..=32 => number of clean run words (32-bit field)
5686///   * bits 33..=63 => number of trailing literal words (31-bit field)
5687struct EwahBuilder {
5688    bit_size: u32,
5689    words: Vec<u64>,
5690    rlw_position: usize,
5691}
5692
5693impl EwahBuilder {
5694    fn new(bit_size: u32) -> Self {
5695        // Every EWAH buffer begins with an RLW, even an empty one.
5696        Self {
5697            bit_size,
5698            words: vec![0u64],
5699            rlw_position: 0,
5700        }
5701    }
5702
5703    fn rlw(&self) -> u64 {
5704        self.words[self.rlw_position]
5705    }
5706
5707    fn set_rlw(&mut self, value: u64) {
5708        self.words[self.rlw_position] = value;
5709    }
5710
5711    fn rlw_running_len(&self) -> u64 {
5712        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
5713    }
5714
5715    fn rlw_running_bit(&self) -> bool {
5716        self.rlw() & 1 == 1
5717    }
5718
5719    fn rlw_literal_len(&self) -> u64 {
5720        self.rlw() >> 33
5721    }
5722
5723    fn set_running_bit(&mut self, bit: bool) {
5724        let mut value = self.rlw();
5725        value &= !1;
5726        value |= u64::from(bit);
5727        self.set_rlw(value);
5728    }
5729
5730    fn set_running_len(&mut self, len: u64) {
5731        let mut value = self.rlw();
5732        value &= !(EWAH_MAX_RUNNING_LEN << 1);
5733        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
5734        self.set_rlw(value);
5735    }
5736
5737    fn set_literal_len(&mut self, len: u64) {
5738        let mut value = self.rlw();
5739        value &= (1u64 << 33) - 1;
5740        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
5741        self.set_rlw(value);
5742    }
5743
5744    /// Begins a fresh RLW block at the end of the buffer.
5745    fn push_rlw(&mut self) {
5746        self.rlw_position = self.words.len();
5747        self.words.push(0);
5748    }
5749
5750    /// Appends `number` clean words whose bits are all `value`, mirroring
5751    /// libgit's `ewah_add_empty_words`.
5752    ///
5753    /// A run can only be merged into the current RLW when that RLW has not yet
5754    /// emitted any literal words and its run either is empty or already carries
5755    /// the same fill value. Otherwise a fresh RLW block must be started, because
5756    /// every block stores its run strictly before its literals.
5757    fn add_empty_words(&mut self, value: bool, mut number: u64) {
5758        while number > 0 {
5759            // The current RLW can absorb more run words only when it has no
5760            // literals yet, its run is either empty or already the right fill
5761            // value, and the 32-bit run-length field is not already saturated.
5762            let can_extend = self.rlw_literal_len() == 0
5763                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
5764                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
5765            if !can_extend {
5766                self.push_rlw();
5767            }
5768            if self.rlw_running_len() == 0 {
5769                self.set_running_bit(value);
5770            }
5771            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
5772            let take = available.min(number);
5773            self.set_running_len(self.rlw_running_len() + take);
5774            number -= take;
5775        }
5776    }
5777
5778    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
5779    /// `ewah_add_dirty_words` for a count of one.
5780    fn add_literal(&mut self, word: u64) {
5781        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
5782            self.push_rlw();
5783        }
5784        let literal_len = self.rlw_literal_len();
5785        self.set_literal_len(literal_len + 1);
5786        self.words.push(word);
5787    }
5788
5789    fn finish(self) -> Result<EwahBitmap> {
5790        let rlw_position = u32::try_from(self.rlw_position)
5791            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
5792        if self.words.len() > u32::MAX as usize {
5793            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
5794        }
5795        Ok(EwahBitmap {
5796            bit_size: self.bit_size,
5797            words: self.words,
5798            rlw_position,
5799        })
5800    }
5801}
5802
5803/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
5804///
5805/// The writer is constructed from the object layout of a pack (one
5806/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
5807/// Callers then register one selected commit per [`add_commit`] call, supplying
5808/// the set of pack positions reachable from that commit. [`build`]/[`write`]
5809/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
5810/// on-disk format (signature `BITM`, version 1).
5811///
5812/// [`add_commit`]: PackBitmapWriter::add_commit
5813/// [`build`]: PackBitmapWriter::build
5814/// [`write`]: PackBitmapWriter::write
5815#[derive(Debug, Clone)]
5816pub struct PackBitmapWriter {
5817    format: ObjectFormat,
5818    pack_checksum: ObjectId,
5819    object_count: u32,
5820    commit_positions: Vec<u32>,
5821    tree_positions: Vec<u32>,
5822    blob_positions: Vec<u32>,
5823    tag_positions: Vec<u32>,
5824    name_hash_cache: Option<Vec<u32>>,
5825    selected: Vec<SelectedCommit>,
5826}
5827
5828#[derive(Debug, Clone)]
5829struct SelectedCommit {
5830    /// Oid-sorted `.idx` position (what the on-disk entry records). The
5831    /// commit's pack-order position lives in `reachable` with the rest of the
5832    /// bits.
5833    commit_index_position: u32,
5834    flags: u8,
5835    reachable: Vec<u32>,
5836}
5837
5838impl PackBitmapWriter {
5839    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
5840    /// compression against a previously selected commit). This is the only flag
5841    /// value this writer emits.
5842    pub const FLAG_NONE: u8 = 0;
5843
5844    /// Creates a writer for a pack whose objects (in pack order) have the given
5845    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
5846    ///
5847    /// Returns an error if the pack contains more than `u32::MAX` objects, if
5848    /// `pack_checksum`'s format does not match `format`, or if any object type
5849    /// is not one of the four reachable git object kinds.
5850    pub fn new(
5851        format: ObjectFormat,
5852        pack_checksum: ObjectId,
5853        object_types: &[ObjectType],
5854    ) -> Result<Self> {
5855        if object_types.len() > u32::MAX as usize {
5856            return Err(GitError::InvalidFormat(
5857                "too many objects for a pack bitmap".into(),
5858            ));
5859        }
5860        if pack_checksum.format() != format {
5861            return Err(GitError::InvalidObjectId(
5862                "pack checksum format does not match bitmap format".into(),
5863            ));
5864        }
5865        let object_count = object_types.len() as u32;
5866        let mut commit_positions = Vec::new();
5867        let mut tree_positions = Vec::new();
5868        let mut blob_positions = Vec::new();
5869        let mut tag_positions = Vec::new();
5870        for (index, object_type) in object_types.iter().enumerate() {
5871            let position = index as u32;
5872            match object_type {
5873                ObjectType::Commit => commit_positions.push(position),
5874                ObjectType::Tree => tree_positions.push(position),
5875                ObjectType::Blob => blob_positions.push(position),
5876                ObjectType::Tag => tag_positions.push(position),
5877            }
5878        }
5879        Ok(Self {
5880            format,
5881            pack_checksum,
5882            object_count,
5883            commit_positions,
5884            tree_positions,
5885            blob_positions,
5886            tag_positions,
5887            name_hash_cache: None,
5888            selected: Vec::new(),
5889        })
5890    }
5891
5892    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
5893    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
5894    /// and appends the cache after the bitmap entries, exactly as git does.
5895    ///
5896    /// Returns an error if the cache length does not equal the object count.
5897    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5898        if cache.len() != self.object_count as usize {
5899            return Err(GitError::InvalidFormat(format!(
5900                "name hash cache has {} entries but pack has {} objects",
5901                cache.len(),
5902                self.object_count
5903            )));
5904        }
5905        self.name_hash_cache = Some(cache);
5906        Ok(self)
5907    }
5908
5909    /// Registers a selected commit and the pack positions reachable from it.
5910    ///
5911    /// `commit_position` is the *pack-order* position of the commit itself (the
5912    /// bit-number space); it must reference a commit object and is implicitly
5913    /// part of the reachable set. `commit_index_position` is the commit's
5914    /// position in the *oid-sorted* pack index — this is what the on-disk entry
5915    /// records (upstream `oid_pos`); bits and entry positions live in different
5916    /// spaces. `reachable` lists the pack-order positions of every object
5917    /// reachable from the commit (it may include or omit `commit_position`;
5918    /// duplicates are fine). All positions must be in range. The commit's full
5919    /// (non-XORed) bitmap is stored.
5920    pub fn add_commit(
5921        &mut self,
5922        commit_position: u32,
5923        commit_index_position: u32,
5924        reachable: &[u32],
5925    ) -> Result<()> {
5926        if commit_position >= self.object_count {
5927            return Err(GitError::InvalidFormat(format!(
5928                "commit position {commit_position} out of range for {} objects",
5929                self.object_count
5930            )));
5931        }
5932        if commit_index_position >= self.object_count {
5933            return Err(GitError::InvalidFormat(format!(
5934                "commit index position {commit_index_position} out of range for {} objects",
5935                self.object_count
5936            )));
5937        }
5938        if !self.commit_positions.contains(&commit_position) {
5939            return Err(GitError::InvalidFormat(format!(
5940                "bitmap commit position {commit_position} is not a commit object"
5941            )));
5942        }
5943        for &position in reachable {
5944            if position >= self.object_count {
5945                return Err(GitError::InvalidFormat(format!(
5946                    "reachable position {position} out of range for {} objects",
5947                    self.object_count
5948                )));
5949            }
5950        }
5951        let mut reachable = reachable.to_vec();
5952        reachable.push(commit_position);
5953        self.selected.push(SelectedCommit {
5954            commit_index_position,
5955            flags: Self::FLAG_NONE,
5956            reachable,
5957        });
5958        Ok(())
5959    }
5960
5961    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
5962    ///
5963    /// The resulting index always advertises
5964    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
5965    /// the pack) and, when a name-hash cache was attached,
5966    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
5967    pub fn build(&self) -> Result<PackBitmapIndex> {
5968        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5969        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5970        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5971        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5972
5973        let mut entries = Vec::with_capacity(self.selected.len());
5974        for selected in &self.selected {
5975            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5976            entries.push(PackBitmapEntry {
5977                object_position: selected.commit_index_position,
5978                xor_offset: 0,
5979                flags: selected.flags,
5980                bitmap,
5981            });
5982        }
5983
5984        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5985        if self.name_hash_cache.is_some() {
5986            options |= PackBitmapIndex::OPTION_HASH_CACHE;
5987        }
5988
5989        // The index checksum is only known once the body is serialised; the
5990        // dedicated `write` path fills it in. `build` reports a placeholder of
5991        // the correct format so the struct is self-consistent for callers that
5992        // only need the decoded bitmaps.
5993        let placeholder_checksum = ObjectId::null(self.format);
5994        Ok(PackBitmapIndex {
5995            version: 1,
5996            format: self.format,
5997            options,
5998            pack_checksum: self.pack_checksum.clone(),
5999            index_checksum: placeholder_checksum,
6000            type_bitmaps: PackBitmapTypeBitmaps {
6001                commits,
6002                trees,
6003                blobs,
6004                tags,
6005            },
6006            entries,
6007            name_hash_cache: self.name_hash_cache.clone(),
6008        })
6009    }
6010
6011    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
6012    /// (including the trailing index checksum).
6013    pub fn write(&self) -> Result<Vec<u8>> {
6014        self.build()?.write()
6015    }
6016}
6017
6018impl PackBitmapIndex {
6019    /// Serialises this index into git's on-disk `.bitmap` byte layout.
6020    ///
6021    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
6022    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
6023    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
6024    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
6025    /// optional name-hash cache, and finally the trailing index checksum over
6026    /// everything written so far.
6027    ///
6028    /// The `index_checksum` field of `self` is ignored and recomputed from the
6029    /// serialised body. Returns an error for unsupported versions, mismatched
6030    /// object-id formats, an oversized entry table, or an inconsistent name-hash
6031    /// cache.
6032    pub fn write(&self) -> Result<Vec<u8>> {
6033        if self.version != 1 {
6034            return Err(GitError::Unsupported(format!(
6035                "bitmap index version {}",
6036                self.version
6037            )));
6038        }
6039        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
6040        if self.options & !known_options != 0 {
6041            return Err(GitError::Unsupported(format!(
6042                "bitmap index options {:#06x}",
6043                self.options & !known_options
6044            )));
6045        }
6046        if self.pack_checksum.format() != self.format {
6047            return Err(GitError::InvalidObjectId(
6048                "bitmap pack checksum format does not match index format".into(),
6049            ));
6050        }
6051        if self.entries.len() > u32::MAX as usize {
6052            return Err(GitError::InvalidFormat(
6053                "too many bitmap index entries".into(),
6054            ));
6055        }
6056        let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
6057        match (&self.name_hash_cache, want_cache) {
6058            (Some(_), false) => {
6059                return Err(GitError::InvalidFormat(
6060                    "name hash cache present without OPTION_HASH_CACHE".into(),
6061                ));
6062            }
6063            (None, true) => {
6064                return Err(GitError::InvalidFormat(
6065                    "OPTION_HASH_CACHE set without a name hash cache".into(),
6066                ));
6067            }
6068            _ => {}
6069        }
6070
6071        let mut out = Vec::new();
6072        out.extend_from_slice(b"BITM");
6073        out.extend_from_slice(&self.version.to_be_bytes());
6074        out.extend_from_slice(&self.options.to_be_bytes());
6075        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
6076        out.extend_from_slice(self.pack_checksum.as_bytes());
6077
6078        self.type_bitmaps.commits.append_bytes(&mut out);
6079        self.type_bitmaps.trees.append_bytes(&mut out);
6080        self.type_bitmaps.blobs.append_bytes(&mut out);
6081        self.type_bitmaps.tags.append_bytes(&mut out);
6082
6083        for (idx, entry) in self.entries.iter().enumerate() {
6084            if entry.xor_offset as usize > idx {
6085                return Err(GitError::InvalidFormat(
6086                    "bitmap index entry has invalid XOR offset".into(),
6087                ));
6088            }
6089            out.extend_from_slice(&entry.object_position.to_be_bytes());
6090            out.push(entry.xor_offset);
6091            out.push(entry.flags);
6092            entry.bitmap.append_bytes(&mut out);
6093        }
6094
6095        if let Some(cache) = &self.name_hash_cache {
6096            for value in cache {
6097                out.extend_from_slice(&value.to_be_bytes());
6098            }
6099        }
6100
6101        let checksum = sley_core::digest_bytes(self.format, &out)?;
6102        out.extend_from_slice(checksum.as_bytes());
6103        Ok(out)
6104    }
6105}
6106
6107/// Convenience wrapper that builds a `.bitmap` file in one call.
6108///
6109/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
6110/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
6111/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
6112/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
6113/// optional `name_hash_cache` (one entry per object) may be supplied to emit
6114/// the hash-cache extension.
6115pub fn write_bitmap(
6116    format: ObjectFormat,
6117    pack_checksum: ObjectId,
6118    object_types: &[ObjectType],
6119    commits: &[(u32, u32, Vec<u32>)],
6120    name_hash_cache: Option<Vec<u32>>,
6121) -> Result<Vec<u8>> {
6122    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
6123    if let Some(cache) = name_hash_cache {
6124        writer = writer.with_name_hash_cache(cache)?;
6125    }
6126    for (commit_position, commit_index_position, reachable) in commits {
6127        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
6128    }
6129    writer.write()
6130}
6131
6132#[cfg(test)]
6133mod tests {
6134    use super::*;
6135    use flate2::Compression;
6136    use flate2::read::ZlibDecoder;
6137    use flate2::write::ZlibEncoder;
6138    use std::fs;
6139    use std::io::Read;
6140    use std::io::Write;
6141    use std::path::{Path, PathBuf};
6142    use std::process::Command;
6143    use std::time::{SystemTime, UNIX_EPOCH};
6144
6145    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
6146        PackWriteOptions::new()
6147            .with_prefer_ofs_delta(prefer_ofs_delta)
6148            .with_reorder(false)
6149    }
6150
6151    #[test]
6152    fn parses_single_blob_pack() {
6153        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6154        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6155        assert_eq!(parsed.version, 2);
6156        assert_eq!(parsed.entries.len(), 1);
6157        let object = &parsed.entries[0].object;
6158        assert_eq!(object.object_type, ObjectType::Blob);
6159        assert_eq!(object.body, b"hello\n");
6160        assert_eq!(
6161            parsed.entries[0].entry.oid.to_hex(),
6162            "ce013625030ba8dba906f756967f9e9ca394464a"
6163        );
6164    }
6165
6166    #[test]
6167    fn parses_single_blob_pack_sha256() {
6168        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
6169        let parsed =
6170            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
6171        assert_eq!(parsed.version, 2);
6172        assert_eq!(parsed.entries.len(), 1);
6173        let object = &parsed.entries[0].object;
6174        assert_eq!(object.object_type, ObjectType::Blob);
6175        assert_eq!(object.body, b"hello\n");
6176        assert_eq!(
6177            parsed.entries[0].entry.oid,
6178            object
6179                .object_id(ObjectFormat::Sha256)
6180                .expect("test operation should succeed")
6181        );
6182    }
6183
6184    #[test]
6185    fn parses_bundle_pack_payload_with_bundle_format() {
6186        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
6187        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
6188            .expect("test operation should succeed");
6189        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6190            .into_bytes()
6191            .into_iter()
6192            .chain(pack)
6193            .collect::<Vec<_>>();
6194        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6195            .expect("test operation should succeed");
6196
6197        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
6198        assert_eq!(parsed.entries.len(), 1);
6199        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
6200        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
6201    }
6202
6203    /// Build a pack whose single blob entry header LIES about its decompressed
6204    /// size: it declares `declared_size` while the actual zlib payload only
6205    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
6206    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
6207    /// from a handful of compressed bytes.
6208    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
6209        let mut pack = Vec::new();
6210        pack.extend_from_slice(b"PACK");
6211        pack.extend_from_slice(&2u32.to_be_bytes());
6212        pack.extend_from_slice(&1u32.to_be_bytes());
6213        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
6214        write_pack_entry_header_kind(&mut pack, 3, declared_size);
6215        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6216        encoder
6217            .write_all(real_body)
6218            .expect("test operation should succeed");
6219        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6220        let checksum =
6221            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6222        pack.extend_from_slice(checksum.as_bytes());
6223        pack
6224    }
6225
6226    /// Regression: a crafted pack object header declaring a gigantic decompressed
6227    /// size with a tiny compressed payload must NOT drive an up-front
6228    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
6229    /// header `size` is attacker-controlled over the network (install_raw_pack →
6230    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
6231    ///
6232    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
6233    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
6234    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
6235    /// run parse on a worker thread so that panic surfaces as a `join()` error
6236    /// rather than killing the test process; the fix turns this into a clean
6237    /// `Err` returned normally.
6238    #[test]
6239    fn rejects_decompression_bomb_header_without_oom() {
6240        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
6241            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
6242            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
6243            let result = handle.join();
6244            // The parse thread must not have panicked/aborted on a huge reserve.
6245            assert!(
6246                result.is_ok(),
6247                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
6248            );
6249            // And parsing must reject the lie (decoded len != declared size).
6250            let parse_result = result.expect("parse thread should not panic on a bomb header");
6251            assert!(
6252                parse_result.is_err(),
6253                "bomb header (declared={declared}) should be rejected as invalid"
6254            );
6255        }
6256    }
6257
6258    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
6259    /// whose *result-size* varint lies, declaring `declared_result_size`, while
6260    /// carrying a tiny real instruction stream. The delta's base-size varint is
6261    /// set correctly (so the base-size check at the top of `apply_pack_delta`
6262    /// passes and we reach the result reservation). Used to drive the sley#35
6263    /// delta-result-size bomb.
6264    fn lying_result_size_delta_pack(
6265        format: ObjectFormat,
6266        declared_result_size: u64,
6267        delta_kind: DeltaKind,
6268    ) -> Vec<u8> {
6269        let base = b"hello";
6270        let result = b"hello world"; // real produced length = 11
6271
6272        // Hand-build a delta with a truthful base-size and a LYING result-size.
6273        let mut delta = Vec::new();
6274        write_delta_varint(&mut delta, base.len() as u64);
6275        write_delta_varint(&mut delta, declared_result_size);
6276        // Real instructions: copy `base` then insert " world".
6277        let suffix = &result[base.len()..];
6278        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
6279        delta.push(base.len() as u8);
6280        delta.push(suffix.len() as u8);
6281        delta.extend_from_slice(suffix);
6282
6283        let mut pack = Vec::new();
6284        pack.extend_from_slice(b"PACK");
6285        pack.extend_from_slice(&2u32.to_be_bytes());
6286        pack.extend_from_slice(&2u32.to_be_bytes());
6287
6288        let base_offset = pack.len();
6289        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
6290        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6291        encoder
6292            .write_all(base)
6293            .expect("test operation should succeed");
6294        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6295
6296        let delta_offset = pack.len();
6297        write_pack_entry_header_kind(
6298            &mut pack,
6299            match delta_kind {
6300                DeltaKind::Offset => 6,
6301                DeltaKind::Ref => 7,
6302            },
6303            delta.len() as u64,
6304        );
6305        match delta_kind {
6306            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
6307            DeltaKind::Ref => {
6308                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
6309                    .expect("test operation should succeed");
6310                pack.extend_from_slice(base_oid.as_bytes());
6311            }
6312        }
6313        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6314        encoder
6315            .write_all(&delta)
6316            .expect("test operation should succeed");
6317        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
6318
6319        let checksum =
6320            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
6321        pack.extend_from_slice(checksum.as_bytes());
6322        pack
6323    }
6324
6325    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
6326    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
6327    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
6328    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
6329    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
6330    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
6331    /// reach the same reservation, so both must be safe. We resolve the pack on a
6332    /// worker thread so an abort/panic surfaces as a `join()` error rather than
6333    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
6334    #[test]
6335    fn rejects_delta_result_size_bomb_without_oom() {
6336        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
6337        for &declared in bombs {
6338            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
6339                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
6340                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
6341                let join_result = handle.join();
6342                assert!(
6343                    join_result.is_ok(),
6344                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
6345                     instead of erroring cleanly"
6346                );
6347                let parse_result =
6348                    join_result.expect("parse thread should not panic on a delta bomb");
6349                assert!(
6350                    parse_result.is_err(),
6351                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
6352                     as invalid (result.len() != declared)"
6353                );
6354            }
6355        }
6356    }
6357
6358    /// A legitimate (truthful) delta whose result-size varint matches the real
6359    /// produced length must still resolve correctly — the bound only caps the
6360    /// speculative reservation, it must not break real delta application.
6361    #[test]
6362    fn applies_legitimate_delta_after_result_size_bound() {
6363        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
6364            let base = b"hello";
6365            let result = b"hello world";
6366            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
6367            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
6368            assert_eq!(parsed.entries.len(), 2);
6369            assert_eq!(parsed.entries[0].object.body, base);
6370            assert_eq!(parsed.entries[1].object.body, result);
6371        }
6372    }
6373
6374    #[test]
6375    fn bounded_inflate_reserve_caps_attacker_declared_size() {
6376        // A tiny compressed input can't justify a multi-gigabyte reservation.
6377        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
6378        // The absolute ceiling caps even a large input-justified hint.
6379        assert_eq!(
6380            bounded_inflate_reserve(usize::MAX, usize::MAX),
6381            MAX_INFLATE_RESERVE
6382        );
6383        // A modest legitimate hint is preserved unchanged (no regression for real
6384        // objects): 1000 bytes of output from 500 bytes of input is well within
6385        // both bounds.
6386        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
6387        // Floor of 64 for tiny hints.
6388        assert_eq!(bounded_inflate_reserve(0, 0), 64);
6389    }
6390
6391    #[test]
6392    fn rejects_bundle_pack_payload_with_wrong_object_format() {
6393        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
6394        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
6395            .expect("test operation should succeed");
6396        let bundle_bytes =
6397            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
6398                .into_bytes()
6399                .into_iter()
6400                .chain(pack)
6401                .collect::<Vec<_>>();
6402        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6403            .expect("test operation should succeed");
6404
6405        assert!(PackFile::parse_bundle(&bundle).is_err());
6406    }
6407
6408    fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
6409        let owned = PackIndex::parse(index, format).expect("test operation should succeed");
6410        let view = PackIndexView::parse(index, format).expect("test operation should succeed");
6411        let owned_view =
6412            PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
6413                .expect("test operation should succeed");
6414
6415        assert_eq!(view.version, owned.version);
6416        assert_eq!(view.count, owned.entries.len());
6417        assert_eq!(view.count(), owned.entries.len());
6418        assert_eq!(view.fanout(), &owned.fanout);
6419        assert_eq!(view.pack_checksum, owned.pack_checksum);
6420        assert_eq!(view.index_checksum, owned.index_checksum);
6421        assert_eq!(owned_view.version, owned.version);
6422        assert_eq!(owned_view.count(), owned.entries.len());
6423        assert_eq!(owned_view.fanout(), &owned.fanout);
6424        assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
6425        assert_eq!(owned_view.index_checksum, owned.index_checksum);
6426        for entry in &owned.entries {
6427            let owned_found = owned
6428                .find(&entry.oid)
6429                .expect("test operation should succeed");
6430            let expected = Some(PackIndexLookup {
6431                crc32: owned_found.crc32,
6432                offset: owned_found.offset,
6433            });
6434            assert_eq!(view.find(&entry.oid), expected);
6435            assert_eq!(owned_view.find(&entry.oid), expected);
6436        }
6437    }
6438
6439    #[test]
6440    fn writes_pack_and_index_that_round_trip() {
6441        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
6442        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6443            .expect("test operation should succeed");
6444        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6445        let index =
6446            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6447        let oid = object
6448            .object_id(ObjectFormat::Sha1)
6449            .expect("test operation should succeed");
6450        assert_eq!(pack.entries[0].object, object);
6451        assert_eq!(index.pack_checksum, pack.checksum);
6452        assert_eq!(
6453            index
6454                .find(&oid)
6455                .expect("test operation should succeed")
6456                .offset,
6457            12
6458        );
6459    }
6460
6461    #[test]
6462    fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
6463        let objects = (0..8)
6464            .map(|idx| {
6465                EncodedObject::new(
6466                    ObjectType::Blob,
6467                    format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
6468                )
6469            })
6470            .collect::<Vec<_>>();
6471        let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
6472            .expect("test operation should succeed");
6473
6474        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
6475
6476        let view =
6477            PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
6478        let missing = sley_core::object_id_for_bytes(
6479            ObjectFormat::Sha1,
6480            "blob",
6481            b"not present in borrowed index\n",
6482        )
6483        .expect("test operation should succeed");
6484        assert_eq!(view.find(&missing), None);
6485    }
6486
6487    #[test]
6488    fn writes_sha256_pack_and_index_that_round_trip() {
6489        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
6490        let written =
6491            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6492                .expect("test operation should succeed");
6493        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
6494            .expect("test operation should succeed");
6495        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
6496            .expect("test operation should succeed");
6497        let oid = object
6498            .object_id(ObjectFormat::Sha256)
6499            .expect("test operation should succeed");
6500        assert_eq!(pack.entries[0].object, object);
6501        assert_eq!(index.pack_checksum, pack.checksum);
6502        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
6503        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
6504        assert_eq!(
6505            index
6506                .find(&oid)
6507                .expect("test operation should succeed")
6508                .offset,
6509            12
6510        );
6511    }
6512
6513    #[test]
6514    fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
6515        let objects = (0..4)
6516            .map(|idx| {
6517                EncodedObject::new(
6518                    ObjectType::Blob,
6519                    format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
6520                )
6521            })
6522            .collect::<Vec<_>>();
6523        let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
6524            .expect("test operation should succeed");
6525
6526        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
6527    }
6528
6529    #[test]
6530    fn indexes_existing_sha256_pack_bytes() {
6531        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
6532        let written =
6533            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6534                .expect("test operation should succeed");
6535
6536        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
6537            .expect("test operation should succeed");
6538        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
6539            .expect("test operation should succeed");
6540
6541        assert_eq!(indexed.pack_checksum, written.checksum);
6542        assert_eq!(indexed.entries, written.entries);
6543        assert_eq!(index.pack_checksum, written.checksum);
6544        assert_eq!(index.entries, written.entries);
6545    }
6546
6547    #[test]
6548    fn indexes_existing_delta_pack_bytes() {
6549        let (base, changed) = similar_blob_objects();
6550        let options = delta_pack_options(true);
6551        let written = PackFile::write_packed_with_options(
6552            &[base, changed.clone()],
6553            ObjectFormat::Sha1,
6554            &options,
6555        )
6556        .expect("test operation should succeed");
6557
6558        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
6559            .expect("test operation should succeed");
6560        let index =
6561            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
6562        let changed_oid = changed
6563            .object_id(ObjectFormat::Sha1)
6564            .expect("test operation should succeed");
6565
6566        assert_eq!(indexed.pack_checksum, written.checksum);
6567        assert_eq!(indexed.entries, written.entries);
6568        assert_eq!(
6569            index
6570                .find(&changed_oid)
6571                .expect("test operation should succeed")
6572                .offset,
6573            written.entries[1].offset
6574        );
6575        assert_eq!(
6576            index
6577                .find(&changed_oid)
6578                .expect("test operation should succeed")
6579                .crc32,
6580            written.entries[1].crc32
6581        );
6582    }
6583
6584    #[test]
6585    fn writes_ref_delta_pack_and_index_that_round_trip() {
6586        let (base, changed) = similar_blob_objects();
6587        let options = delta_pack_options(false);
6588        let written = PackFile::write_packed_with_options(
6589            &[base.clone(), changed.clone()],
6590            ObjectFormat::Sha1,
6591            &options,
6592        )
6593        .expect("test operation should succeed");
6594        let mut second_offset = written.entries[1].offset as usize;
6595        let header = parse_entry_header(&written.pack, &mut second_offset)
6596            .expect("test operation should succeed");
6597        assert_eq!(header.kind, PackObjectKind::RefDelta);
6598
6599        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6600        let index =
6601            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6602        let oid = changed
6603            .object_id(ObjectFormat::Sha1)
6604            .expect("test operation should succeed");
6605        assert_eq!(pack.entries[0].object, base);
6606        assert_eq!(pack.entries[1].object, changed);
6607        assert_eq!(index.pack_checksum, pack.checksum);
6608        assert_eq!(
6609            index
6610                .find(&oid)
6611                .expect("test operation should succeed")
6612                .offset,
6613            written.entries[1].offset
6614        );
6615    }
6616
6617    #[test]
6618    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
6619        let (base, changed) = similar_blob_objects();
6620        let options = delta_pack_options(true);
6621        let written = PackFile::write_packed_with_options(
6622            &[base, changed.clone()],
6623            ObjectFormat::Sha1,
6624            &options,
6625        )
6626        .expect("test operation should succeed");
6627        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
6628        let mut second = written.entries[1].offset as usize;
6629        assert_eq!(
6630            parse_entry_header(&written.pack, &mut second)
6631                .expect("test operation should succeed")
6632                .kind,
6633            PackObjectKind::OfsDelta
6634        );
6635        // Ground truth from a full parse; single-object decode must match at every offset.
6636        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6637        for po in &parsed.entries {
6638            let got =
6639                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
6640                    Ok(None)
6641                })
6642                .expect("test operation should succeed");
6643            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6644        }
6645    }
6646
6647    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
6648    /// read is byte-identical to the uncached one cold and warm (sley#26).
6649    #[derive(Default)]
6650    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
6651
6652    impl HeaderTypeCache for MapHeaderTypeCache {
6653        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
6654            self.0.get(&pack_offset).copied()
6655        }
6656        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
6657            self.0.insert(pack_offset, header);
6658        }
6659    }
6660
6661    #[test]
6662    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
6663        let (base, changed) = similar_blob_objects();
6664        let options = delta_pack_options(true);
6665        let written =
6666            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
6667                .expect("test operation should succeed");
6668        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
6669        let mut second = written.entries[1].offset as usize;
6670        assert_eq!(
6671            parse_entry_header(&written.pack, &mut second)
6672                .expect("test operation should succeed")
6673                .kind,
6674            PackObjectKind::OfsDelta
6675        );
6676
6677        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6678        let mut cache = MapHeaderTypeCache::default();
6679        for po in &parsed.entries {
6680            let uncached =
6681                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
6682                    Ok(None)
6683                })
6684                .expect("test operation should succeed");
6685            // Type inherited from the chain base; size is the inflated body length.
6686            assert_eq!(
6687                uncached,
6688                (po.object.object_type, po.object.body.len() as u64),
6689                "uncached header at offset {}",
6690                po.entry.offset
6691            );
6692            // Cold cache: must agree with the uncached read and populate the memo.
6693            let cold = read_object_header_at_with_cache(
6694                &written.pack,
6695                po.entry.offset,
6696                ObjectFormat::Sha1,
6697                |_| Ok(None),
6698                &mut cache,
6699            )
6700            .expect("test operation should succeed");
6701            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
6702        }
6703        // Warm cache: every offset now resolves from the memo and is still correct,
6704        // proving the fast path does not change behavior (sley#26).
6705        for po in &parsed.entries {
6706            let warm = read_object_header_at_with_cache(
6707                &written.pack,
6708                po.entry.offset,
6709                ObjectFormat::Sha1,
6710                |_| panic!("warm cache must not re-walk the chain"),
6711                &mut cache,
6712            )
6713            .expect("test operation should succeed");
6714            assert_eq!(
6715                warm,
6716                (po.object.object_type, po.object.body.len() as u64),
6717                "warm cache at offset {}",
6718                po.entry.offset
6719            );
6720        }
6721    }
6722
6723    #[test]
6724    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
6725        let (base, changed) = similar_blob_objects();
6726        let options = delta_pack_options(false);
6727        let written = PackFile::write_packed_with_options(
6728            &[base, changed.clone()],
6729            ObjectFormat::Sha1,
6730            &options,
6731        )
6732        .expect("test operation should succeed");
6733        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6734        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
6735            .entries
6736            .iter()
6737            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
6738            .collect();
6739        for po in &parsed.entries {
6740            let got =
6741                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
6742                    Ok(by_oid.get(oid).cloned())
6743                })
6744                .expect("test operation should succeed");
6745            assert_eq!(*got, po.object);
6746        }
6747    }
6748
6749    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
6750    /// used to prove the cached decode path is byte-identical to the uncached
6751    /// one and that bases are reused across reads.
6752    #[derive(Default)]
6753    struct CountingDeltaCache {
6754        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
6755        hits: std::cell::Cell<usize>,
6756        inserts: std::cell::Cell<usize>,
6757    }
6758
6759    impl PackDeltaCache for CountingDeltaCache {
6760        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
6761            let hit = self.map.borrow().get(&offset).cloned();
6762            if hit.is_some() {
6763                self.hits.set(self.hits.get() + 1);
6764            }
6765            hit
6766        }
6767        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
6768            self.inserts.set(self.inserts.get() + 1);
6769            self.map.borrow_mut().insert(offset, object);
6770        }
6771    }
6772
6773    #[test]
6774    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
6775        // A multi-object pack with a real ofs-delta chain so the cache has bases
6776        // to reuse. Build several similar blobs to encourage deltification.
6777        let mut objects = Vec::new();
6778        for idx in 0..8u32 {
6779            let mut body = vec![b'x'; 4096];
6780            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
6781            objects.push(EncodedObject::new(ObjectType::Blob, body));
6782        }
6783        let options = delta_pack_options(true);
6784        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6785            .expect("test operation should succeed");
6786        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6787
6788        let cache = CountingDeltaCache::default();
6789        // Read every object twice through the cache; each result must equal the
6790        // ground-truth from the full parse, byte for byte, both times.
6791        for _ in 0..2 {
6792            for po in &parsed.entries {
6793                let got = read_object_at_with_cache_arc(
6794                    &written.pack,
6795                    po.entry.offset,
6796                    ObjectFormat::Sha1,
6797                    |_| Ok(None),
6798                    &cache,
6799                )
6800                .expect("test operation should succeed");
6801                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6802            }
6803        }
6804        // The second pass reads everything straight from the cache, so there must
6805        // be at least one hit (proving reuse, not just correctness).
6806        assert!(cache.hits.get() > 0, "cache never served a warm object");
6807    }
6808
6809    #[test]
6810    fn writes_ofs_delta_pack_and_index_that_round_trip() {
6811        let (base, changed) = similar_blob_objects();
6812        let options = delta_pack_options(true);
6813        let written = PackFile::write_packed_with_options(
6814            &[base.clone(), changed.clone()],
6815            ObjectFormat::Sha1,
6816            &options,
6817        )
6818        .expect("test operation should succeed");
6819        let mut second_offset = written.entries[1].offset as usize;
6820        let header = parse_entry_header(&written.pack, &mut second_offset)
6821            .expect("test operation should succeed");
6822        assert_eq!(header.kind, PackObjectKind::OfsDelta);
6823
6824        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6825        let index =
6826            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6827        let oid = changed
6828            .object_id(ObjectFormat::Sha1)
6829            .expect("test operation should succeed");
6830        assert_eq!(pack.entries[0].object, base);
6831        assert_eq!(pack.entries[1].object, changed);
6832        assert_eq!(index.pack_checksum, pack.checksum);
6833        assert_eq!(
6834            index
6835                .find(&oid)
6836                .expect("test operation should succeed")
6837                .offset,
6838            written.entries[1].offset
6839        );
6840    }
6841
6842    #[test]
6843    fn resolves_ofs_delta_pack_entry() {
6844        let base = b"hello";
6845        let result = b"hello world";
6846        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6847        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6848        assert_eq!(parsed.entries.len(), 2);
6849        assert_eq!(parsed.entries[0].object.body, base);
6850        assert_eq!(parsed.entries[1].object.body, result);
6851        assert_eq!(
6852            parsed.entries[1].entry.oid,
6853            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6854                .expect("test operation should succeed")
6855        );
6856    }
6857
6858    #[test]
6859    fn resolves_ref_delta_pack_entry() {
6860        let base = b"hello";
6861        let result = b"hello world";
6862        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6863        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6864        assert_eq!(parsed.entries.len(), 2);
6865        assert_eq!(parsed.entries[0].object.body, base);
6866        assert_eq!(parsed.entries[1].object.body, result);
6867        assert_eq!(
6868            parsed.entries[1].entry.oid,
6869            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6870                .expect("test operation should succeed")
6871        );
6872    }
6873
6874    #[test]
6875    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6876        let base = b"hello";
6877        let result = b"hello world";
6878        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6879        assert!(PackFile::parse_sha1(&pack).is_err());
6880
6881        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6882            .expect("test operation should succeed");
6883        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6884            if oid == &base_oid {
6885                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6886            } else {
6887                Ok(None)
6888            }
6889        })
6890        .expect("test operation should succeed");
6891        assert_eq!(parsed.entries.len(), 1);
6892        assert_eq!(parsed.entries[0].object.body, result);
6893        assert_eq!(
6894            parsed.entries[0].entry.oid,
6895            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6896                .expect("test operation should succeed")
6897        );
6898    }
6899
6900    #[test]
6901    fn rejects_bad_pack_checksum() {
6902        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6903        let last = pack.len() - 1;
6904        pack[last] ^= 1;
6905        assert!(PackFile::parse_sha1(&pack).is_err());
6906    }
6907
6908    #[test]
6909    fn raw_pack_index_rejects_bad_pack_checksum() {
6910        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6911        let last = pack.len() - 1;
6912        pack[last] ^= 1;
6913        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6914    }
6915
6916    #[test]
6917    fn pack_index_writer_rejects_duplicate_object_ids() {
6918        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6919            .expect("test operation should succeed");
6920        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6921            .expect("test operation should succeed");
6922        let entries = vec![
6923            PackIndexEntry {
6924                oid,
6925                crc32: 1,
6926                offset: 12,
6927            },
6928            PackIndexEntry {
6929                oid,
6930                crc32: 2,
6931                offset: 24,
6932            },
6933        ];
6934        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6935    }
6936
6937    #[test]
6938    fn parses_single_entry_pack_index() {
6939        let oid = ObjectId::from_hex(
6940            ObjectFormat::Sha1,
6941            "ce013625030ba8dba906f756967f9e9ca394464a",
6942        )
6943        .expect("test operation should succeed");
6944        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6945            .expect("test operation should succeed");
6946        let index = single_entry_index(
6947            ObjectFormat::Sha1,
6948            oid,
6949            0x1234_5678,
6950            12,
6951            pack_checksum.clone(),
6952        );
6953        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6954        assert_eq!(parsed.version, 2);
6955        assert_eq!(parsed.pack_checksum, pack_checksum);
6956        assert_eq!(parsed.entries.len(), 1);
6957        assert_eq!(
6958            parsed
6959                .find(&oid)
6960                .expect("test operation should succeed")
6961                .offset,
6962            12
6963        );
6964        assert_eq!(
6965            parsed
6966                .find(&oid)
6967                .expect("test operation should succeed")
6968                .crc32,
6969            0x1234_5678
6970        );
6971        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6972    }
6973
6974    #[test]
6975    fn parses_single_entry_pack_index_v1() {
6976        let oid = ObjectId::from_hex(
6977            ObjectFormat::Sha1,
6978            "ce013625030ba8dba906f756967f9e9ca394464a",
6979        )
6980        .expect("test operation should succeed");
6981        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6982            .expect("test operation should succeed");
6983        let index =
6984            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6985        let parsed =
6986            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6987        assert_eq!(parsed.version, 1);
6988        assert_eq!(parsed.pack_checksum, pack_checksum);
6989        assert_eq!(parsed.entries.len(), 1);
6990        assert_eq!(
6991            parsed
6992                .find(&oid)
6993                .expect("test operation should succeed")
6994                .offset,
6995            0x1234_5678
6996        );
6997        assert_eq!(
6998            parsed
6999                .find(&oid)
7000                .expect("test operation should succeed")
7001                .crc32,
7002            0
7003        );
7004        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7005    }
7006
7007    #[test]
7008    fn rejects_bad_pack_index_v1_checksum() {
7009        let oid = ObjectId::from_hex(
7010            ObjectFormat::Sha1,
7011            "ce013625030ba8dba906f756967f9e9ca394464a",
7012        )
7013        .expect("test operation should succeed");
7014        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7015            .expect("test operation should succeed");
7016        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
7017        let last = index.len() - 1;
7018        index[last] ^= 1;
7019        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
7020    }
7021
7022    #[test]
7023    fn pack_index_view_reads_v2_large_offsets() {
7024        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
7025            .expect("test operation should succeed");
7026        let second =
7027            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
7028                .expect("test operation should succeed");
7029        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7030            .expect("test operation should succeed");
7031        let entries = vec![
7032            PackIndexEntry {
7033                oid: first,
7034                crc32: 0x1111_2222,
7035                offset: 0x8000_0000,
7036            },
7037            PackIndexEntry {
7038                oid: second,
7039                crc32: 0x3333_4444,
7040                offset: 0x1_0000_0042,
7041            },
7042        ];
7043        let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
7044            .expect("test operation should succeed");
7045
7046        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7047        let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
7048            .expect("test operation should succeed");
7049        for entry in entries {
7050            assert_eq!(
7051                view.find(&entry.oid),
7052                Some(PackIndexLookup {
7053                    crc32: entry.crc32,
7054                    offset: entry.offset,
7055                })
7056            );
7057        }
7058    }
7059
7060    #[test]
7061    fn pack_index_view_default_parse_checks_index_checksum() {
7062        let oid = ObjectId::from_hex(
7063            ObjectFormat::Sha1,
7064            "ce013625030ba8dba906f756967f9e9ca394464a",
7065        )
7066        .expect("test operation should succeed");
7067        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7068            .expect("test operation should succeed");
7069        let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
7070        let last = index.len() - 1;
7071        index[last] ^= 1;
7072
7073        assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
7074        let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
7075            .expect("test operation should succeed");
7076        let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
7077            Arc::from(index.clone().into_boxed_slice()),
7078            ObjectFormat::Sha1,
7079        )
7080        .expect("test operation should succeed");
7081        assert_eq!(
7082            view.find(&oid),
7083            Some(PackIndexLookup {
7084                crc32: 0x1234_5678,
7085                offset: 12,
7086            })
7087        );
7088        assert_eq!(
7089            trusted_view.find(&oid),
7090            Some(PackIndexLookup {
7091                crc32: 0x1234_5678,
7092                offset: 12,
7093            })
7094        );
7095    }
7096
7097    #[test]
7098    fn parses_pack_reverse_index() {
7099        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7100            .expect("test operation should succeed");
7101        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
7102            .expect("test operation should succeed");
7103        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
7104            .expect("test operation should succeed");
7105        assert_eq!(parsed.version, 1);
7106        assert_eq!(parsed.format, ObjectFormat::Sha1);
7107        assert_eq!(parsed.positions, vec![2, 0, 1]);
7108        assert_eq!(parsed.pack_checksum, pack_checksum);
7109        assert_eq!(
7110            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
7111                .expect("test operation should succeed"),
7112            reverse_index
7113        );
7114    }
7115
7116    #[test]
7117    fn rejects_bad_pack_reverse_index_checksum() {
7118        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7119            .expect("test operation should succeed");
7120        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
7121            .expect("test operation should succeed");
7122        let last = reverse_index.len() - 1;
7123        reverse_index[last] ^= 1;
7124        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
7125    }
7126
7127    #[test]
7128    fn rejects_bad_pack_reverse_index_positions() {
7129        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7130            .expect("test operation should succeed");
7131        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
7132        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
7133        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
7134        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
7135        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7136            .expect("test operation should succeed");
7137        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
7138        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
7139    }
7140
7141    #[test]
7142    fn parses_pack_mtimes() {
7143        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7144            .expect("test operation should succeed");
7145        let mtimes = PackMtimes::write(
7146            ObjectFormat::Sha1,
7147            &[1, 1_700_000_000, u32::MAX],
7148            &pack_checksum,
7149        )
7150        .expect("test operation should succeed");
7151        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
7152            .expect("test operation should succeed");
7153        assert_eq!(parsed.version, 1);
7154        assert_eq!(parsed.format, ObjectFormat::Sha1);
7155        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
7156        assert_eq!(parsed.pack_checksum, pack_checksum);
7157        assert_eq!(
7158            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
7159                .expect("test operation should succeed"),
7160            mtimes
7161        );
7162    }
7163
7164    #[test]
7165    fn rejects_bad_pack_mtimes_checksum() {
7166        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7167            .expect("test operation should succeed");
7168        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
7169            .expect("test operation should succeed");
7170        let last = mtimes.len() - 1;
7171        mtimes[last] ^= 1;
7172        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
7173    }
7174
7175    #[test]
7176    fn rejects_bad_pack_mtimes_shape() {
7177        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7178            .expect("test operation should succeed");
7179        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
7180        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
7181
7182        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
7183        wrong_hash[11] = 2;
7184        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
7185        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
7186            .expect("test operation should succeed");
7187        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
7188        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
7189    }
7190
7191    #[test]
7192    fn parses_multi_pack_index_header_and_chunk_lookup() {
7193        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7194            .expect("test operation should succeed");
7195        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7196            .expect("test operation should succeed");
7197        let chunks = midx_chunks_with_pack_names(
7198            ObjectFormat::Sha1,
7199            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7200            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
7201        );
7202        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
7203        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
7204            .expect("test operation should succeed");
7205        assert_eq!(parsed.version, 2);
7206        assert_eq!(parsed.format, ObjectFormat::Sha1);
7207        assert_eq!(parsed.pack_count, 2);
7208        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
7209        assert_eq!(parsed.object_count, 2);
7210        assert_eq!(parsed.objects.len(), 2);
7211        assert_eq!(
7212            parsed
7213                .find(&first)
7214                .expect("test operation should succeed")
7215                .pack_int_id,
7216            0
7217        );
7218        assert_eq!(
7219            parsed
7220                .find(&first)
7221                .expect("test operation should succeed")
7222                .offset,
7223            12
7224        );
7225        assert_eq!(
7226            parsed
7227                .find(&second)
7228                .expect("test operation should succeed")
7229                .pack_int_id,
7230            1
7231        );
7232        assert_eq!(
7233            parsed
7234                .find(&second)
7235                .expect("test operation should succeed")
7236                .offset,
7237            0x1_0000_0000
7238        );
7239        assert_eq!(parsed.reverse_index, None);
7240        assert_eq!(parsed.bitmapped_packs, None);
7241        assert_eq!(parsed.chunks.len(), 5);
7242        assert_eq!(parsed.chunks[0].id, *b"PNAM");
7243        assert_eq!(parsed.chunks[0].offset, 84);
7244        assert_eq!(parsed.chunks[0].len, 24);
7245        assert_eq!(parsed.chunks[1].id, *b"OIDF");
7246        assert_eq!(parsed.chunks[1].offset, 108);
7247        assert_eq!(parsed.chunks[1].len, 1024);
7248    }
7249
7250    #[test]
7251    fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
7252        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7253            .expect("test operation should succeed");
7254        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7255            .expect("test operation should succeed");
7256        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7257            .expect("test operation should succeed");
7258        let chunks = midx_chunks_with_pack_names(
7259            ObjectFormat::Sha1,
7260            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7261            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
7262        );
7263        let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
7264        let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
7265            .expect("test operation should succeed");
7266
7267        assert!(lookup.contains(&first));
7268        assert!(lookup.contains(&second));
7269        assert!(!lookup.contains(&missing));
7270
7271        let first_entry = lookup
7272            .find(&first)
7273            .expect("test operation should succeed")
7274            .expect("object should be present");
7275        assert_eq!(
7276            lookup.pack_name(first_entry.pack_int_id),
7277            Some("pack-a.idx")
7278        );
7279        assert_eq!(first_entry.offset, 12);
7280
7281        let second_entry = lookup
7282            .find(&second)
7283            .expect("test operation should succeed")
7284            .expect("object should be present");
7285        assert_eq!(
7286            lookup.pack_name(second_entry.pack_int_id),
7287            Some("pack-b.idx")
7288        );
7289        assert_eq!(second_entry.offset, 0x1_0000_0000);
7290        assert!(
7291            lookup
7292                .find(&missing)
7293                .expect("test operation should succeed")
7294                .is_none()
7295        );
7296    }
7297
7298    #[test]
7299    fn rejects_bad_multi_pack_index_checksum() {
7300        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
7301        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
7302        let last = midx.len() - 1;
7303        midx[last] ^= 1;
7304        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
7305    }
7306
7307    #[test]
7308    fn rejects_bad_multi_pack_index_shape() {
7309        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
7310        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
7311        wrong_hash[5] = 2;
7312        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
7313        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
7314            .expect("test operation should succeed");
7315        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
7316        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
7317
7318        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
7319        missing_terminator[12] = b'B';
7320        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
7321        let checksum =
7322            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
7323                .expect("test operation should succeed");
7324        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
7325        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
7326
7327        let mut bad_offset = multi_pack_index(
7328            ObjectFormat::Sha1,
7329            2,
7330            0,
7331            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
7332        );
7333        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
7334        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
7335        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
7336            .expect("test operation should succeed");
7337        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
7338        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
7339    }
7340
7341    #[test]
7342    fn rejects_bad_multi_pack_index_pack_names() {
7343        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
7344        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
7345
7346        let too_few = multi_pack_index(
7347            ObjectFormat::Sha1,
7348            2,
7349            2,
7350            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
7351        );
7352        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
7353
7354        let bad_padding = multi_pack_index(
7355            ObjectFormat::Sha1,
7356            2,
7357            1,
7358            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
7359        );
7360        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
7361
7362        let unsorted_v1 = multi_pack_index(
7363            ObjectFormat::Sha1,
7364            1,
7365            2,
7366            &midx_chunks_with_pack_names(
7367                ObjectFormat::Sha1,
7368                b"pack-b.idx\0pack-a.idx\0".to_vec(),
7369                &[],
7370            ),
7371        );
7372        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
7373
7374        let unsorted_v2 = multi_pack_index(
7375            ObjectFormat::Sha1,
7376            2,
7377            2,
7378            &midx_chunks_with_pack_names(
7379                ObjectFormat::Sha1,
7380                b"pack-b.idx\0pack-a.idx\0".to_vec(),
7381                &[],
7382            ),
7383        );
7384        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
7385            .expect("test operation should succeed");
7386        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
7387    }
7388
7389    #[test]
7390    fn rejects_bad_multi_pack_index_object_tables() {
7391        let oid_a = ObjectId::from_hex(
7392            ObjectFormat::Sha1,
7393            "1111111111111111111111111111111111111111",
7394        )
7395        .expect("test operation should succeed");
7396        let oid_b = ObjectId::from_hex(
7397            ObjectFormat::Sha1,
7398            "2222222222222222222222222222222222222222",
7399        )
7400        .expect("test operation should succeed");
7401
7402        let missing_oidf = multi_pack_index(
7403            ObjectFormat::Sha1,
7404            2,
7405            1,
7406            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
7407        );
7408        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
7409
7410        let bad_fanout = vec![
7411            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
7412            (*b"OIDF", vec![0; 256 * 4]),
7413            (*b"OIDL", oid_a.as_bytes().to_vec()),
7414            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
7415        ];
7416        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
7417        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
7418
7419        let mut unsorted = Vec::new();
7420        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
7421        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
7422        let mut oid_lookup = Vec::new();
7423        oid_lookup.extend_from_slice(oid_b.as_bytes());
7424        oid_lookup.extend_from_slice(oid_a.as_bytes());
7425        unsorted.push((*b"OIDL", oid_lookup));
7426        unsorted.push((
7427            *b"OOFF",
7428            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
7429        ));
7430        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
7431        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
7432
7433        let bad_pack = multi_pack_index(
7434            ObjectFormat::Sha1,
7435            2,
7436            1,
7437            &midx_chunks_with_pack_names(
7438                ObjectFormat::Sha1,
7439                b"pack-a.idx\0\0".to_vec(),
7440                &[(oid_a.clone(), 1, 12)],
7441            ),
7442        );
7443        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
7444
7445        let mut large_offsets = Vec::new();
7446        let missing_loff = vec![
7447            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
7448            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
7449            (*b"OIDL", oid_a.as_bytes().to_vec()),
7450            (
7451                *b"OOFF",
7452                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
7453            ),
7454        ];
7455        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
7456        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
7457
7458        let mut bad_loff =
7459            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
7460        bad_loff.push((*b"LOFF", vec![0]));
7461        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
7462        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
7463    }
7464
7465    #[test]
7466    fn parses_multi_pack_index_bitmap_chunks() {
7467        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7468            .expect("test operation should succeed");
7469        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7470            .expect("test operation should succeed");
7471        let mut chunks = midx_chunks_with_pack_names(
7472            ObjectFormat::Sha1,
7473            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7474            &[(first, 0, 12), (second, 1, 24)],
7475        );
7476        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
7477        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
7478        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
7479
7480        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
7481            .expect("test operation should succeed");
7482        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
7483        assert_eq!(
7484            parsed.bitmapped_packs,
7485            Some(vec![
7486                MultiPackBitmapPack {
7487                    bitmap_pos: 0,
7488                    bitmap_nr: 1,
7489                },
7490                MultiPackBitmapPack {
7491                    bitmap_pos: 1,
7492                    bitmap_nr: 1,
7493                },
7494            ])
7495        );
7496    }
7497
7498    #[test]
7499    fn writes_multi_pack_index_that_round_trips() {
7500        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
7501            .expect("test operation should succeed");
7502        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
7503            .expect("test operation should succeed");
7504        let bytes = MultiPackIndex::write(
7505            ObjectFormat::Sha1,
7506            2,
7507            &["pack-b.idx".into(), "pack-a.idx".into()],
7508            &[
7509                MultiPackIndexEntry {
7510                    oid: second.clone(),
7511                    pack_int_id: 0,
7512                    offset: 0x1_0000_0000,
7513                    force_large_offset: false,
7514                },
7515                MultiPackIndexEntry {
7516                    oid: first.clone(),
7517                    pack_int_id: 1,
7518                    offset: 12,
7519                    force_large_offset: false,
7520                },
7521            ],
7522        )
7523        .expect("test operation should succeed");
7524
7525        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
7526            .expect("test operation should succeed");
7527        assert_eq!(parsed.version, 2);
7528        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
7529        assert_eq!(parsed.object_count, 2);
7530        assert_eq!(
7531            parsed
7532                .find(&first)
7533                .expect("test operation should succeed")
7534                .pack_int_id,
7535            1
7536        );
7537        assert_eq!(
7538            parsed
7539                .find(&first)
7540                .expect("test operation should succeed")
7541                .offset,
7542            12
7543        );
7544        assert_eq!(
7545            parsed
7546                .find(&second)
7547                .expect("test operation should succeed")
7548                .pack_int_id,
7549            0
7550        );
7551        assert_eq!(
7552            parsed
7553                .find(&second)
7554                .expect("test operation should succeed")
7555                .offset,
7556            0x1_0000_0000
7557        );
7558        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
7559    }
7560
7561    #[test]
7562    fn write_multi_pack_index_rejects_invalid_inputs() {
7563        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
7564            .expect("test operation should succeed");
7565        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
7566        assert!(
7567            MultiPackIndex::write(
7568                ObjectFormat::Sha1,
7569                1,
7570                &["pack-b.idx".into(), "pack-a.idx".into()],
7571                &[],
7572            )
7573            .is_err()
7574        );
7575        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
7576        assert!(
7577            MultiPackIndex::write(
7578                ObjectFormat::Sha1,
7579                2,
7580                &["pack-a.idx".into()],
7581                &[MultiPackIndexEntry {
7582                    oid,
7583                    pack_int_id: 1,
7584                    offset: 12,
7585                    force_large_offset: false,
7586                }],
7587            )
7588            .is_err()
7589        );
7590        assert!(
7591            MultiPackIndex::write(
7592                ObjectFormat::Sha1,
7593                2,
7594                &["pack-a.idx".into()],
7595                &[
7596                    MultiPackIndexEntry {
7597                        oid,
7598                        pack_int_id: 0,
7599                        offset: 12,
7600                        force_large_offset: false,
7601                    },
7602                    MultiPackIndexEntry {
7603                        oid,
7604                        pack_int_id: 0,
7605                        offset: 24,
7606                        force_large_offset: false,
7607                    },
7608                ],
7609            )
7610            .is_err()
7611        );
7612    }
7613
7614    #[test]
7615    fn rejects_bad_multi_pack_index_bitmap_chunks() {
7616        let oid_a = ObjectId::from_hex(
7617            ObjectFormat::Sha1,
7618            "1111111111111111111111111111111111111111",
7619        )
7620        .expect("test operation should succeed");
7621        let oid_b = ObjectId::from_hex(
7622            ObjectFormat::Sha1,
7623            "2222222222222222222222222222222222222222",
7624        )
7625        .expect("test operation should succeed");
7626
7627        let mut duplicate_ridx = midx_chunks_with_pack_names(
7628            ObjectFormat::Sha1,
7629            b"pack-a.idx\0\0".to_vec(),
7630            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
7631        );
7632        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
7633        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
7634        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
7635
7636        let mut short_btmp = midx_chunks_with_pack_names(
7637            ObjectFormat::Sha1,
7638            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
7639            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
7640        );
7641        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
7642        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
7643        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
7644
7645        let mut out_of_range_btmp = midx_chunks_with_pack_names(
7646            ObjectFormat::Sha1,
7647            b"pack-a.idx\0\0".to_vec(),
7648            &[(oid_a, 0, 12), (oid_b, 0, 24)],
7649        );
7650        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
7651        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
7652        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
7653    }
7654
7655    #[test]
7656    fn parses_pack_bitmap_index_with_hash_cache() {
7657        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7658            .expect("test operation should succeed");
7659        let bitmap = pack_bitmap_index(
7660            ObjectFormat::Sha1,
7661            3,
7662            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
7663            &pack_checksum,
7664            &[(2, 0, 1, &[0b101])],
7665            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
7666        );
7667
7668        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
7669            .expect("test operation should succeed");
7670        assert_eq!(parsed.version, 1);
7671        assert_eq!(parsed.format, ObjectFormat::Sha1);
7672        assert_eq!(
7673            parsed.options,
7674            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
7675        );
7676        assert_eq!(parsed.pack_checksum, pack_checksum);
7677        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
7678        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
7679        assert_eq!(parsed.entries.len(), 1);
7680        let entry = parsed
7681            .entry_for_index_position(2)
7682            .expect("test operation should succeed");
7683        assert_eq!(entry.xor_offset, 0);
7684        assert_eq!(entry.flags, 1);
7685        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
7686        assert_eq!(
7687            parsed.name_hash_cache,
7688            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
7689        );
7690    }
7691
7692    #[test]
7693    fn parses_pack_bitmap_index_sha256() {
7694        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7695            .expect("test operation should succeed");
7696        let bitmap = pack_bitmap_index(
7697            ObjectFormat::Sha256,
7698            2,
7699            PackBitmapIndex::OPTION_FULL_DAG,
7700            &pack_checksum,
7701            &[(0, 0, 0, &[0b11])],
7702            None,
7703        );
7704
7705        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
7706            .expect("test operation should succeed");
7707        assert_eq!(parsed.version, 1);
7708        assert_eq!(parsed.format, ObjectFormat::Sha256);
7709        assert_eq!(parsed.pack_checksum, pack_checksum);
7710        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7711        assert_eq!(parsed.entries[0].object_position, 0);
7712        assert_eq!(parsed.name_hash_cache, None);
7713    }
7714
7715    #[test]
7716    fn parses_upstream_git_written_pack_bitmap_index() {
7717        let root = unique_temp_dir("git-pack-bitmap-upstream");
7718        fs::create_dir_all(&root).expect("test operation should succeed");
7719        {
7720            run_git_success(&root, &["init", "-q", "-b", "main"]);
7721            run_git_success(
7722                &root,
7723                &[
7724                    "-c",
7725                    "user.name=Example User",
7726                    "-c",
7727                    "user.email=example@example.invalid",
7728                    "commit",
7729                    "--allow-empty",
7730                    "-q",
7731                    "-m",
7732                    "one",
7733                ],
7734            );
7735            run_git_success(
7736                &root,
7737                &[
7738                    "-c",
7739                    "user.name=Example User",
7740                    "-c",
7741                    "user.email=example@example.invalid",
7742                    "commit",
7743                    "--allow-empty",
7744                    "-q",
7745                    "-m",
7746                    "two",
7747                ],
7748            );
7749            run_git_success(&root, &["repack", "-adb"]);
7750            let pack_dir = root.join(".git").join("objects").join("pack");
7751            let idx_path = single_path_with_extension(&pack_dir, "idx");
7752            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
7753            let index = PackIndex::parse(
7754                &fs::read(idx_path).expect("test operation should succeed"),
7755                ObjectFormat::Sha1,
7756            )
7757            .expect("test operation should succeed");
7758            let bitmap = PackBitmapIndex::parse(
7759                &fs::read(bitmap_path).expect("test operation should succeed"),
7760                ObjectFormat::Sha1,
7761                index.entries.len(),
7762            )
7763            .expect("test operation should succeed");
7764            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
7765            assert!(!bitmap.entries.is_empty());
7766        };
7767        let _ = fs::remove_dir_all(&root);
7768    }
7769
7770    #[test]
7771    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
7772        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7773            .expect("test operation should succeed");
7774        let bitmap = pack_bitmap_index(
7775            ObjectFormat::Sha1,
7776            1,
7777            PackBitmapIndex::OPTION_FULL_DAG,
7778            &pack_checksum,
7779            &[(0, 0, 0, &[1])],
7780            None,
7781        );
7782
7783        let mut bad_signature = bitmap.clone();
7784        bad_signature[0] = b'X';
7785        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
7786
7787        let mut bad_version = bitmap.clone();
7788        bad_version[5] = 2;
7789        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
7790        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
7791
7792        let mut bad_option = bitmap.clone();
7793        bad_option[7] = 0x20;
7794        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
7795        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
7796
7797        let mut bad_checksum = bitmap;
7798        let last = bad_checksum.len() - 1;
7799        bad_checksum[last] ^= 1;
7800        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
7801    }
7802
7803    #[test]
7804    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
7805        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7806            .expect("test operation should succeed");
7807        let bitmap = pack_bitmap_index(
7808            ObjectFormat::Sha1,
7809            2,
7810            PackBitmapIndex::OPTION_FULL_DAG,
7811            &pack_checksum,
7812            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7813            None,
7814        );
7815
7816        let mut truncated = bitmap.clone();
7817        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7818        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7819        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7820
7821        let mut out_of_range_position = pack_bitmap_index(
7822            ObjectFormat::Sha1,
7823            2,
7824            PackBitmapIndex::OPTION_FULL_DAG,
7825            &pack_checksum,
7826            &[(2, 0, 0, &[0b01])],
7827            None,
7828        );
7829        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7830        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7831        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7832
7833        let invalid_xor = pack_bitmap_index(
7834            ObjectFormat::Sha1,
7835            2,
7836            PackBitmapIndex::OPTION_FULL_DAG,
7837            &pack_checksum,
7838            &[(0, 1, 0, &[0b01])],
7839            None,
7840        );
7841        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7842    }
7843
7844    #[test]
7845    fn parses_single_entry_pack_index_sha256() {
7846        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7847            .expect("test operation should succeed");
7848        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7849            .expect("test operation should succeed");
7850        let index = single_entry_index(
7851            ObjectFormat::Sha256,
7852            oid,
7853            0x1234_5678,
7854            12,
7855            pack_checksum.clone(),
7856        );
7857        let parsed =
7858            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7859        assert_eq!(parsed.version, 2);
7860        assert_eq!(parsed.pack_checksum, pack_checksum);
7861        assert_eq!(parsed.entries.len(), 1);
7862        assert_eq!(
7863            parsed
7864                .find(&oid)
7865                .expect("test operation should succeed")
7866                .offset,
7867            12
7868        );
7869        assert_eq!(
7870            parsed
7871                .find(&oid)
7872                .expect("test operation should succeed")
7873                .crc32,
7874            0x1234_5678
7875        );
7876        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7877        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7878    }
7879
7880    #[test]
7881    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7882        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7883    }
7884
7885    #[test]
7886    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7887        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7888    }
7889
7890    #[test]
7891    fn write_packed_rejects_duplicate_objects() {
7892        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7893        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7894    }
7895
7896    #[test]
7897    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7898        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7899        let sha1 = object
7900            .object_id(ObjectFormat::Sha1)
7901            .expect("test operation should succeed");
7902        let sha256 = object
7903            .object_id(ObjectFormat::Sha256)
7904            .expect("test operation should succeed");
7905        let duplicate = [
7906            PackInput {
7907                oid: &sha1,
7908                object: &object,
7909            },
7910            PackInput {
7911                oid: &sha1,
7912                object: &object,
7913            },
7914        ];
7915        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7916
7917        let wrong_format = [PackInput {
7918            oid: &sha256,
7919            object: &object,
7920        }];
7921        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7922    }
7923
7924    #[test]
7925    fn write_packed_with_known_ids_to_writer_matches_in_memory_pack() {
7926        let objects = similar_blob_family(6);
7927        let object_ids = objects
7928            .iter()
7929            .map(|object| {
7930                object
7931                    .object_id(ObjectFormat::Sha1)
7932                    .expect("test operation should succeed")
7933            })
7934            .collect::<Vec<_>>();
7935        let inputs = objects
7936            .iter()
7937            .zip(&object_ids)
7938            .map(|(object, oid)| PackInput { oid, object })
7939            .collect::<Vec<_>>();
7940        let options = PackWriteOptions::new();
7941        let in_memory = PackFile::write_packed_with_known_ids_and_options(
7942            &inputs,
7943            ObjectFormat::Sha1,
7944            &options,
7945        )
7946        .expect("test operation should succeed");
7947        let mut written = Vec::new();
7948        let streamed = PackFile::write_packed_with_known_ids_to_writer(
7949            &inputs,
7950            ObjectFormat::Sha1,
7951            &options,
7952            &mut written,
7953        )
7954        .expect("test operation should succeed");
7955
7956        assert_eq!(written, in_memory.pack);
7957        assert_eq!(streamed.index, in_memory.index);
7958        assert_eq!(streamed.checksum, in_memory.checksum);
7959        assert_eq!(streamed.entries, in_memory.entries);
7960        assert_eq!(streamed.delta_count, in_memory.delta_count);
7961        assert_eq!(streamed.pack_size, in_memory.pack.len() as u64);
7962    }
7963
7964    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7965        let objects = similar_blob_family(8);
7966        let packed =
7967            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7968        let undeltified =
7969            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7970
7971        // The whole point of delta selection: the packed output is smaller than
7972        // storing every object undeltified.
7973        assert!(
7974            packed.pack.len() < undeltified.pack.len(),
7975            "expected delta pack ({}) smaller than undeltified pack ({})",
7976            packed.pack.len(),
7977            undeltified.pack.len()
7978        );
7979
7980        // At least one object must actually be stored as a delta.
7981        let kinds = pack_entry_kinds(&packed.pack, format);
7982        let delta_count = kinds
7983            .iter()
7984            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7985            .count();
7986        assert!(
7987            delta_count >= 1,
7988            "expected at least one delta entry, found kinds {kinds:?}"
7989        );
7990
7991        // Round-trip: every original object reconstructs byte-for-byte.
7992        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7993        assert_eq!(parsed.entries.len(), objects.len());
7994        for object in &objects {
7995            let oid = object
7996                .object_id(format)
7997                .expect("test operation should succeed");
7998            let found = parsed
7999                .entries
8000                .iter()
8001                .find(|entry| entry.entry.oid == oid)
8002                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
8003            assert_eq!(&found.object, object, "object {oid} did not round-trip");
8004        }
8005
8006        // The index must agree with the pack and locate every object.
8007        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
8008        assert_eq!(index.pack_checksum, packed.checksum);
8009        for object in &objects {
8010            let oid = object
8011                .object_id(format)
8012                .expect("test operation should succeed");
8013            assert!(index.find(&oid).is_some(), "index missing {oid}");
8014        }
8015    }
8016
8017    #[test]
8018    fn write_packed_emits_ofs_delta_by_default() {
8019        let objects = similar_blob_family(6);
8020        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
8021            .expect("test operation should succeed");
8022        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
8023        assert!(
8024            kinds.contains(&PackObjectKind::OfsDelta),
8025            "expected an ofs-delta entry by default, found {kinds:?}"
8026        );
8027        assert!(
8028            !kinds.contains(&PackObjectKind::RefDelta),
8029            "default self-contained pack must not use ref-delta, found {kinds:?}"
8030        );
8031        // Round-trips.
8032        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
8033    }
8034
8035    #[test]
8036    fn write_packed_can_emit_ref_delta() {
8037        let objects = similar_blob_family(6);
8038        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
8039        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
8040            .expect("test operation should succeed");
8041        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
8042        assert!(
8043            kinds.contains(&PackObjectKind::RefDelta),
8044            "expected a ref-delta entry, found {kinds:?}"
8045        );
8046        assert!(
8047            !kinds.contains(&PackObjectKind::OfsDelta),
8048            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
8049        );
8050
8051        // Ref-delta packs are still self-contained here, so they round-trip
8052        // without any external base lookup.
8053        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
8054            .expect("test operation should succeed");
8055        assert_eq!(parsed.entries.len(), objects.len());
8056    }
8057
8058    #[test]
8059    fn write_packed_bounds_delta_chain_depth() {
8060        // A long chain of progressively-modified blobs. With a large window
8061        // every object could otherwise delta against its immediate predecessor,
8062        // forming a chain as long as the input.
8063        let objects = incremental_blob_chain(20);
8064        let format = ObjectFormat::Sha1;
8065
8066        for max_depth in [1usize, 2, 5] {
8067            let options = PackWriteOptions::new()
8068                .with_window(20)
8069                .with_depth(max_depth);
8070            let packed = PackFile::write_packed_with_options(&objects, format, &options)
8071                .expect("test operation should succeed");
8072
8073            let depths = pack_entry_depths(&packed.pack, format);
8074            let observed = depths.iter().copied().max().unwrap_or(0);
8075            assert!(
8076                observed <= max_depth,
8077                "max chain depth {observed} exceeded bound {max_depth}"
8078            );
8079
8080            // Still correct: round-trips byte-for-byte.
8081            let parsed =
8082                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
8083            for object in &objects {
8084                let oid = object
8085                    .object_id(format)
8086                    .expect("test operation should succeed");
8087                let found = parsed
8088                    .entries
8089                    .iter()
8090                    .find(|entry| entry.entry.oid == oid)
8091                    .expect("test operation should succeed");
8092                assert_eq!(&found.object, object);
8093            }
8094        }
8095    }
8096
8097    #[test]
8098    fn write_packed_depth_zero_stores_everything_undeltified() {
8099        let objects = similar_blob_family(5);
8100        let options = PackWriteOptions::new().with_depth(0);
8101        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
8102            .expect("test operation should succeed");
8103        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
8104        assert!(
8105            kinds
8106                .iter()
8107                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
8108            "depth 0 must disable deltas, found {kinds:?}"
8109        );
8110    }
8111
8112    #[test]
8113    fn write_thin_uses_external_base_and_round_trips_sha1() {
8114        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
8115    }
8116
8117    #[test]
8118    fn write_thin_uses_external_base_and_round_trips_sha256() {
8119        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
8120    }
8121
8122    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
8123        // The base object stays OUT of the pack; only `target` is written, as a
8124        // ref-delta against the external base's object id.
8125        let base = blob_with_marker("EXTERNAL-BASE");
8126        let target = blob_with_marker("EXTERNAL-TARGET");
8127        let base_oid = base
8128            .object_id(format)
8129            .expect("test operation should succeed");
8130
8131        let mut external = HashMap::new();
8132        external.insert(base_oid, base.clone());
8133        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
8134            .expect("test operation should succeed");
8135
8136        // Exactly one entry, encoded as a ref-delta to the external base.
8137        let kinds = pack_entry_kinds(&packed.pack, format);
8138        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
8139
8140        // The external base reference must be the base oid.
8141        let mut offset = 12usize;
8142        let header =
8143            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
8144        assert_eq!(header.kind, PackObjectKind::RefDelta);
8145        let referenced =
8146            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
8147                .expect("test operation should succeed");
8148        assert_eq!(referenced, base_oid);
8149
8150        // A plain (non-thin) parse fails: the base is not present.
8151        assert!(PackFile::parse(&packed.pack, format).is_err());
8152
8153        // A thin parse that supplies the external base reconstructs the target.
8154        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
8155            if oid == &base_oid {
8156                Ok(Some(base.clone()))
8157            } else {
8158                Ok(None)
8159            }
8160        })
8161        .expect("test operation should succeed");
8162        assert_eq!(parsed.entries.len(), 1);
8163        assert_eq!(parsed.entries[0].object, target);
8164    }
8165
8166    #[test]
8167    fn write_packed_preserves_distinct_objects_with_no_similarity() {
8168        // Unrelated objects: nothing should delta, but the pack must still be
8169        // valid and complete.
8170        let objects = vec![
8171            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
8172            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
8173            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
8174        ];
8175        let format = ObjectFormat::Sha1;
8176        let packed =
8177            PackFile::write_packed(&objects, format).expect("test operation should succeed");
8178        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
8179        assert_eq!(parsed.entries.len(), objects.len());
8180        for object in &objects {
8181            let oid = object
8182                .object_id(format)
8183                .expect("test operation should succeed");
8184            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
8185        }
8186    }
8187
8188    /// Build a family of blobs that all share a large common region but differ
8189    /// in a marker placed in the *middle*, so a good delta finds copy regions on
8190    /// both sides of the change.
8191    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
8192        let mut common_head = Vec::new();
8193        for _ in 0..200 {
8194            common_head.extend_from_slice(b"shared header line for delta testing\n");
8195        }
8196        let mut common_tail = Vec::new();
8197        for _ in 0..200 {
8198            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
8199        }
8200        (0..count)
8201            .map(|idx| {
8202                let mut body = common_head.clone();
8203                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
8204                body.extend_from_slice(&common_tail);
8205                EncodedObject::new(ObjectType::Blob, body)
8206            })
8207            .collect()
8208    }
8209
8210    /// Build a chain where each blob is the previous one plus an appended line,
8211    /// so each is highly similar to its predecessor.
8212    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
8213        let mut body = Vec::new();
8214        for _ in 0..100 {
8215            body.extend_from_slice(b"baseline content shared across the whole chain\n");
8216        }
8217        let mut objects = Vec::with_capacity(count);
8218        for idx in 0..count {
8219            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
8220            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
8221        }
8222        objects
8223    }
8224
8225    fn blob_with_marker(marker: &str) -> EncodedObject {
8226        let mut body = Vec::new();
8227        for _ in 0..150 {
8228            body.extend_from_slice(b"common body shared between base and target\n");
8229        }
8230        body.extend_from_slice(marker.as_bytes());
8231        body.push(b'\n');
8232        for _ in 0..150 {
8233            body.extend_from_slice(b"more common body shared between objects\n");
8234        }
8235        EncodedObject::new(ObjectType::Blob, body)
8236    }
8237
8238    /// Classify every entry in a pack (in pack order) by its on-disk kind.
8239    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
8240        pack_entry_descriptors(pack, format)
8241            .into_iter()
8242            .map(|descriptor| descriptor.kind)
8243            .collect()
8244    }
8245
8246    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
8247    /// order. Entries always appear after their in-pack bases, so a single
8248    /// forward pass suffices.
8249    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
8250        let descriptors = pack_entry_descriptors(pack, format);
8251        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
8252        let mut depths = Vec::with_capacity(descriptors.len());
8253        for descriptor in &descriptors {
8254            let depth = match &descriptor.base {
8255                EntryBase::None => 0,
8256                EntryBase::Offset(base_offset) => {
8257                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
8258                }
8259                // Ref-delta to an in-pack base: look it up by offset via oid is
8260                // unnecessary for these tests (which only use ofs-delta for the
8261                // chains), so treat as depth 1 if unknown.
8262                EntryBase::Ref => 1,
8263            };
8264            depth_by_offset.insert(descriptor.offset, depth);
8265            depths.push(depth);
8266        }
8267        depths
8268    }
8269
8270    struct EntryDescriptor {
8271        offset: u64,
8272        kind: PackObjectKind,
8273        base: EntryBase,
8274    }
8275
8276    enum EntryBase {
8277        None,
8278        Offset(u64),
8279        Ref,
8280    }
8281
8282    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
8283        let trailer_offset = pack.len() - format.raw_len();
8284        let count = u32_be(&pack[8..12]) as usize;
8285        let mut offset = 12usize;
8286        let mut descriptors = Vec::with_capacity(count);
8287        for _ in 0..count {
8288            let entry_offset = offset as u64;
8289            let header =
8290                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
8291            let base = match header.kind {
8292                PackObjectKind::OfsDelta => {
8293                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
8294                        .expect("test operation should succeed");
8295                    EntryBase::Offset(base_offset)
8296                }
8297                PackObjectKind::RefDelta => {
8298                    offset += format.raw_len();
8299                    EntryBase::Ref
8300                }
8301                _ => EntryBase::None,
8302            };
8303            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
8304            let mut body = Vec::new();
8305            decoder
8306                .read_to_end(&mut body)
8307                .expect("test operation should succeed");
8308            offset += decoder.total_in() as usize;
8309            descriptors.push(EntryDescriptor {
8310                offset: entry_offset,
8311                kind: header.kind,
8312                base,
8313            });
8314        }
8315        descriptors
8316    }
8317
8318    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
8319        let mut base = Vec::new();
8320        for _ in 0..300 {
8321            base.extend_from_slice(b"common payload\n");
8322        }
8323        base.extend_from_slice(b"base\n");
8324        let mut changed = Vec::new();
8325        for _ in 0..300 {
8326            changed.extend_from_slice(b"common payload\n");
8327        }
8328        changed.extend_from_slice(b"changed\n");
8329        (
8330            EncodedObject::new(ObjectType::Blob, base),
8331            EncodedObject::new(ObjectType::Blob, changed),
8332        )
8333    }
8334
8335    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
8336        let mut pack = Vec::new();
8337        pack.extend_from_slice(b"PACK");
8338        pack.extend_from_slice(&2u32.to_be_bytes());
8339        pack.extend_from_slice(&1u32.to_be_bytes());
8340        write_entry_header(&mut pack, object_type, body.len() as u64);
8341        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8342        encoder
8343            .write_all(body)
8344            .expect("test operation should succeed");
8345        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8346        let checksum =
8347            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
8348        pack.extend_from_slice(checksum.as_bytes());
8349        pack
8350    }
8351
8352    #[derive(Clone, Copy, Debug)]
8353    enum DeltaKind {
8354        Offset,
8355        Ref,
8356    }
8357
8358    fn two_object_delta_pack(
8359        format: ObjectFormat,
8360        base: &[u8],
8361        result: &[u8],
8362        delta_kind: DeltaKind,
8363    ) -> Vec<u8> {
8364        let mut pack = Vec::new();
8365        pack.extend_from_slice(b"PACK");
8366        pack.extend_from_slice(&2u32.to_be_bytes());
8367        pack.extend_from_slice(&2u32.to_be_bytes());
8368
8369        let base_offset = pack.len();
8370        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
8371        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8372        encoder
8373            .write_all(base)
8374            .expect("test operation should succeed");
8375        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8376
8377        let delta = append_suffix_delta(base, result);
8378        let delta_offset = pack.len();
8379        write_pack_entry_header_kind(
8380            &mut pack,
8381            match delta_kind {
8382                DeltaKind::Offset => 6,
8383                DeltaKind::Ref => 7,
8384            },
8385            delta.len() as u64,
8386        );
8387        match delta_kind {
8388            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
8389            DeltaKind::Ref => {
8390                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
8391                    .expect("test operation should succeed");
8392                pack.extend_from_slice(base_oid.as_bytes());
8393            }
8394        }
8395        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8396        encoder
8397            .write_all(&delta)
8398            .expect("test operation should succeed");
8399        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8400
8401        let checksum =
8402            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
8403        pack.extend_from_slice(checksum.as_bytes());
8404        pack
8405    }
8406
8407    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
8408        let mut pack = Vec::new();
8409        pack.extend_from_slice(b"PACK");
8410        pack.extend_from_slice(&2u32.to_be_bytes());
8411        pack.extend_from_slice(&1u32.to_be_bytes());
8412
8413        let delta = append_suffix_delta(base, result);
8414        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
8415        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
8416            .expect("test operation should succeed");
8417        pack.extend_from_slice(base_oid.as_bytes());
8418        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
8419        encoder
8420            .write_all(&delta)
8421            .expect("test operation should succeed");
8422        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
8423
8424        let checksum =
8425            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
8426        pack.extend_from_slice(checksum.as_bytes());
8427        pack
8428    }
8429
8430    fn unique_temp_dir(name: &str) -> PathBuf {
8431        let nanos = SystemTime::now()
8432            .duration_since(UNIX_EPOCH)
8433            .expect("test operation should succeed")
8434            .as_nanos();
8435        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
8436    }
8437
8438    fn run_git_success(cwd: &Path, args: &[&str]) {
8439        let output = Command::new("git")
8440            .current_dir(cwd)
8441            .args(args)
8442            .output()
8443            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
8444        assert!(
8445            output.status.success(),
8446            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
8447            output.status.code(),
8448            String::from_utf8_lossy(&output.stdout),
8449            String::from_utf8_lossy(&output.stderr)
8450        );
8451    }
8452
8453    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
8454        let mut paths = fs::read_dir(dir)
8455            .expect("test operation should succeed")
8456            .map(|entry| entry.expect("test operation should succeed").path())
8457            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
8458            .collect::<Vec<_>>();
8459        assert_eq!(paths.len(), 1, "expected one .{extension} file");
8460        paths.remove(0)
8461    }
8462
8463    fn pack_bitmap_index(
8464        format: ObjectFormat,
8465        object_count: u32,
8466        options: u16,
8467        pack_checksum: &ObjectId,
8468        entries: &[(u32, u8, u8, &[u64])],
8469        name_hash_cache: Option<&[u32]>,
8470    ) -> Vec<u8> {
8471        let mut out = Vec::new();
8472        out.extend_from_slice(b"BITM");
8473        out.extend_from_slice(&1u16.to_be_bytes());
8474        out.extend_from_slice(&options.to_be_bytes());
8475        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
8476        out.extend_from_slice(pack_checksum.as_bytes());
8477        write_test_ewah(&mut out, object_count, &[0b001]);
8478        write_test_ewah(&mut out, object_count, &[0b010]);
8479        write_test_ewah(&mut out, object_count, &[0b100]);
8480        write_test_ewah(&mut out, object_count, &[0]);
8481        for (position, xor_offset, flags, words) in entries {
8482            out.extend_from_slice(&position.to_be_bytes());
8483            out.push(*xor_offset);
8484            out.push(*flags);
8485            write_test_ewah(&mut out, object_count, words);
8486        }
8487        if let Some(cache) = name_hash_cache {
8488            for value in cache {
8489                out.extend_from_slice(&value.to_be_bytes());
8490            }
8491        }
8492        let checksum =
8493            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8494        out.extend_from_slice(checksum.as_bytes());
8495        out
8496    }
8497
8498    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
8499        out.extend_from_slice(&bit_size.to_be_bytes());
8500        let words = ewah_literal_words(literals);
8501        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
8502        for word in words {
8503            out.extend_from_slice(&word.to_be_bytes());
8504        }
8505        out.extend_from_slice(&0u32.to_be_bytes());
8506    }
8507
8508    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
8509        let rlw = (literals.len() as u64) << 33;
8510        let mut words = vec![rlw];
8511        words.extend_from_slice(literals);
8512        words
8513    }
8514
8515    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
8516        let checksum_offset = bytes.len() - format.raw_len();
8517        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
8518            .expect("test operation should succeed");
8519        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
8520    }
8521
8522    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
8523        assert!(result.starts_with(base));
8524        let suffix = &result[base.len()..];
8525        assert!(base.len() < 0x10000);
8526        assert!(suffix.len() < 0x80);
8527        let mut delta = Vec::new();
8528        write_delta_varint(&mut delta, base.len() as u64);
8529        write_delta_varint(&mut delta, result.len() as u64);
8530        delta.push(0x90);
8531        delta.push(base.len() as u8);
8532        delta.push(suffix.len() as u8);
8533        delta.extend_from_slice(suffix);
8534        delta
8535    }
8536
8537    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
8538        loop {
8539            let mut byte = (value as u8) & 0x7f;
8540            value >>= 7;
8541            if value != 0 {
8542                byte |= 0x80;
8543            }
8544            out.push(byte);
8545            if value == 0 {
8546                break;
8547            }
8548        }
8549    }
8550
8551    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
8552        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
8553        size >>= 4;
8554        if size != 0 {
8555            byte |= 0x80;
8556        }
8557        out.push(byte);
8558        while size != 0 {
8559            let mut byte = (size as u8) & 0x7f;
8560            size >>= 7;
8561            if size != 0 {
8562                byte |= 0x80;
8563            }
8564            out.push(byte);
8565        }
8566    }
8567
8568    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
8569        assert!(relative < 0x80);
8570        out.push(relative as u8);
8571    }
8572
8573    fn single_entry_index(
8574        format: ObjectFormat,
8575        oid: ObjectId,
8576        crc32: u32,
8577        offset: u32,
8578        pack_checksum: ObjectId,
8579    ) -> Vec<u8> {
8580        let mut index = Vec::new();
8581        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
8582        index.extend_from_slice(&2u32.to_be_bytes());
8583        for idx in 0..256 {
8584            let count = if idx >= usize::from(oid.as_bytes()[0]) {
8585                1u32
8586            } else {
8587                0u32
8588            };
8589            index.extend_from_slice(&count.to_be_bytes());
8590        }
8591        index.extend_from_slice(oid.as_bytes());
8592        index.extend_from_slice(&crc32.to_be_bytes());
8593        index.extend_from_slice(&offset.to_be_bytes());
8594        index.extend_from_slice(pack_checksum.as_bytes());
8595        let checksum =
8596            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
8597        index.extend_from_slice(checksum.as_bytes());
8598        index
8599    }
8600
8601    fn single_entry_index_v1(
8602        format: ObjectFormat,
8603        oid: ObjectId,
8604        offset: u32,
8605        pack_checksum: ObjectId,
8606    ) -> Vec<u8> {
8607        let mut index = Vec::new();
8608        for idx in 0..256 {
8609            let count = if idx >= usize::from(oid.as_bytes()[0]) {
8610                1u32
8611            } else {
8612                0u32
8613            };
8614            index.extend_from_slice(&count.to_be_bytes());
8615        }
8616        index.extend_from_slice(&offset.to_be_bytes());
8617        index.extend_from_slice(oid.as_bytes());
8618        index.extend_from_slice(pack_checksum.as_bytes());
8619        let checksum =
8620            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
8621        index.extend_from_slice(checksum.as_bytes());
8622        index
8623    }
8624
8625    fn pack_reverse_index(
8626        format: ObjectFormat,
8627        positions: &[u32],
8628        pack_checksum: ObjectId,
8629    ) -> Vec<u8> {
8630        let mut reverse_index = Vec::new();
8631        reverse_index.extend_from_slice(b"RIDX");
8632        reverse_index.extend_from_slice(&1u32.to_be_bytes());
8633        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
8634        for position in positions {
8635            reverse_index.extend_from_slice(&position.to_be_bytes());
8636        }
8637        reverse_index.extend_from_slice(pack_checksum.as_bytes());
8638        let checksum =
8639            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
8640        reverse_index.extend_from_slice(checksum.as_bytes());
8641        reverse_index
8642    }
8643
8644    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
8645        let mut out = Vec::new();
8646        out.extend_from_slice(b"MTME");
8647        out.extend_from_slice(&1u32.to_be_bytes());
8648        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
8649        for mtime in mtimes {
8650            out.extend_from_slice(&mtime.to_be_bytes());
8651        }
8652        out.extend_from_slice(pack_checksum.as_bytes());
8653        let checksum =
8654            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8655        out.extend_from_slice(checksum.as_bytes());
8656        out
8657    }
8658
8659    fn midx_chunks_with_pack_names(
8660        _format: ObjectFormat,
8661        pack_names: Vec<u8>,
8662        entries: &[(ObjectId, u32, u64)],
8663    ) -> Vec<([u8; 4], Vec<u8>)> {
8664        let mut entries = entries.to_vec();
8665        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
8666        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
8667        let mut large_offsets = Vec::new();
8668        let mut chunks = vec![
8669            (*b"PNAM", pack_names),
8670            (*b"OIDF", midx_oid_fanout(&object_ids)),
8671            (*b"OIDL", midx_oid_lookup(&object_ids)),
8672            (
8673                *b"OOFF",
8674                midx_ooff_entries(
8675                    &entries
8676                        .iter()
8677                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
8678                        .collect::<Vec<_>>(),
8679                    &mut large_offsets,
8680                ),
8681            ),
8682        ];
8683        if !large_offsets.is_empty() {
8684            chunks.push((*b"LOFF", large_offsets));
8685        }
8686        chunks
8687    }
8688
8689    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
8690        let mut counts = [0u32; 256];
8691        for oid in object_ids {
8692            counts[oid.as_bytes()[0] as usize] += 1;
8693        }
8694        let mut running = 0u32;
8695        let mut out = Vec::new();
8696        for count in counts {
8697            running += count;
8698            out.extend_from_slice(&running.to_be_bytes());
8699        }
8700        out
8701    }
8702
8703    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
8704        let mut out = Vec::new();
8705        for oid in object_ids {
8706            out.extend_from_slice(oid.as_bytes());
8707        }
8708        out
8709    }
8710
8711    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
8712        let mut out = Vec::new();
8713        for (pack_int_id, offset) in entries {
8714            out.extend_from_slice(&pack_int_id.to_be_bytes());
8715            if *offset < 0x8000_0000 {
8716                out.extend_from_slice(&(*offset as u32).to_be_bytes());
8717            } else {
8718                let large_idx = (large_offsets.len() / 8) as u32;
8719                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
8720                large_offsets.extend_from_slice(&offset.to_be_bytes());
8721            }
8722        }
8723        out
8724    }
8725
8726    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
8727        let mut out = Vec::new();
8728        for value in values {
8729            out.extend_from_slice(&value.to_be_bytes());
8730        }
8731        out
8732    }
8733
8734    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
8735        let mut out = Vec::new();
8736        for (bitmap_pos, bitmap_nr) in entries {
8737            out.extend_from_slice(&bitmap_pos.to_be_bytes());
8738            out.extend_from_slice(&bitmap_nr.to_be_bytes());
8739        }
8740        out
8741    }
8742
8743    fn multi_pack_index(
8744        format: ObjectFormat,
8745        version: u8,
8746        pack_count: u32,
8747        chunks: &[([u8; 4], Vec<u8>)],
8748    ) -> Vec<u8> {
8749        let lookup_len = (chunks.len() + 1) * 12;
8750        let mut out = Vec::new();
8751        out.extend_from_slice(b"MIDX");
8752        out.push(version);
8753        out.push(hash_function_id(format) as u8);
8754        out.push(chunks.len() as u8);
8755        out.push(0);
8756        out.extend_from_slice(&pack_count.to_be_bytes());
8757        let mut chunk_offset = (12 + lookup_len) as u64;
8758        for (id, data) in chunks {
8759            out.extend_from_slice(id);
8760            out.extend_from_slice(&chunk_offset.to_be_bytes());
8761            chunk_offset += data.len() as u64;
8762        }
8763        out.extend_from_slice(&[0, 0, 0, 0]);
8764        out.extend_from_slice(&chunk_offset.to_be_bytes());
8765        for (_id, data) in chunks {
8766            out.extend_from_slice(data);
8767        }
8768        let checksum =
8769            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8770        out.extend_from_slice(checksum.as_bytes());
8771        out
8772    }
8773
8774    // ---- EWAH encoder / bitmap writer tests ------------------------------
8775
8776    fn pack_checksum_sha1() -> ObjectId {
8777        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
8778    }
8779
8780    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
8781        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
8782        // expects: a checksum offset that lies just past the serialised bitmap.
8783        let mut offset = 0usize;
8784        let checksum_offset = bytes.len();
8785        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
8786            .expect("test operation should succeed")
8787    }
8788
8789    #[test]
8790    fn ewah_encodes_single_literal_word_matching_helper() {
8791        // A bitmap whose only word is a literal must serialise as one RLW with
8792        // literal_len == 1 followed by the literal, identical to the test
8793        // helper used by the existing parser tests.
8794        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
8795        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
8796        assert_eq!(ewah.rlw_position, 0);
8797        assert_eq!(ewah.bit_size, 64);
8798    }
8799
8800    #[test]
8801    fn ewah_byte_layout_is_big_endian() {
8802        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
8803            .expect("test operation should succeed");
8804        let bytes = ewah.to_bytes();
8805        let mut expected = Vec::new();
8806        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
8807        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
8808        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
8809        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
8810        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
8811        assert_eq!(bytes, expected);
8812    }
8813
8814    #[test]
8815    fn ewah_empty_bitmap_serialises_like_git() {
8816        let ewah = EwahBitmap::empty();
8817        let bytes = ewah.to_bytes();
8818        // bit_size = 0, word_count = 0, rlw_position = 0.
8819        assert_eq!(bytes, vec![0u8; 12]);
8820        // It must still parse and decode to nothing.
8821        let parsed = parse_ewah_bytes(&bytes);
8822        assert_eq!(parsed, ewah);
8823        assert!(
8824            parsed
8825                .to_positions()
8826                .expect("test operation should succeed")
8827                .is_empty()
8828        );
8829    }
8830
8831    #[test]
8832    fn ewah_compresses_clean_zero_run() {
8833        // Three all-zero words followed by a literal: the encoder should emit a
8834        // single RLW carrying a run of 3 clean-zero words plus one literal.
8835        let ewah =
8836            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
8837        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
8838        let rlw = ewah.words[0];
8839        assert_eq!(rlw & 1, 0, "run bit should be zero");
8840        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8841        assert_eq!(rlw >> 33, 1, "literal length should be 1");
8842        assert_eq!(ewah.words[1], 0b1);
8843    }
8844
8845    #[test]
8846    fn ewah_compresses_clean_ones_run() {
8847        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
8848            .expect("test operation should succeed");
8849        // Pure run of ones, no literals: one RLW only.
8850        assert_eq!(ewah.words.len(), 1);
8851        let rlw = ewah.words[0];
8852        assert_eq!(rlw & 1, 1, "run bit should be one");
8853        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8854        assert_eq!(rlw >> 33, 0, "no literals");
8855    }
8856
8857    #[test]
8858    fn ewah_run_then_literal_then_run_roundtrips() {
8859        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8860        let bit_size = (words.len() * 64) as u32;
8861        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8862        assert_eq!(
8863            ewah.to_words().expect("test operation should succeed"),
8864            words
8865        );
8866    }
8867
8868    #[test]
8869    fn ewah_drops_trailing_clean_zero_words() {
8870        // Trailing all-zero words beyond a literal carry no information and git
8871        // does not serialise them, but to_words() restores them up to bit_size.
8872        let words = vec![0b1, 0, 0, 0];
8873        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8874        // bit_size of 1 means a single backing word.
8875        assert_eq!(ewah.bit_size, 1);
8876        assert_eq!(
8877            ewah.to_words().expect("test operation should succeed"),
8878            vec![0b1]
8879        );
8880    }
8881
8882    #[test]
8883    fn ewah_from_positions_roundtrips_via_positions() {
8884        let positions = [0u32, 1, 63, 64, 65, 200, 511];
8885        let ewah =
8886            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8887        let mut decoded = ewah.to_positions().expect("test operation should succeed");
8888        decoded.sort_unstable();
8889        assert_eq!(decoded, positions);
8890    }
8891
8892    #[test]
8893    fn ewah_from_positions_dedupes_and_orders() {
8894        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8895            .expect("test operation should succeed");
8896        assert_eq!(
8897            ewah.to_positions().expect("test operation should succeed"),
8898            vec![5, 100]
8899        );
8900    }
8901
8902    #[test]
8903    fn ewah_huge_zero_run_spans_multiple_rlws() {
8904        // A run longer than the 32-bit running-length field forces the encoder
8905        // to emit more than one RLW. Use one literal bit far out, with a bit
8906        // size large enough to exceed u32::MAX clean words is impractical, so
8907        // assert the field arithmetic via a direct builder run instead.
8908        let mut builder = EwahBuilder::new(0);
8909        builder.add_empty_words(false, 0xffff_ffff);
8910        builder.add_empty_words(false, 5);
8911        let ewah = builder.finish().expect("test operation should succeed");
8912        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8913        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8914        assert_eq!(ewah.words[1] & 1, 0);
8915        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8916        assert_eq!(ewah.rlw_position, 1);
8917    }
8918
8919    #[test]
8920    fn ewah_from_words_rejects_oversized_bit_size() {
8921        // bit_size demands two words but only one is supplied.
8922        assert!(EwahBitmap::from_words(65, &[0]).is_err());
8923    }
8924
8925    #[test]
8926    fn ewah_from_positions_rejects_out_of_range() {
8927        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8928    }
8929
8930    #[test]
8931    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8932        // Exercise the full encode -> serialise -> parse loop for a non-trivial
8933        // pattern and assert structural equality against the parser's model.
8934        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8935        let bit_size = (words.len() * 64) as u32;
8936        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8937        let bytes = ewah.to_bytes();
8938        let parsed = parse_ewah_bytes(&bytes);
8939        assert_eq!(parsed, ewah);
8940        assert_eq!(
8941            parsed.to_words().expect("test operation should succeed"),
8942            words
8943        );
8944    }
8945
8946    #[test]
8947    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8948        // commit, tree, blob in pack order; one selected commit reaching all.
8949        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8950        let bytes = write_bitmap(
8951            ObjectFormat::Sha1,
8952            pack_checksum_sha1(),
8953            &object_types,
8954            &[(0u32, 0u32, vec![1u32, 2u32])],
8955            None,
8956        )
8957        .expect("test operation should succeed");
8958        assert_eq!(&bytes[..4], b"BITM");
8959
8960        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8961            .expect("test operation should succeed");
8962        assert_eq!(parsed.version, 1);
8963        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8964        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8965        assert_eq!(
8966            parsed
8967                .type_bitmaps
8968                .commits
8969                .to_positions()
8970                .expect("test operation should succeed"),
8971            vec![0]
8972        );
8973        assert_eq!(
8974            parsed
8975                .type_bitmaps
8976                .trees
8977                .to_positions()
8978                .expect("test operation should succeed"),
8979            vec![1]
8980        );
8981        assert_eq!(
8982            parsed
8983                .type_bitmaps
8984                .blobs
8985                .to_positions()
8986                .expect("test operation should succeed"),
8987            vec![2]
8988        );
8989        assert!(
8990            parsed
8991                .type_bitmaps
8992                .tags
8993                .to_positions()
8994                .expect("test operation should succeed")
8995                .is_empty()
8996        );
8997        assert_eq!(parsed.entries.len(), 1);
8998        let entry = parsed
8999            .entry_for_index_position(0)
9000            .expect("test operation should succeed");
9001        assert_eq!(entry.xor_offset, 0);
9002        assert_eq!(entry.flags, 0);
9003        assert_eq!(
9004            entry
9005                .bitmap
9006                .to_positions()
9007                .expect("test operation should succeed"),
9008            vec![0, 1, 2]
9009        );
9010        assert_eq!(parsed.name_hash_cache, None);
9011    }
9012
9013    #[test]
9014    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
9015        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
9016            .expect("test operation should succeed");
9017        let object_types = [ObjectType::Commit, ObjectType::Tree];
9018        let bytes = write_bitmap(
9019            ObjectFormat::Sha256,
9020            pack_checksum.clone(),
9021            &object_types,
9022            &[(0u32, 0u32, vec![1u32])],
9023            None,
9024        )
9025        .expect("test operation should succeed");
9026        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
9027            .expect("test operation should succeed");
9028        assert_eq!(parsed.format, ObjectFormat::Sha256);
9029        assert_eq!(parsed.pack_checksum, pack_checksum);
9030        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
9031        assert_eq!(
9032            parsed.entries[0]
9033                .bitmap
9034                .to_positions()
9035                .expect("test operation should succeed"),
9036            vec![0, 1]
9037        );
9038    }
9039
9040    #[test]
9041    fn pack_bitmap_index_write_includes_name_hash_cache() {
9042        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
9043        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
9044        let bytes = write_bitmap(
9045            ObjectFormat::Sha1,
9046            pack_checksum_sha1(),
9047            &object_types,
9048            &[(0u32, 0u32, vec![1u32, 2u32])],
9049            Some(cache.clone()),
9050        )
9051        .expect("test operation should succeed");
9052        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
9053            .expect("test operation should succeed");
9054        assert_eq!(
9055            parsed.options,
9056            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
9057        );
9058        assert_eq!(parsed.name_hash_cache, Some(cache));
9059    }
9060
9061    #[test]
9062    fn pack_bitmap_writer_supports_multiple_commits() {
9063        let object_types = [
9064            ObjectType::Commit,
9065            ObjectType::Commit,
9066            ObjectType::Tree,
9067            ObjectType::Blob,
9068        ];
9069        let mut writer =
9070            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
9071                .expect("test operation should succeed");
9072        writer
9073            .add_commit(0, 0, &[2, 3])
9074            .expect("test operation should succeed");
9075        writer
9076            .add_commit(1, 1, &[2])
9077            .expect("test operation should succeed");
9078        let bytes = writer.write().expect("test operation should succeed");
9079        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
9080            .expect("test operation should succeed");
9081        assert_eq!(parsed.entries.len(), 2);
9082        assert_eq!(
9083            parsed
9084                .type_bitmaps
9085                .commits
9086                .to_positions()
9087                .expect("test operation should succeed"),
9088            vec![0, 1]
9089        );
9090        let first = parsed
9091            .entry_for_index_position(0)
9092            .expect("test operation should succeed");
9093        assert_eq!(
9094            first
9095                .bitmap
9096                .to_positions()
9097                .expect("test operation should succeed"),
9098            vec![0, 2, 3]
9099        );
9100        let second = parsed
9101            .entry_for_index_position(1)
9102            .expect("test operation should succeed");
9103        assert_eq!(
9104            second
9105                .bitmap
9106                .to_positions()
9107                .expect("test operation should succeed"),
9108            vec![1, 2]
9109        );
9110    }
9111
9112    #[test]
9113    fn pack_bitmap_index_recomputes_checksum_on_write() {
9114        // The provided index_checksum field is ignored; write recomputes it so
9115        // a bogus placeholder still produces a valid, parseable file.
9116        let object_types = [ObjectType::Commit, ObjectType::Blob];
9117        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
9118            .expect("test operation should succeed");
9119        let mut index = writer.build().expect("test operation should succeed");
9120        // build() sets an all-zero placeholder checksum.
9121        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
9122        index.entries.clear(); // mutate the model after build
9123        index.entries.push(PackBitmapEntry {
9124            object_position: 0,
9125            xor_offset: 0,
9126            flags: 0,
9127            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
9128        });
9129        let bytes = index.write().expect("test operation should succeed");
9130        // Parsing validates the trailing checksum, so a wrong checksum fails.
9131        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
9132            .expect("test operation should succeed");
9133        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
9134    }
9135
9136    #[test]
9137    fn pack_bitmap_writer_rejects_non_commit_selection() {
9138        let object_types = [ObjectType::Commit, ObjectType::Blob];
9139        let mut writer =
9140            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
9141                .expect("test operation should succeed");
9142        // Position 1 is a blob, not a commit.
9143        assert!(writer.add_commit(1, 1, &[]).is_err());
9144        // Position 5 is out of range entirely.
9145        assert!(writer.add_commit(5, 5, &[]).is_err());
9146        // Index position out of range.
9147        assert!(writer.add_commit(0, 5, &[]).is_err());
9148        // Reachable position out of range.
9149        assert!(writer.add_commit(0, 0, &[9]).is_err());
9150    }
9151
9152    #[test]
9153    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
9154        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
9155            .expect("test operation should succeed");
9156        assert!(
9157            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
9158                .is_err()
9159        );
9160    }
9161
9162    #[test]
9163    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
9164        let writer = PackBitmapWriter::new(
9165            ObjectFormat::Sha1,
9166            pack_checksum_sha1(),
9167            &[ObjectType::Commit],
9168        )
9169        .expect("test operation should succeed");
9170        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
9171    }
9172
9173    #[test]
9174    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
9175        let mut index = PackBitmapWriter::new(
9176            ObjectFormat::Sha1,
9177            pack_checksum_sha1(),
9178            &[ObjectType::Commit],
9179        )
9180        .expect("test operation should succeed")
9181        .build()
9182        .expect("test operation should succeed");
9183        // Flag set but no cache present.
9184        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
9185        assert!(index.write().is_err());
9186        // Cache present but flag missing.
9187        index.options = PackBitmapIndex::OPTION_FULL_DAG;
9188        index.name_hash_cache = Some(vec![0]);
9189        assert!(index.write().is_err());
9190    }
9191
9192    #[test]
9193    fn write_bitmap_roundtrips_through_upstream_git_parser() {
9194        // Build a real pack with git, then overwrite reachability with our own
9195        // writer using the real pack checksum and object types, and confirm our
9196        // bytes parse under the same parser that reads upstream bitmaps.
9197        let root = unique_temp_dir("git-pack-bitmap-writer");
9198        fs::create_dir_all(&root).expect("test operation should succeed");
9199        {
9200            run_git_success(&root, &["init", "-q", "-b", "main"]);
9201            run_git_success(
9202                &root,
9203                &[
9204                    "-c",
9205                    "user.name=Example User",
9206                    "-c",
9207                    "user.email=example@example.invalid",
9208                    "commit",
9209                    "--allow-empty",
9210                    "-q",
9211                    "-m",
9212                    "one",
9213                ],
9214            );
9215            run_git_success(&root, &["repack", "-adb"]);
9216            let pack_dir = root.join(".git").join("objects").join("pack");
9217            let idx_path = single_path_with_extension(&pack_dir, "idx");
9218            let index = PackIndex::parse(
9219                &fs::read(idx_path).expect("test operation should succeed"),
9220                ObjectFormat::Sha1,
9221            )
9222            .expect("test operation should succeed");
9223            // Read object types from the pack so the type bitmaps are accurate.
9224            let pack_path = single_path_with_extension(&pack_dir, "pack");
9225            let pack =
9226                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
9227                    .expect("test operation should succeed");
9228            // Map each index entry (sorted by oid) to its pack offset, then to a
9229            // pack-order position so positions line up with the index ordering.
9230            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
9231            offsets.sort_unstable();
9232            let position_of = |offset: u64| -> u32 {
9233                offsets
9234                    .iter()
9235                    .position(|value| *value == offset)
9236                    .expect("test operation should succeed") as u32
9237            };
9238            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
9239            for entry in &index.entries {
9240                let position = position_of(entry.offset) as usize;
9241                // Find the parsed object at this pack offset to read its type.
9242                if let Some(parsed) = pack
9243                    .entries
9244                    .iter()
9245                    .find(|po| po.entry.offset == entry.offset)
9246                {
9247                    object_types[position] = parsed.object.object_type;
9248                }
9249            }
9250            // Select the first commit position we find and reach everything.
9251            let commit_position = object_types
9252                .iter()
9253                .position(|ty| *ty == ObjectType::Commit)
9254                .expect("test operation should succeed") as u32;
9255            // The entry records the commit's position in the oid-sorted index.
9256            let commit_index_position = index
9257                .entries
9258                .iter()
9259                .position(|entry| position_of(entry.offset) == commit_position)
9260                .expect("test operation should succeed")
9261                as u32;
9262            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
9263            let bytes = write_bitmap(
9264                ObjectFormat::Sha1,
9265                index.pack_checksum.clone(),
9266                &object_types,
9267                &[(commit_position, commit_index_position, reachable)],
9268                None,
9269            )
9270            .expect("test operation should succeed");
9271            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
9272                .expect("test operation should succeed");
9273            assert_eq!(parsed.pack_checksum, index.pack_checksum);
9274            assert_eq!(parsed.entries.len(), 1);
9275            assert_eq!(
9276                parsed.entries[0]
9277                    .bitmap
9278                    .to_positions()
9279                    .expect("test operation should succeed")
9280                    .len(),
9281                index.entries.len()
9282            );
9283        };
9284        let _ = fs::remove_dir_all(&root);
9285    }
9286}