Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18    pub oid: ObjectId,
19    pub compressed_size: u64,
20    pub uncompressed_size: u64,
21    pub offset: u64,
22}
23
24/// Default sliding-window size used by [`PackFile::write_packed`].
25///
26/// Each object is compared against up to this many previously emitted
27/// candidates of the same type when searching for a small delta. Matches git's
28/// default `pack.window`.
29pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31/// Default maximum delta chain depth used by [`PackFile::write_packed`].
32///
33/// A delta may reference a base that is itself a delta; this bounds how long
34/// such chains may grow so that reconstructing any object stays cheap and the
35/// reader's recursion stays shallow. Matches git's default `pack.depth`.
36pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38/// Object-count threshold before pack payload compression is fanned out across
39/// worker threads. Below this, thread setup and extra buffering cost more than
40/// they save.
41const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43/// Keep parallel compression bounded. Git gets much of its wall-clock win from
44/// using several cores, but unbounded threads can steal cache from delta
45/// planning and inflate peak memory on large packs.
46const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48/// Options controlling sliding-window delta selection during pack generation.
49///
50/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
51/// the builder-style setters, or build one directly. Used by
52/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
53#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55    /// Number of previous same-type candidates each object is deltified
56    /// against. Larger windows find better deltas at higher cost.
57    pub window: usize,
58    /// Maximum delta chain depth. A value of `0` disables deltification.
59    pub depth: usize,
60    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
61    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
62    /// against external thin-pack bases always use ref-deltas regardless.
63    pub prefer_ofs_delta: bool,
64    /// External base objects, keyed by object id, that are *not* written into
65    /// the pack but may be used as delta bases. Supplying any entries here
66    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
67    /// yielding a self-contained pack.
68    pub thin_bases: HashMap<ObjectId, EncodedObject>,
69    /// When `true` (the default), objects are reordered by type and size for
70    /// better delta locality. When `false`, the input order is preserved (the
71    /// emitted pack lists objects in the order supplied); deltas then only
72    /// reference earlier input objects. Reordering is always skipped when
73    /// deltification is disabled (`depth == 0`), since it has no effect there.
74    pub reorder: bool,
75    /// Zlib compression level for pack entry payloads.
76    pub compression_level: u32,
77}
78
79impl Default for PackWriteOptions {
80    fn default() -> Self {
81        Self::new()
82    }
83}
84
85impl PackWriteOptions {
86    /// Options with git-compatible defaults: window
87    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
88    /// no external thin bases.
89    pub fn new() -> Self {
90        Self {
91            window: DEFAULT_PACK_WINDOW,
92            depth: DEFAULT_PACK_DEPTH,
93            prefer_ofs_delta: true,
94            thin_bases: HashMap::new(),
95            reorder: true,
96            compression_level: 6,
97        }
98    }
99
100    /// Set the sliding-window size.
101    pub fn with_window(mut self, window: usize) -> Self {
102        self.window = window;
103        self
104    }
105
106    /// Set the maximum delta chain depth (`0` disables deltas).
107    pub fn with_depth(mut self, depth: usize) -> Self {
108        self.depth = depth;
109        self
110    }
111
112    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
113    /// (`false`) base references.
114    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
115        self.prefer_ofs_delta = prefer_ofs_delta;
116        self
117    }
118
119    /// Provide the set of external base objects permitted for a thin pack.
120    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
121        self.thin_bases = thin_bases;
122        self
123    }
124
125    /// Choose whether objects may be reordered for delta locality (`true`) or
126    /// emitted in input order (`false`).
127    pub fn with_reorder(mut self, reorder: bool) -> Self {
128        self.reorder = reorder;
129        self
130    }
131
132    /// Set the zlib compression level used for pack entry payloads.
133    pub fn with_compression_level(mut self, level: u32) -> Self {
134        self.compression_level = level.min(9);
135        self
136    }
137}
138
139#[derive(Debug, Clone, PartialEq, Eq)]
140pub struct RepackPolicy {
141    pub write_bitmaps: bool,
142    pub cruft_packs: bool,
143    pub geometric_factor: Option<u8>,
144}
145
146#[derive(Debug, Clone, PartialEq, Eq)]
147pub struct PackFile {
148    pub version: u32,
149    pub entries: Vec<PackObject>,
150    pub checksum: ObjectId,
151}
152
153#[derive(Debug, Clone, PartialEq, Eq)]
154pub struct PackObject {
155    pub entry: PackEntry,
156    pub object: EncodedObject,
157}
158
159/// Per-object statistics for one entry of a verified pack, in the shape
160/// `git verify-pack -v` reports.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct PackVerifyStat {
163    /// Resolved object id.
164    pub oid: ObjectId,
165    /// Resolved object type (the delta's *result* type, not `ofs-delta`).
166    pub object_type: ObjectType,
167    /// Resolved (inflated) object size in bytes.
168    pub size: u64,
169    /// Bytes this object occupies in the pack: the offset delta to the next
170    /// object, or to the trailing checksum for the last object.
171    pub size_in_pack: u64,
172    /// In-pack byte offset where this object's entry begins.
173    pub offset: u64,
174    /// Delta chain depth: `0` for undeltified objects, base-depth + 1 otherwise.
175    pub delta_depth: u32,
176    /// For delta objects, the id of the *immediate* base object (which may
177    /// itself be a delta). `None` for undeltified objects.
178    pub base_oid: Option<ObjectId>,
179}
180
181/// Result of [`PackFile::verify_pack_stats`]: per-object stats in pack offset
182/// order plus the pack's trailing checksum.
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct PackVerifyStats {
185    pub objects: Vec<PackVerifyStat>,
186    pub checksum: ObjectId,
187}
188
189#[derive(Debug, Clone, PartialEq, Eq)]
190pub struct PackWrite {
191    pub pack: Vec<u8>,
192    pub index: Vec<u8>,
193    pub checksum: ObjectId,
194    pub entries: Vec<PackIndexEntry>,
195    pub delta_count: u32,
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub struct PackInput<'a> {
200    pub oid: &'a ObjectId,
201    pub object: &'a EncodedObject,
202}
203
204#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct PackIndexBuild {
206    pub index: Vec<u8>,
207    pub pack_checksum: ObjectId,
208    pub entries: Vec<PackIndexEntry>,
209}
210
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct PackIndex {
213    pub version: u32,
214    pub fanout: [u32; 256],
215    pub entries: Vec<PackIndexEntry>,
216    pub pack_checksum: ObjectId,
217    pub index_checksum: ObjectId,
218}
219
220#[derive(Debug, Clone, PartialEq, Eq)]
221pub struct PackIndexView<'a> {
222    pub version: u32,
223    pub count: usize,
224    pub fanout: [u32; 256],
225    pub pack_checksum: ObjectId,
226    pub index_checksum: ObjectId,
227    bytes: &'a [u8],
228    format: ObjectFormat,
229    tables: PackIndexViewTables,
230}
231
232pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
233    fn as_bytes(&self) -> &[u8];
234}
235
236impl<T> PackIndexByteSource for T
237where
238    T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
239{
240    fn as_bytes(&self) -> &[u8] {
241        self.as_ref()
242    }
243}
244
245#[derive(Debug)]
246struct SharedIndexBytes(Arc<[u8]>);
247
248impl PackIndexByteSource for SharedIndexBytes {
249    fn as_bytes(&self) -> &[u8] {
250        self.0.as_ref()
251    }
252}
253
254#[derive(Debug, Clone)]
255pub struct PackIndexViewData {
256    pub version: u32,
257    pub count: usize,
258    pub fanout: [u32; 256],
259    pub pack_checksum: ObjectId,
260    pub index_checksum: ObjectId,
261    bytes: Arc<dyn PackIndexByteSource>,
262    format: ObjectFormat,
263    tables: PackIndexViewTables,
264}
265
266#[derive(Debug, Clone, PartialEq, Eq)]
267pub struct PackIndexEntry {
268    pub oid: ObjectId,
269    pub crc32: u32,
270    pub offset: u64,
271}
272
273#[derive(Debug, Clone, Copy, PartialEq, Eq)]
274pub struct PackIndexLookup {
275    pub crc32: u32,
276    pub offset: u64,
277}
278
279#[derive(Debug, Clone, PartialEq, Eq)]
280enum PackIndexViewTables {
281    V1 {
282        entry_table: Range<usize>,
283    },
284    V2 {
285        oid_table: Range<usize>,
286        crc_table: Range<usize>,
287        small_offset_table: Range<usize>,
288        large_offset_table: Range<usize>,
289    },
290}
291
292#[derive(Debug, Clone, PartialEq, Eq)]
293pub struct PackReverseIndex {
294    pub version: u32,
295    pub format: ObjectFormat,
296    pub positions: Vec<u32>,
297    pub pack_checksum: ObjectId,
298    pub index_checksum: ObjectId,
299}
300
301#[derive(Debug, Clone, PartialEq, Eq)]
302pub struct PackMtimes {
303    pub version: u32,
304    pub format: ObjectFormat,
305    pub mtimes: Vec<u32>,
306    pub pack_checksum: ObjectId,
307    pub index_checksum: ObjectId,
308}
309
310#[derive(Debug, Clone, PartialEq, Eq)]
311pub struct PackBitmapIndex {
312    pub version: u16,
313    pub format: ObjectFormat,
314    pub options: u16,
315    pub pack_checksum: ObjectId,
316    pub index_checksum: ObjectId,
317    pub type_bitmaps: PackBitmapTypeBitmaps,
318    pub entries: Vec<PackBitmapEntry>,
319    pub name_hash_cache: Option<Vec<u32>>,
320}
321
322#[derive(Debug, Clone, PartialEq, Eq)]
323pub struct PackBitmapTypeBitmaps {
324    pub commits: EwahBitmap,
325    pub trees: EwahBitmap,
326    pub blobs: EwahBitmap,
327    pub tags: EwahBitmap,
328}
329
330#[derive(Debug, Clone, PartialEq, Eq)]
331pub struct PackBitmapEntry {
332    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
333    /// NOT the pack-order position used for the bitmap's bit numbering.
334    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
335    /// back via `nth_packed_object_id` (pack-bitmap.c).
336    pub object_position: u32,
337    pub xor_offset: u8,
338    pub flags: u8,
339    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
340    /// order* (offset order), as mapped by the pack's reverse index.
341    pub bitmap: EwahBitmap,
342}
343
344#[derive(Debug, Clone, PartialEq, Eq)]
345pub struct EwahBitmap {
346    pub bit_size: u32,
347    pub words: Vec<u64>,
348    pub rlw_position: u32,
349}
350
351#[derive(Debug, Clone, PartialEq, Eq)]
352pub struct MultiPackIndex {
353    pub version: u8,
354    pub format: ObjectFormat,
355    pub pack_count: u32,
356    pub pack_names: Vec<String>,
357    pub object_count: u32,
358    pub fanout: [u32; 256],
359    pub objects: Vec<MultiPackIndexEntry>,
360    pub reverse_index: Option<Vec<u32>>,
361    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
362    pub chunks: Vec<MultiPackIndexChunk>,
363    pub checksum: ObjectId,
364}
365
366#[derive(Debug, Clone)]
367pub struct MultiPackIndexOidLookup {
368    format: ObjectFormat,
369    pack_count: u32,
370    pack_names: Vec<String>,
371    fanout: [u32; 256],
372    object_count: usize,
373    oid_lookup_offset: usize,
374    object_offsets_offset: usize,
375    large_offsets_offset: Option<usize>,
376    large_offsets_len: usize,
377    bytes: Arc<dyn PackIndexByteSource>,
378}
379
380#[derive(Debug, Clone, PartialEq, Eq)]
381pub struct MultiPackIndexEntry {
382    pub oid: ObjectId,
383    pub pack_int_id: u32,
384    pub offset: u64,
385    pub force_large_offset: bool,
386}
387
388#[derive(Debug, Clone, PartialEq, Eq)]
389pub struct MultiPackBitmapPack {
390    pub bitmap_pos: u32,
391    pub bitmap_nr: u32,
392}
393
394#[derive(Debug, Clone, PartialEq, Eq)]
395pub struct MultiPackIndexChunk {
396    pub id: [u8; 4],
397    pub offset: u64,
398    pub len: u64,
399}
400
401#[derive(Debug, Clone, Copy, PartialEq, Eq)]
402enum PackObjectKind {
403    Commit,
404    Tree,
405    Blob,
406    Tag,
407    OfsDelta,
408    RefDelta,
409}
410
411#[derive(Debug, Clone, PartialEq, Eq)]
412enum ParsedPackEntry {
413    Resolved(PackObject),
414    Delta {
415        base: DeltaBase,
416        compressed_size: u64,
417        delta_size: u64,
418        offset: u64,
419        delta: Vec<u8>,
420    },
421}
422
423#[derive(Debug, Clone, PartialEq, Eq)]
424enum DeltaBase {
425    Offset(u64),
426    Ref(ObjectId),
427}
428
429/// One pack entry as stored on disk, used by [`PackFile::verify_pack_stats`] to
430/// recover the delta structure and on-disk stream size that resolved
431/// [`PackObject`]s no longer carry.
432struct OnDiskEntry {
433    offset: u64,
434    base: Option<DeltaBase>,
435    stream_size: u64,
436}
437
438impl PackFile {
439    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
440        Self::parse(bytes, ObjectFormat::Sha1)
441    }
442
443    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
444        Self::parse_with_base(bytes, format, |_| Ok(None))
445    }
446
447    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
448        Self::parse(&bundle.pack, bundle.format)
449    }
450
451    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
452        let PackIndexBuild {
453            index,
454            pack_checksum,
455            entries,
456        } = PackIndex::write_v2_for_pack(bytes, format)?;
457        Ok(PackWrite {
458            pack: bytes.to_vec(),
459            index,
460            checksum: pack_checksum,
461            entries,
462            delta_count: 0,
463        })
464    }
465
466    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
467    where
468        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
469    {
470        Self::parse_with_base(bytes, format, external_base)
471    }
472
473    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
474    where
475        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
476    {
477        let trailer_len = format.raw_len();
478        if bytes.len() < 12 + trailer_len {
479            return Err(GitError::InvalidFormat("pack file too short".into()));
480        }
481        let trailer_offset = bytes.len() - trailer_len;
482        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
483        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
484        if checksum != expected {
485            return Err(GitError::InvalidFormat(format!(
486                "pack checksum mismatch: expected {expected}, got {checksum}"
487            )));
488        }
489
490        if &bytes[..4] != b"PACK" {
491            return Err(GitError::InvalidFormat("missing PACK signature".into()));
492        }
493        let version = u32_be(&bytes[4..8]);
494        if version != 2 && version != 3 {
495            return Err(GitError::Unsupported(format!("pack version {version}")));
496        }
497        let count = u32_be(&bytes[8..12]) as usize;
498        let mut offset = 12usize;
499        let mut entries = Vec::with_capacity(count);
500        for _ in 0..count {
501            let entry_offset = offset;
502            let header = parse_entry_header(bytes, &mut offset)?;
503            let base =
504                match header.kind {
505                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
506                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
507                    )),
508                    PackObjectKind::RefDelta => {
509                        let hash_len = format.raw_len();
510                        if offset + hash_len > trailer_offset {
511                            return Err(GitError::InvalidFormat(
512                                "truncated ref-delta base object id".into(),
513                            ));
514                        }
515                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
516                        offset += hash_len;
517                        Some(DeltaBase::Ref(oid))
518                    }
519                    _ => None,
520                };
521            let mut body = Vec::new();
522            let consumed = inflate_into(
523                &bytes[offset..trailer_offset],
524                &mut body,
525                header.size.min(usize::MAX as u64) as usize,
526            )?;
527            if body.len() as u64 != header.size {
528                return Err(GitError::InvalidObject(format!(
529                    "pack object declared {} bytes, decoded {}",
530                    header.size,
531                    body.len()
532                )));
533            }
534            if consumed == 0 {
535                return Err(GitError::InvalidFormat(
536                    "empty compressed pack entry".into(),
537                ));
538            }
539            offset = offset
540                .checked_add(consumed)
541                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
542            if offset > trailer_offset {
543                return Err(GitError::InvalidFormat(
544                    "pack entry extends past checksum".into(),
545                ));
546            }
547            if let Some(base) = base {
548                entries.push(ParsedPackEntry::Delta {
549                    base,
550                    compressed_size: consumed as u64,
551                    delta_size: header.size,
552                    offset: entry_offset as u64,
553                    delta: body,
554                });
555            } else {
556                let object_type = match header.kind {
557                    PackObjectKind::Commit => ObjectType::Commit,
558                    PackObjectKind::Tree => ObjectType::Tree,
559                    PackObjectKind::Blob => ObjectType::Blob,
560                    PackObjectKind::Tag => ObjectType::Tag,
561                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
562                };
563                let object = EncodedObject::new(object_type, body);
564                let oid = object.object_id(format)?;
565                entries.push(ParsedPackEntry::Resolved(PackObject {
566                    entry: PackEntry {
567                        oid,
568                        compressed_size: consumed as u64,
569                        uncompressed_size: header.size,
570                        offset: entry_offset as u64,
571                    },
572                    object,
573                }));
574            }
575        }
576        if offset != trailer_offset {
577            return Err(GitError::InvalidFormat(format!(
578                "pack has {} trailing bytes before checksum",
579                trailer_offset - offset
580            )));
581        }
582        Ok(Self {
583            version,
584            entries: resolve_pack_entries(entries, format, &mut external_base)?,
585            checksum,
586        })
587    }
588
589    /// Walk the pack and produce per-object statistics matching the output of
590    /// `git verify-pack -v` / `git index-pack --verify-stat`.
591    ///
592    /// Objects are returned in pack offset order (the order `git verify-pack -v`
593    /// prints them). Each entry carries the *resolved* object id, type and size,
594    /// the in-pack byte span (`size_in_pack` = the offset delta to the next
595    /// object, or to the trailing checksum for the last object), the in-pack
596    /// offset, the delta chain depth (`0` for undeltified objects), and — for
597    /// deltas — the object id of the *immediate* base (which may itself be a
598    /// delta). This mirrors `builtin/index-pack.c`'s `show_pack_info`.
599    pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
600        // Resolve the whole pack first: this validates the trailing checksum,
601        // every object's inflate, and yields the resolved oid/type/size keyed by
602        // offset. `verify-pack` is exactly this validation plus the stat report.
603        let pack = Self::parse(bytes, format)?;
604
605        // Independently walk the on-disk entries to recover each object's stored
606        // kind and (for deltas) its base reference — information `PackFile`
607        // discards once deltas are resolved.
608        let trailer_len = format.raw_len();
609        let trailer_offset = bytes.len() - trailer_len;
610        let count = u32_be(&bytes[8..12]) as usize;
611        let mut offset = 12usize;
612        // Per entry in read (offset) order: (offset, base, on-disk stream size).
613        // The stream size is what git prints in the size column: it is the
614        // resolved object size for an undeltified entry, but the *delta
615        // instruction stream* length for a delta entry (builtin/index-pack.c sets
616        // `obj->size` from the entry header, before any delta is applied).
617        let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
618        for _ in 0..count {
619            let entry_offset = offset as u64;
620            let header = parse_entry_header(bytes, &mut offset)?;
621            let stream_size = header.size;
622            let base =
623                match header.kind {
624                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
625                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
626                    )),
627                    PackObjectKind::RefDelta => {
628                        let hash_len = format.raw_len();
629                        if offset + hash_len > trailer_offset {
630                            return Err(GitError::InvalidFormat(
631                                "truncated ref-delta base object id".into(),
632                            ));
633                        }
634                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
635                        offset += hash_len;
636                        Some(DeltaBase::Ref(oid))
637                    }
638                    _ => None,
639                };
640            // Skip the compressed body to reach the next entry header.
641            let mut body = Vec::new();
642            let consumed = inflate_into(
643                &bytes[offset..trailer_offset],
644                &mut body,
645                header.size.min(usize::MAX as u64) as usize,
646            )?;
647            offset = offset
648                .checked_add(consumed)
649                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
650            on_disk.push(OnDiskEntry {
651                offset: entry_offset,
652                base,
653                stream_size,
654            });
655        }
656
657        // Map offset -> resolved object so the on-disk walk can join in oid/type.
658        let mut resolved_by_offset: HashMap<u64, &PackObject> =
659            HashMap::with_capacity(pack.entries.len());
660        for object in &pack.entries {
661            resolved_by_offset.insert(object.entry.offset, object);
662        }
663        // Map offset -> resolved oid, for ofs-delta base lookups.
664        let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
665        for entry in &on_disk {
666            if let Some(object) = resolved_by_offset.get(&entry.offset) {
667                oid_by_offset.insert(entry.offset, object.entry.oid);
668            }
669        }
670        // Map base offset -> index in `on_disk`, for delta-depth propagation.
671        let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
672        for (idx, entry) in on_disk.iter().enumerate() {
673            index_by_offset.insert(entry.offset, idx);
674        }
675
676        // Sorted offsets give the size-in-pack span (next offset - this offset),
677        // with the trailing checksum offset as the final sentinel.
678        let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
679        sorted_offsets.sort_unstable();
680        let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
681        for window in sorted_offsets.windows(2) {
682            next_offset.insert(window[0], window[1]);
683        }
684        if let Some(last) = sorted_offsets.last() {
685            next_offset.insert(*last, trailer_offset as u64);
686        }
687
688        // Compute delta depth by following base offsets. Depth of a non-delta is
689        // 0; a delta's depth is its base's depth + 1. `index_by_offset` lets an
690        // ofs-delta find its base's index; a ref-delta resolves its base oid to
691        // an in-pack offset when present (thin-pack external bases are not stored
692        // in this pack, but verify-pack only ever runs on self-contained packs).
693        let mut depth = vec![None; on_disk.len()];
694        fn resolve_depth(
695            idx: usize,
696            on_disk: &[OnDiskEntry],
697            index_by_offset: &HashMap<u64, usize>,
698            offset_of_oid: &HashMap<ObjectId, u64>,
699            depth: &mut [Option<u32>],
700        ) -> u32 {
701            if let Some(d) = depth[idx] {
702                return d;
703            }
704            let computed = match &on_disk[idx].base {
705                None => 0,
706                Some(base) => {
707                    let base_idx = match base {
708                        DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
709                        DeltaBase::Ref(oid) => offset_of_oid
710                            .get(oid)
711                            .and_then(|off| index_by_offset.get(off).copied()),
712                    };
713                    match base_idx {
714                        Some(bi) => {
715                            resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
716                        }
717                        // Base not in this pack (thin pack); treat as depth 1.
718                        None => 1,
719                    }
720                }
721            };
722            depth[idx] = Some(computed);
723            computed
724        }
725        let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
726        for (off, oid) in &oid_by_offset {
727            offset_of_oid.insert(*oid, *off);
728        }
729        for idx in 0..on_disk.len() {
730            resolve_depth(idx, &on_disk, &index_by_offset, &offset_of_oid, &mut depth);
731        }
732
733        let mut stats = Vec::with_capacity(on_disk.len());
734        for (idx, entry) in on_disk.iter().enumerate() {
735            let off = entry.offset;
736            let object = resolved_by_offset.get(&off).ok_or_else(|| {
737                GitError::InvalidFormat("pack offset missing from resolved set".into())
738            })?;
739            let size_in_pack = next_offset
740                .get(&off)
741                .copied()
742                .unwrap_or(trailer_offset as u64)
743                .saturating_sub(off);
744            let base_oid = match &entry.base {
745                None => None,
746                Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
747                Some(DeltaBase::Ref(oid)) => Some(*oid),
748            };
749            stats.push(PackVerifyStat {
750                oid: object.entry.oid,
751                object_type: object.object.object_type,
752                // git prints the on-disk stream size: object body size for an
753                // undeltified entry, delta-instruction stream size for a delta.
754                size: entry.stream_size,
755                size_in_pack,
756                offset: off,
757                delta_depth: depth[idx].unwrap_or(0),
758                base_oid,
759            });
760        }
761        // Emit in pack offset order, matching git's read order.
762        stats.sort_by_key(|stat| stat.offset);
763
764        Ok(PackVerifyStats {
765            objects: stats,
766            checksum: pack.checksum,
767        })
768    }
769
770    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
771    where
772        T: Borrow<EncodedObject>,
773    {
774        Self::write_undeltified(objects, ObjectFormat::Sha1)
775    }
776
777    /// Write a pack with every object stored undeltified (no delta entries).
778    ///
779    /// This is the simple, self-contained encoding; objects appear in the given
780    /// order. For smaller output that exploits similarity between objects, use
781    /// [`PackFile::write_packed`].
782    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
783    where
784        T: Borrow<EncodedObject>,
785    {
786        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
787        Self::write_packed_impl(objects, format, &options)
788    }
789
790    /// Write a pack using sliding-window delta selection with git-compatible
791    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
792    /// ofs-deltas, self-contained).
793    ///
794    /// Objects are grouped by type and ordered for good deltas, then each is
795    /// compared against a window of previously emitted candidates; the smallest
796    /// acceptable delta is kept, otherwise the object is stored undeltified. The
797    /// result round-trips through [`PackFile::parse`].
798    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
799    where
800        T: Borrow<EncodedObject>,
801    {
802        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
803    }
804
805    /// Like [`PackFile::write_packed`] but with caller-supplied
806    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
807    /// external thin bases).
808    pub fn write_packed_with_options<T>(
809        objects: &[T],
810        format: ObjectFormat,
811        options: &PackWriteOptions,
812    ) -> Result<PackWrite>
813    where
814        T: Borrow<EncodedObject>,
815    {
816        Self::write_packed_impl(objects, format, options)
817    }
818
819    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
820    /// instead of re-hashing each object before pack planning.
821    ///
822    /// This is intended for object-database paths that reached each object by
823    /// its id and already trust that id/object mapping. The function validates
824    /// id formats and duplicate ids, but it does not re-hash object bodies; use
825    /// [`PackFile::write_packed`] when the ids are not already known to be
826    /// canonical.
827    pub fn write_packed_with_known_ids(
828        inputs: &[PackInput<'_>],
829        format: ObjectFormat,
830    ) -> Result<PackWrite> {
831        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
832    }
833
834    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
835    /// [`PackWriteOptions`].
836    pub fn write_packed_with_known_ids_and_options(
837        inputs: &[PackInput<'_>],
838        format: ObjectFormat,
839        options: &PackWriteOptions,
840    ) -> Result<PackWrite> {
841        if inputs.len() > u32::MAX as usize {
842            return Err(GitError::InvalidFormat("too many pack objects".into()));
843        }
844        let mut objects = Vec::with_capacity(inputs.len());
845        let mut object_ids = Vec::with_capacity(inputs.len());
846        for input in inputs {
847            if input.oid.format() != format {
848                return Err(GitError::InvalidObjectId(format!(
849                    "pack object id {} uses {}, pack uses {}",
850                    input.oid,
851                    input.oid.format().name(),
852                    format.name()
853                )));
854            }
855            objects.push(input.object);
856            object_ids.push(*input.oid);
857        }
858        Self::write_packed_from_parts(objects, object_ids, format, options)
859    }
860
861    /// Write a thin pack: objects may be deltified against `external_bases`
862    /// that are *not* included in the pack, referenced by ref-delta to their
863    /// object id.
864    ///
865    /// The receiver must already have (or otherwise obtain) those base objects
866    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
867    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
868    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
869    pub fn write_thin<T>(
870        objects: &[T],
871        format: ObjectFormat,
872        external_bases: HashMap<ObjectId, EncodedObject>,
873    ) -> Result<PackWrite>
874    where
875        T: Borrow<EncodedObject>,
876    {
877        let options = PackWriteOptions::new().with_thin_bases(external_bases);
878        Self::write_packed_impl(objects, format, &options)
879    }
880
881    fn write_packed_impl<T>(
882        objects: &[T],
883        format: ObjectFormat,
884        options: &PackWriteOptions,
885    ) -> Result<PackWrite>
886    where
887        T: Borrow<EncodedObject>,
888    {
889        if objects.len() > u32::MAX as usize {
890            return Err(GitError::InvalidFormat("too many pack objects".into()));
891        }
892        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
893
894        // Compute object ids up front; they are needed both for the index and,
895        // for ref-deltas, inside the pack entries themselves.
896        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
897        for object in &objects {
898            object_ids.push(object.object_id(format)?);
899        }
900        Self::write_packed_from_parts(objects, object_ids, format, options)
901    }
902
903    fn write_packed_from_parts(
904        objects: Vec<&EncodedObject>,
905        object_ids: Vec<ObjectId>,
906        format: ObjectFormat,
907        options: &PackWriteOptions,
908    ) -> Result<PackWrite> {
909        let mut seen = HashSet::with_capacity(object_ids.len());
910        for oid in &object_ids {
911            if !seen.insert(oid) {
912                return Err(GitError::InvalidFormat(format!(
913                    "pack contains duplicate object id {oid}"
914                )));
915            }
916        }
917
918        // Validate external thin bases share the pack's hash format.
919        for oid in options.thin_bases.keys() {
920            if oid.format() != format {
921                return Err(GitError::InvalidObjectId(
922                    "thin pack base object id format does not match pack format".into(),
923                ));
924            }
925        }
926
927        // Decide, for each object, whether it is stored undeltified or as a
928        // delta against another object (in-pack or an external thin base), and
929        // obtain the emit order. In-pack deltas only ever reference candidates
930        // that appear earlier in `order`, so emitting in `order` guarantees a
931        // base is always written before any object that deltas against it.
932        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
933
934        let mut pack = Vec::new();
935        pack.extend_from_slice(b"PACK");
936        pack.extend_from_slice(&2u32.to_be_bytes());
937        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
938
939        let mut index_entries = Vec::with_capacity(objects.len());
940        let mut delta_count = 0u32;
941        // Pack offset at which each original object index was written, or
942        // `None` until it has been emitted.
943        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
944
945        let compressed_payloads =
946            compress_planned_payloads(&objects, &plan, &order, options.compression_level)?;
947
948        for (order_pos, &idx) in order.iter().enumerate() {
949            let offset = pack.len() as u64;
950            let mut entry_bytes = Vec::new();
951            match &plan[idx].base {
952                PlannedBase::None => {
953                    write_entry_header(
954                        &mut entry_bytes,
955                        objects[idx].object_type,
956                        objects[idx].body.len() as u64,
957                    );
958                }
959                PlannedBase::InPack { base_idx, delta } => {
960                    delta_count += 1;
961                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
962                        GitError::InvalidFormat(
963                            "in-pack delta base emitted after dependent object".into(),
964                        )
965                    })?;
966                    if options.prefer_ofs_delta {
967                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
968                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
969                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
970                        })?;
971                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
972                    } else {
973                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
974                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
975                    }
976                }
977                PlannedBase::External { base_oid, delta } => {
978                    delta_count += 1;
979                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
980                    entry_bytes.extend_from_slice(base_oid.as_bytes());
981                }
982            }
983            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
984            let crc32 = crc32fast::hash(&entry_bytes);
985            pack.extend_from_slice(&entry_bytes);
986            written_offsets[idx] = Some(offset);
987            index_entries.push(PackIndexEntry {
988                oid: object_ids[idx].clone(),
989                crc32,
990                offset,
991            });
992        }
993
994        let checksum = sley_core::digest_bytes(format, &pack)?;
995        pack.extend_from_slice(checksum.as_bytes());
996        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
997        Ok(PackWrite {
998            pack,
999            index,
1000            checksum,
1001            entries: index_entries,
1002            delta_count,
1003        })
1004    }
1005}
1006
1007impl<'a> PackIndexView<'a> {
1008    pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
1009        Self::parse(bytes, ObjectFormat::Sha1)
1010    }
1011
1012    pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1013        Self::parse_impl(bytes, format, true, true)
1014    }
1015
1016    /// Parse and validate the index layout without recomputing the trailing
1017    /// index checksum. The checksum stored in the file is still exposed via
1018    /// [`PackIndexView::index_checksum`].
1019    pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1020        Self::parse_impl(bytes, format, false, true)
1021    }
1022
1023    /// Parse a local/trusted pack index without recomputing the trailing index
1024    /// checksum or walking every entry for canonical-order validation.
1025    ///
1026    /// This still validates the table layout and all lookup paths remain
1027    /// bounds-checked, but it avoids O(number-of-objects) startup validation for
1028    /// repository-owned `.idx` files in hot read paths.
1029    pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1030        Self::parse_impl(bytes, format, false, false)
1031    }
1032
1033    pub fn count(&self) -> usize {
1034        self.count
1035    }
1036
1037    pub fn fanout(&self) -> &[u32; 256] {
1038        &self.fanout
1039    }
1040
1041    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1042        if oid.format() != self.format {
1043            return None;
1044        }
1045        let bucket = usize::from(oid.as_bytes()[0]);
1046        let mut start = if bucket == 0 {
1047            0
1048        } else {
1049            self.fanout[bucket - 1] as usize
1050        };
1051        let mut end = self.fanout[bucket] as usize;
1052        let target = oid.as_bytes();
1053
1054        while start < end {
1055            let mid = start + (end - start) / 2;
1056            match self.oid_bytes_at(mid).cmp(target) {
1057                std::cmp::Ordering::Less => start = mid + 1,
1058                std::cmp::Ordering::Equal => return self.lookup_at(mid),
1059                std::cmp::Ordering::Greater => end = mid,
1060            }
1061        }
1062        None
1063    }
1064
1065    fn parse_impl(
1066        bytes: &'a [u8],
1067        format: ObjectFormat,
1068        verify_checksum: bool,
1069        validate_entries: bool,
1070    ) -> Result<Self> {
1071        let hash_len = format.raw_len();
1072        if bytes.len() < 4 {
1073            return Err(GitError::InvalidFormat("pack index too short".into()));
1074        }
1075        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1076            return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1077        }
1078        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1079            return Err(GitError::InvalidFormat("pack index too short".into()));
1080        }
1081        let version = u32_be(&bytes[4..8]);
1082        if version != 2 {
1083            return Err(GitError::Unsupported(format!(
1084                "pack index version {version}"
1085            )));
1086        }
1087        let index_checksum_offset = bytes.len() - hash_len;
1088        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1089        if verify_checksum {
1090            let actual_index_checksum =
1091                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1092            if actual_index_checksum != index_checksum {
1093                return Err(GitError::InvalidFormat(format!(
1094                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1095                )));
1096            }
1097        }
1098
1099        let mut offset = 8usize;
1100        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1101        let count = fanout[255] as usize;
1102        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1103        offset = oid_table.end;
1104        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1105        offset = crc_table.end;
1106        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1107        offset = small_offset_table.end;
1108
1109        let large_offset_count = (0..count)
1110            .filter(|idx| {
1111                let start = small_offset_table.start + idx * 4;
1112                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1113            })
1114            .count();
1115        let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1116        offset = large_offset_table.end;
1117
1118        let expected_trailer_offset = bytes.len() - hash_len * 2;
1119        if offset != expected_trailer_offset {
1120            if !verify_checksum && offset < expected_trailer_offset {
1121                large_offset_table = large_offset_table.start..expected_trailer_offset;
1122                offset = expected_trailer_offset;
1123            } else {
1124                return Err(GitError::InvalidFormat(format!(
1125                    "pack index has {} unexpected bytes before trailer",
1126                    expected_trailer_offset.saturating_sub(offset)
1127                )));
1128            }
1129        }
1130        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1131
1132        let view = Self {
1133            version,
1134            count,
1135            fanout,
1136            pack_checksum,
1137            index_checksum,
1138            bytes,
1139            format,
1140            tables: PackIndexViewTables::V2 {
1141                oid_table,
1142                crc_table,
1143                small_offset_table,
1144                large_offset_table,
1145            },
1146        };
1147        if validate_entries {
1148            view.validate_v2_entries()?;
1149        }
1150        Ok(view)
1151    }
1152
1153    fn parse_v1_impl(
1154        bytes: &'a [u8],
1155        format: ObjectFormat,
1156        verify_checksum: bool,
1157        validate_entries: bool,
1158    ) -> Result<Self> {
1159        let hash_len = format.raw_len();
1160        if bytes.len() < 256 * 4 + 2 * hash_len {
1161            return Err(GitError::InvalidFormat("pack index too short".into()));
1162        }
1163        let index_checksum_offset = bytes.len() - hash_len;
1164        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1165        if verify_checksum {
1166            let actual_index_checksum =
1167                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1168            if actual_index_checksum != index_checksum {
1169                return Err(GitError::InvalidFormat(format!(
1170                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1171                )));
1172            }
1173        }
1174
1175        let mut offset = 0usize;
1176        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1177        let count = fanout[255] as usize;
1178        let entry_len = hash_len
1179            .checked_add(4)
1180            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1181        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1182        offset = entry_table.end;
1183        let expected_trailer_offset = bytes.len() - hash_len * 2;
1184        if offset != expected_trailer_offset {
1185            return Err(GitError::InvalidFormat(format!(
1186                "pack index has {} unexpected bytes before trailer",
1187                expected_trailer_offset.saturating_sub(offset)
1188            )));
1189        }
1190        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1191
1192        let view = Self {
1193            version: 1,
1194            count,
1195            fanout,
1196            pack_checksum,
1197            index_checksum,
1198            bytes,
1199            format,
1200            tables: PackIndexViewTables::V1 { entry_table },
1201        };
1202        if validate_entries {
1203            view.validate_v1_entries()?;
1204        }
1205        Ok(view)
1206    }
1207
1208    fn validate_v2_entries(&self) -> Result<()> {
1209        let PackIndexViewTables::V2 {
1210            oid_table,
1211            small_offset_table,
1212            large_offset_table,
1213            ..
1214        } = &self.tables
1215        else {
1216            unreachable!("v2 validation only runs for v2 views");
1217        };
1218        let oid_table = self.slice(oid_table.clone());
1219        let small_offset_table = self.slice(small_offset_table.clone());
1220        let large_offset_table = self.slice(large_offset_table.clone());
1221        let hash_len = self.format.raw_len();
1222        for idx in 0..self.count {
1223            let oid_start = idx * hash_len;
1224            let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1225            if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1226                return Err(GitError::InvalidFormat(
1227                    "pack index object ids are not strictly ascending".into(),
1228                ));
1229            }
1230            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1231
1232            let offset_start = idx * 4;
1233            let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1234            pack_index_v2_offset(raw_offset, large_offset_table)?;
1235        }
1236        Ok(())
1237    }
1238
1239    fn validate_v1_entries(&self) -> Result<()> {
1240        let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1241            unreachable!("v1 validation only runs for v1 views");
1242        };
1243        let entry_table = self.slice(entry_table.clone());
1244        let hash_len = self.format.raw_len();
1245        let entry_len = hash_len
1246            .checked_add(4)
1247            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1248        for idx in 0..self.count {
1249            let start = idx * entry_len;
1250            let oid_start = start + 4;
1251            let oid_bytes = &entry_table[oid_start..start + entry_len];
1252            if idx > 0 {
1253                let previous_oid_start = oid_start - entry_len;
1254                let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1255                if previous_oid >= oid_bytes {
1256                    return Err(GitError::InvalidFormat(
1257                        "pack index object ids are not strictly sorted".into(),
1258                    ));
1259                }
1260            }
1261            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1262        }
1263        Ok(())
1264    }
1265
1266    fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1267        let hash_len = self.format.raw_len();
1268        match &self.tables {
1269            PackIndexViewTables::V1 { entry_table } => {
1270                let entry_table = self.slice(entry_table.clone());
1271                let entry_len = hash_len + 4;
1272                let start = idx * entry_len + 4;
1273                &entry_table[start..start + hash_len]
1274            }
1275            PackIndexViewTables::V2 { oid_table, .. } => {
1276                let oid_table = self.slice(oid_table.clone());
1277                let start = idx * hash_len;
1278                &oid_table[start..start + hash_len]
1279            }
1280        }
1281    }
1282
1283    fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1284        if idx >= self.count {
1285            return None;
1286        }
1287        let hash_len = self.format.raw_len();
1288        match &self.tables {
1289            PackIndexViewTables::V1 { entry_table } => {
1290                let entry_table = self.slice(entry_table.clone());
1291                let entry_len = hash_len + 4;
1292                let start = idx * entry_len;
1293                Some(PackIndexLookup {
1294                    crc32: 0,
1295                    offset: u64::from(u32_be(&entry_table[start..start + 4])),
1296                })
1297            }
1298            PackIndexViewTables::V2 {
1299                crc_table,
1300                small_offset_table,
1301                large_offset_table,
1302                ..
1303            } => {
1304                let crc_table = self.slice(crc_table.clone());
1305                let small_offset_table = self.slice(small_offset_table.clone());
1306                let large_offset_table = self.slice(large_offset_table.clone());
1307                let crc_start = idx * 4;
1308                let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1309                Some(PackIndexLookup {
1310                    crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1311                    offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1312                })
1313            }
1314        }
1315    }
1316
1317    fn slice(&self, range: Range<usize>) -> &'a [u8] {
1318        &self.bytes[range]
1319    }
1320}
1321
1322impl PackIndexViewData {
1323    pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1324        Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1325    }
1326
1327    /// Parse and validate an owned index view without recomputing the trailing
1328    /// index checksum. The stored checksum is still exposed via
1329    /// [`PackIndexViewData::index_checksum`].
1330    pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1331        Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1332    }
1333
1334    /// Parse a local/trusted owned index view without the checksum or full-entry
1335    /// validation passes.
1336    pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1337        Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1338    }
1339
1340    pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1341        Self::parse_impl(bytes, format, true, true)
1342    }
1343
1344    pub fn parse_source_without_checksum(
1345        bytes: Arc<dyn PackIndexByteSource>,
1346        format: ObjectFormat,
1347    ) -> Result<Self> {
1348        Self::parse_impl(bytes, format, false, true)
1349    }
1350
1351    pub fn parse_trusted_source_without_checksum(
1352        bytes: Arc<dyn PackIndexByteSource>,
1353        format: ObjectFormat,
1354    ) -> Result<Self> {
1355        Self::parse_impl(bytes, format, false, false)
1356    }
1357
1358    pub fn count(&self) -> usize {
1359        self.count
1360    }
1361
1362    pub fn fanout(&self) -> &[u32; 256] {
1363        &self.fanout
1364    }
1365
1366    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1367        self.as_view().find(oid)
1368    }
1369
1370    pub fn as_view(&self) -> PackIndexView<'_> {
1371        PackIndexView {
1372            version: self.version,
1373            count: self.count,
1374            fanout: self.fanout,
1375            pack_checksum: self.pack_checksum,
1376            index_checksum: self.index_checksum,
1377            bytes: self.bytes.as_bytes(),
1378            format: self.format,
1379            tables: self.tables.clone(),
1380        }
1381    }
1382
1383    fn parse_impl(
1384        bytes: Arc<dyn PackIndexByteSource>,
1385        format: ObjectFormat,
1386        verify_checksum: bool,
1387        validate_entries: bool,
1388    ) -> Result<Self> {
1389        let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1390            let view = PackIndexView::parse_impl(
1391                bytes.as_bytes(),
1392                format,
1393                verify_checksum,
1394                validate_entries,
1395            )?;
1396            (
1397                view.version,
1398                view.count,
1399                view.fanout,
1400                view.pack_checksum,
1401                view.index_checksum,
1402                view.tables,
1403            )
1404        };
1405        Ok(Self {
1406            version,
1407            count,
1408            fanout,
1409            pack_checksum,
1410            index_checksum,
1411            bytes,
1412            format,
1413            tables,
1414        })
1415    }
1416}
1417
1418impl PackIndex {
1419    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1420        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1421    }
1422
1423    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1424        let trailer_len = format.raw_len();
1425        if pack_bytes.len() < 12 + trailer_len {
1426            return Err(GitError::InvalidFormat("pack file too short".into()));
1427        }
1428        let trailer_offset = pack_bytes.len() - trailer_len;
1429        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1430        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1431        if pack_checksum != expected {
1432            return Err(GitError::InvalidFormat(format!(
1433                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1434            )));
1435        }
1436
1437        if &pack_bytes[..4] != b"PACK" {
1438            return Err(GitError::InvalidFormat("missing PACK signature".into()));
1439        }
1440        let version = u32_be(&pack_bytes[4..8]);
1441        if version != 2 && version != 3 {
1442            return Err(GitError::Unsupported(format!("pack version {version}")));
1443        }
1444        let count = u32_be(&pack_bytes[8..12]) as usize;
1445        let mut offset = 12usize;
1446        let mut parsed_entries = Vec::with_capacity(count);
1447        let mut raw_entries = Vec::with_capacity(count);
1448        for _ in 0..count {
1449            let entry_offset = offset;
1450            let header = parse_entry_header(pack_bytes, &mut offset)?;
1451            let base = match header.kind {
1452                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1453                    pack_bytes,
1454                    &mut offset,
1455                    entry_offset as u64,
1456                )?)),
1457                PackObjectKind::RefDelta => {
1458                    let hash_len = format.raw_len();
1459                    if offset + hash_len > trailer_offset {
1460                        return Err(GitError::InvalidFormat(
1461                            "truncated ref-delta base object id".into(),
1462                        ));
1463                    }
1464                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1465                    offset += hash_len;
1466                    Some(DeltaBase::Ref(oid))
1467                }
1468                _ => None,
1469            };
1470            let mut body = Vec::new();
1471            let consumed = inflate_into(
1472                &pack_bytes[offset..trailer_offset],
1473                &mut body,
1474                header.size.min(usize::MAX as u64) as usize,
1475            )?;
1476            if body.len() as u64 != header.size {
1477                return Err(GitError::InvalidObject(format!(
1478                    "pack object declared {} bytes, decoded {}",
1479                    header.size,
1480                    body.len()
1481                )));
1482            }
1483            if consumed == 0 {
1484                return Err(GitError::InvalidFormat(
1485                    "empty compressed pack entry".into(),
1486                ));
1487            }
1488            offset = offset
1489                .checked_add(consumed)
1490                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1491            if offset > trailer_offset {
1492                return Err(GitError::InvalidFormat(
1493                    "pack entry extends past checksum".into(),
1494                ));
1495            }
1496            raw_entries.push((
1497                entry_offset as u64,
1498                crc32fast::hash(&pack_bytes[entry_offset..offset]),
1499            ));
1500            if let Some(base) = base {
1501                parsed_entries.push(ParsedPackEntry::Delta {
1502                    base,
1503                    compressed_size: consumed as u64,
1504                    delta_size: header.size,
1505                    offset: entry_offset as u64,
1506                    delta: body,
1507                });
1508            } else {
1509                let object_type = match header.kind {
1510                    PackObjectKind::Commit => ObjectType::Commit,
1511                    PackObjectKind::Tree => ObjectType::Tree,
1512                    PackObjectKind::Blob => ObjectType::Blob,
1513                    PackObjectKind::Tag => ObjectType::Tag,
1514                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1515                };
1516                let object = EncodedObject::new(object_type, body);
1517                let oid = object.object_id(format)?;
1518                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1519                    entry: PackEntry {
1520                        oid,
1521                        compressed_size: consumed as u64,
1522                        uncompressed_size: header.size,
1523                        offset: entry_offset as u64,
1524                    },
1525                    object,
1526                }));
1527            }
1528        }
1529        if offset != trailer_offset {
1530            return Err(GitError::InvalidFormat(format!(
1531                "pack has {} trailing bytes before checksum",
1532                trailer_offset - offset
1533            )));
1534        }
1535
1536        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1537        let entries = resolved
1538            .iter()
1539            .zip(raw_entries)
1540            .map(|(object, (offset, crc32))| PackIndexEntry {
1541                oid: object.entry.oid,
1542                crc32,
1543                offset,
1544            })
1545            .collect::<Vec<_>>();
1546        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1547        Ok(PackIndexBuild {
1548            index,
1549            pack_checksum,
1550            entries,
1551        })
1552    }
1553
1554    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1555        Self::parse(bytes, ObjectFormat::Sha1)
1556    }
1557
1558    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1559        Self::parse_impl(bytes, format, true)
1560    }
1561
1562    pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1563        Self::parse_impl(bytes, format, false)
1564    }
1565
1566    fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1567        let hash_len = format.raw_len();
1568        if bytes.len() < 4 {
1569            return Err(GitError::InvalidFormat("pack index too short".into()));
1570        }
1571        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1572            return Self::parse_v1_impl(bytes, format, verify_checksum);
1573        }
1574        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1575            return Err(GitError::InvalidFormat("pack index too short".into()));
1576        }
1577        let version = u32_be(&bytes[4..8]);
1578        if version != 2 {
1579            return Err(GitError::Unsupported(format!(
1580                "pack index version {version}"
1581            )));
1582        }
1583        let index_checksum_offset = bytes.len() - hash_len;
1584        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1585        if verify_checksum {
1586            let actual_index_checksum =
1587                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1588            if actual_index_checksum != index_checksum {
1589                return Err(GitError::InvalidFormat(format!(
1590                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1591                )));
1592            }
1593        }
1594
1595        let mut offset = 8usize;
1596        let mut fanout = [0u32; 256];
1597        let mut previous = 0u32;
1598        for slot in &mut fanout {
1599            *slot = u32_be(&bytes[offset..offset + 4]);
1600            if *slot < previous {
1601                return Err(GitError::InvalidFormat(
1602                    "pack index fanout is not monotonic".into(),
1603                ));
1604            }
1605            previous = *slot;
1606            offset += 4;
1607        }
1608        let count = fanout[255] as usize;
1609        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1610        offset = oid_table.end;
1611        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1612        offset = crc_table.end;
1613        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1614        offset = small_offset_table.end;
1615
1616        let large_offset_count = (0..count)
1617            .filter(|idx| {
1618                let start = small_offset_table.start + idx * 4;
1619                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1620            })
1621            .count();
1622        let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1623        offset = large_offset_table.end;
1624
1625        let expected_trailer_offset = bytes.len() - hash_len * 2;
1626        if offset != expected_trailer_offset {
1627            if !verify_checksum && offset < expected_trailer_offset {
1628                large_offset_table = large_offset_table.start..expected_trailer_offset;
1629                offset = expected_trailer_offset;
1630            } else {
1631                return Err(GitError::InvalidFormat(format!(
1632                    "pack index has {} unexpected bytes before trailer",
1633                    expected_trailer_offset.saturating_sub(offset)
1634                )));
1635            }
1636        }
1637        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1638
1639        let mut entries = Vec::with_capacity(count);
1640        for idx in 0..count {
1641            let oid_start = oid_table.start + idx * hash_len;
1642            let crc_start = crc_table.start + idx * 4;
1643            let offset_start = small_offset_table.start + idx * 4;
1644            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1645            // Object ids must be strictly ascending: lookup binary-searches them,
1646            // and the fanout must match the first byte. A malformed/forged index
1647            // (e.g. from a received pack) would otherwise yield silent misses.
1648            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1649                return Err(GitError::InvalidFormat(
1650                    "pack index object ids are not strictly ascending".into(),
1651                ));
1652            }
1653            let expected_min = if oid_bytes[0] == 0 {
1654                0
1655            } else {
1656                fanout[usize::from(oid_bytes[0] - 1)]
1657            };
1658            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1659                return Err(GitError::InvalidFormat(
1660                    "pack index object id is outside its fanout bucket".into(),
1661                ));
1662            }
1663            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1664            let offset = if raw_offset & 0x8000_0000 == 0 {
1665                u64::from(raw_offset)
1666            } else {
1667                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1668                let large_start = large_offset_table.start + large_idx * 8;
1669                if large_idx >= large_offset_table.len() / 8 {
1670                    return Err(GitError::InvalidFormat(
1671                        "pack index large offset points past table".into(),
1672                    ));
1673                }
1674                u64_be(&bytes[large_start..large_start + 8])
1675            };
1676            entries.push(PackIndexEntry {
1677                oid: ObjectId::from_raw(format, oid_bytes)?,
1678                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1679                offset,
1680            });
1681        }
1682        Ok(Self {
1683            version,
1684            fanout,
1685            entries,
1686            pack_checksum,
1687            index_checksum,
1688        })
1689    }
1690
1691    fn parse_v1_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
1692        let hash_len = format.raw_len();
1693        if bytes.len() < 256 * 4 + 2 * hash_len {
1694            return Err(GitError::InvalidFormat("pack index too short".into()));
1695        }
1696        let index_checksum_offset = bytes.len() - hash_len;
1697        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1698        if verify_checksum {
1699            let actual_index_checksum =
1700                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1701            if actual_index_checksum != index_checksum {
1702                return Err(GitError::InvalidFormat(format!(
1703                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1704                )));
1705            }
1706        }
1707
1708        let mut offset = 0usize;
1709        let mut fanout = [0u32; 256];
1710        let mut previous = 0u32;
1711        for slot in &mut fanout {
1712            *slot = u32_be(&bytes[offset..offset + 4]);
1713            if *slot < previous {
1714                return Err(GitError::InvalidFormat(
1715                    "pack index fanout is not monotonic".into(),
1716                ));
1717            }
1718            previous = *slot;
1719            offset += 4;
1720        }
1721        let count = fanout[255] as usize;
1722        let entry_len = hash_len
1723            .checked_add(4)
1724            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1725        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1726        offset = entry_table.end;
1727        let expected_trailer_offset = bytes.len() - hash_len * 2;
1728        if offset != expected_trailer_offset {
1729            return Err(GitError::InvalidFormat(format!(
1730                "pack index has {} unexpected bytes before trailer",
1731                expected_trailer_offset.saturating_sub(offset)
1732            )));
1733        }
1734        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1735
1736        let mut entries = Vec::with_capacity(count);
1737        let mut previous_oid: Option<ObjectId> = None;
1738        for idx in 0..count {
1739            let start = entry_table.start + idx * entry_len;
1740            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1741            if let Some(previous) = &previous_oid
1742                && previous.as_bytes() >= oid.as_bytes()
1743            {
1744                return Err(GitError::InvalidFormat(
1745                    "pack index object ids are not strictly sorted".into(),
1746                ));
1747            }
1748            previous_oid = Some(oid);
1749            entries.push(PackIndexEntry {
1750                oid,
1751                crc32: 0,
1752                offset: u64::from(u32_be(&bytes[start..start + 4])),
1753            });
1754        }
1755        Ok(Self {
1756            version: 1,
1757            fanout,
1758            entries,
1759            pack_checksum,
1760            index_checksum,
1761        })
1762    }
1763
1764    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1765        self.entries
1766            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1767            .ok()
1768            .map(|idx| &self.entries[idx])
1769    }
1770
1771    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1772        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1773    }
1774
1775    pub fn write_v2(
1776        format: ObjectFormat,
1777        entries: &[PackIndexEntry],
1778        pack_checksum: &ObjectId,
1779    ) -> Result<Vec<u8>> {
1780        if pack_checksum.format() != format {
1781            return Err(GitError::InvalidObjectId(
1782                "pack checksum format does not match index format".into(),
1783            ));
1784        }
1785        let mut entries = entries.iter().collect::<Vec<_>>();
1786        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1787        for pair in entries.windows(2) {
1788            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1789                return Err(GitError::InvalidFormat(format!(
1790                    "pack index contains duplicate object id {}",
1791                    pair[0].oid
1792                )));
1793            }
1794        }
1795        let mut fanout = [0u32; 256];
1796        for entry in &entries {
1797            if entry.oid.format() != format {
1798                return Err(GitError::InvalidObjectId(
1799                    "pack index entry format does not match index format".into(),
1800                ));
1801            }
1802            let first = entry.oid.as_bytes()[0] as usize;
1803            fanout[first] = fanout[first]
1804                .checked_add(1)
1805                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1806        }
1807        let mut running = 0u32;
1808        for slot in &mut fanout {
1809            running = running
1810                .checked_add(*slot)
1811                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1812            *slot = running;
1813        }
1814
1815        let mut index = Vec::new();
1816        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1817        index.extend_from_slice(&2u32.to_be_bytes());
1818        for count in fanout {
1819            index.extend_from_slice(&count.to_be_bytes());
1820        }
1821        for entry in &entries {
1822            index.extend_from_slice(entry.oid.as_bytes());
1823        }
1824        for entry in &entries {
1825            index.extend_from_slice(&entry.crc32.to_be_bytes());
1826        }
1827
1828        let mut large_offsets = Vec::new();
1829        for entry in &entries {
1830            if entry.offset < 0x8000_0000 {
1831                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1832            } else {
1833                if large_offsets.len() > 0x7fff_ffff {
1834                    return Err(GitError::InvalidFormat(
1835                        "too many large pack offsets".into(),
1836                    ));
1837                }
1838                let large_idx = large_offsets.len() as u32;
1839                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1840                large_offsets.push(entry.offset);
1841            }
1842        }
1843        for offset in large_offsets {
1844            index.extend_from_slice(&offset.to_be_bytes());
1845        }
1846        index.extend_from_slice(pack_checksum.as_bytes());
1847        let index_checksum = sley_core::digest_bytes(format, &index)?;
1848        index.extend_from_slice(index_checksum.as_bytes());
1849        Ok(index)
1850    }
1851
1852    /// Serialise a version-1 pack `.idx`: a 256-entry fanout, then for each
1853    /// object an inline 4-byte big-endian pack offset immediately followed by
1854    /// its object id (sorted by oid), then the pack checksum and a trailing
1855    /// index checksum. v1 has no CRC table and cannot represent offsets that
1856    /// do not fit in 32 bits.
1857    pub fn write_v1(
1858        format: ObjectFormat,
1859        entries: &[PackIndexEntry],
1860        pack_checksum: &ObjectId,
1861    ) -> Result<Vec<u8>> {
1862        if pack_checksum.format() != format {
1863            return Err(GitError::InvalidObjectId(
1864                "pack checksum format does not match index format".into(),
1865            ));
1866        }
1867        let mut entries = entries.iter().collect::<Vec<_>>();
1868        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1869        for pair in entries.windows(2) {
1870            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1871                return Err(GitError::InvalidFormat(format!(
1872                    "pack index contains duplicate object id {}",
1873                    pair[0].oid
1874                )));
1875            }
1876        }
1877        let mut fanout = [0u32; 256];
1878        for entry in &entries {
1879            if entry.oid.format() != format {
1880                return Err(GitError::InvalidObjectId(
1881                    "pack index entry format does not match index format".into(),
1882                ));
1883            }
1884            if entry.offset > 0xffff_ffff {
1885                return Err(GitError::InvalidFormat(
1886                    "pack offset too large for a version-1 index".into(),
1887                ));
1888            }
1889            let first = entry.oid.as_bytes()[0] as usize;
1890            fanout[first] = fanout[first]
1891                .checked_add(1)
1892                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1893        }
1894        let mut running = 0u32;
1895        for slot in &mut fanout {
1896            running = running
1897                .checked_add(*slot)
1898                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1899            *slot = running;
1900        }
1901
1902        let mut index = Vec::new();
1903        for count in fanout {
1904            index.extend_from_slice(&count.to_be_bytes());
1905        }
1906        for entry in &entries {
1907            index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1908            index.extend_from_slice(entry.oid.as_bytes());
1909        }
1910        index.extend_from_slice(pack_checksum.as_bytes());
1911        let index_checksum = sley_core::digest_bytes(format, &index)?;
1912        index.extend_from_slice(index_checksum.as_bytes());
1913        Ok(index)
1914    }
1915}
1916
1917/// The `.rev` table for a pack: index positions (the rank of each object in
1918/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
1919/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
1920/// the result feeds [`PackReverseIndex::write`].
1921pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1922    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1923    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1924    let mut index_position = vec![0u32; entries.len()];
1925    for (position, &entry) in oid_sorted.iter().enumerate() {
1926        index_position[entry] = position as u32;
1927    }
1928    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1929    by_offset.sort_by_key(|&entry| entries[entry].offset);
1930    by_offset
1931        .into_iter()
1932        .map(|entry| index_position[entry])
1933        .collect()
1934}
1935
1936impl PackReverseIndex {
1937    pub fn write(
1938        format: ObjectFormat,
1939        positions: &[u32],
1940        pack_checksum: &ObjectId,
1941    ) -> Result<Vec<u8>> {
1942        if pack_checksum.format() != format {
1943            return Err(GitError::InvalidObjectId(
1944                "pack checksum format does not match reverse index format".into(),
1945            ));
1946        }
1947        validate_position_permutation(positions)?;
1948
1949        let mut out = Vec::new();
1950        out.extend_from_slice(b"RIDX");
1951        out.extend_from_slice(&1u32.to_be_bytes());
1952        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1953        for position in positions {
1954            out.extend_from_slice(&position.to_be_bytes());
1955        }
1956        out.extend_from_slice(pack_checksum.as_bytes());
1957        let checksum = sley_core::digest_bytes(format, &out)?;
1958        out.extend_from_slice(checksum.as_bytes());
1959        Ok(out)
1960    }
1961
1962    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1963        let hash_len = format.raw_len();
1964        let table_len = object_count
1965            .checked_mul(4)
1966            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1967        let min_len = 12usize
1968            .checked_add(table_len)
1969            .and_then(|len| len.checked_add(hash_len * 2))
1970            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1971        if bytes.len() < min_len {
1972            return Err(GitError::InvalidFormat("reverse index too short".into()));
1973        }
1974        if bytes.len() != min_len {
1975            return Err(GitError::InvalidFormat(format!(
1976                "reverse index has {} trailing bytes",
1977                bytes.len() - min_len
1978            )));
1979        }
1980        if &bytes[..4] != b"RIDX" {
1981            return Err(GitError::InvalidFormat(
1982                "missing reverse index signature".into(),
1983            ));
1984        }
1985        let version = u32_be(&bytes[4..8]);
1986        if version != 1 {
1987            return Err(GitError::Unsupported(format!(
1988                "reverse index version {version}"
1989            )));
1990        }
1991        let hash_id = u32_be(&bytes[8..12]);
1992        if hash_id != hash_function_id(format) {
1993            return Err(GitError::InvalidFormat(format!(
1994                "reverse index hash id {hash_id} does not match {}",
1995                format.name()
1996            )));
1997        }
1998
1999        let index_checksum_offset = bytes.len() - hash_len;
2000        let actual_index_checksum =
2001            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2002        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2003        if actual_index_checksum != index_checksum {
2004            return Err(GitError::InvalidFormat(format!(
2005                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2006            )));
2007        }
2008
2009        let pack_checksum_offset = index_checksum_offset - hash_len;
2010        let pack_checksum =
2011            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2012        let mut positions = Vec::with_capacity(object_count);
2013        let mut offset = 12usize;
2014        for _ in 0..object_count {
2015            let position = u32_be(&bytes[offset..offset + 4]);
2016            positions.push(position);
2017            offset += 4;
2018        }
2019        validate_position_permutation(&positions)?;
2020
2021        Ok(Self {
2022            version,
2023            format,
2024            positions,
2025            pack_checksum,
2026            index_checksum,
2027        })
2028    }
2029}
2030
2031impl PackMtimes {
2032    pub fn write(
2033        format: ObjectFormat,
2034        mtimes: &[u32],
2035        pack_checksum: &ObjectId,
2036    ) -> Result<Vec<u8>> {
2037        if pack_checksum.format() != format {
2038            return Err(GitError::InvalidObjectId(
2039                "pack checksum format does not match mtimes format".into(),
2040            ));
2041        }
2042
2043        let mut out = Vec::new();
2044        out.extend_from_slice(b"MTME");
2045        out.extend_from_slice(&1u32.to_be_bytes());
2046        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2047        for mtime in mtimes {
2048            out.extend_from_slice(&mtime.to_be_bytes());
2049        }
2050        out.extend_from_slice(pack_checksum.as_bytes());
2051        let checksum = sley_core::digest_bytes(format, &out)?;
2052        out.extend_from_slice(checksum.as_bytes());
2053        Ok(out)
2054    }
2055
2056    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2057        let hash_len = format.raw_len();
2058        let table_len = object_count
2059            .checked_mul(4)
2060            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2061        let expected_len = 12usize
2062            .checked_add(table_len)
2063            .and_then(|len| len.checked_add(hash_len * 2))
2064            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2065        if bytes.len() < expected_len {
2066            return Err(GitError::InvalidFormat("mtimes file too short".into()));
2067        }
2068        if bytes.len() != expected_len {
2069            return Err(GitError::InvalidFormat(format!(
2070                "mtimes file has {} trailing bytes",
2071                bytes.len() - expected_len
2072            )));
2073        }
2074        if &bytes[..4] != b"MTME" {
2075            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2076        }
2077        let version = u32_be(&bytes[4..8]);
2078        if version != 1 {
2079            return Err(GitError::Unsupported(format!("mtimes version {version}")));
2080        }
2081        let hash_id = u32_be(&bytes[8..12]);
2082        if hash_id != hash_function_id(format) {
2083            return Err(GitError::InvalidFormat(format!(
2084                "mtimes hash id {hash_id} does not match {}",
2085                format.name()
2086            )));
2087        }
2088
2089        let index_checksum_offset = bytes.len() - hash_len;
2090        let actual_index_checksum =
2091            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2092        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2093        if actual_index_checksum != index_checksum {
2094            return Err(GitError::InvalidFormat(format!(
2095                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2096            )));
2097        }
2098
2099        let pack_checksum_offset = index_checksum_offset - hash_len;
2100        let pack_checksum =
2101            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2102        let mut mtimes = Vec::with_capacity(object_count);
2103        let mut offset = 12usize;
2104        for _ in 0..object_count {
2105            mtimes.push(u32_be(&bytes[offset..offset + 4]));
2106            offset += 4;
2107        }
2108
2109        Ok(Self {
2110            version,
2111            format,
2112            mtimes,
2113            pack_checksum,
2114            index_checksum,
2115        })
2116    }
2117}
2118
2119impl PackBitmapIndex {
2120    pub const OPTION_FULL_DAG: u16 = 0x0001;
2121    pub const OPTION_HASH_CACHE: u16 = 0x0004;
2122
2123    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2124        let hash_len = format.raw_len();
2125        let min_len = 12usize
2126            .checked_add(hash_len * 2)
2127            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2128        if bytes.len() < min_len {
2129            return Err(GitError::InvalidFormat("bitmap index too short".into()));
2130        }
2131        if &bytes[..4] != b"BITM" {
2132            return Err(GitError::InvalidFormat(
2133                "missing bitmap index signature".into(),
2134            ));
2135        }
2136        let version = u16_be(&bytes[4..6]);
2137        if version != 1 {
2138            return Err(GitError::Unsupported(format!(
2139                "bitmap index version {version}"
2140            )));
2141        }
2142        let options = u16_be(&bytes[6..8]);
2143        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2144        if options & !known_options != 0 {
2145            return Err(GitError::Unsupported(format!(
2146                "bitmap index options {:#06x}",
2147                options & !known_options
2148            )));
2149        }
2150        let entry_count = u32_be(&bytes[8..12]) as usize;
2151        let checksum_offset = bytes.len() - hash_len;
2152        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2153        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2154        if actual_index_checksum != index_checksum {
2155            return Err(GitError::InvalidFormat(format!(
2156                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2157            )));
2158        }
2159
2160        let pack_checksum_end = 12usize
2161            .checked_add(hash_len)
2162            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2163        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2164        let mut offset = pack_checksum_end;
2165        let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2166        let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2167        let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2168        let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2169
2170        let mut entries = Vec::with_capacity(entry_count);
2171        for idx in 0..entry_count {
2172            if checksum_offset.saturating_sub(offset) < 6 {
2173                return Err(GitError::InvalidFormat(
2174                    "truncated bitmap index entry".into(),
2175                ));
2176            }
2177            let object_position = u32_be(&bytes[offset..offset + 4]);
2178            offset += 4;
2179            if object_position as usize >= object_count {
2180                return Err(GitError::InvalidFormat(
2181                    "bitmap index entry points past object table".into(),
2182                ));
2183            }
2184            let xor_offset = bytes[offset];
2185            offset += 1;
2186            if xor_offset as usize > idx || xor_offset > 160 {
2187                return Err(GitError::InvalidFormat(
2188                    "bitmap index entry has invalid XOR offset".into(),
2189                ));
2190            }
2191            let flags = bytes[offset];
2192            offset += 1;
2193            let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2194            entries.push(PackBitmapEntry {
2195                object_position,
2196                xor_offset,
2197                flags,
2198                bitmap,
2199            });
2200        }
2201
2202        let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2203            let cache_len = object_count
2204                .checked_mul(4)
2205                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2206            if checksum_offset.saturating_sub(offset) < cache_len {
2207                return Err(GitError::InvalidFormat(
2208                    "truncated bitmap hash cache".into(),
2209                ));
2210            }
2211            let mut cache = Vec::with_capacity(object_count);
2212            for _ in 0..object_count {
2213                cache.push(u32_be(&bytes[offset..offset + 4]));
2214                offset += 4;
2215            }
2216            Some(cache)
2217        } else {
2218            None
2219        };
2220
2221        if offset != checksum_offset {
2222            return Err(GitError::InvalidFormat(format!(
2223                "bitmap index has {} trailing bytes",
2224                checksum_offset - offset
2225            )));
2226        }
2227
2228        Ok(Self {
2229            version,
2230            format,
2231            options,
2232            pack_checksum,
2233            index_checksum,
2234            type_bitmaps: PackBitmapTypeBitmaps {
2235                commits,
2236                trees,
2237                blobs,
2238                tags,
2239            },
2240            entries,
2241            name_hash_cache,
2242        })
2243    }
2244
2245    /// Looks up the stored entry whose commit sits at `position` in the
2246    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
2247    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2248        self.entries
2249            .iter()
2250            .find(|entry| entry.object_position == position)
2251    }
2252}
2253
2254fn parse_bitmap_ewah(
2255    bytes: &[u8],
2256    offset: &mut usize,
2257    checksum_offset: usize,
2258    _object_count: usize,
2259) -> Result<EwahBitmap> {
2260    if checksum_offset.saturating_sub(*offset) < 12 {
2261        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2262    }
2263    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2264    *offset += 4;
2265    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2266    *offset += 4;
2267    let words_len = word_count
2268        .checked_mul(8)
2269        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2270    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2271        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2272    }
2273    let mut words = Vec::with_capacity(word_count);
2274    for _ in 0..word_count {
2275        words.push(u64_be(&bytes[*offset..*offset + 8]));
2276        *offset += 8;
2277    }
2278    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2279    *offset += 4;
2280    validate_ewah_words(bit_size, &words, rlw_position)?;
2281    Ok(EwahBitmap {
2282        bit_size,
2283        words,
2284        rlw_position,
2285    })
2286}
2287
2288fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2289    if words.is_empty() {
2290        if rlw_position != 0 || bit_size != 0 {
2291            return Err(GitError::InvalidFormat(
2292                "EWAH bitmap has invalid empty RLW".into(),
2293            ));
2294        }
2295        return Ok(());
2296    }
2297    if rlw_position as usize >= words.len() {
2298        return Err(GitError::InvalidFormat(
2299            "EWAH RLW position points past word table".into(),
2300        ));
2301    }
2302    let mut word_idx = 0usize;
2303    let mut decoded_words = 0u64;
2304    while word_idx < words.len() {
2305        let rlw = words[word_idx];
2306        let run_words = (rlw >> 1) & 0xffff_ffff;
2307        let literal_words = (rlw >> 33) as usize;
2308        word_idx += 1;
2309        word_idx = word_idx
2310            .checked_add(literal_words)
2311            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
2312        if word_idx > words.len() {
2313            return Err(GitError::InvalidFormat(
2314                "EWAH literal words extend past word table".into(),
2315            ));
2316        }
2317        decoded_words = decoded_words
2318            .checked_add(run_words)
2319            .and_then(|value| value.checked_add(literal_words as u64))
2320            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2321    }
2322    let decoded_bits = decoded_words
2323        .checked_mul(64)
2324        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2325    if decoded_bits < u64::from(bit_size) {
2326        return Err(GitError::InvalidFormat(
2327            "EWAH bitmap decodes fewer bits than declared".into(),
2328        ));
2329    }
2330    Ok(())
2331}
2332
2333impl MultiPackIndex {
2334    pub fn write(
2335        format: ObjectFormat,
2336        version: u8,
2337        pack_names: &[String],
2338        objects: &[MultiPackIndexEntry],
2339    ) -> Result<Vec<u8>> {
2340        Self::write_with_reverse_index(format, version, pack_names, objects, None)
2341    }
2342
2343    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
2344    /// additionally emits the `RIDX` chunk: the object order a multi-pack
2345    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
2346    /// the preferred pack first, then the rest by pack id, each pack's slice
2347    /// in offset order), stored as one u32 midx position per object.
2348    ///
2349    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
2350    /// must be in range.
2351    pub fn write_with_reverse_index(
2352        format: ObjectFormat,
2353        version: u8,
2354        pack_names: &[String],
2355        objects: &[MultiPackIndexEntry],
2356        preferred_pack: Option<u32>,
2357    ) -> Result<Vec<u8>> {
2358        Self::write_with_bitmap_packs(format, version, pack_names, objects, preferred_pack, None)
2359    }
2360
2361    pub fn write_with_bitmap_packs(
2362        format: ObjectFormat,
2363        version: u8,
2364        pack_names: &[String],
2365        objects: &[MultiPackIndexEntry],
2366        preferred_pack: Option<u32>,
2367        bitmapped_packs: Option<&[MultiPackBitmapPack]>,
2368    ) -> Result<Vec<u8>> {
2369        if let Some(preferred) = preferred_pack
2370            && preferred as usize >= pack_names.len()
2371        {
2372            return Err(GitError::InvalidFormat(format!(
2373                "preferred pack {preferred} out of range for {} packs",
2374                pack_names.len()
2375            )));
2376        }
2377        if version != 1 && version != 2 {
2378            return Err(GitError::Unsupported(format!(
2379                "multi-pack-index version {version}"
2380            )));
2381        }
2382        if pack_names.len() > u32::MAX as usize {
2383            return Err(GitError::InvalidFormat(
2384                "too many multi-pack-index packs".into(),
2385            ));
2386        }
2387        if objects.len() > u32::MAX as usize {
2388            return Err(GitError::InvalidFormat(
2389                "too many multi-pack-index objects".into(),
2390            ));
2391        }
2392        if let Some(bitmapped_packs) = bitmapped_packs {
2393            if bitmapped_packs.len() != pack_names.len() {
2394                return Err(GitError::InvalidFormat(
2395                    "multi-pack-index BTMP pack count mismatch".into(),
2396                ));
2397            }
2398            for pack in bitmapped_packs {
2399                let bitmap_end = u64::from(pack.bitmap_pos)
2400                    .checked_add(u64::from(pack.bitmap_nr))
2401                    .ok_or_else(|| {
2402                        GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
2403                    })?;
2404                if bitmap_end > objects.len() as u64 {
2405                    return Err(GitError::InvalidFormat(
2406                        "multi-pack-index BTMP range points past object table".into(),
2407                    ));
2408                }
2409            }
2410        }
2411        validate_midx_pack_names(pack_names)?;
2412        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2413            return Err(GitError::InvalidFormat(
2414                "multi-pack-index v1 pack names must be sorted".into(),
2415            ));
2416        }
2417
2418        let mut objects = objects.iter().collect::<Vec<_>>();
2419        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2420        let mut previous_oid: Option<&ObjectId> = None;
2421        for object in &objects {
2422            if object.oid.format() != format {
2423                return Err(GitError::InvalidObjectId(
2424                    "multi-pack-index object format does not match index format".into(),
2425                ));
2426            }
2427            if let Some(previous) = previous_oid
2428                && previous.as_bytes() == object.oid.as_bytes()
2429            {
2430                return Err(GitError::InvalidFormat(
2431                    "multi-pack-index contains duplicate object ids".into(),
2432                ));
2433            }
2434            if object.pack_int_id as usize >= pack_names.len() {
2435                return Err(GitError::InvalidFormat(
2436                    "multi-pack-index object points past pack table".into(),
2437                ));
2438            }
2439            previous_oid = Some(&object.oid);
2440        }
2441
2442        let mut large_offsets = Vec::new();
2443        let mut chunks = vec![
2444            (*b"PNAM", write_midx_pack_names(pack_names)),
2445            (*b"OIDF", write_midx_oid_fanout(&objects)?),
2446            (*b"OIDL", write_midx_oid_lookup(&objects)),
2447            (
2448                *b"OOFF",
2449                write_midx_object_offsets(&objects, &mut large_offsets)?,
2450            ),
2451        ];
2452        if !large_offsets.is_empty() {
2453            chunks.push((*b"LOFF", large_offsets));
2454        }
2455        if let Some(preferred) = preferred_pack {
2456            // `objects` is already in midx (oid-sorted) order here; the chunk
2457            // lists each object's midx position in pseudo-pack order.
2458            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2459            pseudo.sort_by_key(|&midx_pos| {
2460                let object = objects[midx_pos as usize];
2461                (
2462                    object.pack_int_id != preferred,
2463                    object.pack_int_id,
2464                    object.offset,
2465                )
2466            });
2467            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2468            for midx_pos in pseudo {
2469                ridx.extend_from_slice(&midx_pos.to_be_bytes());
2470            }
2471            chunks.push((*b"RIDX", ridx));
2472        }
2473        if let Some(bitmapped_packs) = bitmapped_packs {
2474            let mut btmp = Vec::with_capacity(bitmapped_packs.len() * 8);
2475            for pack in bitmapped_packs {
2476                btmp.extend_from_slice(&pack.bitmap_pos.to_be_bytes());
2477                btmp.extend_from_slice(&pack.bitmap_nr.to_be_bytes());
2478            }
2479            chunks.push((*b"BTMP", btmp));
2480        }
2481        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2482    }
2483
2484    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2485        Self::parse_impl(bytes, format, true)
2486    }
2487
2488    pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2489        Self::parse_impl(bytes, format, false)
2490    }
2491
2492    fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
2493        let hash_len = format.raw_len();
2494        if bytes.len() < 12 + 12 + hash_len {
2495            return Err(GitError::InvalidFormat(
2496                "multi-pack-index file too short".into(),
2497            ));
2498        }
2499        if &bytes[..4] != b"MIDX" {
2500            return Err(GitError::InvalidFormat(
2501                "missing multi-pack-index signature".into(),
2502            ));
2503        }
2504        let version = bytes[4];
2505        if version != 1 && version != 2 {
2506            return Err(GitError::Unsupported(format!(
2507                "multi-pack-index version {version}"
2508            )));
2509        }
2510        let hash_id = bytes[5];
2511        if u32::from(hash_id) != hash_function_id(format) {
2512            return Err(GitError::InvalidFormat(format!(
2513                "multi-pack-index hash id {hash_id} does not match {}",
2514                format.name()
2515            )));
2516        }
2517        let chunk_count = bytes[6] as usize;
2518        let base_midx_count = bytes[7];
2519        if base_midx_count != 0 {
2520            return Err(GitError::Unsupported(format!(
2521                "multi-pack-index base count {base_midx_count}"
2522            )));
2523        }
2524        let pack_count = u32_be(&bytes[8..12]);
2525        let lookup_len = (chunk_count + 1)
2526            .checked_mul(12)
2527            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2528        let data_start = 12usize
2529            .checked_add(lookup_len)
2530            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2531        let checksum_offset = bytes.len() - hash_len;
2532        if data_start > checksum_offset {
2533            return Err(GitError::InvalidFormat(
2534                "truncated multi-pack-index chunk lookup".into(),
2535            ));
2536        }
2537
2538        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2539        if verify_checksum {
2540            let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2541            if actual_checksum != checksum {
2542                return Err(GitError::InvalidFormat(format!(
2543                    "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2544                )));
2545            }
2546        }
2547
2548        let mut entries = Vec::with_capacity(chunk_count + 1);
2549        let mut offset = 12usize;
2550        for _ in 0..=chunk_count {
2551            let id = [
2552                bytes[offset],
2553                bytes[offset + 1],
2554                bytes[offset + 2],
2555                bytes[offset + 3],
2556            ];
2557            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2558            entries.push((id, chunk_offset));
2559            offset += 12;
2560        }
2561        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2562            return Err(GitError::InvalidFormat(
2563                "multi-pack-index chunk lookup is empty".into(),
2564            ));
2565        };
2566        if terminator_id != [0, 0, 0, 0] {
2567            return Err(GitError::InvalidFormat(
2568                "multi-pack-index chunk lookup missing terminator".into(),
2569            ));
2570        }
2571        if terminator_offset != checksum_offset as u64 {
2572            return Err(GitError::InvalidFormat(
2573                "multi-pack-index terminator does not point at checksum".into(),
2574            ));
2575        }
2576
2577        let mut chunks = Vec::with_capacity(chunk_count);
2578        let mut previous_offset = data_start as u64;
2579        let mut reported_unaligned = false;
2580        for pair in entries.windows(2) {
2581            let (id, chunk_offset) = pair[0];
2582            let (_next_id, next_offset) = pair[1];
2583            if id == [0, 0, 0, 0] {
2584                return Err(GitError::InvalidFormat(
2585                    "multi-pack-index chunk id is zero before terminator".into(),
2586                ));
2587            }
2588            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2589                return Err(GitError::InvalidFormat(
2590                    "multi-pack-index chunk offsets are not monotonic".into(),
2591                ));
2592            }
2593            if chunk_offset % 4 != 0 && !reported_unaligned {
2594                eprintln!(
2595                    "error: chunk id {:08x} not 4-byte aligned",
2596                    u32::from_be_bytes(id)
2597                );
2598                reported_unaligned = true;
2599            }
2600            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2601                return Err(GitError::InvalidFormat(
2602                    "multi-pack-index chunk length is invalid".into(),
2603                ));
2604            }
2605            chunks.push(MultiPackIndexChunk {
2606                id,
2607                offset: chunk_offset,
2608                len: next_offset - chunk_offset,
2609            });
2610            previous_offset = chunk_offset;
2611        }
2612
2613        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2614        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2615        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2616        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2617        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2618        let bitmapped_packs =
2619            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2620
2621        Ok(Self {
2622            version,
2623            format,
2624            pack_count,
2625            pack_names,
2626            object_count: object_count as u32,
2627            fanout,
2628            objects,
2629            reverse_index,
2630            bitmapped_packs,
2631            chunks,
2632            checksum,
2633        })
2634    }
2635
2636    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2637        self.objects
2638            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2639            .ok()
2640            .map(|idx| &self.objects[idx])
2641    }
2642}
2643
2644impl MultiPackIndexOidLookup {
2645    pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
2646        let raw = bytes.as_bytes();
2647        let hash_len = format.raw_len();
2648        if raw.len() < 12 + 12 + hash_len {
2649            return Err(GitError::InvalidFormat(
2650                "multi-pack-index file too short".into(),
2651            ));
2652        }
2653        if &raw[..4] != b"MIDX" {
2654            return Err(GitError::InvalidFormat(
2655                "missing multi-pack-index signature".into(),
2656            ));
2657        }
2658        let version = raw[4];
2659        if version != 1 && version != 2 {
2660            return Err(GitError::Unsupported(format!(
2661                "multi-pack-index version {version}"
2662            )));
2663        }
2664        let hash_id = raw[5];
2665        if u32::from(hash_id) != hash_function_id(format) {
2666            return Err(GitError::InvalidFormat(format!(
2667                "multi-pack-index hash id {hash_id} does not match {}",
2668                format.name()
2669            )));
2670        }
2671        let chunk_count = raw[6] as usize;
2672        let base_midx_count = raw[7];
2673        if base_midx_count != 0 {
2674            return Err(GitError::Unsupported(format!(
2675                "multi-pack-index base count {base_midx_count}"
2676            )));
2677        }
2678        let pack_count = u32_be(&raw[8..12]);
2679        let lookup_len = (chunk_count + 1)
2680            .checked_mul(12)
2681            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2682        let data_start = 12usize
2683            .checked_add(lookup_len)
2684            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2685        let checksum_offset = raw.len() - hash_len;
2686        if data_start > checksum_offset {
2687            return Err(GitError::InvalidFormat(
2688                "truncated multi-pack-index chunk lookup".into(),
2689            ));
2690        }
2691
2692        let mut entries = Vec::with_capacity(chunk_count + 1);
2693        let mut offset = 12usize;
2694        for _ in 0..=chunk_count {
2695            let id = [
2696                raw[offset],
2697                raw[offset + 1],
2698                raw[offset + 2],
2699                raw[offset + 3],
2700            ];
2701            let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
2702            entries.push((id, chunk_offset));
2703            offset += 12;
2704        }
2705        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2706            return Err(GitError::InvalidFormat(
2707                "multi-pack-index chunk lookup is empty".into(),
2708            ));
2709        };
2710        if terminator_id != [0, 0, 0, 0] {
2711            return Err(GitError::InvalidFormat(
2712                "multi-pack-index chunk lookup missing terminator".into(),
2713            ));
2714        }
2715        if terminator_offset != checksum_offset as u64 {
2716            return Err(GitError::InvalidFormat(
2717                "multi-pack-index terminator does not point at checksum".into(),
2718            ));
2719        }
2720
2721        let mut chunks = Vec::with_capacity(chunk_count);
2722        let mut previous_offset = data_start as u64;
2723        let mut reported_unaligned = false;
2724        for pair in entries.windows(2) {
2725            let (id, chunk_offset) = pair[0];
2726            let (_next_id, next_offset) = pair[1];
2727            if id == [0, 0, 0, 0] {
2728                return Err(GitError::InvalidFormat(
2729                    "multi-pack-index chunk id is zero before terminator".into(),
2730                ));
2731            }
2732            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2733                return Err(GitError::InvalidFormat(
2734                    "multi-pack-index chunk offsets are not monotonic".into(),
2735                ));
2736            }
2737            if chunk_offset % 4 != 0 && !reported_unaligned {
2738                eprintln!(
2739                    "error: chunk id {:08x} not 4-byte aligned",
2740                    u32::from_be_bytes(id)
2741                );
2742                reported_unaligned = true;
2743            }
2744            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2745                return Err(GitError::InvalidFormat(
2746                    "multi-pack-index chunk length is invalid".into(),
2747                ));
2748            }
2749            chunks.push(MultiPackIndexChunk {
2750                id,
2751                offset: chunk_offset,
2752                len: next_offset - chunk_offset,
2753            });
2754            previous_offset = chunk_offset;
2755        }
2756
2757        let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
2758        let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
2759        let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
2760            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2761        let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2762            GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2763        })?;
2764        if oid_lookup.len() != expected_len {
2765            return Err(GitError::InvalidFormat(
2766                "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
2767            ));
2768        }
2769        let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
2770            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2771        let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2772            GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2773        })?;
2774        if object_offsets.len() != expected_offsets_len {
2775            return Err(GitError::InvalidFormat(
2776                "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
2777            ));
2778        }
2779        let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
2780        if let Some(large_offsets) = large_offsets
2781            && large_offsets.len() % 8 != 0
2782        {
2783            return Err(GitError::InvalidFormat(
2784                "multi-pack-index LOFF chunk has invalid length".into(),
2785            ));
2786        }
2787        let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
2788        let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
2789        let (large_offsets_offset, large_offsets_len) = match large_offsets {
2790            Some(large_offsets) => (
2791                Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
2792                large_offsets.len(),
2793            ),
2794            None => (None, 0),
2795        };
2796        Ok(Self {
2797            format,
2798            pack_count,
2799            pack_names,
2800            fanout,
2801            object_count,
2802            oid_lookup_offset,
2803            object_offsets_offset,
2804            large_offsets_offset,
2805            large_offsets_len,
2806            bytes,
2807        })
2808    }
2809
2810    pub fn contains(&self, oid: &ObjectId) -> bool {
2811        self.find_position(oid).is_some()
2812    }
2813
2814    pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2815        let Some(position) = self.find_position(oid) else {
2816            return Ok(None);
2817        };
2818        let bytes = self.bytes.as_bytes();
2819        let hash_len = self.format.raw_len();
2820        let oid_start = self
2821            .oid_lookup_offset
2822            .checked_add(position * hash_len)
2823            .ok_or_else(|| {
2824                GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2825            })?;
2826        let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
2827        let offset_start = self
2828            .object_offsets_offset
2829            .checked_add(position * 8)
2830            .ok_or_else(|| {
2831                GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2832            })?;
2833        let data = &bytes[offset_start..offset_start + 8];
2834        let pack_int_id = u32_be(&data[..4]);
2835        if pack_int_id >= self.pack_count {
2836            return Err(GitError::InvalidFormat(
2837                "multi-pack-index object points past pack table".into(),
2838            ));
2839        }
2840        let raw_offset = u32_be(&data[4..8]);
2841        let offset = if raw_offset & 0x8000_0000 == 0 {
2842            u64::from(raw_offset)
2843        } else {
2844            let Some(large_offsets_offset) = self.large_offsets_offset else {
2845                return Err(GitError::InvalidFormat(
2846                    "multi-pack-index large offset missing LOFF chunk".into(),
2847                ));
2848            };
2849            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2850            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2851                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2852            })?;
2853            let large_end = large_start.checked_add(8).ok_or_else(|| {
2854                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2855            })?;
2856            if large_end > self.large_offsets_len {
2857                return Err(GitError::InvalidFormat(
2858                    "fatal: multi-pack-index large offset out of bounds".into(),
2859                ));
2860            }
2861            let start = large_offsets_offset + large_start;
2862            u64_be(&bytes[start..start + 8])
2863        };
2864        Ok(Some(MultiPackIndexEntry {
2865            oid,
2866            pack_int_id,
2867            offset,
2868            force_large_offset: raw_offset & 0x8000_0000 != 0,
2869        }))
2870    }
2871
2872    pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2873        self.pack_names
2874            .get(pack_int_id as usize)
2875            .map(String::as_str)
2876    }
2877
2878    fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2879        if oid.format() != self.format || self.object_count == 0 {
2880            return None;
2881        }
2882        let first = oid.as_bytes()[0] as usize;
2883        let start = if first == 0 {
2884            0
2885        } else {
2886            self.fanout[first - 1] as usize
2887        };
2888        let end = self.fanout[first] as usize;
2889        if start >= end || end > self.object_count {
2890            return None;
2891        }
2892        let hash_len = self.format.raw_len();
2893        let table_start = self.oid_lookup_offset;
2894        let table_end = table_start + self.object_count * hash_len;
2895        let bytes = self.bytes.as_bytes();
2896        let table = &bytes[table_start..table_end];
2897        let needle = oid.as_bytes();
2898        let mut low = start;
2899        let mut high = end;
2900        while low < high {
2901            let mid = low + (high - low) / 2;
2902            let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2903            match raw.cmp(needle) {
2904                std::cmp::Ordering::Less => low = mid + 1,
2905                std::cmp::Ordering::Equal => return Some(mid),
2906                std::cmp::Ordering::Greater => high = mid,
2907            }
2908        }
2909        None
2910    }
2911}
2912
2913fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2914    for name in pack_names {
2915        if name.is_empty() {
2916            return Err(GitError::InvalidFormat(
2917                "multi-pack-index pack name is empty".into(),
2918            ));
2919        }
2920        if name
2921            .bytes()
2922            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2923        {
2924            return Err(GitError::InvalidFormat(
2925                "multi-pack-index pack name contains an invalid byte".into(),
2926            ));
2927        }
2928    }
2929    Ok(())
2930}
2931
2932fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2933    let mut out = Vec::new();
2934    for name in pack_names {
2935        out.extend_from_slice(name.as_bytes());
2936        out.push(0);
2937    }
2938    while out.len() % 4 != 0 {
2939        out.push(0);
2940    }
2941    out
2942}
2943
2944fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2945    let mut counts = [0u32; 256];
2946    for object in objects {
2947        let first = object.oid.as_bytes()[0] as usize;
2948        counts[first] = counts[first]
2949            .checked_add(1)
2950            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2951    }
2952    let mut running = 0u32;
2953    let mut out = Vec::with_capacity(256 * 4);
2954    for count in counts {
2955        running = running
2956            .checked_add(count)
2957            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2958        out.extend_from_slice(&running.to_be_bytes());
2959    }
2960    Ok(out)
2961}
2962
2963fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2964    let mut out = Vec::new();
2965    for object in objects {
2966        out.extend_from_slice(object.oid.as_bytes());
2967    }
2968    out
2969}
2970
2971fn write_midx_object_offsets(
2972    objects: &[&MultiPackIndexEntry],
2973    large_offsets: &mut Vec<u8>,
2974) -> Result<Vec<u8>> {
2975    let mut out = Vec::new();
2976    for object in objects {
2977        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2978        if object.offset < 0x8000_0000 && !object.force_large_offset {
2979            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2980        } else {
2981            let large_idx = large_offsets.len() / 8;
2982            if large_idx > 0x7fff_ffff {
2983                return Err(GitError::InvalidFormat(
2984                    "too many multi-pack-index large offsets".into(),
2985                ));
2986            }
2987            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2988            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2989        }
2990    }
2991    Ok(out)
2992}
2993
2994fn write_multi_pack_index_chunks(
2995    format: ObjectFormat,
2996    version: u8,
2997    pack_count: u32,
2998    chunks: &[([u8; 4], Vec<u8>)],
2999) -> Result<Vec<u8>> {
3000    if chunks.len() > u8::MAX as usize {
3001        return Err(GitError::InvalidFormat(
3002            "too many multi-pack-index chunks".into(),
3003        ));
3004    }
3005    let lookup_len = (chunks.len() + 1)
3006        .checked_mul(12)
3007        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3008    let mut out = Vec::new();
3009    out.extend_from_slice(b"MIDX");
3010    out.push(version);
3011    out.push(hash_function_id(format) as u8);
3012    out.push(chunks.len() as u8);
3013    out.push(0);
3014    out.extend_from_slice(&pack_count.to_be_bytes());
3015    let mut chunk_offset = (12usize)
3016        .checked_add(lookup_len)
3017        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
3018        as u64;
3019    for (id, data) in chunks {
3020        out.extend_from_slice(id);
3021        out.extend_from_slice(&chunk_offset.to_be_bytes());
3022        chunk_offset = chunk_offset
3023            .checked_add(data.len() as u64)
3024            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
3025    }
3026    out.extend_from_slice(&[0, 0, 0, 0]);
3027    out.extend_from_slice(&chunk_offset.to_be_bytes());
3028    for (_id, data) in chunks {
3029        out.extend_from_slice(data);
3030    }
3031    let checksum = sley_core::digest_bytes(format, &out)?;
3032    out.extend_from_slice(checksum.as_bytes());
3033    Ok(out)
3034}
3035
3036#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3037struct EntryHeader {
3038    kind: PackObjectKind,
3039    size: u64,
3040}
3041
3042/// A cache of objects already decoded from one specific pack, keyed by the
3043/// in-pack byte offset at which each object's entry begins.
3044///
3045/// Delta resolution within a pack walks a chain of base objects by offset; the
3046/// same base is the parent of many deltas, so without a cache the entire chain
3047/// is re-inflated and re-applied on every read. Implementors let
3048/// [`read_object_at_with_cache`] reuse a warm base instead.
3049///
3050/// Correctness contract: a given `offset` within a given pack's bytes always
3051/// decodes to exactly one object, so caching by offset can never serve the wrong
3052/// object **provided the same cache is only ever used with one pack's bytes**.
3053/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
3054/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
3055pub trait PackDeltaCache {
3056    /// Return the decoded object whose entry begins at `offset`, if cached.
3057    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
3058    /// Record that the entry beginning at `offset` decodes to `object`.
3059    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
3060}
3061
3062/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
3063/// the original, allocation-free behavior for callers that do not opt in.
3064struct NoopDeltaCache;
3065
3066impl PackDeltaCache for NoopDeltaCache {
3067    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
3068        None
3069    }
3070    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
3071}
3072
3073// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
3074// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
3075// an allocation that dominated bulk reads. Borrowed only for the duration of a
3076// single inflate; the recursive pack reader fully inflates each entry's data before
3077// recursing to its base, so the borrow never nests.
3078thread_local! {
3079    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
3080}
3081
3082/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
3083/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
3084/// run of back-references). We pre-reserve no more than this multiple of the
3085/// available compressed input, so an attacker who declares a huge `size_hint`
3086/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
3087/// memory before the inflate has produced a single byte. The stream's *actual*
3088/// output is still verified against the declared size by the caller; this only
3089/// bounds the speculative allocation. git never pre-allocates an attacker's
3090/// declared size beyond a streaming buffer either (see index-pack.c's
3091/// `unpack_entry_data`).
3092const MAX_INFLATE_EXPANSION: usize = 1032;
3093
3094/// An absolute ceiling on the speculative pre-reservation, independent of the
3095/// input length, so even a large legitimate-looking compressed input can't be
3096/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
3097/// output buffer organically past this when a real stream genuinely produces
3098/// that much — this only caps the *speculative* reserve.
3099const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3100
3101/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
3102/// to something safe to reserve up front: no larger than what `compressed_len`
3103/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
3104/// returned value is only used to size the initial allocation; the inflate loop
3105/// grows the buffer as the real stream produces output, so legitimate large
3106/// objects still decode correctly — they just don't get the whole allocation at
3107/// once.
3108fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3109    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3110    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
3111    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3112}
3113
3114/// Inflate the entire zlib stream at the front of `compressed`, appending the
3115/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
3116/// is the caller's expectation for the decompressed length, but it is treated as
3117/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
3118/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
3119/// number of *compressed* bytes consumed (so callers stepping through a pack can
3120/// advance to the next entry). Byte-for-byte equivalent to
3121/// `ZlibDecoder::read_to_end` + `total_in`.
3122fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3123    INFLATE.with(|cell| {
3124        let mut decompress = cell.borrow_mut();
3125        decompress.reset(true);
3126        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3127        let mut input = compressed;
3128        let mut consumed_total = 0usize;
3129        loop {
3130            // Always leave output room so a zero-progress result means the input
3131            // (not the buffer) is exhausted.
3132            if out.len() == out.capacity() {
3133                out.reserve(out.len().max(64));
3134            }
3135            let before_in = decompress.total_in();
3136            let before_out = decompress.total_out();
3137            let status = decompress
3138                .decompress_vec(input, out, flate2::FlushDecompress::None)
3139                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3140            let consumed = (decompress.total_in() - before_in) as usize;
3141            let produced = decompress.total_out() - before_out;
3142            input = &input[consumed..];
3143            consumed_total += consumed;
3144            match status {
3145                flate2::Status::StreamEnd => return Ok(consumed_total),
3146                _ if consumed == 0 && produced == 0 => {
3147                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
3148                }
3149                _ => {}
3150            }
3151        }
3152    })
3153}
3154
3155/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
3156/// into `out`, reusing the thread-local state. Used to read a delta's leading
3157/// base-size / result-size varints without inflating the whole instruction stream.
3158fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3159    INFLATE.with(|cell| {
3160        let mut decompress = cell.borrow_mut();
3161        decompress.reset(true);
3162        out.reserve(max_out.max(16));
3163        let mut input = compressed;
3164        while out.len() < max_out {
3165            if out.len() == out.capacity() {
3166                out.reserve(out.len().max(16));
3167            }
3168            let before_in = decompress.total_in();
3169            let before_out = decompress.total_out();
3170            let status = decompress
3171                .decompress_vec(input, out, flate2::FlushDecompress::None)
3172                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3173            let consumed = (decompress.total_in() - before_in) as usize;
3174            let produced = decompress.total_out() - before_out;
3175            input = &input[consumed..];
3176            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3177                break;
3178            }
3179        }
3180        Ok(())
3181    })
3182}
3183
3184/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
3185/// only that object and its delta-base chain instead of parsing the whole pack.
3186///
3187/// Ofs-delta bases are followed by offset (recursively, within this pack);
3188/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
3189/// with the surrounding object store (so a base in another pack or loose still
3190/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
3191pub fn read_object_at_arc<F>(
3192    pack_bytes: &[u8],
3193    offset: u64,
3194    format: ObjectFormat,
3195    resolve_ref_base: F,
3196) -> Result<Arc<EncodedObject>>
3197where
3198    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3199{
3200    read_object_at_with_cache_arc(
3201        pack_bytes,
3202        offset,
3203        format,
3204        resolve_ref_base,
3205        &NoopDeltaCache,
3206    )
3207}
3208
3209/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
3210/// (keyed by in-pack offset) and records every object it decodes.
3211///
3212/// This turns repeated reads from the same pack — where many deltas share a base
3213/// chain — from re-inflating each chain per read into resolving each base once.
3214/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
3215/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
3216/// callers can reuse cache handles without cloning full object bodies.
3217pub fn read_object_at_with_cache_arc<F, C>(
3218    pack_bytes: &[u8],
3219    offset: u64,
3220    format: ObjectFormat,
3221    mut resolve_ref_base: F,
3222    cache: &C,
3223) -> Result<Arc<EncodedObject>>
3224where
3225    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3226    C: PackDeltaCache + ?Sized,
3227{
3228    read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3229}
3230
3231fn read_object_at_inner<F, C>(
3232    pack_bytes: &[u8],
3233    offset: u64,
3234    format: ObjectFormat,
3235    resolve_ref_base: &mut F,
3236    cache: &C,
3237) -> Result<Arc<EncodedObject>>
3238where
3239    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3240    C: PackDeltaCache + ?Sized,
3241{
3242    // A warm cache entry for this exact offset is already the fully resolved
3243    // object, so the whole base chain below can be skipped.
3244    if let Some(object) = cache.get(offset) {
3245        return Ok(object);
3246    }
3247    let trailer_offset = pack_bytes
3248        .len()
3249        .checked_sub(format.raw_len())
3250        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3251    let mut cursor = usize::try_from(offset)
3252        .ok()
3253        .filter(|&value| value < trailer_offset)
3254        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3255    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3256    let base = match header.kind {
3257        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3258            pack_bytes,
3259            &mut cursor,
3260            offset,
3261        )?)),
3262        PackObjectKind::RefDelta => {
3263            let hash_len = format.raw_len();
3264            if cursor + hash_len > trailer_offset {
3265                return Err(GitError::InvalidFormat(
3266                    "truncated ref-delta base object id".into(),
3267                ));
3268            }
3269            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3270            cursor += hash_len;
3271            Some(DeltaBase::Ref(oid))
3272        }
3273        _ => None,
3274    };
3275    let mut body = Vec::new();
3276    inflate_into(
3277        &pack_bytes[cursor..trailer_offset],
3278        &mut body,
3279        header.size.min(usize::MAX as u64) as usize,
3280    )?;
3281    if body.len() as u64 != header.size {
3282        return Err(GitError::InvalidObject(format!(
3283            "pack object declared {} bytes, decoded {}",
3284            header.size,
3285            body.len()
3286        )));
3287    }
3288    let object = match base {
3289        None => {
3290            let object_type = match header.kind {
3291                PackObjectKind::Commit => ObjectType::Commit,
3292                PackObjectKind::Tree => ObjectType::Tree,
3293                PackObjectKind::Blob => ObjectType::Blob,
3294                PackObjectKind::Tag => ObjectType::Tag,
3295                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3296                    return Err(GitError::InvalidFormat(
3297                        "delta pack entry decoded without a base".into(),
3298                    ));
3299                }
3300            };
3301            Arc::new(EncodedObject::new(object_type, body))
3302        }
3303        Some(DeltaBase::Offset(base_offset)) => {
3304            let base =
3305                read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
3306            let resolved = apply_pack_delta(&base.body, &body)?;
3307            Arc::new(EncodedObject::new(base.object_type, resolved))
3308        }
3309        Some(DeltaBase::Ref(base_oid)) => {
3310            let base = resolve_ref_base(&base_oid)?
3311                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
3312            let resolved = apply_pack_delta(&base.body, &body)?;
3313            Arc::new(EncodedObject::new(base.object_type, resolved))
3314        }
3315    };
3316    // Record the fully resolved object so any later read that walks through this
3317    // offset (as a delta base or directly) reuses it. Bases are inserted as the
3318    // recursion unwinds, so a chain is decoded at most once across reads.
3319    cache.insert(offset, Arc::clone(&object));
3320    Ok(object)
3321}
3322
3323/// The object type and final (inflated) size of the entry at `offset`, *without*
3324/// materializing the object body — git's `cat-file --batch-check` fast path.
3325///
3326/// A base object's size is already in its pack entry header, and a delta's result
3327/// size is the second varint at the front of its (small) delta stream, so neither
3328/// inflates the full content. The reported type is the type at the end of the
3329/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
3330/// the type of a ref-delta base that lives outside this pack (resolved through the
3331/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
3332pub fn read_object_header_at<F>(
3333    pack_bytes: &[u8],
3334    offset: u64,
3335    format: ObjectFormat,
3336    mut resolve_ref_base_type: F,
3337) -> Result<(ObjectType, u64)>
3338where
3339    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3340{
3341    read_object_header_at_inner(
3342        pack_bytes,
3343        offset,
3344        format,
3345        &mut resolve_ref_base_type,
3346        &mut NoopHeaderTypeCache,
3347    )
3348}
3349
3350/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
3351/// the `cat-file --batch-check` header fast path.
3352///
3353/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
3354/// to its base on every header read, re-inflating each link's leading varints
3355/// from scratch — so reading every object in a deeply-deltified pack costs
3356/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
3357/// of memoizing `offset -> (type, size)`:
3358///
3359/// * a chain's end-of-chain type is resolved at most once, so later objects on
3360///   the same chain skip the walk; and
3361/// * a repeated lookup of the same object (common in batch input) returns from
3362///   the memo without re-inflating its delta header at all.
3363///
3364/// The size stored is the object's final (inflated) result size — read from its
3365/// own pack/delta header, never by materializing the body.
3366pub trait HeaderTypeCache {
3367    /// The previously resolved header at `pack_offset`, if any.
3368    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
3369    /// Record the resolved header at `pack_offset` for reuse by later reads.
3370    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
3371}
3372
3373struct NoopHeaderTypeCache;
3374
3375impl HeaderTypeCache for NoopHeaderTypeCache {
3376    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
3377        None
3378    }
3379    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
3380}
3381
3382/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
3383/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
3384/// most once per chain and (b) a repeated lookup of the same offset returns from
3385/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
3386/// so it must be scoped to a single pack's bytes by the caller.
3387pub fn read_object_header_at_with_cache<F, C>(
3388    pack_bytes: &[u8],
3389    offset: u64,
3390    format: ObjectFormat,
3391    mut resolve_ref_base_type: F,
3392    type_cache: &mut C,
3393) -> Result<(ObjectType, u64)>
3394where
3395    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3396    C: HeaderTypeCache + ?Sized,
3397{
3398    if let Some(header) = type_cache.get(offset) {
3399        return Ok(header);
3400    }
3401    read_object_header_at_inner(
3402        pack_bytes,
3403        offset,
3404        format,
3405        &mut resolve_ref_base_type,
3406        type_cache,
3407    )
3408}
3409
3410fn read_object_header_at_inner<F, C>(
3411    pack_bytes: &[u8],
3412    offset: u64,
3413    format: ObjectFormat,
3414    resolve_ref_base_type: &mut F,
3415    type_cache: &mut C,
3416) -> Result<(ObjectType, u64)>
3417where
3418    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3419    C: HeaderTypeCache + ?Sized,
3420{
3421    let trailer_offset = pack_bytes
3422        .len()
3423        .checked_sub(format.raw_len())
3424        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3425    let mut cursor = usize::try_from(offset)
3426        .ok()
3427        .filter(|&value| value < trailer_offset)
3428        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3429    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3430    let resolved = match header.kind {
3431        PackObjectKind::Commit => (ObjectType::Commit, header.size),
3432        PackObjectKind::Tree => (ObjectType::Tree, header.size),
3433        PackObjectKind::Blob => (ObjectType::Blob, header.size),
3434        PackObjectKind::Tag => (ObjectType::Tag, header.size),
3435        PackObjectKind::OfsDelta => {
3436            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3437            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3438            // The end-of-chain type only depends on the base, so reuse it across
3439            // reads instead of re-walking the chain per object (sley#26).
3440            let base_type = match type_cache.get(base_offset) {
3441                Some((base_type, _)) => base_type,
3442                None => {
3443                    let (base_type, _) = read_object_header_at_inner(
3444                        pack_bytes,
3445                        base_offset,
3446                        format,
3447                        resolve_ref_base_type,
3448                        type_cache,
3449                    )?;
3450                    base_type
3451                }
3452            };
3453            (base_type, size)
3454        }
3455        PackObjectKind::RefDelta => {
3456            let hash_len = format.raw_len();
3457            if cursor + hash_len > trailer_offset {
3458                return Err(GitError::InvalidFormat(
3459                    "truncated ref-delta base object id".into(),
3460                ));
3461            }
3462            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3463            cursor += hash_len;
3464            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3465            let base_type = resolve_ref_base_type(&oid)?
3466                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3467            (base_type, size)
3468        }
3469    };
3470    // Memoize the fully resolved header so a repeated lookup of this offset (or a
3471    // chain that bases on it) returns without re-inflating (sley#26).
3472    type_cache.put(offset, resolved);
3473    Ok(resolved)
3474}
3475
3476/// Number of inflated delta-stream bytes to read when only the leading base-size
3477/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
3478/// prefix always covers both without inflating the delta instructions.
3479const DELTA_HEADER_PREFIX_LEN: usize = 32;
3480
3481/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
3482/// inflating only the short prefix that holds its two leading varints.
3483fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3484    let mut prefix = Vec::new();
3485    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3486    decoded_delta_result_size(&prefix)
3487}
3488
3489fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3490    let first = next_byte(bytes, offset)?;
3491    let mut size = u64::from(first & 0x0f);
3492    let kind = match (first >> 4) & 0x07 {
3493        1 => PackObjectKind::Commit,
3494        2 => PackObjectKind::Tree,
3495        3 => PackObjectKind::Blob,
3496        4 => PackObjectKind::Tag,
3497        6 => PackObjectKind::OfsDelta,
3498        7 => PackObjectKind::RefDelta,
3499        other => {
3500            return Err(GitError::InvalidFormat(format!(
3501                "invalid pack object type {other}"
3502            )));
3503        }
3504    };
3505    let mut shift = 4;
3506    let mut byte = first;
3507    while byte & 0x80 != 0 {
3508        byte = next_byte(bytes, offset)?;
3509        let part = u64::from(byte & 0x7f);
3510        size = size
3511            .checked_add(
3512                part.checked_shl(shift)
3513                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3514            )
3515            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3516        shift += 7;
3517    }
3518    Ok(EntryHeader { kind, size })
3519}
3520
3521fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3522    let mut byte = next_byte(bytes, offset)?;
3523    let mut relative = u64::from(byte & 0x7f);
3524    while byte & 0x80 != 0 {
3525        byte = next_byte(bytes, offset)?;
3526        relative = relative
3527            .checked_add(1)
3528            .and_then(|value| value.checked_shl(7))
3529            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3530            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3531    }
3532    entry_offset
3533        .checked_sub(relative)
3534        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3535}
3536
3537fn resolve_pack_entries<F>(
3538    parsed: Vec<ParsedPackEntry>,
3539    format: ObjectFormat,
3540    external_base: &mut F,
3541) -> Result<Vec<PackObject>>
3542where
3543    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3544{
3545    let mut offset_to_index = HashMap::with_capacity(parsed.len());
3546    for (idx, entry) in parsed.iter().enumerate() {
3547        offset_to_index.insert(parsed_entry_offset(entry), idx);
3548    }
3549
3550    let mut resolved = vec![None; parsed.len()];
3551    let mut oid_to_index = HashMap::new();
3552    let mut unresolved = 0usize;
3553    for (idx, entry) in parsed.iter().enumerate() {
3554        match entry {
3555            ParsedPackEntry::Resolved(object) => {
3556                oid_to_index.insert(object.entry.oid, idx);
3557                resolved[idx] = Some(object.clone());
3558            }
3559            ParsedPackEntry::Delta { .. } => unresolved += 1,
3560        }
3561    }
3562
3563    while unresolved != 0 {
3564        let mut progress = false;
3565        for idx in 0..parsed.len() {
3566            if resolved[idx].is_some() {
3567                continue;
3568            }
3569            let ParsedPackEntry::Delta {
3570                base,
3571                compressed_size,
3572                delta_size,
3573                offset,
3574                delta,
3575            } = &parsed[idx]
3576            else {
3577                continue;
3578            };
3579            let Some(base_object) = delta_base_object(
3580                base,
3581                &offset_to_index,
3582                &oid_to_index,
3583                &resolved,
3584                external_base,
3585            )?
3586            else {
3587                continue;
3588            };
3589            let body = apply_pack_delta(base_object.body(), delta)?;
3590            let object = EncodedObject::new(base_object.object_type(), body);
3591            let oid = object.object_id(format)?;
3592            let pack_object = PackObject {
3593                entry: PackEntry {
3594                    oid,
3595                    compressed_size: *compressed_size,
3596                    uncompressed_size: object.body.len() as u64,
3597                    offset: *offset,
3598                },
3599                object,
3600            };
3601            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3602                return Err(GitError::InvalidObject(
3603                    "resolved delta size does not match delta header".into(),
3604                ));
3605            }
3606            if *delta_size != delta.len() as u64 {
3607                return Err(GitError::InvalidObject(format!(
3608                    "pack delta declared {delta_size} bytes, decoded {}",
3609                    delta.len()
3610                )));
3611            }
3612            oid_to_index.insert(oid, idx);
3613            resolved[idx] = Some(pack_object);
3614            unresolved -= 1;
3615            progress = true;
3616        }
3617        if !progress {
3618            return Err(GitError::Unsupported("unresolved delta base".into()));
3619        }
3620    }
3621
3622    resolved
3623        .into_iter()
3624        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3625        .collect()
3626}
3627
3628fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3629    match entry {
3630        ParsedPackEntry::Resolved(object) => object.entry.offset,
3631        ParsedPackEntry::Delta { offset, .. } => *offset,
3632    }
3633}
3634
3635enum DeltaBaseObject<'a> {
3636    Borrowed(&'a EncodedObject),
3637    Owned(EncodedObject),
3638}
3639
3640impl DeltaBaseObject<'_> {
3641    fn object_type(&self) -> ObjectType {
3642        match self {
3643            Self::Borrowed(object) => object.object_type,
3644            Self::Owned(object) => object.object_type,
3645        }
3646    }
3647
3648    fn body(&self) -> &[u8] {
3649        match self {
3650            Self::Borrowed(object) => &object.body,
3651            Self::Owned(object) => &object.body,
3652        }
3653    }
3654}
3655
3656fn delta_base_object<'a, F>(
3657    base: &DeltaBase,
3658    offset_to_index: &HashMap<u64, usize>,
3659    oid_to_index: &HashMap<ObjectId, usize>,
3660    resolved: &'a [Option<PackObject>],
3661    external_base: &mut F,
3662) -> Result<Option<DeltaBaseObject<'a>>>
3663where
3664    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3665{
3666    match base {
3667        DeltaBase::Offset(offset) => {
3668            let Some(index) = offset_to_index.get(offset).copied() else {
3669                return Err(GitError::InvalidFormat(format!(
3670                    "ofs-delta base offset {offset} not found"
3671                )));
3672            };
3673            Ok(resolved[index]
3674                .as_ref()
3675                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3676        }
3677        DeltaBase::Ref(oid) => {
3678            if let Some(index) = oid_to_index.get(oid).copied() {
3679                return Ok(resolved[index]
3680                    .as_ref()
3681                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3682            }
3683            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3684        }
3685    }
3686}
3687
3688fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3689    let mut cursor = 0usize;
3690    let base_size = read_delta_varint(delta, &mut cursor)?;
3691    if base_size != base.len() as u64 {
3692        return Err(GitError::InvalidObject(format!(
3693            "delta base size mismatch: expected {base_size}, got {}",
3694            base.len()
3695        )));
3696    }
3697    let result_size = read_delta_varint(delta, &mut cursor)?;
3698    // `result_size` is an attacker-controlled delta varint from a network pack
3699    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
3700    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
3701    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
3702    // size-mismatch check below can fire. Route the up-front reservation through
3703    // the sley#2 bound so the speculative allocation is capped; `result.extend`
3704    // still grows the buffer organically and the post-decode length check
3705    // (`result.len() != result_size`) rejects the lie cleanly.
3706    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3707    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3708    while cursor < delta.len() {
3709        let command = delta[cursor];
3710        cursor += 1;
3711        if command & 0x80 != 0 {
3712            let copy_offset =
3713                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3714            let mut copy_size =
3715                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3716            if copy_size == 0 {
3717                copy_size = 0x10000;
3718            }
3719            let start = usize::try_from(copy_offset)
3720                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3721            let len = usize::try_from(copy_size)
3722                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3723            let end = start
3724                .checked_add(len)
3725                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3726            let Some(slice) = base.get(start..end) else {
3727                return Err(GitError::InvalidObject(
3728                    "delta copy range exceeds base object".into(),
3729                ));
3730            };
3731            result.extend_from_slice(slice);
3732        } else if command != 0 {
3733            let len = usize::from(command);
3734            let end = cursor
3735                .checked_add(len)
3736                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3737            let Some(slice) = delta.get(cursor..end) else {
3738                return Err(GitError::InvalidObject(
3739                    "delta insert range exceeds delta data".into(),
3740                ));
3741            };
3742            result.extend_from_slice(slice);
3743            cursor = end;
3744        } else {
3745            return Err(GitError::InvalidObject(
3746                "delta contains reserved zero command".into(),
3747            ));
3748        }
3749    }
3750    if result.len() as u64 != result_size {
3751        return Err(GitError::InvalidObject(format!(
3752            "delta result size mismatch: expected {result_size}, got {}",
3753            result.len()
3754        )));
3755    }
3756    Ok(result)
3757}
3758
3759fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3760    let mut cursor = 0usize;
3761    let _ = read_delta_varint(delta, &mut cursor)?;
3762    read_delta_varint(delta, &mut cursor)
3763}
3764
3765/// Size, in bytes, of the fixed blocks used to index a base object for delta
3766/// compression. Matches git's `diff-delta.c` block size.
3767const DELTA_BLOCK_SIZE: usize = 16;
3768
3769/// Distance between indexed base anchors. Delta generation still scans target
3770/// objects byte-by-byte once there is evidence of shared content; anchoring the
3771/// base at block boundaries keeps the index compact and avoids per-object
3772/// hash-table allocation storms on unrelated blobs.
3773const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3774
3775/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
3776/// base object's anchors while keeping exact-hash candidate scans short.
3777const DELTA_BUCKET_BITS: usize = 12;
3778const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3779const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3780
3781/// An index over a base object's content used to generate deltas against it.
3782///
3783/// The index hashes block-sized anchors of the base, groups them into fixed
3784/// buckets, and verifies exact byte matches before copying. This avoids both
3785/// per-bucket allocation storms and the per-object sort needed by a single
3786/// sorted vector.
3787struct DeltaIndex<'a> {
3788    base: &'a [u8],
3789    blocks: Vec<DeltaBlock>,
3790    buckets: Vec<usize>,
3791}
3792
3793#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3794struct DeltaBlock {
3795    hash: u32,
3796    offset: usize,
3797}
3798
3799impl<'a> DeltaIndex<'a> {
3800    fn new(base: &'a [u8]) -> Self {
3801        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3802        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3803        for_each_delta_anchor(base.len(), |offset| {
3804            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3805            buckets[delta_bucket(hash) + 1] += 1;
3806            anchors.push(DeltaBlock { hash, offset });
3807        });
3808        for idx in 1..buckets.len() {
3809            buckets[idx] += buckets[idx - 1];
3810        }
3811
3812        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3813        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3814        for anchor in anchors {
3815            let bucket = delta_bucket(anchor.hash);
3816            let next = &mut next_offsets[bucket];
3817            blocks[*next] = anchor;
3818            *next += 1;
3819        }
3820
3821        Self {
3822            base,
3823            blocks,
3824            buckets,
3825        }
3826    }
3827
3828    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3829        let bucket = delta_bucket(hash);
3830        let start = self.buckets[bucket];
3831        let end = self.buckets[bucket + 1];
3832        self.blocks[start..end]
3833            .iter()
3834            .filter(move |block| block.hash == hash)
3835    }
3836
3837    fn has_hash(&self, hash: u32) -> bool {
3838        self.candidate_blocks(hash).next().is_some()
3839    }
3840
3841    fn has_shared_anchor(&self, target: &[u8]) -> bool {
3842        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3843            return false;
3844        }
3845        let last = target.len() - DELTA_BLOCK_SIZE;
3846        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3847            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3848            if self.has_hash(hash) {
3849                return true;
3850            }
3851        }
3852        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3853            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3854            if self.has_hash(hash) {
3855                return true;
3856            }
3857        }
3858        false
3859    }
3860
3861    /// Generate a delta that reconstructs `target` from this index's base.
3862    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3863        if !self.has_shared_anchor(target) {
3864            return None;
3865        }
3866        let base = self.base;
3867        let mut delta = Vec::new();
3868        write_delta_varint(&mut delta, base.len() as u64);
3869        write_delta_varint(&mut delta, target.len() as u64);
3870
3871        let mut pending_insert_start = 0usize;
3872        let mut pos = 0usize;
3873        while pos < target.len() {
3874            let mut best_len = 0usize;
3875            let mut best_offset = 0usize;
3876            if pos + DELTA_BLOCK_SIZE <= target.len() {
3877                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3878                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3879                    // Confirm the block actually matches (hash collisions are
3880                    // possible) before measuring how far it extends.
3881                    let candidate = candidate.offset;
3882                    let max_len = (base.len() - candidate).min(target.len() - pos);
3883                    let mut len = 0usize;
3884                    while len < max_len && base[candidate + len] == target[pos + len] {
3885                        len += 1;
3886                    }
3887                    if len > best_len {
3888                        best_len = len;
3889                        best_offset = candidate;
3890                    }
3891                }
3892            }
3893
3894            if best_len >= DELTA_BLOCK_SIZE {
3895                if pending_insert_start < pos {
3896                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3897                }
3898                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3899                pos += best_len;
3900                pending_insert_start = pos;
3901            } else {
3902                pos += 1;
3903            }
3904        }
3905        if pending_insert_start < target.len() {
3906            write_delta_insert(&mut delta, &target[pending_insert_start..]);
3907        }
3908        Some(delta)
3909    }
3910}
3911
3912fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3913    if len < DELTA_BLOCK_SIZE {
3914        return;
3915    }
3916    len -= DELTA_BLOCK_SIZE;
3917    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3918        visit(offset);
3919    }
3920    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3921        visit(len);
3922    }
3923}
3924
3925fn delta_anchor_count(len: usize) -> usize {
3926    if len < DELTA_BLOCK_SIZE {
3927        return 0;
3928    }
3929    let last = len - DELTA_BLOCK_SIZE;
3930    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3931}
3932
3933fn delta_bucket(hash: u32) -> usize {
3934    (hash as usize) & DELTA_BUCKET_MASK
3935}
3936
3937/// Maximum number of base offsets retained per block-hash bucket. Caps the work
3938/// done extending candidate matches for inputs with many repeated blocks.
3939const DELTA_MAX_CHAIN: usize = 64;
3940
3941/// Hash a fixed-size block of base/target bytes into a bucket key.
3942///
3943/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
3944/// always verified byte-for-byte before use, so collisions only cost a little
3945/// extra comparison work and never affect correctness.
3946fn block_hash(block: &[u8]) -> u32 {
3947    let mut hash = 0u32;
3948    for &byte in block {
3949        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3950    }
3951    hash
3952}
3953
3954/// The chosen storage form for a single object during pack generation.
3955#[derive(Debug, Clone, PartialEq, Eq)]
3956enum PlannedBase {
3957    /// Stored undeltified (a base for others, or no good delta was found).
3958    None,
3959    /// Delta against another object in this pack, identified by its original
3960    /// index. The pre-computed `delta` bytes reconstruct the object from that
3961    /// base's body.
3962    InPack { base_idx: usize, delta: Vec<u8> },
3963    /// Delta against an external (thin-pack) base, referenced by object id.
3964    External { base_oid: ObjectId, delta: Vec<u8> },
3965}
3966
3967#[derive(Debug, Clone, PartialEq, Eq)]
3968struct PlannedEntry {
3969    base: PlannedBase,
3970}
3971
3972fn compress_planned_payloads(
3973    objects: &[&EncodedObject],
3974    plan: &[PlannedEntry],
3975    order: &[usize],
3976    compression_level: u32,
3977) -> Result<Vec<Vec<u8>>> {
3978    if order.is_empty() {
3979        return Ok(Vec::new());
3980    }
3981
3982    let worker_count = std::thread::available_parallelism()
3983        .map(|threads| threads.get())
3984        .unwrap_or(1)
3985        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3986        .min(order.len());
3987    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3988        let mut payloads = Vec::with_capacity(order.len());
3989        for &idx in order {
3990            payloads.push(compressed_payload(
3991                planned_payload(objects, plan, idx),
3992                compression_level,
3993            )?);
3994        }
3995        return Ok(payloads);
3996    }
3997
3998    let chunk_len = order.len().div_ceil(worker_count);
3999    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
4000    std::thread::scope(|scope| {
4001        let mut handles = Vec::new();
4002        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
4003            let chunk_start = chunk_idx * chunk_len;
4004            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
4005                let mut chunk_payloads = Vec::with_capacity(chunk.len());
4006                for (offset, &idx) in chunk.iter().enumerate() {
4007                        chunk_payloads.push((
4008                            chunk_start + offset,
4009                            compressed_payload(
4010                                planned_payload(objects, plan, idx),
4011                                compression_level,
4012                            )?,
4013                        ));
4014                }
4015                Ok(chunk_payloads)
4016            }));
4017        }
4018
4019        let mut first_error = None;
4020        for handle in handles {
4021            match handle.join() {
4022                Ok(Ok(chunk_payloads)) => {
4023                    if first_error.is_none() {
4024                        for (pos, payload) in chunk_payloads {
4025                            payloads[pos] = payload;
4026                        }
4027                    }
4028                }
4029                Ok(Err(err)) => {
4030                    first_error.get_or_insert(err);
4031                }
4032                Err(_) => {
4033                    first_error.get_or_insert_with(|| {
4034                        GitError::InvalidObject("pack compression worker panicked".into())
4035                    });
4036                }
4037            }
4038        }
4039
4040        match first_error {
4041            Some(err) => Err(err),
4042            None => Ok(()),
4043        }
4044    })?;
4045    Ok(payloads)
4046}
4047
4048fn planned_payload<'a>(
4049    objects: &'a [&'a EncodedObject],
4050    plan: &'a [PlannedEntry],
4051    idx: usize,
4052) -> &'a [u8] {
4053    match &plan[idx].base {
4054        PlannedBase::None => &objects[idx].body,
4055        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
4056    }
4057}
4058
4059fn compressed_payload(body: &[u8], compression_level: u32) -> Result<Vec<u8>> {
4060    let mut out = Vec::new();
4061    write_compressed_payload(&mut out, body, compression_level)?;
4062    Ok(out)
4063}
4064
4065/// Maximum number of external thin-pack bases compared against any single
4066/// object. Bounds the work of the thin path when a large base set is supplied.
4067const DELTA_MAX_EXTERNAL_BASES: usize = 64;
4068
4069struct DeltaWindowEntry<'a> {
4070    idx: usize,
4071    index: DeltaIndex<'a>,
4072}
4073
4074/// Rank object types for delta grouping. Objects of the same type are far more
4075/// likely to delta well, so the sort groups by this rank first.
4076fn delta_type_rank(object_type: ObjectType) -> u8 {
4077    match object_type {
4078        ObjectType::Commit => 0,
4079        ObjectType::Tree => 1,
4080        ObjectType::Blob => 2,
4081        ObjectType::Tag => 3,
4082    }
4083}
4084
4085/// Decide how each object is stored (undeltified or deltified) and the order in
4086/// which objects are emitted into the pack.
4087///
4088/// # Ordering
4089///
4090/// Candidates are sorted by `(type, size descending, object id)`:
4091/// * **type** — only same-type objects are deltified against one another, so
4092///   grouping by type keeps the sliding window full of viable bases. Type rank
4093///   follows [`delta_type_rank`] (commit, tree, blob, tag).
4094/// * **size descending** — larger objects come first so smaller, later objects
4095///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
4096///   no path/name, so the usual path-hash key is unavailable; size is the next
4097///   best locality signal.
4098/// * **object id** — a deterministic tiebreaker for reproducible packs.
4099///
4100/// # Selection
4101///
4102/// Each object is compared against the previous up to `window` same-type
4103/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
4104/// bases of the same type). The smallest delta whose encoded length is strictly
4105/// less than the object's own body is kept; otherwise the object is stored
4106/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
4107/// only be used if doing so keeps the resulting chain within the bound); a depth
4108/// of `0` disables deltification entirely.
4109///
4110/// Returns the per-object plan (indexed by original object index) together with
4111/// the emit order. Every in-pack delta references a candidate that is earlier in
4112/// the emit order, so emitting in that order writes each base before any object
4113/// that depends on it.
4114fn plan_pack_deltas(
4115    objects: &[&EncodedObject],
4116    object_ids: &[ObjectId],
4117    options: &PackWriteOptions,
4118) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4119    let count = objects.len();
4120    let mut plan: Vec<PlannedEntry> = (0..count)
4121        .map(|_| PlannedEntry {
4122            base: PlannedBase::None,
4123        })
4124        .collect();
4125
4126    // Processing order. Deltas only point backwards within this order, which is
4127    // therefore also a valid emit order. Reordering by type/size improves delta
4128    // locality but is skipped when disabled or when deltification is off.
4129    let mut order: Vec<usize> = (0..count).collect();
4130    if options.reorder && options.depth > 0 {
4131        order.sort_by(|&left, &right| {
4132            delta_type_rank(objects[left].object_type)
4133                .cmp(&delta_type_rank(objects[right].object_type))
4134                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4135                .then_with(|| {
4136                    object_ids[left]
4137                        .as_bytes()
4138                        .cmp(object_ids[right].as_bytes())
4139                })
4140        });
4141    }
4142
4143    if options.depth == 0 {
4144        return Ok((plan, order));
4145    }
4146
4147    // Pre-build delta indexes for external thin-pack bases, grouped by type so
4148    // an object only compares against compatible bases.
4149    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4150        Vec::with_capacity(options.thin_bases.len());
4151    for (oid, object) in &options.thin_bases {
4152        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4153    }
4154
4155    // Chain depth ending at each object (0 = undeltified). Used to keep delta
4156    // chains within `options.depth`.
4157    let mut depth = vec![0usize; count];
4158    // Sliding window of recently processed original indices, most recent last.
4159    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4160        std::collections::VecDeque::new();
4161
4162    for &idx in &order {
4163        let target = &objects[idx].body;
4164        let target_type = objects[idx].object_type;
4165
4166        let mut best_delta: Option<Vec<u8>> = None;
4167        let mut best_base = PlannedBase::None;
4168
4169        // Try in-pack candidates from the window (same type only).
4170        for base_entry in window.iter().rev() {
4171            let base_idx = base_entry.idx;
4172            if objects[base_idx].object_type != target_type {
4173                continue;
4174            }
4175            // Using this base would make the new chain depth + 1; skip if that
4176            // would exceed the configured maximum.
4177            if depth[base_idx] + 1 > options.depth {
4178                continue;
4179            }
4180            let Some(delta) = base_entry.index.delta(target) else {
4181                continue;
4182            };
4183            if !delta_is_acceptable(&delta, target.len()) {
4184                continue;
4185            }
4186            if best_delta
4187                .as_ref()
4188                .is_none_or(|current| delta.len() < current.len())
4189            {
4190                best_delta = Some(delta);
4191                best_base = PlannedBase::InPack {
4192                    base_idx,
4193                    delta: Vec::new(),
4194                };
4195            }
4196        }
4197
4198        // Try external thin-pack bases (ref-delta; external base is depth 0, so
4199        // the resulting chain depth is 1, always within a non-zero bound).
4200        for (base_oid, base_type, base_index) in
4201            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4202        {
4203            if *base_type != target_type {
4204                continue;
4205            }
4206            let Some(delta) = base_index.delta(target) else {
4207                continue;
4208            };
4209            if !delta_is_acceptable(&delta, target.len()) {
4210                continue;
4211            }
4212            if best_delta
4213                .as_ref()
4214                .is_none_or(|current| delta.len() < current.len())
4215            {
4216                best_delta = Some(delta);
4217                best_base = PlannedBase::External {
4218                    base_oid: *base_oid,
4219                    delta: Vec::new(),
4220                };
4221            }
4222        }
4223
4224        if let Some(delta) = best_delta {
4225            match best_base {
4226                PlannedBase::InPack { base_idx, .. } => {
4227                    depth[idx] = depth[base_idx] + 1;
4228                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
4229                }
4230                PlannedBase::External { base_oid, .. } => {
4231                    depth[idx] = 1;
4232                    plan[idx].base = PlannedBase::External { base_oid, delta };
4233                }
4234                PlannedBase::None => {}
4235            }
4236        }
4237
4238        // Add this object to the window for subsequent candidates.
4239        window.push_back(DeltaWindowEntry {
4240            idx,
4241            index: DeltaIndex::new(&objects[idx].body),
4242        });
4243        while window.len() > options.window {
4244            window.pop_front();
4245        }
4246    }
4247
4248    Ok((plan, order))
4249}
4250
4251/// Whether a generated delta is worth using instead of storing the object
4252/// undeltified. The encoded delta must be strictly smaller than the object's own
4253/// body; otherwise the undeltified form is the same size or smaller and is
4254/// always self-contained.
4255fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4256    !delta.is_empty() && delta.len() < target_len
4257}
4258
4259fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4260    loop {
4261        let mut byte = (value as u8) & 0x7f;
4262        value >>= 7;
4263        if value != 0 {
4264            byte |= 0x80;
4265        }
4266        out.push(byte);
4267        if value == 0 {
4268            break;
4269        }
4270    }
4271}
4272
4273fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4274    while size != 0 {
4275        let chunk = size.min(0x10000);
4276        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4277        let mut command = 0x80u8;
4278        let mut payload = [0u8; 7];
4279        let mut payload_len = 0usize;
4280        for idx in 0..4 {
4281            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4282            if byte != 0 {
4283                command |= 1 << idx;
4284                payload[payload_len] = byte;
4285                payload_len += 1;
4286            }
4287        }
4288        for idx in 0..3 {
4289            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4290            if byte != 0 {
4291                command |= 0x10 << idx;
4292                payload[payload_len] = byte;
4293                payload_len += 1;
4294            }
4295        }
4296        out.push(command);
4297        out.extend_from_slice(&payload[..payload_len]);
4298        offset += chunk;
4299        size -= chunk;
4300    }
4301}
4302
4303fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
4304    while !bytes.is_empty() {
4305        let chunk_len = bytes.len().min(0x7f);
4306        out.push(chunk_len as u8);
4307        out.extend_from_slice(&bytes[..chunk_len]);
4308        bytes = &bytes[chunk_len..];
4309    }
4310}
4311
4312fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
4313    let mut value = 0u64;
4314    let mut shift = 0u32;
4315    loop {
4316        let Some(byte) = delta.get(*cursor).copied() else {
4317            return Err(GitError::InvalidObject("truncated delta size".into()));
4318        };
4319        *cursor += 1;
4320        value = value
4321            .checked_add(
4322                u64::from(byte & 0x7f)
4323                    .checked_shl(shift)
4324                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
4325            )
4326            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4327        if byte & 0x80 == 0 {
4328            return Ok(value);
4329        }
4330        shift = shift
4331            .checked_add(7)
4332            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4333    }
4334}
4335
4336fn read_delta_copy_value(
4337    delta: &[u8],
4338    cursor: &mut usize,
4339    command: u8,
4340    masks: &[u8],
4341) -> Result<u64> {
4342    let mut value = 0u64;
4343    for (shift, mask) in masks.iter().enumerate() {
4344        if command & mask != 0 {
4345            let Some(byte) = delta.get(*cursor).copied() else {
4346                return Err(GitError::InvalidObject(
4347                    "truncated delta copy command".into(),
4348                ));
4349            };
4350            *cursor += 1;
4351            value |= u64::from(byte) << (shift * 8);
4352        }
4353    }
4354    Ok(value)
4355}
4356
4357fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8], compression_level: u32) -> Result<()> {
4358    let mut compressor = Compress::new(Compression::new(compression_level.min(9)), true);
4359    out.reserve(zlib_compress_bound(body.len()));
4360    let status = compressor
4361        .compress_vec(body, out, FlushCompress::Finish)
4362        .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
4363    if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
4364        return Err(GitError::InvalidObject(
4365            "zlib compression did not finish pack entry".into(),
4366        ));
4367    }
4368    Ok(())
4369}
4370
4371fn zlib_compress_bound(len: usize) -> usize {
4372    len.saturating_add(len >> 12)
4373        .saturating_add(len >> 14)
4374        .saturating_add(len >> 25)
4375        .saturating_add(13)
4376}
4377
4378fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
4379    let type_code = match object_type {
4380        ObjectType::Commit => 1,
4381        ObjectType::Tree => 2,
4382        ObjectType::Blob => 3,
4383        ObjectType::Tag => 4,
4384    };
4385    write_pack_entry_header_kind(out, type_code, size);
4386}
4387
4388fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4389    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4390    size >>= 4;
4391    if size != 0 {
4392        byte |= 0x80;
4393    }
4394    out.push(byte);
4395    while size != 0 {
4396        let mut byte = (size as u8) & 0x7f;
4397        size >>= 7;
4398        if size != 0 {
4399            byte |= 0x80;
4400        }
4401        out.push(byte);
4402    }
4403}
4404
4405fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4406    if relative == 0 {
4407        return Err(GitError::InvalidFormat(
4408            "ofs-delta relative offset cannot be zero".into(),
4409        ));
4410    }
4411    let mut value = relative;
4412    let mut bytes = vec![(value & 0x7f) as u8];
4413    value >>= 7;
4414    while value != 0 {
4415        value -= 1;
4416        bytes.push(((value & 0x7f) as u8) | 0x80);
4417        value >>= 7;
4418    }
4419    bytes.reverse();
4420    out.extend_from_slice(&bytes);
4421    Ok(())
4422}
4423
4424fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4425    let Some(byte) = bytes.get(*offset).copied() else {
4426        return Err(GitError::InvalidFormat(
4427            "truncated pack entry header".into(),
4428        ));
4429    };
4430    *offset += 1;
4431    Ok(byte)
4432}
4433
4434fn u16_be(bytes: &[u8]) -> u16 {
4435    u16::from_be_bytes([bytes[0], bytes[1]])
4436}
4437
4438fn u32_be(bytes: &[u8]) -> u32 {
4439    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4440}
4441
4442fn u64_be(bytes: &[u8]) -> u64 {
4443    u64::from_be_bytes([
4444        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4445    ])
4446}
4447
4448fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4449    let mut fanout = [0u32; 256];
4450    let mut previous = 0u32;
4451    for slot in &mut fanout {
4452        *slot = u32_be(&bytes[*offset..*offset + 4]);
4453        if *slot < previous {
4454            return Err(GitError::InvalidFormat(
4455                "pack index fanout is not monotonic".into(),
4456            ));
4457        }
4458        previous = *slot;
4459        *offset += 4;
4460    }
4461    Ok(fanout)
4462}
4463
4464fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4465    let expected_min = if oid_bytes[0] == 0 {
4466        0
4467    } else {
4468        fanout[usize::from(oid_bytes[0] - 1)]
4469    };
4470    if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4471        return Err(GitError::InvalidFormat(
4472            "pack index object id is outside its fanout bucket".into(),
4473        ));
4474    }
4475    Ok(())
4476}
4477
4478fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4479    if raw_offset & 0x8000_0000 == 0 {
4480        return Ok(u64::from(raw_offset));
4481    }
4482    let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4483    let large_start = large_idx
4484        .checked_mul(8)
4485        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4486    let large_end = large_start
4487        .checked_add(8)
4488        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4489    if large_end > large_offset_table.len() {
4490        return Err(GitError::InvalidFormat(
4491            "pack index large offset points past table".into(),
4492        ));
4493    }
4494    Ok(u64_be(&large_offset_table[large_start..large_end]))
4495}
4496
4497fn checked_range(
4498    start: usize,
4499    count: usize,
4500    width: usize,
4501    total: usize,
4502) -> Result<std::ops::Range<usize>> {
4503    let len = count
4504        .checked_mul(width)
4505        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4506    let end = start
4507        .checked_add(len)
4508        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4509    if end > total {
4510        return Err(GitError::InvalidFormat("truncated pack index table".into()));
4511    }
4512    Ok(start..end)
4513}
4514
4515fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4516    let mut seen = vec![false; positions.len()];
4517    for position in positions {
4518        let idx = *position as usize;
4519        if idx >= positions.len() {
4520            return Err(GitError::InvalidFormat(
4521                "reverse index position points past object table".into(),
4522            ));
4523        }
4524        if seen[idx] {
4525            return Err(GitError::InvalidFormat(
4526                "reverse index position is duplicated".into(),
4527            ));
4528        }
4529        seen[idx] = true;
4530    }
4531    Ok(())
4532}
4533
4534fn parse_midx_pack_names(
4535    bytes: &[u8],
4536    chunks: &[MultiPackIndexChunk],
4537    pack_count: usize,
4538    version: u8,
4539) -> Result<Vec<String>> {
4540    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4541        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4542    let mut names = Vec::with_capacity(pack_count);
4543    let mut offset = 0usize;
4544    while names.len() < pack_count {
4545        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4546            return Err(GitError::InvalidFormat(
4547                "fatal: multi-pack-index pack-name chunk is too short".into(),
4548            ));
4549        };
4550        let name_bytes = &data[offset..offset + relative_end];
4551        if name_bytes.is_empty() {
4552            return Err(GitError::InvalidFormat(
4553                "multi-pack-index PNAM entry is empty".into(),
4554            ));
4555        }
4556        let name = std::str::from_utf8(name_bytes)
4557            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4558        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4559            return Err(GitError::InvalidFormat(
4560                "multi-pack-index PNAM entry contains a path separator".into(),
4561            ));
4562        }
4563        names.push(name.to_string());
4564        offset += relative_end + 1;
4565    }
4566    let padding = &data[offset..];
4567    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4568        return Err(GitError::InvalidFormat(
4569            "multi-pack-index PNAM padding is invalid".into(),
4570        ));
4571    }
4572    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4573        return Err(GitError::InvalidFormat(
4574            "multi-pack-index v1 PNAM entries are not sorted".into(),
4575        ));
4576    }
4577    Ok(names)
4578}
4579
4580fn parse_midx_oid_fanout(
4581    bytes: &[u8],
4582    chunks: &[MultiPackIndexChunk],
4583) -> Result<([u32; 256], usize)> {
4584    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4585        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4586    if data.len() != 256 * 4 {
4587        return Err(GitError::InvalidFormat(
4588            "error: multi-pack-index OID fanout is of the wrong size\nfatal: multi-pack-index required OID fanout chunk missing or corrupted".into(),
4589        ));
4590    }
4591    let mut fanout = [0u32; 256];
4592    let mut previous = 0u32;
4593    for (idx, slot) in fanout.iter_mut().enumerate() {
4594        let start = idx * 4;
4595        *slot = u32_be(&data[start..start + 4]);
4596        if *slot < previous {
4597            return Err(GitError::InvalidFormat(
4598                format!(
4599                    "error: oid fanout out of order: fanout[{}] = {:x} > {:x} = fanout[{idx}]\nfatal: multi-pack-index required OID fanout chunk missing or corrupted",
4600                    idx - 1,
4601                    previous,
4602                    *slot
4603                ),
4604            ));
4605        }
4606        previous = *slot;
4607    }
4608    Ok((fanout, fanout[255] as usize))
4609}
4610
4611fn parse_midx_object_ids(
4612    bytes: &[u8],
4613    chunks: &[MultiPackIndexChunk],
4614    format: ObjectFormat,
4615    object_count: usize,
4616    fanout: &[u32; 256],
4617) -> Result<Vec<ObjectId>> {
4618    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4619        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4620    let expected_len = object_count
4621        .checked_mul(format.raw_len())
4622        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4623    if data.len() != expected_len {
4624        return Err(GitError::InvalidFormat(
4625            "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
4626        ));
4627    }
4628
4629    let mut ids = Vec::with_capacity(object_count);
4630    let mut counts = [0u32; 256];
4631    let mut previous_oid: Option<ObjectId> = None;
4632    for idx in 0..object_count {
4633        let start = idx * format.raw_len();
4634        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4635        if let Some(previous) = &previous_oid
4636            && previous.as_bytes() >= oid.as_bytes()
4637        {
4638            return Err(GitError::InvalidFormat(
4639                "multi-pack-index OIDL object ids are not strictly sorted".into(),
4640            ));
4641        }
4642        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4643            .checked_add(1)
4644            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4645        previous_oid = Some(oid);
4646        ids.push(oid);
4647    }
4648
4649    let mut running = 0u32;
4650    for (idx, count) in counts.iter().enumerate() {
4651        running = running
4652            .checked_add(*count)
4653            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4654        if fanout[idx] != running {
4655            return Err(GitError::InvalidFormat(
4656                "multi-pack-index OIDF fanout does not match OIDL".into(),
4657            ));
4658        }
4659    }
4660    Ok(ids)
4661}
4662
4663fn parse_midx_object_offsets(
4664    bytes: &[u8],
4665    chunks: &[MultiPackIndexChunk],
4666    object_ids: Vec<ObjectId>,
4667    pack_count: u32,
4668) -> Result<Vec<MultiPackIndexEntry>> {
4669    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4670        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4671    let expected_len = object_ids
4672        .len()
4673        .checked_mul(8)
4674        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4675    if data.len() != expected_len {
4676        return Err(GitError::InvalidFormat(
4677            "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
4678        ));
4679    }
4680    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4681    if let Some(large_offsets) = large_offsets
4682        && large_offsets.len() % 8 != 0
4683    {
4684        return Err(GitError::InvalidFormat(
4685            "multi-pack-index LOFF chunk has invalid length".into(),
4686        ));
4687    }
4688
4689    let mut entries = Vec::with_capacity(object_ids.len());
4690    for (idx, oid) in object_ids.into_iter().enumerate() {
4691        let start = idx * 8;
4692        let pack_int_id = u32_be(&data[start..start + 4]);
4693        if pack_int_id >= pack_count {
4694            return Err(GitError::InvalidFormat(
4695                "multi-pack-index object points past pack table".into(),
4696            ));
4697        }
4698        let raw_offset = u32_be(&data[start + 4..start + 8]);
4699        let offset = if raw_offset & 0x8000_0000 == 0 {
4700            u64::from(raw_offset)
4701        } else {
4702            let Some(large_offsets) = large_offsets else {
4703                return Err(GitError::InvalidFormat(
4704                    "multi-pack-index large offset missing LOFF chunk".into(),
4705                ));
4706            };
4707            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4708            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4709                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4710            })?;
4711            let large_end = large_start.checked_add(8).ok_or_else(|| {
4712                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4713            })?;
4714            if large_end > large_offsets.len() {
4715                return Err(GitError::InvalidFormat(
4716                    "fatal: multi-pack-index large offset out of bounds".into(),
4717                ));
4718            }
4719            u64_be(&large_offsets[large_start..large_end])
4720        };
4721        entries.push(MultiPackIndexEntry {
4722            oid,
4723            pack_int_id,
4724            offset,
4725            force_large_offset: raw_offset & 0x8000_0000 != 0,
4726        });
4727    }
4728    Ok(entries)
4729}
4730
4731fn parse_midx_reverse_index(
4732    bytes: &[u8],
4733    chunks: &[MultiPackIndexChunk],
4734    object_count: usize,
4735) -> Result<Option<Vec<u32>>> {
4736    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4737        return Ok(None);
4738    };
4739    let expected_len = object_count
4740        .checked_mul(4)
4741        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4742    if data.len() != expected_len {
4743        return Err(GitError::InvalidFormat(
4744            "multi-pack-index reverse-index chunk is the wrong size".into(),
4745        ));
4746    }
4747    let mut positions = Vec::with_capacity(object_count);
4748    for idx in 0..object_count {
4749        let start = idx * 4;
4750        positions.push(u32_be(&data[start..start + 4]));
4751    }
4752    validate_position_permutation(&positions)?;
4753    Ok(Some(positions))
4754}
4755
4756fn parse_midx_bitmapped_packs(
4757    bytes: &[u8],
4758    chunks: &[MultiPackIndexChunk],
4759    pack_count: usize,
4760    object_count: usize,
4761) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4762    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4763        return Ok(None);
4764    };
4765    let expected_len = pack_count
4766        .checked_mul(8)
4767        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4768    if data.len() != expected_len {
4769        return Err(GitError::InvalidFormat(
4770            "multi-pack-index BTMP chunk has invalid length".into(),
4771        ));
4772    }
4773    let mut entries = Vec::with_capacity(pack_count);
4774    for idx in 0..pack_count {
4775        let start = idx * 8;
4776        let bitmap_pos = u32_be(&data[start..start + 4]);
4777        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4778        let bitmap_end = u64::from(bitmap_pos)
4779            .checked_add(u64::from(bitmap_nr))
4780            .ok_or_else(|| {
4781                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4782            })?;
4783        if bitmap_end > object_count as u64 {
4784            return Err(GitError::InvalidFormat(
4785                "multi-pack-index BTMP range points past object table".into(),
4786            ));
4787        }
4788        entries.push(MultiPackBitmapPack {
4789            bitmap_pos,
4790            bitmap_nr,
4791        });
4792    }
4793    Ok(Some(entries))
4794}
4795
4796fn midx_chunk_data<'a>(
4797    bytes: &'a [u8],
4798    chunks: &[MultiPackIndexChunk],
4799    id: [u8; 4],
4800    required: bool,
4801) -> Result<Option<&'a [u8]>> {
4802    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4803        if required {
4804            return Err(GitError::InvalidFormat(format!(
4805                "multi-pack-index missing {} chunk",
4806                std::str::from_utf8(&id).unwrap_or("required")
4807            )));
4808        }
4809        return Ok(None);
4810    };
4811    let start = usize::try_from(chunk.offset)
4812        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4813    let len = usize::try_from(chunk.len)
4814        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4815    let end = start
4816        .checked_add(len)
4817        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4818    let Some(data) = bytes.get(start..end) else {
4819        return Err(GitError::InvalidFormat(
4820            "multi-pack-index chunk extends past file".into(),
4821        ));
4822    };
4823    Ok(Some(data))
4824}
4825
4826fn hash_function_id(format: ObjectFormat) -> u32 {
4827    match format {
4828        ObjectFormat::Sha1 => 1,
4829        ObjectFormat::Sha256 => 2,
4830    }
4831}
4832
4833/// Maximum number of clean (run) words that a single EWAH running-length word
4834/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
4835const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4836
4837/// Maximum number of literal (dirty) words that can trail a single EWAH
4838/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
4839const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4840
4841/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
4842const EWAH_ALL_ONES: u64 = u64::MAX;
4843
4844impl EwahBitmap {
4845    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
4846    /// from a slice of raw uncompressed 64-bit words.
4847    ///
4848    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
4849    /// matching git's on-disk convention. `bit_size` records the number of
4850    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
4851    ///
4852    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
4853    /// encoder: consecutive all-zero or all-one words collapse into a run, and
4854    /// any other word is stored verbatim as a literal. Only the first
4855    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
4856    /// words supplied by the caller are ignored, just as git encodes a bitmap
4857    /// sized to its highest set bit.
4858    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4859        let required_words = bit_size.div_ceil(64) as usize;
4860        if required_words > words.len() {
4861            return Err(GitError::InvalidFormat(format!(
4862                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4863                words.len()
4864            )));
4865        }
4866        // Only the words that actually back the declared bits matter; libgit
4867        // never emits clean trailing zero words for the unused tail.
4868        let significant = &words[..required_words];
4869        let mut builder = EwahBuilder::new(bit_size);
4870        for &word in significant {
4871            if word == 0 {
4872                builder.add_empty_words(false, 1);
4873            } else if word == EWAH_ALL_ONES {
4874                builder.add_empty_words(true, 1);
4875            } else {
4876                builder.add_literal(word);
4877            }
4878        }
4879        builder.finish()
4880    }
4881
4882    /// Constructs an [`EwahBitmap`] from a set of bit positions.
4883    ///
4884    /// `bit_size` is the number of logical bits (typically the pack object
4885    /// count). Every position in `positions` must be strictly less than
4886    /// `bit_size`. Positions may be given in any order and may repeat.
4887    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4888        let word_count = bit_size.div_ceil(64) as usize;
4889        let mut words = vec![0u64; word_count];
4890        for &position in positions {
4891            if position >= bit_size {
4892                return Err(GitError::InvalidFormat(format!(
4893                    "EWAH bit position {position} out of range for bit_size {bit_size}"
4894                )));
4895            }
4896            let word_index = (position / 64) as usize;
4897            let bit_index = position % 64;
4898            words[word_index] |= 1u64 << bit_index;
4899        }
4900        Self::from_words(bit_size, &words)
4901    }
4902
4903    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
4904    /// all-zero type bitmap (e.g. when a pack has no tags).
4905    pub fn empty() -> Self {
4906        Self {
4907            bit_size: 0,
4908            words: Vec::new(),
4909            rlw_position: 0,
4910        }
4911    }
4912
4913    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
4914    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
4915    ///
4916    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
4917    /// bitmap actually covers and is primarily used to validate roundtrips.
4918    pub fn to_words(&self) -> Result<Vec<u64>> {
4919        let mut out = Vec::new();
4920        let mut word_idx = 0usize;
4921        while word_idx < self.words.len() {
4922            let rlw = self.words[word_idx];
4923            let run_bit = rlw & 1;
4924            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4925            let literal_words = (rlw >> 33) as usize;
4926            word_idx += 1;
4927            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4928            for _ in 0..run_words {
4929                out.push(fill);
4930            }
4931            let literal_end = word_idx
4932                .checked_add(literal_words)
4933                .filter(|end| *end <= self.words.len())
4934                .ok_or_else(|| {
4935                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
4936                })?;
4937            out.extend_from_slice(&self.words[word_idx..literal_end]);
4938            word_idx = literal_end;
4939        }
4940        let required_words = (self.bit_size as usize).div_ceil(64);
4941        if out.len() < required_words {
4942            out.resize(required_words, 0);
4943        }
4944        out.truncate(required_words);
4945        Ok(out)
4946    }
4947
4948    /// Returns the sorted set bit positions covered by this bitmap.
4949    pub fn to_positions(&self) -> Result<Vec<u32>> {
4950        let words = self.to_words()?;
4951        let mut positions = Vec::new();
4952        for (word_index, word) in words.iter().enumerate() {
4953            let mut remaining = *word;
4954            while remaining != 0 {
4955                let bit = remaining.trailing_zeros();
4956                let position = (word_index as u64) * 64 + u64::from(bit);
4957                if position < u64::from(self.bit_size) {
4958                    // position always fits in u32 because bit_size is u32.
4959                    positions.push(position as u32);
4960                }
4961                remaining &= remaining - 1;
4962            }
4963        }
4964        Ok(positions)
4965    }
4966
4967    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
4968    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
4969    /// running-length-word position (u32 BE).
4970    pub fn to_bytes(&self) -> Vec<u8> {
4971        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4972        self.append_bytes(&mut out);
4973        out
4974    }
4975
4976    fn append_bytes(&self, out: &mut Vec<u8>) {
4977        out.extend_from_slice(&self.bit_size.to_be_bytes());
4978        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4979        for word in &self.words {
4980            out.extend_from_slice(&word.to_be_bytes());
4981        }
4982        out.extend_from_slice(&self.rlw_position.to_be_bytes());
4983    }
4984}
4985
4986/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
4987///
4988/// The buffer is a sequence of blocks. Each block begins with a running-length
4989/// word (RLW) and is followed by zero or more literal words:
4990///   * bit 0      => value of the clean run words (0 or 1)
4991///   * bits 1..=32 => number of clean run words (32-bit field)
4992///   * bits 33..=63 => number of trailing literal words (31-bit field)
4993struct EwahBuilder {
4994    bit_size: u32,
4995    words: Vec<u64>,
4996    rlw_position: usize,
4997}
4998
4999impl EwahBuilder {
5000    fn new(bit_size: u32) -> Self {
5001        // Every EWAH buffer begins with an RLW, even an empty one.
5002        Self {
5003            bit_size,
5004            words: vec![0u64],
5005            rlw_position: 0,
5006        }
5007    }
5008
5009    fn rlw(&self) -> u64 {
5010        self.words[self.rlw_position]
5011    }
5012
5013    fn set_rlw(&mut self, value: u64) {
5014        self.words[self.rlw_position] = value;
5015    }
5016
5017    fn rlw_running_len(&self) -> u64 {
5018        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
5019    }
5020
5021    fn rlw_running_bit(&self) -> bool {
5022        self.rlw() & 1 == 1
5023    }
5024
5025    fn rlw_literal_len(&self) -> u64 {
5026        self.rlw() >> 33
5027    }
5028
5029    fn set_running_bit(&mut self, bit: bool) {
5030        let mut value = self.rlw();
5031        value &= !1;
5032        value |= u64::from(bit);
5033        self.set_rlw(value);
5034    }
5035
5036    fn set_running_len(&mut self, len: u64) {
5037        let mut value = self.rlw();
5038        value &= !(EWAH_MAX_RUNNING_LEN << 1);
5039        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
5040        self.set_rlw(value);
5041    }
5042
5043    fn set_literal_len(&mut self, len: u64) {
5044        let mut value = self.rlw();
5045        value &= (1u64 << 33) - 1;
5046        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
5047        self.set_rlw(value);
5048    }
5049
5050    /// Begins a fresh RLW block at the end of the buffer.
5051    fn push_rlw(&mut self) {
5052        self.rlw_position = self.words.len();
5053        self.words.push(0);
5054    }
5055
5056    /// Appends `number` clean words whose bits are all `value`, mirroring
5057    /// libgit's `ewah_add_empty_words`.
5058    ///
5059    /// A run can only be merged into the current RLW when that RLW has not yet
5060    /// emitted any literal words and its run either is empty or already carries
5061    /// the same fill value. Otherwise a fresh RLW block must be started, because
5062    /// every block stores its run strictly before its literals.
5063    fn add_empty_words(&mut self, value: bool, mut number: u64) {
5064        while number > 0 {
5065            // The current RLW can absorb more run words only when it has no
5066            // literals yet, its run is either empty or already the right fill
5067            // value, and the 32-bit run-length field is not already saturated.
5068            let can_extend = self.rlw_literal_len() == 0
5069                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
5070                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
5071            if !can_extend {
5072                self.push_rlw();
5073            }
5074            if self.rlw_running_len() == 0 {
5075                self.set_running_bit(value);
5076            }
5077            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
5078            let take = available.min(number);
5079            self.set_running_len(self.rlw_running_len() + take);
5080            number -= take;
5081        }
5082    }
5083
5084    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
5085    /// `ewah_add_dirty_words` for a count of one.
5086    fn add_literal(&mut self, word: u64) {
5087        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
5088            self.push_rlw();
5089        }
5090        let literal_len = self.rlw_literal_len();
5091        self.set_literal_len(literal_len + 1);
5092        self.words.push(word);
5093    }
5094
5095    fn finish(self) -> Result<EwahBitmap> {
5096        let rlw_position = u32::try_from(self.rlw_position)
5097            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
5098        if self.words.len() > u32::MAX as usize {
5099            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
5100        }
5101        Ok(EwahBitmap {
5102            bit_size: self.bit_size,
5103            words: self.words,
5104            rlw_position,
5105        })
5106    }
5107}
5108
5109/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
5110///
5111/// The writer is constructed from the object layout of a pack (one
5112/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
5113/// Callers then register one selected commit per [`add_commit`] call, supplying
5114/// the set of pack positions reachable from that commit. [`build`]/[`write`]
5115/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
5116/// on-disk format (signature `BITM`, version 1).
5117///
5118/// [`add_commit`]: PackBitmapWriter::add_commit
5119/// [`build`]: PackBitmapWriter::build
5120/// [`write`]: PackBitmapWriter::write
5121#[derive(Debug, Clone)]
5122pub struct PackBitmapWriter {
5123    format: ObjectFormat,
5124    pack_checksum: ObjectId,
5125    object_count: u32,
5126    commit_positions: Vec<u32>,
5127    tree_positions: Vec<u32>,
5128    blob_positions: Vec<u32>,
5129    tag_positions: Vec<u32>,
5130    name_hash_cache: Option<Vec<u32>>,
5131    selected: Vec<SelectedCommit>,
5132}
5133
5134#[derive(Debug, Clone)]
5135struct SelectedCommit {
5136    /// Oid-sorted `.idx` position (what the on-disk entry records). The
5137    /// commit's pack-order position lives in `reachable` with the rest of the
5138    /// bits.
5139    commit_index_position: u32,
5140    flags: u8,
5141    reachable: Vec<u32>,
5142}
5143
5144impl PackBitmapWriter {
5145    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
5146    /// compression against a previously selected commit). This is the only flag
5147    /// value this writer emits.
5148    pub const FLAG_NONE: u8 = 0;
5149
5150    /// Creates a writer for a pack whose objects (in pack order) have the given
5151    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
5152    ///
5153    /// Returns an error if the pack contains more than `u32::MAX` objects, if
5154    /// `pack_checksum`'s format does not match `format`, or if any object type
5155    /// is not one of the four reachable git object kinds.
5156    pub fn new(
5157        format: ObjectFormat,
5158        pack_checksum: ObjectId,
5159        object_types: &[ObjectType],
5160    ) -> Result<Self> {
5161        if object_types.len() > u32::MAX as usize {
5162            return Err(GitError::InvalidFormat(
5163                "too many objects for a pack bitmap".into(),
5164            ));
5165        }
5166        if pack_checksum.format() != format {
5167            return Err(GitError::InvalidObjectId(
5168                "pack checksum format does not match bitmap format".into(),
5169            ));
5170        }
5171        let object_count = object_types.len() as u32;
5172        let mut commit_positions = Vec::new();
5173        let mut tree_positions = Vec::new();
5174        let mut blob_positions = Vec::new();
5175        let mut tag_positions = Vec::new();
5176        for (index, object_type) in object_types.iter().enumerate() {
5177            let position = index as u32;
5178            match object_type {
5179                ObjectType::Commit => commit_positions.push(position),
5180                ObjectType::Tree => tree_positions.push(position),
5181                ObjectType::Blob => blob_positions.push(position),
5182                ObjectType::Tag => tag_positions.push(position),
5183            }
5184        }
5185        Ok(Self {
5186            format,
5187            pack_checksum,
5188            object_count,
5189            commit_positions,
5190            tree_positions,
5191            blob_positions,
5192            tag_positions,
5193            name_hash_cache: None,
5194            selected: Vec::new(),
5195        })
5196    }
5197
5198    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
5199    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
5200    /// and appends the cache after the bitmap entries, exactly as git does.
5201    ///
5202    /// Returns an error if the cache length does not equal the object count.
5203    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5204        if cache.len() != self.object_count as usize {
5205            return Err(GitError::InvalidFormat(format!(
5206                "name hash cache has {} entries but pack has {} objects",
5207                cache.len(),
5208                self.object_count
5209            )));
5210        }
5211        self.name_hash_cache = Some(cache);
5212        Ok(self)
5213    }
5214
5215    /// Registers a selected commit and the pack positions reachable from it.
5216    ///
5217    /// `commit_position` is the *pack-order* position of the commit itself (the
5218    /// bit-number space); it must reference a commit object and is implicitly
5219    /// part of the reachable set. `commit_index_position` is the commit's
5220    /// position in the *oid-sorted* pack index — this is what the on-disk entry
5221    /// records (upstream `oid_pos`); bits and entry positions live in different
5222    /// spaces. `reachable` lists the pack-order positions of every object
5223    /// reachable from the commit (it may include or omit `commit_position`;
5224    /// duplicates are fine). All positions must be in range. The commit's full
5225    /// (non-XORed) bitmap is stored.
5226    pub fn add_commit(
5227        &mut self,
5228        commit_position: u32,
5229        commit_index_position: u32,
5230        reachable: &[u32],
5231    ) -> Result<()> {
5232        if commit_position >= self.object_count {
5233            return Err(GitError::InvalidFormat(format!(
5234                "commit position {commit_position} out of range for {} objects",
5235                self.object_count
5236            )));
5237        }
5238        if commit_index_position >= self.object_count {
5239            return Err(GitError::InvalidFormat(format!(
5240                "commit index position {commit_index_position} out of range for {} objects",
5241                self.object_count
5242            )));
5243        }
5244        if !self.commit_positions.contains(&commit_position) {
5245            return Err(GitError::InvalidFormat(format!(
5246                "bitmap commit position {commit_position} is not a commit object"
5247            )));
5248        }
5249        for &position in reachable {
5250            if position >= self.object_count {
5251                return Err(GitError::InvalidFormat(format!(
5252                    "reachable position {position} out of range for {} objects",
5253                    self.object_count
5254                )));
5255            }
5256        }
5257        let mut reachable = reachable.to_vec();
5258        reachable.push(commit_position);
5259        self.selected.push(SelectedCommit {
5260            commit_index_position,
5261            flags: Self::FLAG_NONE,
5262            reachable,
5263        });
5264        Ok(())
5265    }
5266
5267    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
5268    ///
5269    /// The resulting index always advertises
5270    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
5271    /// the pack) and, when a name-hash cache was attached,
5272    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
5273    pub fn build(&self) -> Result<PackBitmapIndex> {
5274        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5275        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5276        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5277        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5278
5279        let mut entries = Vec::with_capacity(self.selected.len());
5280        for selected in &self.selected {
5281            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5282            entries.push(PackBitmapEntry {
5283                object_position: selected.commit_index_position,
5284                xor_offset: 0,
5285                flags: selected.flags,
5286                bitmap,
5287            });
5288        }
5289
5290        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5291        if self.name_hash_cache.is_some() {
5292            options |= PackBitmapIndex::OPTION_HASH_CACHE;
5293        }
5294
5295        // The index checksum is only known once the body is serialised; the
5296        // dedicated `write` path fills it in. `build` reports a placeholder of
5297        // the correct format so the struct is self-consistent for callers that
5298        // only need the decoded bitmaps.
5299        let placeholder_checksum = ObjectId::null(self.format);
5300        Ok(PackBitmapIndex {
5301            version: 1,
5302            format: self.format,
5303            options,
5304            pack_checksum: self.pack_checksum.clone(),
5305            index_checksum: placeholder_checksum,
5306            type_bitmaps: PackBitmapTypeBitmaps {
5307                commits,
5308                trees,
5309                blobs,
5310                tags,
5311            },
5312            entries,
5313            name_hash_cache: self.name_hash_cache.clone(),
5314        })
5315    }
5316
5317    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
5318    /// (including the trailing index checksum).
5319    pub fn write(&self) -> Result<Vec<u8>> {
5320        self.build()?.write()
5321    }
5322}
5323
5324impl PackBitmapIndex {
5325    /// Serialises this index into git's on-disk `.bitmap` byte layout.
5326    ///
5327    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
5328    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
5329    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
5330    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
5331    /// optional name-hash cache, and finally the trailing index checksum over
5332    /// everything written so far.
5333    ///
5334    /// The `index_checksum` field of `self` is ignored and recomputed from the
5335    /// serialised body. Returns an error for unsupported versions, mismatched
5336    /// object-id formats, an oversized entry table, or an inconsistent name-hash
5337    /// cache.
5338    pub fn write(&self) -> Result<Vec<u8>> {
5339        if self.version != 1 {
5340            return Err(GitError::Unsupported(format!(
5341                "bitmap index version {}",
5342                self.version
5343            )));
5344        }
5345        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
5346        if self.options & !known_options != 0 {
5347            return Err(GitError::Unsupported(format!(
5348                "bitmap index options {:#06x}",
5349                self.options & !known_options
5350            )));
5351        }
5352        if self.pack_checksum.format() != self.format {
5353            return Err(GitError::InvalidObjectId(
5354                "bitmap pack checksum format does not match index format".into(),
5355            ));
5356        }
5357        if self.entries.len() > u32::MAX as usize {
5358            return Err(GitError::InvalidFormat(
5359                "too many bitmap index entries".into(),
5360            ));
5361        }
5362        let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
5363        match (&self.name_hash_cache, want_cache) {
5364            (Some(_), false) => {
5365                return Err(GitError::InvalidFormat(
5366                    "name hash cache present without OPTION_HASH_CACHE".into(),
5367                ));
5368            }
5369            (None, true) => {
5370                return Err(GitError::InvalidFormat(
5371                    "OPTION_HASH_CACHE set without a name hash cache".into(),
5372                ));
5373            }
5374            _ => {}
5375        }
5376
5377        let mut out = Vec::new();
5378        out.extend_from_slice(b"BITM");
5379        out.extend_from_slice(&self.version.to_be_bytes());
5380        out.extend_from_slice(&self.options.to_be_bytes());
5381        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
5382        out.extend_from_slice(self.pack_checksum.as_bytes());
5383
5384        self.type_bitmaps.commits.append_bytes(&mut out);
5385        self.type_bitmaps.trees.append_bytes(&mut out);
5386        self.type_bitmaps.blobs.append_bytes(&mut out);
5387        self.type_bitmaps.tags.append_bytes(&mut out);
5388
5389        for (idx, entry) in self.entries.iter().enumerate() {
5390            if entry.xor_offset as usize > idx {
5391                return Err(GitError::InvalidFormat(
5392                    "bitmap index entry has invalid XOR offset".into(),
5393                ));
5394            }
5395            out.extend_from_slice(&entry.object_position.to_be_bytes());
5396            out.push(entry.xor_offset);
5397            out.push(entry.flags);
5398            entry.bitmap.append_bytes(&mut out);
5399        }
5400
5401        if let Some(cache) = &self.name_hash_cache {
5402            for value in cache {
5403                out.extend_from_slice(&value.to_be_bytes());
5404            }
5405        }
5406
5407        let checksum = sley_core::digest_bytes(self.format, &out)?;
5408        out.extend_from_slice(checksum.as_bytes());
5409        Ok(out)
5410    }
5411}
5412
5413/// Convenience wrapper that builds a `.bitmap` file in one call.
5414///
5415/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
5416/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
5417/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
5418/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
5419/// optional `name_hash_cache` (one entry per object) may be supplied to emit
5420/// the hash-cache extension.
5421pub fn write_bitmap(
5422    format: ObjectFormat,
5423    pack_checksum: ObjectId,
5424    object_types: &[ObjectType],
5425    commits: &[(u32, u32, Vec<u32>)],
5426    name_hash_cache: Option<Vec<u32>>,
5427) -> Result<Vec<u8>> {
5428    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5429    if let Some(cache) = name_hash_cache {
5430        writer = writer.with_name_hash_cache(cache)?;
5431    }
5432    for (commit_position, commit_index_position, reachable) in commits {
5433        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5434    }
5435    writer.write()
5436}
5437
5438#[cfg(test)]
5439mod tests {
5440    use super::*;
5441    use flate2::Compression;
5442    use flate2::read::ZlibDecoder;
5443    use flate2::write::ZlibEncoder;
5444    use std::fs;
5445    use std::io::Read;
5446    use std::io::Write;
5447    use std::path::{Path, PathBuf};
5448    use std::process::Command;
5449    use std::time::{SystemTime, UNIX_EPOCH};
5450
5451    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5452        PackWriteOptions::new()
5453            .with_prefer_ofs_delta(prefer_ofs_delta)
5454            .with_reorder(false)
5455    }
5456
5457    #[test]
5458    fn parses_single_blob_pack() {
5459        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5460        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5461        assert_eq!(parsed.version, 2);
5462        assert_eq!(parsed.entries.len(), 1);
5463        let object = &parsed.entries[0].object;
5464        assert_eq!(object.object_type, ObjectType::Blob);
5465        assert_eq!(object.body, b"hello\n");
5466        assert_eq!(
5467            parsed.entries[0].entry.oid.to_hex(),
5468            "ce013625030ba8dba906f756967f9e9ca394464a"
5469        );
5470    }
5471
5472    #[test]
5473    fn parses_single_blob_pack_sha256() {
5474        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5475        let parsed =
5476            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5477        assert_eq!(parsed.version, 2);
5478        assert_eq!(parsed.entries.len(), 1);
5479        let object = &parsed.entries[0].object;
5480        assert_eq!(object.object_type, ObjectType::Blob);
5481        assert_eq!(object.body, b"hello\n");
5482        assert_eq!(
5483            parsed.entries[0].entry.oid,
5484            object
5485                .object_id(ObjectFormat::Sha256)
5486                .expect("test operation should succeed")
5487        );
5488    }
5489
5490    #[test]
5491    fn parses_bundle_pack_payload_with_bundle_format() {
5492        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5493        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5494            .expect("test operation should succeed");
5495        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5496            .into_bytes()
5497            .into_iter()
5498            .chain(pack)
5499            .collect::<Vec<_>>();
5500        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5501            .expect("test operation should succeed");
5502
5503        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5504        assert_eq!(parsed.entries.len(), 1);
5505        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5506        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5507    }
5508
5509    /// Build a pack whose single blob entry header LIES about its decompressed
5510    /// size: it declares `declared_size` while the actual zlib payload only
5511    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
5512    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
5513    /// from a handful of compressed bytes.
5514    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5515        let mut pack = Vec::new();
5516        pack.extend_from_slice(b"PACK");
5517        pack.extend_from_slice(&2u32.to_be_bytes());
5518        pack.extend_from_slice(&1u32.to_be_bytes());
5519        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
5520        write_pack_entry_header_kind(&mut pack, 3, declared_size);
5521        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5522        encoder
5523            .write_all(real_body)
5524            .expect("test operation should succeed");
5525        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5526        let checksum =
5527            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5528        pack.extend_from_slice(checksum.as_bytes());
5529        pack
5530    }
5531
5532    /// Regression: a crafted pack object header declaring a gigantic decompressed
5533    /// size with a tiny compressed payload must NOT drive an up-front
5534    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
5535    /// header `size` is attacker-controlled over the network (install_raw_pack →
5536    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
5537    ///
5538    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
5539    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
5540    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
5541    /// run parse on a worker thread so that panic surfaces as a `join()` error
5542    /// rather than killing the test process; the fix turns this into a clean
5543    /// `Err` returned normally.
5544    #[test]
5545    fn rejects_decompression_bomb_header_without_oom() {
5546        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5547            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5548            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5549            let result = handle.join();
5550            // The parse thread must not have panicked/aborted on a huge reserve.
5551            assert!(
5552                result.is_ok(),
5553                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5554            );
5555            // And parsing must reject the lie (decoded len != declared size).
5556            let parse_result = result.expect("parse thread should not panic on a bomb header");
5557            assert!(
5558                parse_result.is_err(),
5559                "bomb header (declared={declared}) should be rejected as invalid"
5560            );
5561        }
5562    }
5563
5564    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
5565    /// whose *result-size* varint lies, declaring `declared_result_size`, while
5566    /// carrying a tiny real instruction stream. The delta's base-size varint is
5567    /// set correctly (so the base-size check at the top of `apply_pack_delta`
5568    /// passes and we reach the result reservation). Used to drive the sley#35
5569    /// delta-result-size bomb.
5570    fn lying_result_size_delta_pack(
5571        format: ObjectFormat,
5572        declared_result_size: u64,
5573        delta_kind: DeltaKind,
5574    ) -> Vec<u8> {
5575        let base = b"hello";
5576        let result = b"hello world"; // real produced length = 11
5577
5578        // Hand-build a delta with a truthful base-size and a LYING result-size.
5579        let mut delta = Vec::new();
5580        write_delta_varint(&mut delta, base.len() as u64);
5581        write_delta_varint(&mut delta, declared_result_size);
5582        // Real instructions: copy `base` then insert " world".
5583        let suffix = &result[base.len()..];
5584        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
5585        delta.push(base.len() as u8);
5586        delta.push(suffix.len() as u8);
5587        delta.extend_from_slice(suffix);
5588
5589        let mut pack = Vec::new();
5590        pack.extend_from_slice(b"PACK");
5591        pack.extend_from_slice(&2u32.to_be_bytes());
5592        pack.extend_from_slice(&2u32.to_be_bytes());
5593
5594        let base_offset = pack.len();
5595        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5596        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5597        encoder
5598            .write_all(base)
5599            .expect("test operation should succeed");
5600        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5601
5602        let delta_offset = pack.len();
5603        write_pack_entry_header_kind(
5604            &mut pack,
5605            match delta_kind {
5606                DeltaKind::Offset => 6,
5607                DeltaKind::Ref => 7,
5608            },
5609            delta.len() as u64,
5610        );
5611        match delta_kind {
5612            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5613            DeltaKind::Ref => {
5614                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5615                    .expect("test operation should succeed");
5616                pack.extend_from_slice(base_oid.as_bytes());
5617            }
5618        }
5619        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5620        encoder
5621            .write_all(&delta)
5622            .expect("test operation should succeed");
5623        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5624
5625        let checksum =
5626            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5627        pack.extend_from_slice(checksum.as_bytes());
5628        pack
5629    }
5630
5631    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
5632    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
5633    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
5634    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
5635    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
5636    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
5637    /// reach the same reservation, so both must be safe. We resolve the pack on a
5638    /// worker thread so an abort/panic surfaces as a `join()` error rather than
5639    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
5640    #[test]
5641    fn rejects_delta_result_size_bomb_without_oom() {
5642        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5643        for &declared in bombs {
5644            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5645                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5646                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5647                let join_result = handle.join();
5648                assert!(
5649                    join_result.is_ok(),
5650                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5651                     instead of erroring cleanly"
5652                );
5653                let parse_result =
5654                    join_result.expect("parse thread should not panic on a delta bomb");
5655                assert!(
5656                    parse_result.is_err(),
5657                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5658                     as invalid (result.len() != declared)"
5659                );
5660            }
5661        }
5662    }
5663
5664    /// A legitimate (truthful) delta whose result-size varint matches the real
5665    /// produced length must still resolve correctly — the bound only caps the
5666    /// speculative reservation, it must not break real delta application.
5667    #[test]
5668    fn applies_legitimate_delta_after_result_size_bound() {
5669        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5670            let base = b"hello";
5671            let result = b"hello world";
5672            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5673            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5674            assert_eq!(parsed.entries.len(), 2);
5675            assert_eq!(parsed.entries[0].object.body, base);
5676            assert_eq!(parsed.entries[1].object.body, result);
5677        }
5678    }
5679
5680    #[test]
5681    fn bounded_inflate_reserve_caps_attacker_declared_size() {
5682        // A tiny compressed input can't justify a multi-gigabyte reservation.
5683        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5684        // The absolute ceiling caps even a large input-justified hint.
5685        assert_eq!(
5686            bounded_inflate_reserve(usize::MAX, usize::MAX),
5687            MAX_INFLATE_RESERVE
5688        );
5689        // A modest legitimate hint is preserved unchanged (no regression for real
5690        // objects): 1000 bytes of output from 500 bytes of input is well within
5691        // both bounds.
5692        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5693        // Floor of 64 for tiny hints.
5694        assert_eq!(bounded_inflate_reserve(0, 0), 64);
5695    }
5696
5697    #[test]
5698    fn rejects_bundle_pack_payload_with_wrong_object_format() {
5699        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5700        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5701            .expect("test operation should succeed");
5702        let bundle_bytes =
5703            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5704                .into_bytes()
5705                .into_iter()
5706                .chain(pack)
5707                .collect::<Vec<_>>();
5708        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5709            .expect("test operation should succeed");
5710
5711        assert!(PackFile::parse_bundle(&bundle).is_err());
5712    }
5713
5714    fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5715        let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5716        let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5717        let owned_view =
5718            PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5719                .expect("test operation should succeed");
5720
5721        assert_eq!(view.version, owned.version);
5722        assert_eq!(view.count, owned.entries.len());
5723        assert_eq!(view.count(), owned.entries.len());
5724        assert_eq!(view.fanout(), &owned.fanout);
5725        assert_eq!(view.pack_checksum, owned.pack_checksum);
5726        assert_eq!(view.index_checksum, owned.index_checksum);
5727        assert_eq!(owned_view.version, owned.version);
5728        assert_eq!(owned_view.count(), owned.entries.len());
5729        assert_eq!(owned_view.fanout(), &owned.fanout);
5730        assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5731        assert_eq!(owned_view.index_checksum, owned.index_checksum);
5732        for entry in &owned.entries {
5733            let owned_found = owned
5734                .find(&entry.oid)
5735                .expect("test operation should succeed");
5736            let expected = Some(PackIndexLookup {
5737                crc32: owned_found.crc32,
5738                offset: owned_found.offset,
5739            });
5740            assert_eq!(view.find(&entry.oid), expected);
5741            assert_eq!(owned_view.find(&entry.oid), expected);
5742        }
5743    }
5744
5745    #[test]
5746    fn writes_pack_and_index_that_round_trip() {
5747        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5748        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5749            .expect("test operation should succeed");
5750        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5751        let index =
5752            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5753        let oid = object
5754            .object_id(ObjectFormat::Sha1)
5755            .expect("test operation should succeed");
5756        assert_eq!(pack.entries[0].object, object);
5757        assert_eq!(index.pack_checksum, pack.checksum);
5758        assert_eq!(
5759            index
5760                .find(&oid)
5761                .expect("test operation should succeed")
5762                .offset,
5763            12
5764        );
5765    }
5766
5767    #[test]
5768    fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5769        let objects = (0..8)
5770            .map(|idx| {
5771                EncodedObject::new(
5772                    ObjectType::Blob,
5773                    format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5774                )
5775            })
5776            .collect::<Vec<_>>();
5777        let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5778            .expect("test operation should succeed");
5779
5780        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5781
5782        let view =
5783            PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5784        let missing = sley_core::object_id_for_bytes(
5785            ObjectFormat::Sha1,
5786            "blob",
5787            b"not present in borrowed index\n",
5788        )
5789        .expect("test operation should succeed");
5790        assert_eq!(view.find(&missing), None);
5791    }
5792
5793    #[test]
5794    fn writes_sha256_pack_and_index_that_round_trip() {
5795        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5796        let written =
5797            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5798                .expect("test operation should succeed");
5799        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5800            .expect("test operation should succeed");
5801        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5802            .expect("test operation should succeed");
5803        let oid = object
5804            .object_id(ObjectFormat::Sha256)
5805            .expect("test operation should succeed");
5806        assert_eq!(pack.entries[0].object, object);
5807        assert_eq!(index.pack_checksum, pack.checksum);
5808        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5809        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5810        assert_eq!(
5811            index
5812                .find(&oid)
5813                .expect("test operation should succeed")
5814                .offset,
5815            12
5816        );
5817    }
5818
5819    #[test]
5820    fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5821        let objects = (0..4)
5822            .map(|idx| {
5823                EncodedObject::new(
5824                    ObjectType::Blob,
5825                    format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5826                )
5827            })
5828            .collect::<Vec<_>>();
5829        let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5830            .expect("test operation should succeed");
5831
5832        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5833    }
5834
5835    #[test]
5836    fn indexes_existing_sha256_pack_bytes() {
5837        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5838        let written =
5839            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5840                .expect("test operation should succeed");
5841
5842        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5843            .expect("test operation should succeed");
5844        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5845            .expect("test operation should succeed");
5846
5847        assert_eq!(indexed.pack_checksum, written.checksum);
5848        assert_eq!(indexed.entries, written.entries);
5849        assert_eq!(index.pack_checksum, written.checksum);
5850        assert_eq!(index.entries, written.entries);
5851    }
5852
5853    #[test]
5854    fn indexes_existing_delta_pack_bytes() {
5855        let (base, changed) = similar_blob_objects();
5856        let options = delta_pack_options(true);
5857        let written = PackFile::write_packed_with_options(
5858            &[base, changed.clone()],
5859            ObjectFormat::Sha1,
5860            &options,
5861        )
5862        .expect("test operation should succeed");
5863
5864        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5865            .expect("test operation should succeed");
5866        let index =
5867            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5868        let changed_oid = changed
5869            .object_id(ObjectFormat::Sha1)
5870            .expect("test operation should succeed");
5871
5872        assert_eq!(indexed.pack_checksum, written.checksum);
5873        assert_eq!(indexed.entries, written.entries);
5874        assert_eq!(
5875            index
5876                .find(&changed_oid)
5877                .expect("test operation should succeed")
5878                .offset,
5879            written.entries[1].offset
5880        );
5881        assert_eq!(
5882            index
5883                .find(&changed_oid)
5884                .expect("test operation should succeed")
5885                .crc32,
5886            written.entries[1].crc32
5887        );
5888    }
5889
5890    #[test]
5891    fn writes_ref_delta_pack_and_index_that_round_trip() {
5892        let (base, changed) = similar_blob_objects();
5893        let options = delta_pack_options(false);
5894        let written = PackFile::write_packed_with_options(
5895            &[base.clone(), changed.clone()],
5896            ObjectFormat::Sha1,
5897            &options,
5898        )
5899        .expect("test operation should succeed");
5900        let mut second_offset = written.entries[1].offset as usize;
5901        let header = parse_entry_header(&written.pack, &mut second_offset)
5902            .expect("test operation should succeed");
5903        assert_eq!(header.kind, PackObjectKind::RefDelta);
5904
5905        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5906        let index =
5907            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5908        let oid = changed
5909            .object_id(ObjectFormat::Sha1)
5910            .expect("test operation should succeed");
5911        assert_eq!(pack.entries[0].object, base);
5912        assert_eq!(pack.entries[1].object, changed);
5913        assert_eq!(index.pack_checksum, pack.checksum);
5914        assert_eq!(
5915            index
5916                .find(&oid)
5917                .expect("test operation should succeed")
5918                .offset,
5919            written.entries[1].offset
5920        );
5921    }
5922
5923    #[test]
5924    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5925        let (base, changed) = similar_blob_objects();
5926        let options = delta_pack_options(true);
5927        let written = PackFile::write_packed_with_options(
5928            &[base, changed.clone()],
5929            ObjectFormat::Sha1,
5930            &options,
5931        )
5932        .expect("test operation should succeed");
5933        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5934        let mut second = written.entries[1].offset as usize;
5935        assert_eq!(
5936            parse_entry_header(&written.pack, &mut second)
5937                .expect("test operation should succeed")
5938                .kind,
5939            PackObjectKind::OfsDelta
5940        );
5941        // Ground truth from a full parse; single-object decode must match at every offset.
5942        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5943        for po in &parsed.entries {
5944            let got =
5945                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5946                    Ok(None)
5947                })
5948                .expect("test operation should succeed");
5949            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5950        }
5951    }
5952
5953    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
5954    /// read is byte-identical to the uncached one cold and warm (sley#26).
5955    #[derive(Default)]
5956    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5957
5958    impl HeaderTypeCache for MapHeaderTypeCache {
5959        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5960            self.0.get(&pack_offset).copied()
5961        }
5962        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5963            self.0.insert(pack_offset, header);
5964        }
5965    }
5966
5967    #[test]
5968    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5969        let (base, changed) = similar_blob_objects();
5970        let options = delta_pack_options(true);
5971        let written =
5972            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5973                .expect("test operation should succeed");
5974        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5975        let mut second = written.entries[1].offset as usize;
5976        assert_eq!(
5977            parse_entry_header(&written.pack, &mut second)
5978                .expect("test operation should succeed")
5979                .kind,
5980            PackObjectKind::OfsDelta
5981        );
5982
5983        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5984        let mut cache = MapHeaderTypeCache::default();
5985        for po in &parsed.entries {
5986            let uncached =
5987                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5988                    Ok(None)
5989                })
5990                .expect("test operation should succeed");
5991            // Type inherited from the chain base; size is the inflated body length.
5992            assert_eq!(
5993                uncached,
5994                (po.object.object_type, po.object.body.len() as u64),
5995                "uncached header at offset {}",
5996                po.entry.offset
5997            );
5998            // Cold cache: must agree with the uncached read and populate the memo.
5999            let cold = read_object_header_at_with_cache(
6000                &written.pack,
6001                po.entry.offset,
6002                ObjectFormat::Sha1,
6003                |_| Ok(None),
6004                &mut cache,
6005            )
6006            .expect("test operation should succeed");
6007            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
6008        }
6009        // Warm cache: every offset now resolves from the memo and is still correct,
6010        // proving the fast path does not change behavior (sley#26).
6011        for po in &parsed.entries {
6012            let warm = read_object_header_at_with_cache(
6013                &written.pack,
6014                po.entry.offset,
6015                ObjectFormat::Sha1,
6016                |_| panic!("warm cache must not re-walk the chain"),
6017                &mut cache,
6018            )
6019            .expect("test operation should succeed");
6020            assert_eq!(
6021                warm,
6022                (po.object.object_type, po.object.body.len() as u64),
6023                "warm cache at offset {}",
6024                po.entry.offset
6025            );
6026        }
6027    }
6028
6029    #[test]
6030    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
6031        let (base, changed) = similar_blob_objects();
6032        let options = delta_pack_options(false);
6033        let written = PackFile::write_packed_with_options(
6034            &[base, changed.clone()],
6035            ObjectFormat::Sha1,
6036            &options,
6037        )
6038        .expect("test operation should succeed");
6039        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6040        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
6041            .entries
6042            .iter()
6043            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
6044            .collect();
6045        for po in &parsed.entries {
6046            let got =
6047                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
6048                    Ok(by_oid.get(oid).cloned())
6049                })
6050                .expect("test operation should succeed");
6051            assert_eq!(*got, po.object);
6052        }
6053    }
6054
6055    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
6056    /// used to prove the cached decode path is byte-identical to the uncached
6057    /// one and that bases are reused across reads.
6058    #[derive(Default)]
6059    struct CountingDeltaCache {
6060        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
6061        hits: std::cell::Cell<usize>,
6062        inserts: std::cell::Cell<usize>,
6063    }
6064
6065    impl PackDeltaCache for CountingDeltaCache {
6066        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
6067            let hit = self.map.borrow().get(&offset).cloned();
6068            if hit.is_some() {
6069                self.hits.set(self.hits.get() + 1);
6070            }
6071            hit
6072        }
6073        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
6074            self.inserts.set(self.inserts.get() + 1);
6075            self.map.borrow_mut().insert(offset, object);
6076        }
6077    }
6078
6079    #[test]
6080    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
6081        // A multi-object pack with a real ofs-delta chain so the cache has bases
6082        // to reuse. Build several similar blobs to encourage deltification.
6083        let mut objects = Vec::new();
6084        for idx in 0..8u32 {
6085            let mut body = vec![b'x'; 4096];
6086            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
6087            objects.push(EncodedObject::new(ObjectType::Blob, body));
6088        }
6089        let options = delta_pack_options(true);
6090        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6091            .expect("test operation should succeed");
6092        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6093
6094        let cache = CountingDeltaCache::default();
6095        // Read every object twice through the cache; each result must equal the
6096        // ground-truth from the full parse, byte for byte, both times.
6097        for _ in 0..2 {
6098            for po in &parsed.entries {
6099                let got = read_object_at_with_cache_arc(
6100                    &written.pack,
6101                    po.entry.offset,
6102                    ObjectFormat::Sha1,
6103                    |_| Ok(None),
6104                    &cache,
6105                )
6106                .expect("test operation should succeed");
6107                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6108            }
6109        }
6110        // The second pass reads everything straight from the cache, so there must
6111        // be at least one hit (proving reuse, not just correctness).
6112        assert!(cache.hits.get() > 0, "cache never served a warm object");
6113    }
6114
6115    #[test]
6116    fn writes_ofs_delta_pack_and_index_that_round_trip() {
6117        let (base, changed) = similar_blob_objects();
6118        let options = delta_pack_options(true);
6119        let written = PackFile::write_packed_with_options(
6120            &[base.clone(), changed.clone()],
6121            ObjectFormat::Sha1,
6122            &options,
6123        )
6124        .expect("test operation should succeed");
6125        let mut second_offset = written.entries[1].offset as usize;
6126        let header = parse_entry_header(&written.pack, &mut second_offset)
6127            .expect("test operation should succeed");
6128        assert_eq!(header.kind, PackObjectKind::OfsDelta);
6129
6130        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6131        let index =
6132            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6133        let oid = changed
6134            .object_id(ObjectFormat::Sha1)
6135            .expect("test operation should succeed");
6136        assert_eq!(pack.entries[0].object, base);
6137        assert_eq!(pack.entries[1].object, changed);
6138        assert_eq!(index.pack_checksum, pack.checksum);
6139        assert_eq!(
6140            index
6141                .find(&oid)
6142                .expect("test operation should succeed")
6143                .offset,
6144            written.entries[1].offset
6145        );
6146    }
6147
6148    #[test]
6149    fn resolves_ofs_delta_pack_entry() {
6150        let base = b"hello";
6151        let result = b"hello world";
6152        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6153        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6154        assert_eq!(parsed.entries.len(), 2);
6155        assert_eq!(parsed.entries[0].object.body, base);
6156        assert_eq!(parsed.entries[1].object.body, result);
6157        assert_eq!(
6158            parsed.entries[1].entry.oid,
6159            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6160                .expect("test operation should succeed")
6161        );
6162    }
6163
6164    #[test]
6165    fn resolves_ref_delta_pack_entry() {
6166        let base = b"hello";
6167        let result = b"hello world";
6168        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6169        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6170        assert_eq!(parsed.entries.len(), 2);
6171        assert_eq!(parsed.entries[0].object.body, base);
6172        assert_eq!(parsed.entries[1].object.body, result);
6173        assert_eq!(
6174            parsed.entries[1].entry.oid,
6175            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6176                .expect("test operation should succeed")
6177        );
6178    }
6179
6180    #[test]
6181    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6182        let base = b"hello";
6183        let result = b"hello world";
6184        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6185        assert!(PackFile::parse_sha1(&pack).is_err());
6186
6187        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6188            .expect("test operation should succeed");
6189        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6190            if oid == &base_oid {
6191                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6192            } else {
6193                Ok(None)
6194            }
6195        })
6196        .expect("test operation should succeed");
6197        assert_eq!(parsed.entries.len(), 1);
6198        assert_eq!(parsed.entries[0].object.body, result);
6199        assert_eq!(
6200            parsed.entries[0].entry.oid,
6201            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6202                .expect("test operation should succeed")
6203        );
6204    }
6205
6206    #[test]
6207    fn rejects_bad_pack_checksum() {
6208        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6209        let last = pack.len() - 1;
6210        pack[last] ^= 1;
6211        assert!(PackFile::parse_sha1(&pack).is_err());
6212    }
6213
6214    #[test]
6215    fn raw_pack_index_rejects_bad_pack_checksum() {
6216        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6217        let last = pack.len() - 1;
6218        pack[last] ^= 1;
6219        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6220    }
6221
6222    #[test]
6223    fn pack_index_writer_rejects_duplicate_object_ids() {
6224        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6225            .expect("test operation should succeed");
6226        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6227            .expect("test operation should succeed");
6228        let entries = vec![
6229            PackIndexEntry {
6230                oid,
6231                crc32: 1,
6232                offset: 12,
6233            },
6234            PackIndexEntry {
6235                oid,
6236                crc32: 2,
6237                offset: 24,
6238            },
6239        ];
6240        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6241    }
6242
6243    #[test]
6244    fn parses_single_entry_pack_index() {
6245        let oid = ObjectId::from_hex(
6246            ObjectFormat::Sha1,
6247            "ce013625030ba8dba906f756967f9e9ca394464a",
6248        )
6249        .expect("test operation should succeed");
6250        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6251            .expect("test operation should succeed");
6252        let index = single_entry_index(
6253            ObjectFormat::Sha1,
6254            oid,
6255            0x1234_5678,
6256            12,
6257            pack_checksum.clone(),
6258        );
6259        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6260        assert_eq!(parsed.version, 2);
6261        assert_eq!(parsed.pack_checksum, pack_checksum);
6262        assert_eq!(parsed.entries.len(), 1);
6263        assert_eq!(
6264            parsed
6265                .find(&oid)
6266                .expect("test operation should succeed")
6267                .offset,
6268            12
6269        );
6270        assert_eq!(
6271            parsed
6272                .find(&oid)
6273                .expect("test operation should succeed")
6274                .crc32,
6275            0x1234_5678
6276        );
6277        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6278    }
6279
6280    #[test]
6281    fn parses_single_entry_pack_index_v1() {
6282        let oid = ObjectId::from_hex(
6283            ObjectFormat::Sha1,
6284            "ce013625030ba8dba906f756967f9e9ca394464a",
6285        )
6286        .expect("test operation should succeed");
6287        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6288            .expect("test operation should succeed");
6289        let index =
6290            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6291        let parsed =
6292            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6293        assert_eq!(parsed.version, 1);
6294        assert_eq!(parsed.pack_checksum, pack_checksum);
6295        assert_eq!(parsed.entries.len(), 1);
6296        assert_eq!(
6297            parsed
6298                .find(&oid)
6299                .expect("test operation should succeed")
6300                .offset,
6301            0x1234_5678
6302        );
6303        assert_eq!(
6304            parsed
6305                .find(&oid)
6306                .expect("test operation should succeed")
6307                .crc32,
6308            0
6309        );
6310        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6311    }
6312
6313    #[test]
6314    fn rejects_bad_pack_index_v1_checksum() {
6315        let oid = ObjectId::from_hex(
6316            ObjectFormat::Sha1,
6317            "ce013625030ba8dba906f756967f9e9ca394464a",
6318        )
6319        .expect("test operation should succeed");
6320        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6321            .expect("test operation should succeed");
6322        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
6323        let last = index.len() - 1;
6324        index[last] ^= 1;
6325        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
6326    }
6327
6328    #[test]
6329    fn pack_index_view_reads_v2_large_offsets() {
6330        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
6331            .expect("test operation should succeed");
6332        let second =
6333            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
6334                .expect("test operation should succeed");
6335        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6336            .expect("test operation should succeed");
6337        let entries = vec![
6338            PackIndexEntry {
6339                oid: first,
6340                crc32: 0x1111_2222,
6341                offset: 0x8000_0000,
6342            },
6343            PackIndexEntry {
6344                oid: second,
6345                crc32: 0x3333_4444,
6346                offset: 0x1_0000_0042,
6347            },
6348        ];
6349        let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
6350            .expect("test operation should succeed");
6351
6352        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6353        let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
6354            .expect("test operation should succeed");
6355        for entry in entries {
6356            assert_eq!(
6357                view.find(&entry.oid),
6358                Some(PackIndexLookup {
6359                    crc32: entry.crc32,
6360                    offset: entry.offset,
6361                })
6362            );
6363        }
6364    }
6365
6366    #[test]
6367    fn pack_index_view_default_parse_checks_index_checksum() {
6368        let oid = ObjectId::from_hex(
6369            ObjectFormat::Sha1,
6370            "ce013625030ba8dba906f756967f9e9ca394464a",
6371        )
6372        .expect("test operation should succeed");
6373        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6374            .expect("test operation should succeed");
6375        let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
6376        let last = index.len() - 1;
6377        index[last] ^= 1;
6378
6379        assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
6380        let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
6381            .expect("test operation should succeed");
6382        let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
6383            Arc::from(index.clone().into_boxed_slice()),
6384            ObjectFormat::Sha1,
6385        )
6386        .expect("test operation should succeed");
6387        assert_eq!(
6388            view.find(&oid),
6389            Some(PackIndexLookup {
6390                crc32: 0x1234_5678,
6391                offset: 12,
6392            })
6393        );
6394        assert_eq!(
6395            trusted_view.find(&oid),
6396            Some(PackIndexLookup {
6397                crc32: 0x1234_5678,
6398                offset: 12,
6399            })
6400        );
6401    }
6402
6403    #[test]
6404    fn parses_pack_reverse_index() {
6405        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6406            .expect("test operation should succeed");
6407        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6408            .expect("test operation should succeed");
6409        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6410            .expect("test operation should succeed");
6411        assert_eq!(parsed.version, 1);
6412        assert_eq!(parsed.format, ObjectFormat::Sha1);
6413        assert_eq!(parsed.positions, vec![2, 0, 1]);
6414        assert_eq!(parsed.pack_checksum, pack_checksum);
6415        assert_eq!(
6416            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6417                .expect("test operation should succeed"),
6418            reverse_index
6419        );
6420    }
6421
6422    #[test]
6423    fn rejects_bad_pack_reverse_index_checksum() {
6424        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6425            .expect("test operation should succeed");
6426        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6427            .expect("test operation should succeed");
6428        let last = reverse_index.len() - 1;
6429        reverse_index[last] ^= 1;
6430        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6431    }
6432
6433    #[test]
6434    fn rejects_bad_pack_reverse_index_positions() {
6435        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6436            .expect("test operation should succeed");
6437        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6438        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6439        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6440        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6441        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6442            .expect("test operation should succeed");
6443        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6444        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6445    }
6446
6447    #[test]
6448    fn parses_pack_mtimes() {
6449        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6450            .expect("test operation should succeed");
6451        let mtimes = PackMtimes::write(
6452            ObjectFormat::Sha1,
6453            &[1, 1_700_000_000, u32::MAX],
6454            &pack_checksum,
6455        )
6456        .expect("test operation should succeed");
6457        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6458            .expect("test operation should succeed");
6459        assert_eq!(parsed.version, 1);
6460        assert_eq!(parsed.format, ObjectFormat::Sha1);
6461        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6462        assert_eq!(parsed.pack_checksum, pack_checksum);
6463        assert_eq!(
6464            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6465                .expect("test operation should succeed"),
6466            mtimes
6467        );
6468    }
6469
6470    #[test]
6471    fn rejects_bad_pack_mtimes_checksum() {
6472        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6473            .expect("test operation should succeed");
6474        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6475            .expect("test operation should succeed");
6476        let last = mtimes.len() - 1;
6477        mtimes[last] ^= 1;
6478        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6479    }
6480
6481    #[test]
6482    fn rejects_bad_pack_mtimes_shape() {
6483        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6484            .expect("test operation should succeed");
6485        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6486        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6487
6488        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6489        wrong_hash[11] = 2;
6490        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6491        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6492            .expect("test operation should succeed");
6493        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6494        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6495    }
6496
6497    #[test]
6498    fn parses_multi_pack_index_header_and_chunk_lookup() {
6499        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6500            .expect("test operation should succeed");
6501        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6502            .expect("test operation should succeed");
6503        let chunks = midx_chunks_with_pack_names(
6504            ObjectFormat::Sha1,
6505            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6506            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6507        );
6508        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6509        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6510            .expect("test operation should succeed");
6511        assert_eq!(parsed.version, 2);
6512        assert_eq!(parsed.format, ObjectFormat::Sha1);
6513        assert_eq!(parsed.pack_count, 2);
6514        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6515        assert_eq!(parsed.object_count, 2);
6516        assert_eq!(parsed.objects.len(), 2);
6517        assert_eq!(
6518            parsed
6519                .find(&first)
6520                .expect("test operation should succeed")
6521                .pack_int_id,
6522            0
6523        );
6524        assert_eq!(
6525            parsed
6526                .find(&first)
6527                .expect("test operation should succeed")
6528                .offset,
6529            12
6530        );
6531        assert_eq!(
6532            parsed
6533                .find(&second)
6534                .expect("test operation should succeed")
6535                .pack_int_id,
6536            1
6537        );
6538        assert_eq!(
6539            parsed
6540                .find(&second)
6541                .expect("test operation should succeed")
6542                .offset,
6543            0x1_0000_0000
6544        );
6545        assert_eq!(parsed.reverse_index, None);
6546        assert_eq!(parsed.bitmapped_packs, None);
6547        assert_eq!(parsed.chunks.len(), 5);
6548        assert_eq!(parsed.chunks[0].id, *b"PNAM");
6549        assert_eq!(parsed.chunks[0].offset, 84);
6550        assert_eq!(parsed.chunks[0].len, 24);
6551        assert_eq!(parsed.chunks[1].id, *b"OIDF");
6552        assert_eq!(parsed.chunks[1].offset, 108);
6553        assert_eq!(parsed.chunks[1].len, 1024);
6554    }
6555
6556    #[test]
6557    fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6558        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6559            .expect("test operation should succeed");
6560        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6561            .expect("test operation should succeed");
6562        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6563            .expect("test operation should succeed");
6564        let chunks = midx_chunks_with_pack_names(
6565            ObjectFormat::Sha1,
6566            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6567            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6568        );
6569        let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6570        let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6571            .expect("test operation should succeed");
6572
6573        assert!(lookup.contains(&first));
6574        assert!(lookup.contains(&second));
6575        assert!(!lookup.contains(&missing));
6576
6577        let first_entry = lookup
6578            .find(&first)
6579            .expect("test operation should succeed")
6580            .expect("object should be present");
6581        assert_eq!(
6582            lookup.pack_name(first_entry.pack_int_id),
6583            Some("pack-a.idx")
6584        );
6585        assert_eq!(first_entry.offset, 12);
6586
6587        let second_entry = lookup
6588            .find(&second)
6589            .expect("test operation should succeed")
6590            .expect("object should be present");
6591        assert_eq!(
6592            lookup.pack_name(second_entry.pack_int_id),
6593            Some("pack-b.idx")
6594        );
6595        assert_eq!(second_entry.offset, 0x1_0000_0000);
6596        assert!(
6597            lookup
6598                .find(&missing)
6599                .expect("test operation should succeed")
6600                .is_none()
6601        );
6602    }
6603
6604    #[test]
6605    fn rejects_bad_multi_pack_index_checksum() {
6606        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6607        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6608        let last = midx.len() - 1;
6609        midx[last] ^= 1;
6610        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6611    }
6612
6613    #[test]
6614    fn rejects_bad_multi_pack_index_shape() {
6615        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6616        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6617        wrong_hash[5] = 2;
6618        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6619        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6620            .expect("test operation should succeed");
6621        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6622        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6623
6624        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6625        missing_terminator[12] = b'B';
6626        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6627        let checksum =
6628            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6629                .expect("test operation should succeed");
6630        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6631        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6632
6633        let mut bad_offset = multi_pack_index(
6634            ObjectFormat::Sha1,
6635            2,
6636            0,
6637            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6638        );
6639        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6640        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6641        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6642            .expect("test operation should succeed");
6643        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6644        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6645    }
6646
6647    #[test]
6648    fn rejects_bad_multi_pack_index_pack_names() {
6649        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6650        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6651
6652        let too_few = multi_pack_index(
6653            ObjectFormat::Sha1,
6654            2,
6655            2,
6656            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6657        );
6658        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6659
6660        let bad_padding = multi_pack_index(
6661            ObjectFormat::Sha1,
6662            2,
6663            1,
6664            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6665        );
6666        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6667
6668        let unsorted_v1 = multi_pack_index(
6669            ObjectFormat::Sha1,
6670            1,
6671            2,
6672            &midx_chunks_with_pack_names(
6673                ObjectFormat::Sha1,
6674                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6675                &[],
6676            ),
6677        );
6678        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6679
6680        let unsorted_v2 = multi_pack_index(
6681            ObjectFormat::Sha1,
6682            2,
6683            2,
6684            &midx_chunks_with_pack_names(
6685                ObjectFormat::Sha1,
6686                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6687                &[],
6688            ),
6689        );
6690        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6691            .expect("test operation should succeed");
6692        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6693    }
6694
6695    #[test]
6696    fn rejects_bad_multi_pack_index_object_tables() {
6697        let oid_a = ObjectId::from_hex(
6698            ObjectFormat::Sha1,
6699            "1111111111111111111111111111111111111111",
6700        )
6701        .expect("test operation should succeed");
6702        let oid_b = ObjectId::from_hex(
6703            ObjectFormat::Sha1,
6704            "2222222222222222222222222222222222222222",
6705        )
6706        .expect("test operation should succeed");
6707
6708        let missing_oidf = multi_pack_index(
6709            ObjectFormat::Sha1,
6710            2,
6711            1,
6712            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6713        );
6714        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6715
6716        let bad_fanout = vec![
6717            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6718            (*b"OIDF", vec![0; 256 * 4]),
6719            (*b"OIDL", oid_a.as_bytes().to_vec()),
6720            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6721        ];
6722        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6723        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6724
6725        let mut unsorted = Vec::new();
6726        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6727        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6728        let mut oid_lookup = Vec::new();
6729        oid_lookup.extend_from_slice(oid_b.as_bytes());
6730        oid_lookup.extend_from_slice(oid_a.as_bytes());
6731        unsorted.push((*b"OIDL", oid_lookup));
6732        unsorted.push((
6733            *b"OOFF",
6734            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6735        ));
6736        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6737        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6738
6739        let bad_pack = multi_pack_index(
6740            ObjectFormat::Sha1,
6741            2,
6742            1,
6743            &midx_chunks_with_pack_names(
6744                ObjectFormat::Sha1,
6745                b"pack-a.idx\0\0".to_vec(),
6746                &[(oid_a.clone(), 1, 12)],
6747            ),
6748        );
6749        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6750
6751        let mut large_offsets = Vec::new();
6752        let missing_loff = vec![
6753            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6754            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6755            (*b"OIDL", oid_a.as_bytes().to_vec()),
6756            (
6757                *b"OOFF",
6758                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6759            ),
6760        ];
6761        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6762        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6763
6764        let mut bad_loff =
6765            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6766        bad_loff.push((*b"LOFF", vec![0]));
6767        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6768        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6769    }
6770
6771    #[test]
6772    fn parses_multi_pack_index_bitmap_chunks() {
6773        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6774            .expect("test operation should succeed");
6775        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6776            .expect("test operation should succeed");
6777        let mut chunks = midx_chunks_with_pack_names(
6778            ObjectFormat::Sha1,
6779            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6780            &[(first, 0, 12), (second, 1, 24)],
6781        );
6782        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6783        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6784        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6785
6786        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6787            .expect("test operation should succeed");
6788        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6789        assert_eq!(
6790            parsed.bitmapped_packs,
6791            Some(vec![
6792                MultiPackBitmapPack {
6793                    bitmap_pos: 0,
6794                    bitmap_nr: 1,
6795                },
6796                MultiPackBitmapPack {
6797                    bitmap_pos: 1,
6798                    bitmap_nr: 1,
6799                },
6800            ])
6801        );
6802    }
6803
6804    #[test]
6805    fn writes_multi_pack_index_that_round_trips() {
6806        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6807            .expect("test operation should succeed");
6808        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6809            .expect("test operation should succeed");
6810        let bytes = MultiPackIndex::write(
6811            ObjectFormat::Sha1,
6812            2,
6813            &["pack-b.idx".into(), "pack-a.idx".into()],
6814            &[
6815                MultiPackIndexEntry {
6816                    oid: second.clone(),
6817                    pack_int_id: 0,
6818                    offset: 0x1_0000_0000,
6819                    force_large_offset: false,
6820                },
6821                MultiPackIndexEntry {
6822                    oid: first.clone(),
6823                    pack_int_id: 1,
6824                    offset: 12,
6825                    force_large_offset: false,
6826                },
6827            ],
6828        )
6829        .expect("test operation should succeed");
6830
6831        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6832            .expect("test operation should succeed");
6833        assert_eq!(parsed.version, 2);
6834        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6835        assert_eq!(parsed.object_count, 2);
6836        assert_eq!(
6837            parsed
6838                .find(&first)
6839                .expect("test operation should succeed")
6840                .pack_int_id,
6841            1
6842        );
6843        assert_eq!(
6844            parsed
6845                .find(&first)
6846                .expect("test operation should succeed")
6847                .offset,
6848            12
6849        );
6850        assert_eq!(
6851            parsed
6852                .find(&second)
6853                .expect("test operation should succeed")
6854                .pack_int_id,
6855            0
6856        );
6857        assert_eq!(
6858            parsed
6859                .find(&second)
6860                .expect("test operation should succeed")
6861                .offset,
6862            0x1_0000_0000
6863        );
6864        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6865    }
6866
6867    #[test]
6868    fn write_multi_pack_index_rejects_invalid_inputs() {
6869        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6870            .expect("test operation should succeed");
6871        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6872        assert!(
6873            MultiPackIndex::write(
6874                ObjectFormat::Sha1,
6875                1,
6876                &["pack-b.idx".into(), "pack-a.idx".into()],
6877                &[],
6878            )
6879            .is_err()
6880        );
6881        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6882        assert!(
6883            MultiPackIndex::write(
6884                ObjectFormat::Sha1,
6885                2,
6886                &["pack-a.idx".into()],
6887                &[MultiPackIndexEntry {
6888                    oid,
6889                    pack_int_id: 1,
6890                    offset: 12,
6891                    force_large_offset: false,
6892                }],
6893            )
6894            .is_err()
6895        );
6896        assert!(
6897            MultiPackIndex::write(
6898                ObjectFormat::Sha1,
6899                2,
6900                &["pack-a.idx".into()],
6901                &[
6902                    MultiPackIndexEntry {
6903                        oid,
6904                        pack_int_id: 0,
6905                        offset: 12,
6906                        force_large_offset: false,
6907                    },
6908                    MultiPackIndexEntry {
6909                        oid,
6910                        pack_int_id: 0,
6911                        offset: 24,
6912                        force_large_offset: false,
6913                    },
6914                ],
6915            )
6916            .is_err()
6917        );
6918    }
6919
6920    #[test]
6921    fn rejects_bad_multi_pack_index_bitmap_chunks() {
6922        let oid_a = ObjectId::from_hex(
6923            ObjectFormat::Sha1,
6924            "1111111111111111111111111111111111111111",
6925        )
6926        .expect("test operation should succeed");
6927        let oid_b = ObjectId::from_hex(
6928            ObjectFormat::Sha1,
6929            "2222222222222222222222222222222222222222",
6930        )
6931        .expect("test operation should succeed");
6932
6933        let mut duplicate_ridx = midx_chunks_with_pack_names(
6934            ObjectFormat::Sha1,
6935            b"pack-a.idx\0\0".to_vec(),
6936            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6937        );
6938        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6939        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6940        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6941
6942        let mut short_btmp = midx_chunks_with_pack_names(
6943            ObjectFormat::Sha1,
6944            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6945            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6946        );
6947        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6948        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6949        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6950
6951        let mut out_of_range_btmp = midx_chunks_with_pack_names(
6952            ObjectFormat::Sha1,
6953            b"pack-a.idx\0\0".to_vec(),
6954            &[(oid_a, 0, 12), (oid_b, 0, 24)],
6955        );
6956        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6957        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6958        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6959    }
6960
6961    #[test]
6962    fn parses_pack_bitmap_index_with_hash_cache() {
6963        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6964            .expect("test operation should succeed");
6965        let bitmap = pack_bitmap_index(
6966            ObjectFormat::Sha1,
6967            3,
6968            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6969            &pack_checksum,
6970            &[(2, 0, 1, &[0b101])],
6971            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6972        );
6973
6974        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6975            .expect("test operation should succeed");
6976        assert_eq!(parsed.version, 1);
6977        assert_eq!(parsed.format, ObjectFormat::Sha1);
6978        assert_eq!(
6979            parsed.options,
6980            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6981        );
6982        assert_eq!(parsed.pack_checksum, pack_checksum);
6983        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6984        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6985        assert_eq!(parsed.entries.len(), 1);
6986        let entry = parsed
6987            .entry_for_index_position(2)
6988            .expect("test operation should succeed");
6989        assert_eq!(entry.xor_offset, 0);
6990        assert_eq!(entry.flags, 1);
6991        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6992        assert_eq!(
6993            parsed.name_hash_cache,
6994            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6995        );
6996    }
6997
6998    #[test]
6999    fn parses_pack_bitmap_index_sha256() {
7000        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7001            .expect("test operation should succeed");
7002        let bitmap = pack_bitmap_index(
7003            ObjectFormat::Sha256,
7004            2,
7005            PackBitmapIndex::OPTION_FULL_DAG,
7006            &pack_checksum,
7007            &[(0, 0, 0, &[0b11])],
7008            None,
7009        );
7010
7011        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
7012            .expect("test operation should succeed");
7013        assert_eq!(parsed.version, 1);
7014        assert_eq!(parsed.format, ObjectFormat::Sha256);
7015        assert_eq!(parsed.pack_checksum, pack_checksum);
7016        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7017        assert_eq!(parsed.entries[0].object_position, 0);
7018        assert_eq!(parsed.name_hash_cache, None);
7019    }
7020
7021    #[test]
7022    fn parses_upstream_git_written_pack_bitmap_index() {
7023        let root = unique_temp_dir("git-pack-bitmap-upstream");
7024        fs::create_dir_all(&root).expect("test operation should succeed");
7025        {
7026            run_git_success(&root, &["init", "-q", "-b", "main"]);
7027            run_git_success(
7028                &root,
7029                &[
7030                    "-c",
7031                    "user.name=Example User",
7032                    "-c",
7033                    "user.email=example@example.invalid",
7034                    "commit",
7035                    "--allow-empty",
7036                    "-q",
7037                    "-m",
7038                    "one",
7039                ],
7040            );
7041            run_git_success(
7042                &root,
7043                &[
7044                    "-c",
7045                    "user.name=Example User",
7046                    "-c",
7047                    "user.email=example@example.invalid",
7048                    "commit",
7049                    "--allow-empty",
7050                    "-q",
7051                    "-m",
7052                    "two",
7053                ],
7054            );
7055            run_git_success(&root, &["repack", "-adb"]);
7056            let pack_dir = root.join(".git").join("objects").join("pack");
7057            let idx_path = single_path_with_extension(&pack_dir, "idx");
7058            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
7059            let index = PackIndex::parse(
7060                &fs::read(idx_path).expect("test operation should succeed"),
7061                ObjectFormat::Sha1,
7062            )
7063            .expect("test operation should succeed");
7064            let bitmap = PackBitmapIndex::parse(
7065                &fs::read(bitmap_path).expect("test operation should succeed"),
7066                ObjectFormat::Sha1,
7067                index.entries.len(),
7068            )
7069            .expect("test operation should succeed");
7070            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
7071            assert!(!bitmap.entries.is_empty());
7072        };
7073        let _ = fs::remove_dir_all(&root);
7074    }
7075
7076    #[test]
7077    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
7078        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7079            .expect("test operation should succeed");
7080        let bitmap = pack_bitmap_index(
7081            ObjectFormat::Sha1,
7082            1,
7083            PackBitmapIndex::OPTION_FULL_DAG,
7084            &pack_checksum,
7085            &[(0, 0, 0, &[1])],
7086            None,
7087        );
7088
7089        let mut bad_signature = bitmap.clone();
7090        bad_signature[0] = b'X';
7091        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
7092
7093        let mut bad_version = bitmap.clone();
7094        bad_version[5] = 2;
7095        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
7096        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
7097
7098        let mut bad_option = bitmap.clone();
7099        bad_option[7] = 0x20;
7100        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
7101        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
7102
7103        let mut bad_checksum = bitmap;
7104        let last = bad_checksum.len() - 1;
7105        bad_checksum[last] ^= 1;
7106        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
7107    }
7108
7109    #[test]
7110    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
7111        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7112            .expect("test operation should succeed");
7113        let bitmap = pack_bitmap_index(
7114            ObjectFormat::Sha1,
7115            2,
7116            PackBitmapIndex::OPTION_FULL_DAG,
7117            &pack_checksum,
7118            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7119            None,
7120        );
7121
7122        let mut truncated = bitmap.clone();
7123        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7124        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7125        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7126
7127        let mut out_of_range_position = pack_bitmap_index(
7128            ObjectFormat::Sha1,
7129            2,
7130            PackBitmapIndex::OPTION_FULL_DAG,
7131            &pack_checksum,
7132            &[(2, 0, 0, &[0b01])],
7133            None,
7134        );
7135        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7136        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7137        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7138
7139        let invalid_xor = pack_bitmap_index(
7140            ObjectFormat::Sha1,
7141            2,
7142            PackBitmapIndex::OPTION_FULL_DAG,
7143            &pack_checksum,
7144            &[(0, 1, 0, &[0b01])],
7145            None,
7146        );
7147        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7148    }
7149
7150    #[test]
7151    fn parses_single_entry_pack_index_sha256() {
7152        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7153            .expect("test operation should succeed");
7154        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7155            .expect("test operation should succeed");
7156        let index = single_entry_index(
7157            ObjectFormat::Sha256,
7158            oid,
7159            0x1234_5678,
7160            12,
7161            pack_checksum.clone(),
7162        );
7163        let parsed =
7164            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7165        assert_eq!(parsed.version, 2);
7166        assert_eq!(parsed.pack_checksum, pack_checksum);
7167        assert_eq!(parsed.entries.len(), 1);
7168        assert_eq!(
7169            parsed
7170                .find(&oid)
7171                .expect("test operation should succeed")
7172                .offset,
7173            12
7174        );
7175        assert_eq!(
7176            parsed
7177                .find(&oid)
7178                .expect("test operation should succeed")
7179                .crc32,
7180            0x1234_5678
7181        );
7182        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7183        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7184    }
7185
7186    #[test]
7187    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7188        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7189    }
7190
7191    #[test]
7192    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7193        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7194    }
7195
7196    #[test]
7197    fn write_packed_rejects_duplicate_objects() {
7198        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7199        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7200    }
7201
7202    #[test]
7203    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7204        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7205        let sha1 = object
7206            .object_id(ObjectFormat::Sha1)
7207            .expect("test operation should succeed");
7208        let sha256 = object
7209            .object_id(ObjectFormat::Sha256)
7210            .expect("test operation should succeed");
7211        let duplicate = [
7212            PackInput {
7213                oid: &sha1,
7214                object: &object,
7215            },
7216            PackInput {
7217                oid: &sha1,
7218                object: &object,
7219            },
7220        ];
7221        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7222
7223        let wrong_format = [PackInput {
7224            oid: &sha256,
7225            object: &object,
7226        }];
7227        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7228    }
7229
7230    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7231        let objects = similar_blob_family(8);
7232        let packed =
7233            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7234        let undeltified =
7235            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7236
7237        // The whole point of delta selection: the packed output is smaller than
7238        // storing every object undeltified.
7239        assert!(
7240            packed.pack.len() < undeltified.pack.len(),
7241            "expected delta pack ({}) smaller than undeltified pack ({})",
7242            packed.pack.len(),
7243            undeltified.pack.len()
7244        );
7245
7246        // At least one object must actually be stored as a delta.
7247        let kinds = pack_entry_kinds(&packed.pack, format);
7248        let delta_count = kinds
7249            .iter()
7250            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7251            .count();
7252        assert!(
7253            delta_count >= 1,
7254            "expected at least one delta entry, found kinds {kinds:?}"
7255        );
7256
7257        // Round-trip: every original object reconstructs byte-for-byte.
7258        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7259        assert_eq!(parsed.entries.len(), objects.len());
7260        for object in &objects {
7261            let oid = object
7262                .object_id(format)
7263                .expect("test operation should succeed");
7264            let found = parsed
7265                .entries
7266                .iter()
7267                .find(|entry| entry.entry.oid == oid)
7268                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
7269            assert_eq!(&found.object, object, "object {oid} did not round-trip");
7270        }
7271
7272        // The index must agree with the pack and locate every object.
7273        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
7274        assert_eq!(index.pack_checksum, packed.checksum);
7275        for object in &objects {
7276            let oid = object
7277                .object_id(format)
7278                .expect("test operation should succeed");
7279            assert!(index.find(&oid).is_some(), "index missing {oid}");
7280        }
7281    }
7282
7283    #[test]
7284    fn write_packed_emits_ofs_delta_by_default() {
7285        let objects = similar_blob_family(6);
7286        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7287            .expect("test operation should succeed");
7288        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7289        assert!(
7290            kinds.contains(&PackObjectKind::OfsDelta),
7291            "expected an ofs-delta entry by default, found {kinds:?}"
7292        );
7293        assert!(
7294            !kinds.contains(&PackObjectKind::RefDelta),
7295            "default self-contained pack must not use ref-delta, found {kinds:?}"
7296        );
7297        // Round-trips.
7298        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
7299    }
7300
7301    #[test]
7302    fn write_packed_can_emit_ref_delta() {
7303        let objects = similar_blob_family(6);
7304        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
7305        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7306            .expect("test operation should succeed");
7307        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7308        assert!(
7309            kinds.contains(&PackObjectKind::RefDelta),
7310            "expected a ref-delta entry, found {kinds:?}"
7311        );
7312        assert!(
7313            !kinds.contains(&PackObjectKind::OfsDelta),
7314            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
7315        );
7316
7317        // Ref-delta packs are still self-contained here, so they round-trip
7318        // without any external base lookup.
7319        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
7320            .expect("test operation should succeed");
7321        assert_eq!(parsed.entries.len(), objects.len());
7322    }
7323
7324    #[test]
7325    fn write_packed_bounds_delta_chain_depth() {
7326        // A long chain of progressively-modified blobs. With a large window
7327        // every object could otherwise delta against its immediate predecessor,
7328        // forming a chain as long as the input.
7329        let objects = incremental_blob_chain(20);
7330        let format = ObjectFormat::Sha1;
7331
7332        for max_depth in [1usize, 2, 5] {
7333            let options = PackWriteOptions::new()
7334                .with_window(20)
7335                .with_depth(max_depth);
7336            let packed = PackFile::write_packed_with_options(&objects, format, &options)
7337                .expect("test operation should succeed");
7338
7339            let depths = pack_entry_depths(&packed.pack, format);
7340            let observed = depths.iter().copied().max().unwrap_or(0);
7341            assert!(
7342                observed <= max_depth,
7343                "max chain depth {observed} exceeded bound {max_depth}"
7344            );
7345
7346            // Still correct: round-trips byte-for-byte.
7347            let parsed =
7348                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7349            for object in &objects {
7350                let oid = object
7351                    .object_id(format)
7352                    .expect("test operation should succeed");
7353                let found = parsed
7354                    .entries
7355                    .iter()
7356                    .find(|entry| entry.entry.oid == oid)
7357                    .expect("test operation should succeed");
7358                assert_eq!(&found.object, object);
7359            }
7360        }
7361    }
7362
7363    #[test]
7364    fn write_packed_depth_zero_stores_everything_undeltified() {
7365        let objects = similar_blob_family(5);
7366        let options = PackWriteOptions::new().with_depth(0);
7367        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7368            .expect("test operation should succeed");
7369        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7370        assert!(
7371            kinds
7372                .iter()
7373                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
7374            "depth 0 must disable deltas, found {kinds:?}"
7375        );
7376    }
7377
7378    #[test]
7379    fn write_thin_uses_external_base_and_round_trips_sha1() {
7380        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
7381    }
7382
7383    #[test]
7384    fn write_thin_uses_external_base_and_round_trips_sha256() {
7385        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
7386    }
7387
7388    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
7389        // The base object stays OUT of the pack; only `target` is written, as a
7390        // ref-delta against the external base's object id.
7391        let base = blob_with_marker("EXTERNAL-BASE");
7392        let target = blob_with_marker("EXTERNAL-TARGET");
7393        let base_oid = base
7394            .object_id(format)
7395            .expect("test operation should succeed");
7396
7397        let mut external = HashMap::new();
7398        external.insert(base_oid, base.clone());
7399        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
7400            .expect("test operation should succeed");
7401
7402        // Exactly one entry, encoded as a ref-delta to the external base.
7403        let kinds = pack_entry_kinds(&packed.pack, format);
7404        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7405
7406        // The external base reference must be the base oid.
7407        let mut offset = 12usize;
7408        let header =
7409            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7410        assert_eq!(header.kind, PackObjectKind::RefDelta);
7411        let referenced =
7412            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7413                .expect("test operation should succeed");
7414        assert_eq!(referenced, base_oid);
7415
7416        // A plain (non-thin) parse fails: the base is not present.
7417        assert!(PackFile::parse(&packed.pack, format).is_err());
7418
7419        // A thin parse that supplies the external base reconstructs the target.
7420        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7421            if oid == &base_oid {
7422                Ok(Some(base.clone()))
7423            } else {
7424                Ok(None)
7425            }
7426        })
7427        .expect("test operation should succeed");
7428        assert_eq!(parsed.entries.len(), 1);
7429        assert_eq!(parsed.entries[0].object, target);
7430    }
7431
7432    #[test]
7433    fn write_packed_preserves_distinct_objects_with_no_similarity() {
7434        // Unrelated objects: nothing should delta, but the pack must still be
7435        // valid and complete.
7436        let objects = vec![
7437            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7438            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7439            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7440        ];
7441        let format = ObjectFormat::Sha1;
7442        let packed =
7443            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7444        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7445        assert_eq!(parsed.entries.len(), objects.len());
7446        for object in &objects {
7447            let oid = object
7448                .object_id(format)
7449                .expect("test operation should succeed");
7450            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7451        }
7452    }
7453
7454    /// Build a family of blobs that all share a large common region but differ
7455    /// in a marker placed in the *middle*, so a good delta finds copy regions on
7456    /// both sides of the change.
7457    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7458        let mut common_head = Vec::new();
7459        for _ in 0..200 {
7460            common_head.extend_from_slice(b"shared header line for delta testing\n");
7461        }
7462        let mut common_tail = Vec::new();
7463        for _ in 0..200 {
7464            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7465        }
7466        (0..count)
7467            .map(|idx| {
7468                let mut body = common_head.clone();
7469                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7470                body.extend_from_slice(&common_tail);
7471                EncodedObject::new(ObjectType::Blob, body)
7472            })
7473            .collect()
7474    }
7475
7476    /// Build a chain where each blob is the previous one plus an appended line,
7477    /// so each is highly similar to its predecessor.
7478    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7479        let mut body = Vec::new();
7480        for _ in 0..100 {
7481            body.extend_from_slice(b"baseline content shared across the whole chain\n");
7482        }
7483        let mut objects = Vec::with_capacity(count);
7484        for idx in 0..count {
7485            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7486            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7487        }
7488        objects
7489    }
7490
7491    fn blob_with_marker(marker: &str) -> EncodedObject {
7492        let mut body = Vec::new();
7493        for _ in 0..150 {
7494            body.extend_from_slice(b"common body shared between base and target\n");
7495        }
7496        body.extend_from_slice(marker.as_bytes());
7497        body.push(b'\n');
7498        for _ in 0..150 {
7499            body.extend_from_slice(b"more common body shared between objects\n");
7500        }
7501        EncodedObject::new(ObjectType::Blob, body)
7502    }
7503
7504    /// Classify every entry in a pack (in pack order) by its on-disk kind.
7505    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7506        pack_entry_descriptors(pack, format)
7507            .into_iter()
7508            .map(|descriptor| descriptor.kind)
7509            .collect()
7510    }
7511
7512    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
7513    /// order. Entries always appear after their in-pack bases, so a single
7514    /// forward pass suffices.
7515    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7516        let descriptors = pack_entry_descriptors(pack, format);
7517        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7518        let mut depths = Vec::with_capacity(descriptors.len());
7519        for descriptor in &descriptors {
7520            let depth = match &descriptor.base {
7521                EntryBase::None => 0,
7522                EntryBase::Offset(base_offset) => {
7523                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7524                }
7525                // Ref-delta to an in-pack base: look it up by offset via oid is
7526                // unnecessary for these tests (which only use ofs-delta for the
7527                // chains), so treat as depth 1 if unknown.
7528                EntryBase::Ref => 1,
7529            };
7530            depth_by_offset.insert(descriptor.offset, depth);
7531            depths.push(depth);
7532        }
7533        depths
7534    }
7535
7536    struct EntryDescriptor {
7537        offset: u64,
7538        kind: PackObjectKind,
7539        base: EntryBase,
7540    }
7541
7542    enum EntryBase {
7543        None,
7544        Offset(u64),
7545        Ref,
7546    }
7547
7548    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7549        let trailer_offset = pack.len() - format.raw_len();
7550        let count = u32_be(&pack[8..12]) as usize;
7551        let mut offset = 12usize;
7552        let mut descriptors = Vec::with_capacity(count);
7553        for _ in 0..count {
7554            let entry_offset = offset as u64;
7555            let header =
7556                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7557            let base = match header.kind {
7558                PackObjectKind::OfsDelta => {
7559                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7560                        .expect("test operation should succeed");
7561                    EntryBase::Offset(base_offset)
7562                }
7563                PackObjectKind::RefDelta => {
7564                    offset += format.raw_len();
7565                    EntryBase::Ref
7566                }
7567                _ => EntryBase::None,
7568            };
7569            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7570            let mut body = Vec::new();
7571            decoder
7572                .read_to_end(&mut body)
7573                .expect("test operation should succeed");
7574            offset += decoder.total_in() as usize;
7575            descriptors.push(EntryDescriptor {
7576                offset: entry_offset,
7577                kind: header.kind,
7578                base,
7579            });
7580        }
7581        descriptors
7582    }
7583
7584    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7585        let mut base = Vec::new();
7586        for _ in 0..300 {
7587            base.extend_from_slice(b"common payload\n");
7588        }
7589        base.extend_from_slice(b"base\n");
7590        let mut changed = Vec::new();
7591        for _ in 0..300 {
7592            changed.extend_from_slice(b"common payload\n");
7593        }
7594        changed.extend_from_slice(b"changed\n");
7595        (
7596            EncodedObject::new(ObjectType::Blob, base),
7597            EncodedObject::new(ObjectType::Blob, changed),
7598        )
7599    }
7600
7601    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7602        let mut pack = Vec::new();
7603        pack.extend_from_slice(b"PACK");
7604        pack.extend_from_slice(&2u32.to_be_bytes());
7605        pack.extend_from_slice(&1u32.to_be_bytes());
7606        write_entry_header(&mut pack, object_type, body.len() as u64);
7607        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7608        encoder
7609            .write_all(body)
7610            .expect("test operation should succeed");
7611        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7612        let checksum =
7613            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7614        pack.extend_from_slice(checksum.as_bytes());
7615        pack
7616    }
7617
7618    #[derive(Clone, Copy, Debug)]
7619    enum DeltaKind {
7620        Offset,
7621        Ref,
7622    }
7623
7624    fn two_object_delta_pack(
7625        format: ObjectFormat,
7626        base: &[u8],
7627        result: &[u8],
7628        delta_kind: DeltaKind,
7629    ) -> Vec<u8> {
7630        let mut pack = Vec::new();
7631        pack.extend_from_slice(b"PACK");
7632        pack.extend_from_slice(&2u32.to_be_bytes());
7633        pack.extend_from_slice(&2u32.to_be_bytes());
7634
7635        let base_offset = pack.len();
7636        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7637        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7638        encoder
7639            .write_all(base)
7640            .expect("test operation should succeed");
7641        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7642
7643        let delta = append_suffix_delta(base, result);
7644        let delta_offset = pack.len();
7645        write_pack_entry_header_kind(
7646            &mut pack,
7647            match delta_kind {
7648                DeltaKind::Offset => 6,
7649                DeltaKind::Ref => 7,
7650            },
7651            delta.len() as u64,
7652        );
7653        match delta_kind {
7654            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7655            DeltaKind::Ref => {
7656                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7657                    .expect("test operation should succeed");
7658                pack.extend_from_slice(base_oid.as_bytes());
7659            }
7660        }
7661        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7662        encoder
7663            .write_all(&delta)
7664            .expect("test operation should succeed");
7665        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7666
7667        let checksum =
7668            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7669        pack.extend_from_slice(checksum.as_bytes());
7670        pack
7671    }
7672
7673    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7674        let mut pack = Vec::new();
7675        pack.extend_from_slice(b"PACK");
7676        pack.extend_from_slice(&2u32.to_be_bytes());
7677        pack.extend_from_slice(&1u32.to_be_bytes());
7678
7679        let delta = append_suffix_delta(base, result);
7680        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7681        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7682            .expect("test operation should succeed");
7683        pack.extend_from_slice(base_oid.as_bytes());
7684        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7685        encoder
7686            .write_all(&delta)
7687            .expect("test operation should succeed");
7688        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7689
7690        let checksum =
7691            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7692        pack.extend_from_slice(checksum.as_bytes());
7693        pack
7694    }
7695
7696    fn unique_temp_dir(name: &str) -> PathBuf {
7697        let nanos = SystemTime::now()
7698            .duration_since(UNIX_EPOCH)
7699            .expect("test operation should succeed")
7700            .as_nanos();
7701        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7702    }
7703
7704    fn run_git_success(cwd: &Path, args: &[&str]) {
7705        let output = Command::new("git")
7706            .current_dir(cwd)
7707            .args(args)
7708            .output()
7709            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7710        assert!(
7711            output.status.success(),
7712            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7713            output.status.code(),
7714            String::from_utf8_lossy(&output.stdout),
7715            String::from_utf8_lossy(&output.stderr)
7716        );
7717    }
7718
7719    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7720        let mut paths = fs::read_dir(dir)
7721            .expect("test operation should succeed")
7722            .map(|entry| entry.expect("test operation should succeed").path())
7723            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7724            .collect::<Vec<_>>();
7725        assert_eq!(paths.len(), 1, "expected one .{extension} file");
7726        paths.remove(0)
7727    }
7728
7729    fn pack_bitmap_index(
7730        format: ObjectFormat,
7731        object_count: u32,
7732        options: u16,
7733        pack_checksum: &ObjectId,
7734        entries: &[(u32, u8, u8, &[u64])],
7735        name_hash_cache: Option<&[u32]>,
7736    ) -> Vec<u8> {
7737        let mut out = Vec::new();
7738        out.extend_from_slice(b"BITM");
7739        out.extend_from_slice(&1u16.to_be_bytes());
7740        out.extend_from_slice(&options.to_be_bytes());
7741        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7742        out.extend_from_slice(pack_checksum.as_bytes());
7743        write_test_ewah(&mut out, object_count, &[0b001]);
7744        write_test_ewah(&mut out, object_count, &[0b010]);
7745        write_test_ewah(&mut out, object_count, &[0b100]);
7746        write_test_ewah(&mut out, object_count, &[0]);
7747        for (position, xor_offset, flags, words) in entries {
7748            out.extend_from_slice(&position.to_be_bytes());
7749            out.push(*xor_offset);
7750            out.push(*flags);
7751            write_test_ewah(&mut out, object_count, words);
7752        }
7753        if let Some(cache) = name_hash_cache {
7754            for value in cache {
7755                out.extend_from_slice(&value.to_be_bytes());
7756            }
7757        }
7758        let checksum =
7759            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7760        out.extend_from_slice(checksum.as_bytes());
7761        out
7762    }
7763
7764    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7765        out.extend_from_slice(&bit_size.to_be_bytes());
7766        let words = ewah_literal_words(literals);
7767        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7768        for word in words {
7769            out.extend_from_slice(&word.to_be_bytes());
7770        }
7771        out.extend_from_slice(&0u32.to_be_bytes());
7772    }
7773
7774    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7775        let rlw = (literals.len() as u64) << 33;
7776        let mut words = vec![rlw];
7777        words.extend_from_slice(literals);
7778        words
7779    }
7780
7781    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7782        let checksum_offset = bytes.len() - format.raw_len();
7783        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7784            .expect("test operation should succeed");
7785        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7786    }
7787
7788    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7789        assert!(result.starts_with(base));
7790        let suffix = &result[base.len()..];
7791        assert!(base.len() < 0x10000);
7792        assert!(suffix.len() < 0x80);
7793        let mut delta = Vec::new();
7794        write_delta_varint(&mut delta, base.len() as u64);
7795        write_delta_varint(&mut delta, result.len() as u64);
7796        delta.push(0x90);
7797        delta.push(base.len() as u8);
7798        delta.push(suffix.len() as u8);
7799        delta.extend_from_slice(suffix);
7800        delta
7801    }
7802
7803    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7804        loop {
7805            let mut byte = (value as u8) & 0x7f;
7806            value >>= 7;
7807            if value != 0 {
7808                byte |= 0x80;
7809            }
7810            out.push(byte);
7811            if value == 0 {
7812                break;
7813            }
7814        }
7815    }
7816
7817    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7818        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7819        size >>= 4;
7820        if size != 0 {
7821            byte |= 0x80;
7822        }
7823        out.push(byte);
7824        while size != 0 {
7825            let mut byte = (size as u8) & 0x7f;
7826            size >>= 7;
7827            if size != 0 {
7828                byte |= 0x80;
7829            }
7830            out.push(byte);
7831        }
7832    }
7833
7834    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7835        assert!(relative < 0x80);
7836        out.push(relative as u8);
7837    }
7838
7839    fn single_entry_index(
7840        format: ObjectFormat,
7841        oid: ObjectId,
7842        crc32: u32,
7843        offset: u32,
7844        pack_checksum: ObjectId,
7845    ) -> Vec<u8> {
7846        let mut index = Vec::new();
7847        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7848        index.extend_from_slice(&2u32.to_be_bytes());
7849        for idx in 0..256 {
7850            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7851                1u32
7852            } else {
7853                0u32
7854            };
7855            index.extend_from_slice(&count.to_be_bytes());
7856        }
7857        index.extend_from_slice(oid.as_bytes());
7858        index.extend_from_slice(&crc32.to_be_bytes());
7859        index.extend_from_slice(&offset.to_be_bytes());
7860        index.extend_from_slice(pack_checksum.as_bytes());
7861        let checksum =
7862            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7863        index.extend_from_slice(checksum.as_bytes());
7864        index
7865    }
7866
7867    fn single_entry_index_v1(
7868        format: ObjectFormat,
7869        oid: ObjectId,
7870        offset: u32,
7871        pack_checksum: ObjectId,
7872    ) -> Vec<u8> {
7873        let mut index = Vec::new();
7874        for idx in 0..256 {
7875            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7876                1u32
7877            } else {
7878                0u32
7879            };
7880            index.extend_from_slice(&count.to_be_bytes());
7881        }
7882        index.extend_from_slice(&offset.to_be_bytes());
7883        index.extend_from_slice(oid.as_bytes());
7884        index.extend_from_slice(pack_checksum.as_bytes());
7885        let checksum =
7886            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7887        index.extend_from_slice(checksum.as_bytes());
7888        index
7889    }
7890
7891    fn pack_reverse_index(
7892        format: ObjectFormat,
7893        positions: &[u32],
7894        pack_checksum: ObjectId,
7895    ) -> Vec<u8> {
7896        let mut reverse_index = Vec::new();
7897        reverse_index.extend_from_slice(b"RIDX");
7898        reverse_index.extend_from_slice(&1u32.to_be_bytes());
7899        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7900        for position in positions {
7901            reverse_index.extend_from_slice(&position.to_be_bytes());
7902        }
7903        reverse_index.extend_from_slice(pack_checksum.as_bytes());
7904        let checksum =
7905            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7906        reverse_index.extend_from_slice(checksum.as_bytes());
7907        reverse_index
7908    }
7909
7910    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7911        let mut out = Vec::new();
7912        out.extend_from_slice(b"MTME");
7913        out.extend_from_slice(&1u32.to_be_bytes());
7914        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7915        for mtime in mtimes {
7916            out.extend_from_slice(&mtime.to_be_bytes());
7917        }
7918        out.extend_from_slice(pack_checksum.as_bytes());
7919        let checksum =
7920            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7921        out.extend_from_slice(checksum.as_bytes());
7922        out
7923    }
7924
7925    fn midx_chunks_with_pack_names(
7926        _format: ObjectFormat,
7927        pack_names: Vec<u8>,
7928        entries: &[(ObjectId, u32, u64)],
7929    ) -> Vec<([u8; 4], Vec<u8>)> {
7930        let mut entries = entries.to_vec();
7931        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7932        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7933        let mut large_offsets = Vec::new();
7934        let mut chunks = vec![
7935            (*b"PNAM", pack_names),
7936            (*b"OIDF", midx_oid_fanout(&object_ids)),
7937            (*b"OIDL", midx_oid_lookup(&object_ids)),
7938            (
7939                *b"OOFF",
7940                midx_ooff_entries(
7941                    &entries
7942                        .iter()
7943                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7944                        .collect::<Vec<_>>(),
7945                    &mut large_offsets,
7946                ),
7947            ),
7948        ];
7949        if !large_offsets.is_empty() {
7950            chunks.push((*b"LOFF", large_offsets));
7951        }
7952        chunks
7953    }
7954
7955    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7956        let mut counts = [0u32; 256];
7957        for oid in object_ids {
7958            counts[oid.as_bytes()[0] as usize] += 1;
7959        }
7960        let mut running = 0u32;
7961        let mut out = Vec::new();
7962        for count in counts {
7963            running += count;
7964            out.extend_from_slice(&running.to_be_bytes());
7965        }
7966        out
7967    }
7968
7969    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7970        let mut out = Vec::new();
7971        for oid in object_ids {
7972            out.extend_from_slice(oid.as_bytes());
7973        }
7974        out
7975    }
7976
7977    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7978        let mut out = Vec::new();
7979        for (pack_int_id, offset) in entries {
7980            out.extend_from_slice(&pack_int_id.to_be_bytes());
7981            if *offset < 0x8000_0000 {
7982                out.extend_from_slice(&(*offset as u32).to_be_bytes());
7983            } else {
7984                let large_idx = (large_offsets.len() / 8) as u32;
7985                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7986                large_offsets.extend_from_slice(&offset.to_be_bytes());
7987            }
7988        }
7989        out
7990    }
7991
7992    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7993        let mut out = Vec::new();
7994        for value in values {
7995            out.extend_from_slice(&value.to_be_bytes());
7996        }
7997        out
7998    }
7999
8000    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
8001        let mut out = Vec::new();
8002        for (bitmap_pos, bitmap_nr) in entries {
8003            out.extend_from_slice(&bitmap_pos.to_be_bytes());
8004            out.extend_from_slice(&bitmap_nr.to_be_bytes());
8005        }
8006        out
8007    }
8008
8009    fn multi_pack_index(
8010        format: ObjectFormat,
8011        version: u8,
8012        pack_count: u32,
8013        chunks: &[([u8; 4], Vec<u8>)],
8014    ) -> Vec<u8> {
8015        let lookup_len = (chunks.len() + 1) * 12;
8016        let mut out = Vec::new();
8017        out.extend_from_slice(b"MIDX");
8018        out.push(version);
8019        out.push(hash_function_id(format) as u8);
8020        out.push(chunks.len() as u8);
8021        out.push(0);
8022        out.extend_from_slice(&pack_count.to_be_bytes());
8023        let mut chunk_offset = (12 + lookup_len) as u64;
8024        for (id, data) in chunks {
8025            out.extend_from_slice(id);
8026            out.extend_from_slice(&chunk_offset.to_be_bytes());
8027            chunk_offset += data.len() as u64;
8028        }
8029        out.extend_from_slice(&[0, 0, 0, 0]);
8030        out.extend_from_slice(&chunk_offset.to_be_bytes());
8031        for (_id, data) in chunks {
8032            out.extend_from_slice(data);
8033        }
8034        let checksum =
8035            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
8036        out.extend_from_slice(checksum.as_bytes());
8037        out
8038    }
8039
8040    // ---- EWAH encoder / bitmap writer tests ------------------------------
8041
8042    fn pack_checksum_sha1() -> ObjectId {
8043        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
8044    }
8045
8046    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
8047        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
8048        // expects: a checksum offset that lies just past the serialised bitmap.
8049        let mut offset = 0usize;
8050        let checksum_offset = bytes.len();
8051        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
8052            .expect("test operation should succeed")
8053    }
8054
8055    #[test]
8056    fn ewah_encodes_single_literal_word_matching_helper() {
8057        // A bitmap whose only word is a literal must serialise as one RLW with
8058        // literal_len == 1 followed by the literal, identical to the test
8059        // helper used by the existing parser tests.
8060        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
8061        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
8062        assert_eq!(ewah.rlw_position, 0);
8063        assert_eq!(ewah.bit_size, 64);
8064    }
8065
8066    #[test]
8067    fn ewah_byte_layout_is_big_endian() {
8068        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
8069            .expect("test operation should succeed");
8070        let bytes = ewah.to_bytes();
8071        let mut expected = Vec::new();
8072        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
8073        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
8074        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
8075        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
8076        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
8077        assert_eq!(bytes, expected);
8078    }
8079
8080    #[test]
8081    fn ewah_empty_bitmap_serialises_like_git() {
8082        let ewah = EwahBitmap::empty();
8083        let bytes = ewah.to_bytes();
8084        // bit_size = 0, word_count = 0, rlw_position = 0.
8085        assert_eq!(bytes, vec![0u8; 12]);
8086        // It must still parse and decode to nothing.
8087        let parsed = parse_ewah_bytes(&bytes);
8088        assert_eq!(parsed, ewah);
8089        assert!(
8090            parsed
8091                .to_positions()
8092                .expect("test operation should succeed")
8093                .is_empty()
8094        );
8095    }
8096
8097    #[test]
8098    fn ewah_compresses_clean_zero_run() {
8099        // Three all-zero words followed by a literal: the encoder should emit a
8100        // single RLW carrying a run of 3 clean-zero words plus one literal.
8101        let ewah =
8102            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
8103        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
8104        let rlw = ewah.words[0];
8105        assert_eq!(rlw & 1, 0, "run bit should be zero");
8106        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8107        assert_eq!(rlw >> 33, 1, "literal length should be 1");
8108        assert_eq!(ewah.words[1], 0b1);
8109    }
8110
8111    #[test]
8112    fn ewah_compresses_clean_ones_run() {
8113        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
8114            .expect("test operation should succeed");
8115        // Pure run of ones, no literals: one RLW only.
8116        assert_eq!(ewah.words.len(), 1);
8117        let rlw = ewah.words[0];
8118        assert_eq!(rlw & 1, 1, "run bit should be one");
8119        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8120        assert_eq!(rlw >> 33, 0, "no literals");
8121    }
8122
8123    #[test]
8124    fn ewah_run_then_literal_then_run_roundtrips() {
8125        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8126        let bit_size = (words.len() * 64) as u32;
8127        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8128        assert_eq!(
8129            ewah.to_words().expect("test operation should succeed"),
8130            words
8131        );
8132    }
8133
8134    #[test]
8135    fn ewah_drops_trailing_clean_zero_words() {
8136        // Trailing all-zero words beyond a literal carry no information and git
8137        // does not serialise them, but to_words() restores them up to bit_size.
8138        let words = vec![0b1, 0, 0, 0];
8139        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8140        // bit_size of 1 means a single backing word.
8141        assert_eq!(ewah.bit_size, 1);
8142        assert_eq!(
8143            ewah.to_words().expect("test operation should succeed"),
8144            vec![0b1]
8145        );
8146    }
8147
8148    #[test]
8149    fn ewah_from_positions_roundtrips_via_positions() {
8150        let positions = [0u32, 1, 63, 64, 65, 200, 511];
8151        let ewah =
8152            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8153        let mut decoded = ewah.to_positions().expect("test operation should succeed");
8154        decoded.sort_unstable();
8155        assert_eq!(decoded, positions);
8156    }
8157
8158    #[test]
8159    fn ewah_from_positions_dedupes_and_orders() {
8160        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8161            .expect("test operation should succeed");
8162        assert_eq!(
8163            ewah.to_positions().expect("test operation should succeed"),
8164            vec![5, 100]
8165        );
8166    }
8167
8168    #[test]
8169    fn ewah_huge_zero_run_spans_multiple_rlws() {
8170        // A run longer than the 32-bit running-length field forces the encoder
8171        // to emit more than one RLW. Use one literal bit far out, with a bit
8172        // size large enough to exceed u32::MAX clean words is impractical, so
8173        // assert the field arithmetic via a direct builder run instead.
8174        let mut builder = EwahBuilder::new(0);
8175        builder.add_empty_words(false, 0xffff_ffff);
8176        builder.add_empty_words(false, 5);
8177        let ewah = builder.finish().expect("test operation should succeed");
8178        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8179        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8180        assert_eq!(ewah.words[1] & 1, 0);
8181        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8182        assert_eq!(ewah.rlw_position, 1);
8183    }
8184
8185    #[test]
8186    fn ewah_from_words_rejects_oversized_bit_size() {
8187        // bit_size demands two words but only one is supplied.
8188        assert!(EwahBitmap::from_words(65, &[0]).is_err());
8189    }
8190
8191    #[test]
8192    fn ewah_from_positions_rejects_out_of_range() {
8193        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8194    }
8195
8196    #[test]
8197    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8198        // Exercise the full encode -> serialise -> parse loop for a non-trivial
8199        // pattern and assert structural equality against the parser's model.
8200        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8201        let bit_size = (words.len() * 64) as u32;
8202        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8203        let bytes = ewah.to_bytes();
8204        let parsed = parse_ewah_bytes(&bytes);
8205        assert_eq!(parsed, ewah);
8206        assert_eq!(
8207            parsed.to_words().expect("test operation should succeed"),
8208            words
8209        );
8210    }
8211
8212    #[test]
8213    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8214        // commit, tree, blob in pack order; one selected commit reaching all.
8215        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8216        let bytes = write_bitmap(
8217            ObjectFormat::Sha1,
8218            pack_checksum_sha1(),
8219            &object_types,
8220            &[(0u32, 0u32, vec![1u32, 2u32])],
8221            None,
8222        )
8223        .expect("test operation should succeed");
8224        assert_eq!(&bytes[..4], b"BITM");
8225
8226        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8227            .expect("test operation should succeed");
8228        assert_eq!(parsed.version, 1);
8229        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8230        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8231        assert_eq!(
8232            parsed
8233                .type_bitmaps
8234                .commits
8235                .to_positions()
8236                .expect("test operation should succeed"),
8237            vec![0]
8238        );
8239        assert_eq!(
8240            parsed
8241                .type_bitmaps
8242                .trees
8243                .to_positions()
8244                .expect("test operation should succeed"),
8245            vec![1]
8246        );
8247        assert_eq!(
8248            parsed
8249                .type_bitmaps
8250                .blobs
8251                .to_positions()
8252                .expect("test operation should succeed"),
8253            vec![2]
8254        );
8255        assert!(
8256            parsed
8257                .type_bitmaps
8258                .tags
8259                .to_positions()
8260                .expect("test operation should succeed")
8261                .is_empty()
8262        );
8263        assert_eq!(parsed.entries.len(), 1);
8264        let entry = parsed
8265            .entry_for_index_position(0)
8266            .expect("test operation should succeed");
8267        assert_eq!(entry.xor_offset, 0);
8268        assert_eq!(entry.flags, 0);
8269        assert_eq!(
8270            entry
8271                .bitmap
8272                .to_positions()
8273                .expect("test operation should succeed"),
8274            vec![0, 1, 2]
8275        );
8276        assert_eq!(parsed.name_hash_cache, None);
8277    }
8278
8279    #[test]
8280    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
8281        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8282            .expect("test operation should succeed");
8283        let object_types = [ObjectType::Commit, ObjectType::Tree];
8284        let bytes = write_bitmap(
8285            ObjectFormat::Sha256,
8286            pack_checksum.clone(),
8287            &object_types,
8288            &[(0u32, 0u32, vec![1u32])],
8289            None,
8290        )
8291        .expect("test operation should succeed");
8292        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
8293            .expect("test operation should succeed");
8294        assert_eq!(parsed.format, ObjectFormat::Sha256);
8295        assert_eq!(parsed.pack_checksum, pack_checksum);
8296        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8297        assert_eq!(
8298            parsed.entries[0]
8299                .bitmap
8300                .to_positions()
8301                .expect("test operation should succeed"),
8302            vec![0, 1]
8303        );
8304    }
8305
8306    #[test]
8307    fn pack_bitmap_index_write_includes_name_hash_cache() {
8308        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8309        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
8310        let bytes = write_bitmap(
8311            ObjectFormat::Sha1,
8312            pack_checksum_sha1(),
8313            &object_types,
8314            &[(0u32, 0u32, vec![1u32, 2u32])],
8315            Some(cache.clone()),
8316        )
8317        .expect("test operation should succeed");
8318        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8319            .expect("test operation should succeed");
8320        assert_eq!(
8321            parsed.options,
8322            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8323        );
8324        assert_eq!(parsed.name_hash_cache, Some(cache));
8325    }
8326
8327    #[test]
8328    fn pack_bitmap_writer_supports_multiple_commits() {
8329        let object_types = [
8330            ObjectType::Commit,
8331            ObjectType::Commit,
8332            ObjectType::Tree,
8333            ObjectType::Blob,
8334        ];
8335        let mut writer =
8336            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8337                .expect("test operation should succeed");
8338        writer
8339            .add_commit(0, 0, &[2, 3])
8340            .expect("test operation should succeed");
8341        writer
8342            .add_commit(1, 1, &[2])
8343            .expect("test operation should succeed");
8344        let bytes = writer.write().expect("test operation should succeed");
8345        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
8346            .expect("test operation should succeed");
8347        assert_eq!(parsed.entries.len(), 2);
8348        assert_eq!(
8349            parsed
8350                .type_bitmaps
8351                .commits
8352                .to_positions()
8353                .expect("test operation should succeed"),
8354            vec![0, 1]
8355        );
8356        let first = parsed
8357            .entry_for_index_position(0)
8358            .expect("test operation should succeed");
8359        assert_eq!(
8360            first
8361                .bitmap
8362                .to_positions()
8363                .expect("test operation should succeed"),
8364            vec![0, 2, 3]
8365        );
8366        let second = parsed
8367            .entry_for_index_position(1)
8368            .expect("test operation should succeed");
8369        assert_eq!(
8370            second
8371                .bitmap
8372                .to_positions()
8373                .expect("test operation should succeed"),
8374            vec![1, 2]
8375        );
8376    }
8377
8378    #[test]
8379    fn pack_bitmap_index_recomputes_checksum_on_write() {
8380        // The provided index_checksum field is ignored; write recomputes it so
8381        // a bogus placeholder still produces a valid, parseable file.
8382        let object_types = [ObjectType::Commit, ObjectType::Blob];
8383        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8384            .expect("test operation should succeed");
8385        let mut index = writer.build().expect("test operation should succeed");
8386        // build() sets an all-zero placeholder checksum.
8387        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
8388        index.entries.clear(); // mutate the model after build
8389        index.entries.push(PackBitmapEntry {
8390            object_position: 0,
8391            xor_offset: 0,
8392            flags: 0,
8393            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
8394        });
8395        let bytes = index.write().expect("test operation should succeed");
8396        // Parsing validates the trailing checksum, so a wrong checksum fails.
8397        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
8398            .expect("test operation should succeed");
8399        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
8400    }
8401
8402    #[test]
8403    fn pack_bitmap_writer_rejects_non_commit_selection() {
8404        let object_types = [ObjectType::Commit, ObjectType::Blob];
8405        let mut writer =
8406            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8407                .expect("test operation should succeed");
8408        // Position 1 is a blob, not a commit.
8409        assert!(writer.add_commit(1, 1, &[]).is_err());
8410        // Position 5 is out of range entirely.
8411        assert!(writer.add_commit(5, 5, &[]).is_err());
8412        // Index position out of range.
8413        assert!(writer.add_commit(0, 5, &[]).is_err());
8414        // Reachable position out of range.
8415        assert!(writer.add_commit(0, 0, &[9]).is_err());
8416    }
8417
8418    #[test]
8419    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8420        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8421            .expect("test operation should succeed");
8422        assert!(
8423            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8424                .is_err()
8425        );
8426    }
8427
8428    #[test]
8429    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8430        let writer = PackBitmapWriter::new(
8431            ObjectFormat::Sha1,
8432            pack_checksum_sha1(),
8433            &[ObjectType::Commit],
8434        )
8435        .expect("test operation should succeed");
8436        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8437    }
8438
8439    #[test]
8440    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8441        let mut index = PackBitmapWriter::new(
8442            ObjectFormat::Sha1,
8443            pack_checksum_sha1(),
8444            &[ObjectType::Commit],
8445        )
8446        .expect("test operation should succeed")
8447        .build()
8448        .expect("test operation should succeed");
8449        // Flag set but no cache present.
8450        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8451        assert!(index.write().is_err());
8452        // Cache present but flag missing.
8453        index.options = PackBitmapIndex::OPTION_FULL_DAG;
8454        index.name_hash_cache = Some(vec![0]);
8455        assert!(index.write().is_err());
8456    }
8457
8458    #[test]
8459    fn write_bitmap_roundtrips_through_upstream_git_parser() {
8460        // Build a real pack with git, then overwrite reachability with our own
8461        // writer using the real pack checksum and object types, and confirm our
8462        // bytes parse under the same parser that reads upstream bitmaps.
8463        let root = unique_temp_dir("git-pack-bitmap-writer");
8464        fs::create_dir_all(&root).expect("test operation should succeed");
8465        {
8466            run_git_success(&root, &["init", "-q", "-b", "main"]);
8467            run_git_success(
8468                &root,
8469                &[
8470                    "-c",
8471                    "user.name=Example User",
8472                    "-c",
8473                    "user.email=example@example.invalid",
8474                    "commit",
8475                    "--allow-empty",
8476                    "-q",
8477                    "-m",
8478                    "one",
8479                ],
8480            );
8481            run_git_success(&root, &["repack", "-adb"]);
8482            let pack_dir = root.join(".git").join("objects").join("pack");
8483            let idx_path = single_path_with_extension(&pack_dir, "idx");
8484            let index = PackIndex::parse(
8485                &fs::read(idx_path).expect("test operation should succeed"),
8486                ObjectFormat::Sha1,
8487            )
8488            .expect("test operation should succeed");
8489            // Read object types from the pack so the type bitmaps are accurate.
8490            let pack_path = single_path_with_extension(&pack_dir, "pack");
8491            let pack =
8492                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8493                    .expect("test operation should succeed");
8494            // Map each index entry (sorted by oid) to its pack offset, then to a
8495            // pack-order position so positions line up with the index ordering.
8496            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8497            offsets.sort_unstable();
8498            let position_of = |offset: u64| -> u32 {
8499                offsets
8500                    .iter()
8501                    .position(|value| *value == offset)
8502                    .expect("test operation should succeed") as u32
8503            };
8504            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8505            for entry in &index.entries {
8506                let position = position_of(entry.offset) as usize;
8507                // Find the parsed object at this pack offset to read its type.
8508                if let Some(parsed) = pack
8509                    .entries
8510                    .iter()
8511                    .find(|po| po.entry.offset == entry.offset)
8512                {
8513                    object_types[position] = parsed.object.object_type;
8514                }
8515            }
8516            // Select the first commit position we find and reach everything.
8517            let commit_position = object_types
8518                .iter()
8519                .position(|ty| *ty == ObjectType::Commit)
8520                .expect("test operation should succeed") as u32;
8521            // The entry records the commit's position in the oid-sorted index.
8522            let commit_index_position = index
8523                .entries
8524                .iter()
8525                .position(|entry| position_of(entry.offset) == commit_position)
8526                .expect("test operation should succeed")
8527                as u32;
8528            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8529            let bytes = write_bitmap(
8530                ObjectFormat::Sha1,
8531                index.pack_checksum.clone(),
8532                &object_types,
8533                &[(commit_position, commit_index_position, reachable)],
8534                None,
8535            )
8536            .expect("test operation should succeed");
8537            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8538                .expect("test operation should succeed");
8539            assert_eq!(parsed.pack_checksum, index.pack_checksum);
8540            assert_eq!(parsed.entries.len(), 1);
8541            assert_eq!(
8542                parsed.entries[0]
8543                    .bitmap
8544                    .to_positions()
8545                    .expect("test operation should succeed")
8546                    .len(),
8547                index.entries.len()
8548            );
8549        };
8550        let _ = fs::remove_dir_all(&root);
8551    }
8552}