Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18    pub oid: ObjectId,
19    pub compressed_size: u64,
20    pub uncompressed_size: u64,
21    pub offset: u64,
22}
23
24/// Default sliding-window size used by [`PackFile::write_packed`].
25///
26/// Each object is compared against up to this many previously emitted
27/// candidates of the same type when searching for a small delta. Matches git's
28/// default `pack.window`.
29pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31/// Default maximum delta chain depth used by [`PackFile::write_packed`].
32///
33/// A delta may reference a base that is itself a delta; this bounds how long
34/// such chains may grow so that reconstructing any object stays cheap and the
35/// reader's recursion stays shallow. Matches git's default `pack.depth`.
36pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38/// Object-count threshold before pack payload compression is fanned out across
39/// worker threads. Below this, thread setup and extra buffering cost more than
40/// they save.
41const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43/// Keep parallel compression bounded. Git gets much of its wall-clock win from
44/// using several cores, but unbounded threads can steal cache from delta
45/// planning and inflate peak memory on large packs.
46const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48/// Options controlling sliding-window delta selection during pack generation.
49///
50/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
51/// the builder-style setters, or build one directly. Used by
52/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
53#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55    /// Number of previous same-type candidates each object is deltified
56    /// against. Larger windows find better deltas at higher cost.
57    pub window: usize,
58    /// Maximum delta chain depth. A value of `0` disables deltification.
59    pub depth: usize,
60    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
61    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
62    /// against external thin-pack bases always use ref-deltas regardless.
63    pub prefer_ofs_delta: bool,
64    /// External base objects, keyed by object id, that are *not* written into
65    /// the pack but may be used as delta bases. Supplying any entries here
66    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
67    /// yielding a self-contained pack.
68    pub thin_bases: HashMap<ObjectId, EncodedObject>,
69    /// When `true` (the default), objects are reordered by type and size for
70    /// better delta locality. When `false`, the input order is preserved (the
71    /// emitted pack lists objects in the order supplied); deltas then only
72    /// reference earlier input objects. Reordering is always skipped when
73    /// deltification is disabled (`depth == 0`), since it has no effect there.
74    pub reorder: bool,
75}
76
77impl Default for PackWriteOptions {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl PackWriteOptions {
84    /// Options with git-compatible defaults: window
85    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
86    /// no external thin bases.
87    pub fn new() -> Self {
88        Self {
89            window: DEFAULT_PACK_WINDOW,
90            depth: DEFAULT_PACK_DEPTH,
91            prefer_ofs_delta: true,
92            thin_bases: HashMap::new(),
93            reorder: true,
94        }
95    }
96
97    /// Set the sliding-window size.
98    pub fn with_window(mut self, window: usize) -> Self {
99        self.window = window;
100        self
101    }
102
103    /// Set the maximum delta chain depth (`0` disables deltas).
104    pub fn with_depth(mut self, depth: usize) -> Self {
105        self.depth = depth;
106        self
107    }
108
109    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
110    /// (`false`) base references.
111    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
112        self.prefer_ofs_delta = prefer_ofs_delta;
113        self
114    }
115
116    /// Provide the set of external base objects permitted for a thin pack.
117    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
118        self.thin_bases = thin_bases;
119        self
120    }
121
122    /// Choose whether objects may be reordered for delta locality (`true`) or
123    /// emitted in input order (`false`).
124    pub fn with_reorder(mut self, reorder: bool) -> Self {
125        self.reorder = reorder;
126        self
127    }
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct RepackPolicy {
132    pub write_bitmaps: bool,
133    pub cruft_packs: bool,
134    pub geometric_factor: Option<u8>,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct PackFile {
139    pub version: u32,
140    pub entries: Vec<PackObject>,
141    pub checksum: ObjectId,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct PackObject {
146    pub entry: PackEntry,
147    pub object: EncodedObject,
148}
149
150/// Per-object statistics for one entry of a verified pack, in the shape
151/// `git verify-pack -v` reports.
152#[derive(Debug, Clone, PartialEq, Eq)]
153pub struct PackVerifyStat {
154    /// Resolved object id.
155    pub oid: ObjectId,
156    /// Resolved object type (the delta's *result* type, not `ofs-delta`).
157    pub object_type: ObjectType,
158    /// Resolved (inflated) object size in bytes.
159    pub size: u64,
160    /// Bytes this object occupies in the pack: the offset delta to the next
161    /// object, or to the trailing checksum for the last object.
162    pub size_in_pack: u64,
163    /// In-pack byte offset where this object's entry begins.
164    pub offset: u64,
165    /// Delta chain depth: `0` for undeltified objects, base-depth + 1 otherwise.
166    pub delta_depth: u32,
167    /// For delta objects, the id of the *immediate* base object (which may
168    /// itself be a delta). `None` for undeltified objects.
169    pub base_oid: Option<ObjectId>,
170}
171
172/// Result of [`PackFile::verify_pack_stats`]: per-object stats in pack offset
173/// order plus the pack's trailing checksum.
174#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct PackVerifyStats {
176    pub objects: Vec<PackVerifyStat>,
177    pub checksum: ObjectId,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct PackWrite {
182    pub pack: Vec<u8>,
183    pub index: Vec<u8>,
184    pub checksum: ObjectId,
185    pub entries: Vec<PackIndexEntry>,
186}
187
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub struct PackInput<'a> {
190    pub oid: &'a ObjectId,
191    pub object: &'a EncodedObject,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct PackIndexBuild {
196    pub index: Vec<u8>,
197    pub pack_checksum: ObjectId,
198    pub entries: Vec<PackIndexEntry>,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct PackIndex {
203    pub version: u32,
204    pub fanout: [u32; 256],
205    pub entries: Vec<PackIndexEntry>,
206    pub pack_checksum: ObjectId,
207    pub index_checksum: ObjectId,
208}
209
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct PackIndexView<'a> {
212    pub version: u32,
213    pub count: usize,
214    pub fanout: [u32; 256],
215    pub pack_checksum: ObjectId,
216    pub index_checksum: ObjectId,
217    bytes: &'a [u8],
218    format: ObjectFormat,
219    tables: PackIndexViewTables,
220}
221
222pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
223    fn as_bytes(&self) -> &[u8];
224}
225
226impl<T> PackIndexByteSource for T
227where
228    T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
229{
230    fn as_bytes(&self) -> &[u8] {
231        self.as_ref()
232    }
233}
234
235#[derive(Debug)]
236struct SharedIndexBytes(Arc<[u8]>);
237
238impl PackIndexByteSource for SharedIndexBytes {
239    fn as_bytes(&self) -> &[u8] {
240        self.0.as_ref()
241    }
242}
243
244#[derive(Debug, Clone)]
245pub struct PackIndexViewData {
246    pub version: u32,
247    pub count: usize,
248    pub fanout: [u32; 256],
249    pub pack_checksum: ObjectId,
250    pub index_checksum: ObjectId,
251    bytes: Arc<dyn PackIndexByteSource>,
252    format: ObjectFormat,
253    tables: PackIndexViewTables,
254}
255
256#[derive(Debug, Clone, PartialEq, Eq)]
257pub struct PackIndexEntry {
258    pub oid: ObjectId,
259    pub crc32: u32,
260    pub offset: u64,
261}
262
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub struct PackIndexLookup {
265    pub crc32: u32,
266    pub offset: u64,
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
270enum PackIndexViewTables {
271    V1 {
272        entry_table: Range<usize>,
273    },
274    V2 {
275        oid_table: Range<usize>,
276        crc_table: Range<usize>,
277        small_offset_table: Range<usize>,
278        large_offset_table: Range<usize>,
279    },
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct PackReverseIndex {
284    pub version: u32,
285    pub format: ObjectFormat,
286    pub positions: Vec<u32>,
287    pub pack_checksum: ObjectId,
288    pub index_checksum: ObjectId,
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub struct PackMtimes {
293    pub version: u32,
294    pub format: ObjectFormat,
295    pub mtimes: Vec<u32>,
296    pub pack_checksum: ObjectId,
297    pub index_checksum: ObjectId,
298}
299
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct PackBitmapIndex {
302    pub version: u16,
303    pub format: ObjectFormat,
304    pub options: u16,
305    pub pack_checksum: ObjectId,
306    pub index_checksum: ObjectId,
307    pub type_bitmaps: PackBitmapTypeBitmaps,
308    pub entries: Vec<PackBitmapEntry>,
309    pub name_hash_cache: Option<Vec<u32>>,
310}
311
312#[derive(Debug, Clone, PartialEq, Eq)]
313pub struct PackBitmapTypeBitmaps {
314    pub commits: EwahBitmap,
315    pub trees: EwahBitmap,
316    pub blobs: EwahBitmap,
317    pub tags: EwahBitmap,
318}
319
320#[derive(Debug, Clone, PartialEq, Eq)]
321pub struct PackBitmapEntry {
322    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
323    /// NOT the pack-order position used for the bitmap's bit numbering.
324    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
325    /// back via `nth_packed_object_id` (pack-bitmap.c).
326    pub object_position: u32,
327    pub xor_offset: u8,
328    pub flags: u8,
329    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
330    /// order* (offset order), as mapped by the pack's reverse index.
331    pub bitmap: EwahBitmap,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct EwahBitmap {
336    pub bit_size: u32,
337    pub words: Vec<u64>,
338    pub rlw_position: u32,
339}
340
341#[derive(Debug, Clone, PartialEq, Eq)]
342pub struct MultiPackIndex {
343    pub version: u8,
344    pub format: ObjectFormat,
345    pub pack_count: u32,
346    pub pack_names: Vec<String>,
347    pub object_count: u32,
348    pub fanout: [u32; 256],
349    pub objects: Vec<MultiPackIndexEntry>,
350    pub reverse_index: Option<Vec<u32>>,
351    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
352    pub chunks: Vec<MultiPackIndexChunk>,
353    pub checksum: ObjectId,
354}
355
356#[derive(Debug, Clone)]
357pub struct MultiPackIndexOidLookup {
358    format: ObjectFormat,
359    pack_count: u32,
360    pack_names: Vec<String>,
361    fanout: [u32; 256],
362    object_count: usize,
363    oid_lookup_offset: usize,
364    object_offsets_offset: usize,
365    large_offsets_offset: Option<usize>,
366    large_offsets_len: usize,
367    bytes: Arc<dyn PackIndexByteSource>,
368}
369
370#[derive(Debug, Clone, PartialEq, Eq)]
371pub struct MultiPackIndexEntry {
372    pub oid: ObjectId,
373    pub pack_int_id: u32,
374    pub offset: u64,
375}
376
377#[derive(Debug, Clone, PartialEq, Eq)]
378pub struct MultiPackBitmapPack {
379    pub bitmap_pos: u32,
380    pub bitmap_nr: u32,
381}
382
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct MultiPackIndexChunk {
385    pub id: [u8; 4],
386    pub offset: u64,
387    pub len: u64,
388}
389
390#[derive(Debug, Clone, Copy, PartialEq, Eq)]
391enum PackObjectKind {
392    Commit,
393    Tree,
394    Blob,
395    Tag,
396    OfsDelta,
397    RefDelta,
398}
399
400#[derive(Debug, Clone, PartialEq, Eq)]
401enum ParsedPackEntry {
402    Resolved(PackObject),
403    Delta {
404        base: DeltaBase,
405        compressed_size: u64,
406        delta_size: u64,
407        offset: u64,
408        delta: Vec<u8>,
409    },
410}
411
412#[derive(Debug, Clone, PartialEq, Eq)]
413enum DeltaBase {
414    Offset(u64),
415    Ref(ObjectId),
416}
417
418/// One pack entry as stored on disk, used by [`PackFile::verify_pack_stats`] to
419/// recover the delta structure and on-disk stream size that resolved
420/// [`PackObject`]s no longer carry.
421struct OnDiskEntry {
422    offset: u64,
423    base: Option<DeltaBase>,
424    stream_size: u64,
425}
426
427impl PackFile {
428    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
429        Self::parse(bytes, ObjectFormat::Sha1)
430    }
431
432    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
433        Self::parse_with_base(bytes, format, |_| Ok(None))
434    }
435
436    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
437        Self::parse(&bundle.pack, bundle.format)
438    }
439
440    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
441        let PackIndexBuild {
442            index,
443            pack_checksum,
444            entries,
445        } = PackIndex::write_v2_for_pack(bytes, format)?;
446        Ok(PackWrite {
447            pack: bytes.to_vec(),
448            index,
449            checksum: pack_checksum,
450            entries,
451        })
452    }
453
454    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
455    where
456        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
457    {
458        Self::parse_with_base(bytes, format, external_base)
459    }
460
461    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
462    where
463        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
464    {
465        let trailer_len = format.raw_len();
466        if bytes.len() < 12 + trailer_len {
467            return Err(GitError::InvalidFormat("pack file too short".into()));
468        }
469        let trailer_offset = bytes.len() - trailer_len;
470        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
471        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
472        if checksum != expected {
473            return Err(GitError::InvalidFormat(format!(
474                "pack checksum mismatch: expected {expected}, got {checksum}"
475            )));
476        }
477
478        if &bytes[..4] != b"PACK" {
479            return Err(GitError::InvalidFormat("missing PACK signature".into()));
480        }
481        let version = u32_be(&bytes[4..8]);
482        if version != 2 && version != 3 {
483            return Err(GitError::Unsupported(format!("pack version {version}")));
484        }
485        let count = u32_be(&bytes[8..12]) as usize;
486        let mut offset = 12usize;
487        let mut entries = Vec::with_capacity(count);
488        for _ in 0..count {
489            let entry_offset = offset;
490            let header = parse_entry_header(bytes, &mut offset)?;
491            let base =
492                match header.kind {
493                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
494                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
495                    )),
496                    PackObjectKind::RefDelta => {
497                        let hash_len = format.raw_len();
498                        if offset + hash_len > trailer_offset {
499                            return Err(GitError::InvalidFormat(
500                                "truncated ref-delta base object id".into(),
501                            ));
502                        }
503                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
504                        offset += hash_len;
505                        Some(DeltaBase::Ref(oid))
506                    }
507                    _ => None,
508                };
509            let mut body = Vec::new();
510            let consumed = inflate_into(
511                &bytes[offset..trailer_offset],
512                &mut body,
513                header.size.min(usize::MAX as u64) as usize,
514            )?;
515            if body.len() as u64 != header.size {
516                return Err(GitError::InvalidObject(format!(
517                    "pack object declared {} bytes, decoded {}",
518                    header.size,
519                    body.len()
520                )));
521            }
522            if consumed == 0 {
523                return Err(GitError::InvalidFormat(
524                    "empty compressed pack entry".into(),
525                ));
526            }
527            offset = offset
528                .checked_add(consumed)
529                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
530            if offset > trailer_offset {
531                return Err(GitError::InvalidFormat(
532                    "pack entry extends past checksum".into(),
533                ));
534            }
535            if let Some(base) = base {
536                entries.push(ParsedPackEntry::Delta {
537                    base,
538                    compressed_size: consumed as u64,
539                    delta_size: header.size,
540                    offset: entry_offset as u64,
541                    delta: body,
542                });
543            } else {
544                let object_type = match header.kind {
545                    PackObjectKind::Commit => ObjectType::Commit,
546                    PackObjectKind::Tree => ObjectType::Tree,
547                    PackObjectKind::Blob => ObjectType::Blob,
548                    PackObjectKind::Tag => ObjectType::Tag,
549                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
550                };
551                let object = EncodedObject::new(object_type, body);
552                let oid = object.object_id(format)?;
553                entries.push(ParsedPackEntry::Resolved(PackObject {
554                    entry: PackEntry {
555                        oid,
556                        compressed_size: consumed as u64,
557                        uncompressed_size: header.size,
558                        offset: entry_offset as u64,
559                    },
560                    object,
561                }));
562            }
563        }
564        if offset != trailer_offset {
565            return Err(GitError::InvalidFormat(format!(
566                "pack has {} trailing bytes before checksum",
567                trailer_offset - offset
568            )));
569        }
570        Ok(Self {
571            version,
572            entries: resolve_pack_entries(entries, format, &mut external_base)?,
573            checksum,
574        })
575    }
576
577    /// Walk the pack and produce per-object statistics matching the output of
578    /// `git verify-pack -v` / `git index-pack --verify-stat`.
579    ///
580    /// Objects are returned in pack offset order (the order `git verify-pack -v`
581    /// prints them). Each entry carries the *resolved* object id, type and size,
582    /// the in-pack byte span (`size_in_pack` = the offset delta to the next
583    /// object, or to the trailing checksum for the last object), the in-pack
584    /// offset, the delta chain depth (`0` for undeltified objects), and — for
585    /// deltas — the object id of the *immediate* base (which may itself be a
586    /// delta). This mirrors `builtin/index-pack.c`'s `show_pack_info`.
587    pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
588        // Resolve the whole pack first: this validates the trailing checksum,
589        // every object's inflate, and yields the resolved oid/type/size keyed by
590        // offset. `verify-pack` is exactly this validation plus the stat report.
591        let pack = Self::parse(bytes, format)?;
592
593        // Independently walk the on-disk entries to recover each object's stored
594        // kind and (for deltas) its base reference — information `PackFile`
595        // discards once deltas are resolved.
596        let trailer_len = format.raw_len();
597        let trailer_offset = bytes.len() - trailer_len;
598        let count = u32_be(&bytes[8..12]) as usize;
599        let mut offset = 12usize;
600        // Per entry in read (offset) order: (offset, base, on-disk stream size).
601        // The stream size is what git prints in the size column: it is the
602        // resolved object size for an undeltified entry, but the *delta
603        // instruction stream* length for a delta entry (builtin/index-pack.c sets
604        // `obj->size` from the entry header, before any delta is applied).
605        let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
606        for _ in 0..count {
607            let entry_offset = offset as u64;
608            let header = parse_entry_header(bytes, &mut offset)?;
609            let stream_size = header.size;
610            let base = match header.kind {
611                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
612                    parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
613                )),
614                PackObjectKind::RefDelta => {
615                    let hash_len = format.raw_len();
616                    if offset + hash_len > trailer_offset {
617                        return Err(GitError::InvalidFormat(
618                            "truncated ref-delta base object id".into(),
619                        ));
620                    }
621                    let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
622                    offset += hash_len;
623                    Some(DeltaBase::Ref(oid))
624                }
625                _ => None,
626            };
627            // Skip the compressed body to reach the next entry header.
628            let mut body = Vec::new();
629            let consumed = inflate_into(
630                &bytes[offset..trailer_offset],
631                &mut body,
632                header.size.min(usize::MAX as u64) as usize,
633            )?;
634            offset = offset
635                .checked_add(consumed)
636                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
637            on_disk.push(OnDiskEntry {
638                offset: entry_offset,
639                base,
640                stream_size,
641            });
642        }
643
644        // Map offset -> resolved object so the on-disk walk can join in oid/type.
645        let mut resolved_by_offset: HashMap<u64, &PackObject> =
646            HashMap::with_capacity(pack.entries.len());
647        for object in &pack.entries {
648            resolved_by_offset.insert(object.entry.offset, object);
649        }
650        // Map offset -> resolved oid, for ofs-delta base lookups.
651        let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
652        for entry in &on_disk {
653            if let Some(object) = resolved_by_offset.get(&entry.offset) {
654                oid_by_offset.insert(entry.offset, object.entry.oid);
655            }
656        }
657        // Map base offset -> index in `on_disk`, for delta-depth propagation.
658        let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
659        for (idx, entry) in on_disk.iter().enumerate() {
660            index_by_offset.insert(entry.offset, idx);
661        }
662
663        // Sorted offsets give the size-in-pack span (next offset - this offset),
664        // with the trailing checksum offset as the final sentinel.
665        let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
666        sorted_offsets.sort_unstable();
667        let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
668        for window in sorted_offsets.windows(2) {
669            next_offset.insert(window[0], window[1]);
670        }
671        if let Some(last) = sorted_offsets.last() {
672            next_offset.insert(*last, trailer_offset as u64);
673        }
674
675        // Compute delta depth by following base offsets. Depth of a non-delta is
676        // 0; a delta's depth is its base's depth + 1. `index_by_offset` lets an
677        // ofs-delta find its base's index; a ref-delta resolves its base oid to
678        // an in-pack offset when present (thin-pack external bases are not stored
679        // in this pack, but verify-pack only ever runs on self-contained packs).
680        let mut depth = vec![None; on_disk.len()];
681        fn resolve_depth(
682            idx: usize,
683            on_disk: &[OnDiskEntry],
684            index_by_offset: &HashMap<u64, usize>,
685            offset_of_oid: &HashMap<ObjectId, u64>,
686            depth: &mut [Option<u32>],
687        ) -> u32 {
688            if let Some(d) = depth[idx] {
689                return d;
690            }
691            let computed = match &on_disk[idx].base {
692                None => 0,
693                Some(base) => {
694                    let base_idx = match base {
695                        DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
696                        DeltaBase::Ref(oid) => offset_of_oid
697                            .get(oid)
698                            .and_then(|off| index_by_offset.get(off).copied()),
699                    };
700                    match base_idx {
701                        Some(bi) => {
702                            resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
703                        }
704                        // Base not in this pack (thin pack); treat as depth 1.
705                        None => 1,
706                    }
707                }
708            };
709            depth[idx] = Some(computed);
710            computed
711        }
712        let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
713        for (off, oid) in &oid_by_offset {
714            offset_of_oid.insert(*oid, *off);
715        }
716        for idx in 0..on_disk.len() {
717            resolve_depth(
718                idx,
719                &on_disk,
720                &index_by_offset,
721                &offset_of_oid,
722                &mut depth,
723            );
724        }
725
726        let mut stats = Vec::with_capacity(on_disk.len());
727        for (idx, entry) in on_disk.iter().enumerate() {
728            let off = entry.offset;
729            let object = resolved_by_offset.get(&off).ok_or_else(|| {
730                GitError::InvalidFormat("pack offset missing from resolved set".into())
731            })?;
732            let size_in_pack = next_offset
733                .get(&off)
734                .copied()
735                .unwrap_or(trailer_offset as u64)
736                .saturating_sub(off);
737            let base_oid = match &entry.base {
738                None => None,
739                Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
740                Some(DeltaBase::Ref(oid)) => Some(*oid),
741            };
742            stats.push(PackVerifyStat {
743                oid: object.entry.oid,
744                object_type: object.object.object_type,
745                // git prints the on-disk stream size: object body size for an
746                // undeltified entry, delta-instruction stream size for a delta.
747                size: entry.stream_size,
748                size_in_pack,
749                offset: off,
750                delta_depth: depth[idx].unwrap_or(0),
751                base_oid,
752            });
753        }
754        // Emit in pack offset order, matching git's read order.
755        stats.sort_by_key(|stat| stat.offset);
756
757        Ok(PackVerifyStats {
758            objects: stats,
759            checksum: pack.checksum,
760        })
761    }
762
763    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
764    where
765        T: Borrow<EncodedObject>,
766    {
767        Self::write_undeltified(objects, ObjectFormat::Sha1)
768    }
769
770    /// Write a pack with every object stored undeltified (no delta entries).
771    ///
772    /// This is the simple, self-contained encoding; objects appear in the given
773    /// order. For smaller output that exploits similarity between objects, use
774    /// [`PackFile::write_packed`].
775    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
776    where
777        T: Borrow<EncodedObject>,
778    {
779        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
780        Self::write_packed_impl(objects, format, &options)
781    }
782
783    /// Write a pack using sliding-window delta selection with git-compatible
784    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
785    /// ofs-deltas, self-contained).
786    ///
787    /// Objects are grouped by type and ordered for good deltas, then each is
788    /// compared against a window of previously emitted candidates; the smallest
789    /// acceptable delta is kept, otherwise the object is stored undeltified. The
790    /// result round-trips through [`PackFile::parse`].
791    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
792    where
793        T: Borrow<EncodedObject>,
794    {
795        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
796    }
797
798    /// Like [`PackFile::write_packed`] but with caller-supplied
799    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
800    /// external thin bases).
801    pub fn write_packed_with_options<T>(
802        objects: &[T],
803        format: ObjectFormat,
804        options: &PackWriteOptions,
805    ) -> Result<PackWrite>
806    where
807        T: Borrow<EncodedObject>,
808    {
809        Self::write_packed_impl(objects, format, options)
810    }
811
812    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
813    /// instead of re-hashing each object before pack planning.
814    ///
815    /// This is intended for object-database paths that reached each object by
816    /// its id and already trust that id/object mapping. The function validates
817    /// id formats and duplicate ids, but it does not re-hash object bodies; use
818    /// [`PackFile::write_packed`] when the ids are not already known to be
819    /// canonical.
820    pub fn write_packed_with_known_ids(
821        inputs: &[PackInput<'_>],
822        format: ObjectFormat,
823    ) -> Result<PackWrite> {
824        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
825    }
826
827    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
828    /// [`PackWriteOptions`].
829    pub fn write_packed_with_known_ids_and_options(
830        inputs: &[PackInput<'_>],
831        format: ObjectFormat,
832        options: &PackWriteOptions,
833    ) -> Result<PackWrite> {
834        if inputs.len() > u32::MAX as usize {
835            return Err(GitError::InvalidFormat("too many pack objects".into()));
836        }
837        let mut objects = Vec::with_capacity(inputs.len());
838        let mut object_ids = Vec::with_capacity(inputs.len());
839        for input in inputs {
840            if input.oid.format() != format {
841                return Err(GitError::InvalidObjectId(format!(
842                    "pack object id {} uses {}, pack uses {}",
843                    input.oid,
844                    input.oid.format().name(),
845                    format.name()
846                )));
847            }
848            objects.push(input.object);
849            object_ids.push(*input.oid);
850        }
851        Self::write_packed_from_parts(objects, object_ids, format, options)
852    }
853
854    /// Write a thin pack: objects may be deltified against `external_bases`
855    /// that are *not* included in the pack, referenced by ref-delta to their
856    /// object id.
857    ///
858    /// The receiver must already have (or otherwise obtain) those base objects
859    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
860    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
861    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
862    pub fn write_thin<T>(
863        objects: &[T],
864        format: ObjectFormat,
865        external_bases: HashMap<ObjectId, EncodedObject>,
866    ) -> Result<PackWrite>
867    where
868        T: Borrow<EncodedObject>,
869    {
870        let options = PackWriteOptions::new().with_thin_bases(external_bases);
871        Self::write_packed_impl(objects, format, &options)
872    }
873
874    fn write_packed_impl<T>(
875        objects: &[T],
876        format: ObjectFormat,
877        options: &PackWriteOptions,
878    ) -> Result<PackWrite>
879    where
880        T: Borrow<EncodedObject>,
881    {
882        if objects.len() > u32::MAX as usize {
883            return Err(GitError::InvalidFormat("too many pack objects".into()));
884        }
885        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
886
887        // Compute object ids up front; they are needed both for the index and,
888        // for ref-deltas, inside the pack entries themselves.
889        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
890        for object in &objects {
891            object_ids.push(object.object_id(format)?);
892        }
893        Self::write_packed_from_parts(objects, object_ids, format, options)
894    }
895
896    fn write_packed_from_parts(
897        objects: Vec<&EncodedObject>,
898        object_ids: Vec<ObjectId>,
899        format: ObjectFormat,
900        options: &PackWriteOptions,
901    ) -> Result<PackWrite> {
902        let mut seen = HashSet::with_capacity(object_ids.len());
903        for oid in &object_ids {
904            if !seen.insert(oid) {
905                return Err(GitError::InvalidFormat(format!(
906                    "pack contains duplicate object id {oid}"
907                )));
908            }
909        }
910
911        // Validate external thin bases share the pack's hash format.
912        for oid in options.thin_bases.keys() {
913            if oid.format() != format {
914                return Err(GitError::InvalidObjectId(
915                    "thin pack base object id format does not match pack format".into(),
916                ));
917            }
918        }
919
920        // Decide, for each object, whether it is stored undeltified or as a
921        // delta against another object (in-pack or an external thin base), and
922        // obtain the emit order. In-pack deltas only ever reference candidates
923        // that appear earlier in `order`, so emitting in `order` guarantees a
924        // base is always written before any object that deltas against it.
925        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
926
927        let mut pack = Vec::new();
928        pack.extend_from_slice(b"PACK");
929        pack.extend_from_slice(&2u32.to_be_bytes());
930        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
931
932        let mut index_entries = Vec::with_capacity(objects.len());
933        // Pack offset at which each original object index was written, or
934        // `None` until it has been emitted.
935        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
936
937        let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
938
939        for (order_pos, &idx) in order.iter().enumerate() {
940            let offset = pack.len() as u64;
941            let mut entry_bytes = Vec::new();
942            match &plan[idx].base {
943                PlannedBase::None => {
944                    write_entry_header(
945                        &mut entry_bytes,
946                        objects[idx].object_type,
947                        objects[idx].body.len() as u64,
948                    );
949                }
950                PlannedBase::InPack { base_idx, delta } => {
951                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
952                        GitError::InvalidFormat(
953                            "in-pack delta base emitted after dependent object".into(),
954                        )
955                    })?;
956                    if options.prefer_ofs_delta {
957                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
958                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
959                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
960                        })?;
961                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
962                    } else {
963                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
964                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
965                    }
966                }
967                PlannedBase::External { base_oid, delta } => {
968                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
969                    entry_bytes.extend_from_slice(base_oid.as_bytes());
970                }
971            }
972            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
973            let crc32 = crc32fast::hash(&entry_bytes);
974            pack.extend_from_slice(&entry_bytes);
975            written_offsets[idx] = Some(offset);
976            index_entries.push(PackIndexEntry {
977                oid: object_ids[idx].clone(),
978                crc32,
979                offset,
980            });
981        }
982
983        let checksum = sley_core::digest_bytes(format, &pack)?;
984        pack.extend_from_slice(checksum.as_bytes());
985        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
986        Ok(PackWrite {
987            pack,
988            index,
989            checksum,
990            entries: index_entries,
991        })
992    }
993}
994
995impl<'a> PackIndexView<'a> {
996    pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
997        Self::parse(bytes, ObjectFormat::Sha1)
998    }
999
1000    pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1001        Self::parse_impl(bytes, format, true, true)
1002    }
1003
1004    /// Parse and validate the index layout without recomputing the trailing
1005    /// index checksum. The checksum stored in the file is still exposed via
1006    /// [`PackIndexView::index_checksum`].
1007    pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1008        Self::parse_impl(bytes, format, false, true)
1009    }
1010
1011    /// Parse a local/trusted pack index without recomputing the trailing index
1012    /// checksum or walking every entry for canonical-order validation.
1013    ///
1014    /// This still validates the table layout and all lookup paths remain
1015    /// bounds-checked, but it avoids O(number-of-objects) startup validation for
1016    /// repository-owned `.idx` files in hot read paths.
1017    pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1018        Self::parse_impl(bytes, format, false, false)
1019    }
1020
1021    pub fn count(&self) -> usize {
1022        self.count
1023    }
1024
1025    pub fn fanout(&self) -> &[u32; 256] {
1026        &self.fanout
1027    }
1028
1029    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1030        if oid.format() != self.format {
1031            return None;
1032        }
1033        let bucket = usize::from(oid.as_bytes()[0]);
1034        let mut start = if bucket == 0 {
1035            0
1036        } else {
1037            self.fanout[bucket - 1] as usize
1038        };
1039        let mut end = self.fanout[bucket] as usize;
1040        let target = oid.as_bytes();
1041
1042        while start < end {
1043            let mid = start + (end - start) / 2;
1044            match self.oid_bytes_at(mid).cmp(target) {
1045                std::cmp::Ordering::Less => start = mid + 1,
1046                std::cmp::Ordering::Equal => return self.lookup_at(mid),
1047                std::cmp::Ordering::Greater => end = mid,
1048            }
1049        }
1050        None
1051    }
1052
1053    fn parse_impl(
1054        bytes: &'a [u8],
1055        format: ObjectFormat,
1056        verify_checksum: bool,
1057        validate_entries: bool,
1058    ) -> Result<Self> {
1059        let hash_len = format.raw_len();
1060        if bytes.len() < 4 {
1061            return Err(GitError::InvalidFormat("pack index too short".into()));
1062        }
1063        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1064            return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1065        }
1066        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1067            return Err(GitError::InvalidFormat("pack index too short".into()));
1068        }
1069        let version = u32_be(&bytes[4..8]);
1070        if version != 2 {
1071            return Err(GitError::Unsupported(format!(
1072                "pack index version {version}"
1073            )));
1074        }
1075        let index_checksum_offset = bytes.len() - hash_len;
1076        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1077        if verify_checksum {
1078            let actual_index_checksum =
1079                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1080            if actual_index_checksum != index_checksum {
1081                return Err(GitError::InvalidFormat(format!(
1082                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1083                )));
1084            }
1085        }
1086
1087        let mut offset = 8usize;
1088        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1089        let count = fanout[255] as usize;
1090        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1091        offset = oid_table.end;
1092        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1093        offset = crc_table.end;
1094        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1095        offset = small_offset_table.end;
1096
1097        let large_offset_count = (0..count)
1098            .filter(|idx| {
1099                let start = small_offset_table.start + idx * 4;
1100                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1101            })
1102            .count();
1103        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1104        offset = large_offset_table.end;
1105
1106        let expected_trailer_offset = bytes.len() - hash_len * 2;
1107        if offset != expected_trailer_offset {
1108            return Err(GitError::InvalidFormat(format!(
1109                "pack index has {} unexpected bytes before trailer",
1110                expected_trailer_offset.saturating_sub(offset)
1111            )));
1112        }
1113        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1114
1115        let view = Self {
1116            version,
1117            count,
1118            fanout,
1119            pack_checksum,
1120            index_checksum,
1121            bytes,
1122            format,
1123            tables: PackIndexViewTables::V2 {
1124                oid_table,
1125                crc_table,
1126                small_offset_table,
1127                large_offset_table,
1128            },
1129        };
1130        if validate_entries {
1131            view.validate_v2_entries()?;
1132        }
1133        Ok(view)
1134    }
1135
1136    fn parse_v1_impl(
1137        bytes: &'a [u8],
1138        format: ObjectFormat,
1139        verify_checksum: bool,
1140        validate_entries: bool,
1141    ) -> Result<Self> {
1142        let hash_len = format.raw_len();
1143        if bytes.len() < 256 * 4 + 2 * hash_len {
1144            return Err(GitError::InvalidFormat("pack index too short".into()));
1145        }
1146        let index_checksum_offset = bytes.len() - hash_len;
1147        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1148        if verify_checksum {
1149            let actual_index_checksum =
1150                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1151            if actual_index_checksum != index_checksum {
1152                return Err(GitError::InvalidFormat(format!(
1153                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1154                )));
1155            }
1156        }
1157
1158        let mut offset = 0usize;
1159        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1160        let count = fanout[255] as usize;
1161        let entry_len = hash_len
1162            .checked_add(4)
1163            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1164        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1165        offset = entry_table.end;
1166        let expected_trailer_offset = bytes.len() - hash_len * 2;
1167        if offset != expected_trailer_offset {
1168            return Err(GitError::InvalidFormat(format!(
1169                "pack index has {} unexpected bytes before trailer",
1170                expected_trailer_offset.saturating_sub(offset)
1171            )));
1172        }
1173        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1174
1175        let view = Self {
1176            version: 1,
1177            count,
1178            fanout,
1179            pack_checksum,
1180            index_checksum,
1181            bytes,
1182            format,
1183            tables: PackIndexViewTables::V1 { entry_table },
1184        };
1185        if validate_entries {
1186            view.validate_v1_entries()?;
1187        }
1188        Ok(view)
1189    }
1190
1191    fn validate_v2_entries(&self) -> Result<()> {
1192        let PackIndexViewTables::V2 {
1193            oid_table,
1194            small_offset_table,
1195            large_offset_table,
1196            ..
1197        } = &self.tables
1198        else {
1199            unreachable!("v2 validation only runs for v2 views");
1200        };
1201        let oid_table = self.slice(oid_table.clone());
1202        let small_offset_table = self.slice(small_offset_table.clone());
1203        let large_offset_table = self.slice(large_offset_table.clone());
1204        let hash_len = self.format.raw_len();
1205        for idx in 0..self.count {
1206            let oid_start = idx * hash_len;
1207            let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1208            if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1209                return Err(GitError::InvalidFormat(
1210                    "pack index object ids are not strictly ascending".into(),
1211                ));
1212            }
1213            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1214
1215            let offset_start = idx * 4;
1216            let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1217            pack_index_v2_offset(raw_offset, large_offset_table)?;
1218        }
1219        Ok(())
1220    }
1221
1222    fn validate_v1_entries(&self) -> Result<()> {
1223        let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1224            unreachable!("v1 validation only runs for v1 views");
1225        };
1226        let entry_table = self.slice(entry_table.clone());
1227        let hash_len = self.format.raw_len();
1228        let entry_len = hash_len
1229            .checked_add(4)
1230            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1231        for idx in 0..self.count {
1232            let start = idx * entry_len;
1233            let oid_start = start + 4;
1234            let oid_bytes = &entry_table[oid_start..start + entry_len];
1235            if idx > 0 {
1236                let previous_oid_start = oid_start - entry_len;
1237                let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1238                if previous_oid >= oid_bytes {
1239                    return Err(GitError::InvalidFormat(
1240                        "pack index object ids are not strictly sorted".into(),
1241                    ));
1242                }
1243            }
1244            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1245        }
1246        Ok(())
1247    }
1248
1249    fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1250        let hash_len = self.format.raw_len();
1251        match &self.tables {
1252            PackIndexViewTables::V1 { entry_table } => {
1253                let entry_table = self.slice(entry_table.clone());
1254                let entry_len = hash_len + 4;
1255                let start = idx * entry_len + 4;
1256                &entry_table[start..start + hash_len]
1257            }
1258            PackIndexViewTables::V2 { oid_table, .. } => {
1259                let oid_table = self.slice(oid_table.clone());
1260                let start = idx * hash_len;
1261                &oid_table[start..start + hash_len]
1262            }
1263        }
1264    }
1265
1266    fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1267        if idx >= self.count {
1268            return None;
1269        }
1270        let hash_len = self.format.raw_len();
1271        match &self.tables {
1272            PackIndexViewTables::V1 { entry_table } => {
1273                let entry_table = self.slice(entry_table.clone());
1274                let entry_len = hash_len + 4;
1275                let start = idx * entry_len;
1276                Some(PackIndexLookup {
1277                    crc32: 0,
1278                    offset: u64::from(u32_be(&entry_table[start..start + 4])),
1279                })
1280            }
1281            PackIndexViewTables::V2 {
1282                crc_table,
1283                small_offset_table,
1284                large_offset_table,
1285                ..
1286            } => {
1287                let crc_table = self.slice(crc_table.clone());
1288                let small_offset_table = self.slice(small_offset_table.clone());
1289                let large_offset_table = self.slice(large_offset_table.clone());
1290                let crc_start = idx * 4;
1291                let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1292                Some(PackIndexLookup {
1293                    crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1294                    offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1295                })
1296            }
1297        }
1298    }
1299
1300    fn slice(&self, range: Range<usize>) -> &'a [u8] {
1301        &self.bytes[range]
1302    }
1303}
1304
1305impl PackIndexViewData {
1306    pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1307        Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1308    }
1309
1310    /// Parse and validate an owned index view without recomputing the trailing
1311    /// index checksum. The stored checksum is still exposed via
1312    /// [`PackIndexViewData::index_checksum`].
1313    pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1314        Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1315    }
1316
1317    /// Parse a local/trusted owned index view without the checksum or full-entry
1318    /// validation passes.
1319    pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1320        Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1321    }
1322
1323    pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1324        Self::parse_impl(bytes, format, true, true)
1325    }
1326
1327    pub fn parse_source_without_checksum(
1328        bytes: Arc<dyn PackIndexByteSource>,
1329        format: ObjectFormat,
1330    ) -> Result<Self> {
1331        Self::parse_impl(bytes, format, false, true)
1332    }
1333
1334    pub fn parse_trusted_source_without_checksum(
1335        bytes: Arc<dyn PackIndexByteSource>,
1336        format: ObjectFormat,
1337    ) -> Result<Self> {
1338        Self::parse_impl(bytes, format, false, false)
1339    }
1340
1341    pub fn count(&self) -> usize {
1342        self.count
1343    }
1344
1345    pub fn fanout(&self) -> &[u32; 256] {
1346        &self.fanout
1347    }
1348
1349    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1350        self.as_view().find(oid)
1351    }
1352
1353    pub fn as_view(&self) -> PackIndexView<'_> {
1354        PackIndexView {
1355            version: self.version,
1356            count: self.count,
1357            fanout: self.fanout,
1358            pack_checksum: self.pack_checksum,
1359            index_checksum: self.index_checksum,
1360            bytes: self.bytes.as_bytes(),
1361            format: self.format,
1362            tables: self.tables.clone(),
1363        }
1364    }
1365
1366    fn parse_impl(
1367        bytes: Arc<dyn PackIndexByteSource>,
1368        format: ObjectFormat,
1369        verify_checksum: bool,
1370        validate_entries: bool,
1371    ) -> Result<Self> {
1372        let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1373            let view = PackIndexView::parse_impl(
1374                bytes.as_bytes(),
1375                format,
1376                verify_checksum,
1377                validate_entries,
1378            )?;
1379            (
1380                view.version,
1381                view.count,
1382                view.fanout,
1383                view.pack_checksum,
1384                view.index_checksum,
1385                view.tables,
1386            )
1387        };
1388        Ok(Self {
1389            version,
1390            count,
1391            fanout,
1392            pack_checksum,
1393            index_checksum,
1394            bytes,
1395            format,
1396            tables,
1397        })
1398    }
1399}
1400
1401impl PackIndex {
1402    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1403        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1404    }
1405
1406    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1407        let trailer_len = format.raw_len();
1408        if pack_bytes.len() < 12 + trailer_len {
1409            return Err(GitError::InvalidFormat("pack file too short".into()));
1410        }
1411        let trailer_offset = pack_bytes.len() - trailer_len;
1412        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1413        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1414        if pack_checksum != expected {
1415            return Err(GitError::InvalidFormat(format!(
1416                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1417            )));
1418        }
1419
1420        if &pack_bytes[..4] != b"PACK" {
1421            return Err(GitError::InvalidFormat("missing PACK signature".into()));
1422        }
1423        let version = u32_be(&pack_bytes[4..8]);
1424        if version != 2 && version != 3 {
1425            return Err(GitError::Unsupported(format!("pack version {version}")));
1426        }
1427        let count = u32_be(&pack_bytes[8..12]) as usize;
1428        let mut offset = 12usize;
1429        let mut parsed_entries = Vec::with_capacity(count);
1430        let mut raw_entries = Vec::with_capacity(count);
1431        for _ in 0..count {
1432            let entry_offset = offset;
1433            let header = parse_entry_header(pack_bytes, &mut offset)?;
1434            let base = match header.kind {
1435                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1436                    pack_bytes,
1437                    &mut offset,
1438                    entry_offset as u64,
1439                )?)),
1440                PackObjectKind::RefDelta => {
1441                    let hash_len = format.raw_len();
1442                    if offset + hash_len > trailer_offset {
1443                        return Err(GitError::InvalidFormat(
1444                            "truncated ref-delta base object id".into(),
1445                        ));
1446                    }
1447                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1448                    offset += hash_len;
1449                    Some(DeltaBase::Ref(oid))
1450                }
1451                _ => None,
1452            };
1453            let mut body = Vec::new();
1454            let consumed = inflate_into(
1455                &pack_bytes[offset..trailer_offset],
1456                &mut body,
1457                header.size.min(usize::MAX as u64) as usize,
1458            )?;
1459            if body.len() as u64 != header.size {
1460                return Err(GitError::InvalidObject(format!(
1461                    "pack object declared {} bytes, decoded {}",
1462                    header.size,
1463                    body.len()
1464                )));
1465            }
1466            if consumed == 0 {
1467                return Err(GitError::InvalidFormat(
1468                    "empty compressed pack entry".into(),
1469                ));
1470            }
1471            offset = offset
1472                .checked_add(consumed)
1473                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1474            if offset > trailer_offset {
1475                return Err(GitError::InvalidFormat(
1476                    "pack entry extends past checksum".into(),
1477                ));
1478            }
1479            raw_entries.push((
1480                entry_offset as u64,
1481                crc32fast::hash(&pack_bytes[entry_offset..offset]),
1482            ));
1483            if let Some(base) = base {
1484                parsed_entries.push(ParsedPackEntry::Delta {
1485                    base,
1486                    compressed_size: consumed as u64,
1487                    delta_size: header.size,
1488                    offset: entry_offset as u64,
1489                    delta: body,
1490                });
1491            } else {
1492                let object_type = match header.kind {
1493                    PackObjectKind::Commit => ObjectType::Commit,
1494                    PackObjectKind::Tree => ObjectType::Tree,
1495                    PackObjectKind::Blob => ObjectType::Blob,
1496                    PackObjectKind::Tag => ObjectType::Tag,
1497                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1498                };
1499                let object = EncodedObject::new(object_type, body);
1500                let oid = object.object_id(format)?;
1501                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1502                    entry: PackEntry {
1503                        oid,
1504                        compressed_size: consumed as u64,
1505                        uncompressed_size: header.size,
1506                        offset: entry_offset as u64,
1507                    },
1508                    object,
1509                }));
1510            }
1511        }
1512        if offset != trailer_offset {
1513            return Err(GitError::InvalidFormat(format!(
1514                "pack has {} trailing bytes before checksum",
1515                trailer_offset - offset
1516            )));
1517        }
1518
1519        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1520        let entries = resolved
1521            .iter()
1522            .zip(raw_entries)
1523            .map(|(object, (offset, crc32))| PackIndexEntry {
1524                oid: object.entry.oid,
1525                crc32,
1526                offset,
1527            })
1528            .collect::<Vec<_>>();
1529        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1530        Ok(PackIndexBuild {
1531            index,
1532            pack_checksum,
1533            entries,
1534        })
1535    }
1536
1537    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1538        Self::parse(bytes, ObjectFormat::Sha1)
1539    }
1540
1541    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1542        let hash_len = format.raw_len();
1543        if bytes.len() < 4 {
1544            return Err(GitError::InvalidFormat("pack index too short".into()));
1545        }
1546        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1547            return Self::parse_v1(bytes, format);
1548        }
1549        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1550            return Err(GitError::InvalidFormat("pack index too short".into()));
1551        }
1552        let version = u32_be(&bytes[4..8]);
1553        if version != 2 {
1554            return Err(GitError::Unsupported(format!(
1555                "pack index version {version}"
1556            )));
1557        }
1558        let index_checksum_offset = bytes.len() - hash_len;
1559        let actual_index_checksum =
1560            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1561        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1562        if actual_index_checksum != index_checksum {
1563            return Err(GitError::InvalidFormat(format!(
1564                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1565            )));
1566        }
1567
1568        let mut offset = 8usize;
1569        let mut fanout = [0u32; 256];
1570        let mut previous = 0u32;
1571        for slot in &mut fanout {
1572            *slot = u32_be(&bytes[offset..offset + 4]);
1573            if *slot < previous {
1574                return Err(GitError::InvalidFormat(
1575                    "pack index fanout is not monotonic".into(),
1576                ));
1577            }
1578            previous = *slot;
1579            offset += 4;
1580        }
1581        let count = fanout[255] as usize;
1582        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1583        offset = oid_table.end;
1584        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1585        offset = crc_table.end;
1586        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1587        offset = small_offset_table.end;
1588
1589        let large_offset_count = (0..count)
1590            .filter(|idx| {
1591                let start = small_offset_table.start + idx * 4;
1592                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1593            })
1594            .count();
1595        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1596        offset = large_offset_table.end;
1597
1598        let expected_trailer_offset = bytes.len() - hash_len * 2;
1599        if offset != expected_trailer_offset {
1600            return Err(GitError::InvalidFormat(format!(
1601                "pack index has {} unexpected bytes before trailer",
1602                expected_trailer_offset.saturating_sub(offset)
1603            )));
1604        }
1605        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1606
1607        let mut entries = Vec::with_capacity(count);
1608        for idx in 0..count {
1609            let oid_start = oid_table.start + idx * hash_len;
1610            let crc_start = crc_table.start + idx * 4;
1611            let offset_start = small_offset_table.start + idx * 4;
1612            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1613            // Object ids must be strictly ascending: lookup binary-searches them,
1614            // and the fanout must match the first byte. A malformed/forged index
1615            // (e.g. from a received pack) would otherwise yield silent misses.
1616            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1617                return Err(GitError::InvalidFormat(
1618                    "pack index object ids are not strictly ascending".into(),
1619                ));
1620            }
1621            let expected_min = if oid_bytes[0] == 0 {
1622                0
1623            } else {
1624                fanout[usize::from(oid_bytes[0] - 1)]
1625            };
1626            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1627                return Err(GitError::InvalidFormat(
1628                    "pack index object id is outside its fanout bucket".into(),
1629                ));
1630            }
1631            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1632            let offset = if raw_offset & 0x8000_0000 == 0 {
1633                u64::from(raw_offset)
1634            } else {
1635                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1636                let large_start = large_offset_table.start + large_idx * 8;
1637                if large_idx >= large_offset_count {
1638                    return Err(GitError::InvalidFormat(
1639                        "pack index large offset points past table".into(),
1640                    ));
1641                }
1642                u64_be(&bytes[large_start..large_start + 8])
1643            };
1644            entries.push(PackIndexEntry {
1645                oid: ObjectId::from_raw(format, oid_bytes)?,
1646                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1647                offset,
1648            });
1649        }
1650        Ok(Self {
1651            version,
1652            fanout,
1653            entries,
1654            pack_checksum,
1655            index_checksum,
1656        })
1657    }
1658
1659    fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1660        let hash_len = format.raw_len();
1661        if bytes.len() < 256 * 4 + 2 * hash_len {
1662            return Err(GitError::InvalidFormat("pack index too short".into()));
1663        }
1664        let index_checksum_offset = bytes.len() - hash_len;
1665        let actual_index_checksum =
1666            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1667        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1668        if actual_index_checksum != index_checksum {
1669            return Err(GitError::InvalidFormat(format!(
1670                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1671            )));
1672        }
1673
1674        let mut offset = 0usize;
1675        let mut fanout = [0u32; 256];
1676        let mut previous = 0u32;
1677        for slot in &mut fanout {
1678            *slot = u32_be(&bytes[offset..offset + 4]);
1679            if *slot < previous {
1680                return Err(GitError::InvalidFormat(
1681                    "pack index fanout is not monotonic".into(),
1682                ));
1683            }
1684            previous = *slot;
1685            offset += 4;
1686        }
1687        let count = fanout[255] as usize;
1688        let entry_len = hash_len
1689            .checked_add(4)
1690            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1691        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1692        offset = entry_table.end;
1693        let expected_trailer_offset = bytes.len() - hash_len * 2;
1694        if offset != expected_trailer_offset {
1695            return Err(GitError::InvalidFormat(format!(
1696                "pack index has {} unexpected bytes before trailer",
1697                expected_trailer_offset.saturating_sub(offset)
1698            )));
1699        }
1700        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1701
1702        let mut entries = Vec::with_capacity(count);
1703        let mut previous_oid: Option<ObjectId> = None;
1704        for idx in 0..count {
1705            let start = entry_table.start + idx * entry_len;
1706            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1707            if let Some(previous) = &previous_oid
1708                && previous.as_bytes() >= oid.as_bytes()
1709            {
1710                return Err(GitError::InvalidFormat(
1711                    "pack index object ids are not strictly sorted".into(),
1712                ));
1713            }
1714            previous_oid = Some(oid);
1715            entries.push(PackIndexEntry {
1716                oid,
1717                crc32: 0,
1718                offset: u64::from(u32_be(&bytes[start..start + 4])),
1719            });
1720        }
1721        Ok(Self {
1722            version: 1,
1723            fanout,
1724            entries,
1725            pack_checksum,
1726            index_checksum,
1727        })
1728    }
1729
1730    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1731        self.entries
1732            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1733            .ok()
1734            .map(|idx| &self.entries[idx])
1735    }
1736
1737    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1738        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1739    }
1740
1741    pub fn write_v2(
1742        format: ObjectFormat,
1743        entries: &[PackIndexEntry],
1744        pack_checksum: &ObjectId,
1745    ) -> Result<Vec<u8>> {
1746        if pack_checksum.format() != format {
1747            return Err(GitError::InvalidObjectId(
1748                "pack checksum format does not match index format".into(),
1749            ));
1750        }
1751        let mut entries = entries.iter().collect::<Vec<_>>();
1752        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1753        for pair in entries.windows(2) {
1754            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1755                return Err(GitError::InvalidFormat(format!(
1756                    "pack index contains duplicate object id {}",
1757                    pair[0].oid
1758                )));
1759            }
1760        }
1761        let mut fanout = [0u32; 256];
1762        for entry in &entries {
1763            if entry.oid.format() != format {
1764                return Err(GitError::InvalidObjectId(
1765                    "pack index entry format does not match index format".into(),
1766                ));
1767            }
1768            let first = entry.oid.as_bytes()[0] as usize;
1769            fanout[first] = fanout[first]
1770                .checked_add(1)
1771                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1772        }
1773        let mut running = 0u32;
1774        for slot in &mut fanout {
1775            running = running
1776                .checked_add(*slot)
1777                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1778            *slot = running;
1779        }
1780
1781        let mut index = Vec::new();
1782        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1783        index.extend_from_slice(&2u32.to_be_bytes());
1784        for count in fanout {
1785            index.extend_from_slice(&count.to_be_bytes());
1786        }
1787        for entry in &entries {
1788            index.extend_from_slice(entry.oid.as_bytes());
1789        }
1790        for entry in &entries {
1791            index.extend_from_slice(&entry.crc32.to_be_bytes());
1792        }
1793
1794        let mut large_offsets = Vec::new();
1795        for entry in &entries {
1796            if entry.offset < 0x8000_0000 {
1797                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1798            } else {
1799                if large_offsets.len() > 0x7fff_ffff {
1800                    return Err(GitError::InvalidFormat(
1801                        "too many large pack offsets".into(),
1802                    ));
1803                }
1804                let large_idx = large_offsets.len() as u32;
1805                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1806                large_offsets.push(entry.offset);
1807            }
1808        }
1809        for offset in large_offsets {
1810            index.extend_from_slice(&offset.to_be_bytes());
1811        }
1812        index.extend_from_slice(pack_checksum.as_bytes());
1813        let index_checksum = sley_core::digest_bytes(format, &index)?;
1814        index.extend_from_slice(index_checksum.as_bytes());
1815        Ok(index)
1816    }
1817
1818    /// Serialise a version-1 pack `.idx`: a 256-entry fanout, then for each
1819    /// object an inline 4-byte big-endian pack offset immediately followed by
1820    /// its object id (sorted by oid), then the pack checksum and a trailing
1821    /// index checksum. v1 has no CRC table and cannot represent offsets that
1822    /// do not fit in 32 bits.
1823    pub fn write_v1(
1824        format: ObjectFormat,
1825        entries: &[PackIndexEntry],
1826        pack_checksum: &ObjectId,
1827    ) -> Result<Vec<u8>> {
1828        if pack_checksum.format() != format {
1829            return Err(GitError::InvalidObjectId(
1830                "pack checksum format does not match index format".into(),
1831            ));
1832        }
1833        let mut entries = entries.iter().collect::<Vec<_>>();
1834        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1835        for pair in entries.windows(2) {
1836            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1837                return Err(GitError::InvalidFormat(format!(
1838                    "pack index contains duplicate object id {}",
1839                    pair[0].oid
1840                )));
1841            }
1842        }
1843        let mut fanout = [0u32; 256];
1844        for entry in &entries {
1845            if entry.oid.format() != format {
1846                return Err(GitError::InvalidObjectId(
1847                    "pack index entry format does not match index format".into(),
1848                ));
1849            }
1850            if entry.offset > 0xffff_ffff {
1851                return Err(GitError::InvalidFormat(
1852                    "pack offset too large for a version-1 index".into(),
1853                ));
1854            }
1855            let first = entry.oid.as_bytes()[0] as usize;
1856            fanout[first] = fanout[first]
1857                .checked_add(1)
1858                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1859        }
1860        let mut running = 0u32;
1861        for slot in &mut fanout {
1862            running = running
1863                .checked_add(*slot)
1864                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1865            *slot = running;
1866        }
1867
1868        let mut index = Vec::new();
1869        for count in fanout {
1870            index.extend_from_slice(&count.to_be_bytes());
1871        }
1872        for entry in &entries {
1873            index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1874            index.extend_from_slice(entry.oid.as_bytes());
1875        }
1876        index.extend_from_slice(pack_checksum.as_bytes());
1877        let index_checksum = sley_core::digest_bytes(format, &index)?;
1878        index.extend_from_slice(index_checksum.as_bytes());
1879        Ok(index)
1880    }
1881}
1882
1883/// The `.rev` table for a pack: index positions (the rank of each object in
1884/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
1885/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
1886/// the result feeds [`PackReverseIndex::write`].
1887pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1888    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1889    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1890    let mut index_position = vec![0u32; entries.len()];
1891    for (position, &entry) in oid_sorted.iter().enumerate() {
1892        index_position[entry] = position as u32;
1893    }
1894    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1895    by_offset.sort_by_key(|&entry| entries[entry].offset);
1896    by_offset
1897        .into_iter()
1898        .map(|entry| index_position[entry])
1899        .collect()
1900}
1901
1902impl PackReverseIndex {
1903    pub fn write(
1904        format: ObjectFormat,
1905        positions: &[u32],
1906        pack_checksum: &ObjectId,
1907    ) -> Result<Vec<u8>> {
1908        if pack_checksum.format() != format {
1909            return Err(GitError::InvalidObjectId(
1910                "pack checksum format does not match reverse index format".into(),
1911            ));
1912        }
1913        validate_position_permutation(positions)?;
1914
1915        let mut out = Vec::new();
1916        out.extend_from_slice(b"RIDX");
1917        out.extend_from_slice(&1u32.to_be_bytes());
1918        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1919        for position in positions {
1920            out.extend_from_slice(&position.to_be_bytes());
1921        }
1922        out.extend_from_slice(pack_checksum.as_bytes());
1923        let checksum = sley_core::digest_bytes(format, &out)?;
1924        out.extend_from_slice(checksum.as_bytes());
1925        Ok(out)
1926    }
1927
1928    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1929        let hash_len = format.raw_len();
1930        let table_len = object_count
1931            .checked_mul(4)
1932            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1933        let min_len = 12usize
1934            .checked_add(table_len)
1935            .and_then(|len| len.checked_add(hash_len * 2))
1936            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1937        if bytes.len() < min_len {
1938            return Err(GitError::InvalidFormat("reverse index too short".into()));
1939        }
1940        if bytes.len() != min_len {
1941            return Err(GitError::InvalidFormat(format!(
1942                "reverse index has {} trailing bytes",
1943                bytes.len() - min_len
1944            )));
1945        }
1946        if &bytes[..4] != b"RIDX" {
1947            return Err(GitError::InvalidFormat(
1948                "missing reverse index signature".into(),
1949            ));
1950        }
1951        let version = u32_be(&bytes[4..8]);
1952        if version != 1 {
1953            return Err(GitError::Unsupported(format!(
1954                "reverse index version {version}"
1955            )));
1956        }
1957        let hash_id = u32_be(&bytes[8..12]);
1958        if hash_id != hash_function_id(format) {
1959            return Err(GitError::InvalidFormat(format!(
1960                "reverse index hash id {hash_id} does not match {}",
1961                format.name()
1962            )));
1963        }
1964
1965        let index_checksum_offset = bytes.len() - hash_len;
1966        let actual_index_checksum =
1967            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1968        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1969        if actual_index_checksum != index_checksum {
1970            return Err(GitError::InvalidFormat(format!(
1971                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1972            )));
1973        }
1974
1975        let pack_checksum_offset = index_checksum_offset - hash_len;
1976        let pack_checksum =
1977            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1978        let mut positions = Vec::with_capacity(object_count);
1979        let mut offset = 12usize;
1980        for _ in 0..object_count {
1981            let position = u32_be(&bytes[offset..offset + 4]);
1982            positions.push(position);
1983            offset += 4;
1984        }
1985        validate_position_permutation(&positions)?;
1986
1987        Ok(Self {
1988            version,
1989            format,
1990            positions,
1991            pack_checksum,
1992            index_checksum,
1993        })
1994    }
1995}
1996
1997impl PackMtimes {
1998    pub fn write(
1999        format: ObjectFormat,
2000        mtimes: &[u32],
2001        pack_checksum: &ObjectId,
2002    ) -> Result<Vec<u8>> {
2003        if pack_checksum.format() != format {
2004            return Err(GitError::InvalidObjectId(
2005                "pack checksum format does not match mtimes format".into(),
2006            ));
2007        }
2008
2009        let mut out = Vec::new();
2010        out.extend_from_slice(b"MTME");
2011        out.extend_from_slice(&1u32.to_be_bytes());
2012        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2013        for mtime in mtimes {
2014            out.extend_from_slice(&mtime.to_be_bytes());
2015        }
2016        out.extend_from_slice(pack_checksum.as_bytes());
2017        let checksum = sley_core::digest_bytes(format, &out)?;
2018        out.extend_from_slice(checksum.as_bytes());
2019        Ok(out)
2020    }
2021
2022    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2023        let hash_len = format.raw_len();
2024        let table_len = object_count
2025            .checked_mul(4)
2026            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2027        let expected_len = 12usize
2028            .checked_add(table_len)
2029            .and_then(|len| len.checked_add(hash_len * 2))
2030            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2031        if bytes.len() < expected_len {
2032            return Err(GitError::InvalidFormat("mtimes file too short".into()));
2033        }
2034        if bytes.len() != expected_len {
2035            return Err(GitError::InvalidFormat(format!(
2036                "mtimes file has {} trailing bytes",
2037                bytes.len() - expected_len
2038            )));
2039        }
2040        if &bytes[..4] != b"MTME" {
2041            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2042        }
2043        let version = u32_be(&bytes[4..8]);
2044        if version != 1 {
2045            return Err(GitError::Unsupported(format!("mtimes version {version}")));
2046        }
2047        let hash_id = u32_be(&bytes[8..12]);
2048        if hash_id != hash_function_id(format) {
2049            return Err(GitError::InvalidFormat(format!(
2050                "mtimes hash id {hash_id} does not match {}",
2051                format.name()
2052            )));
2053        }
2054
2055        let index_checksum_offset = bytes.len() - hash_len;
2056        let actual_index_checksum =
2057            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2058        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2059        if actual_index_checksum != index_checksum {
2060            return Err(GitError::InvalidFormat(format!(
2061                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2062            )));
2063        }
2064
2065        let pack_checksum_offset = index_checksum_offset - hash_len;
2066        let pack_checksum =
2067            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2068        let mut mtimes = Vec::with_capacity(object_count);
2069        let mut offset = 12usize;
2070        for _ in 0..object_count {
2071            mtimes.push(u32_be(&bytes[offset..offset + 4]));
2072            offset += 4;
2073        }
2074
2075        Ok(Self {
2076            version,
2077            format,
2078            mtimes,
2079            pack_checksum,
2080            index_checksum,
2081        })
2082    }
2083}
2084
2085impl PackBitmapIndex {
2086    pub const OPTION_FULL_DAG: u16 = 0x0001;
2087    pub const OPTION_HASH_CACHE: u16 = 0x0004;
2088
2089    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2090        let hash_len = format.raw_len();
2091        let min_len = 12usize
2092            .checked_add(hash_len * 2)
2093            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2094        if bytes.len() < min_len {
2095            return Err(GitError::InvalidFormat("bitmap index too short".into()));
2096        }
2097        if &bytes[..4] != b"BITM" {
2098            return Err(GitError::InvalidFormat(
2099                "missing bitmap index signature".into(),
2100            ));
2101        }
2102        let version = u16_be(&bytes[4..6]);
2103        if version != 1 {
2104            return Err(GitError::Unsupported(format!(
2105                "bitmap index version {version}"
2106            )));
2107        }
2108        let options = u16_be(&bytes[6..8]);
2109        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2110        if options & !known_options != 0 {
2111            return Err(GitError::Unsupported(format!(
2112                "bitmap index options {:#06x}",
2113                options & !known_options
2114            )));
2115        }
2116        let entry_count = u32_be(&bytes[8..12]) as usize;
2117        let checksum_offset = bytes.len() - hash_len;
2118        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2119        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2120        if actual_index_checksum != index_checksum {
2121            return Err(GitError::InvalidFormat(format!(
2122                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2123            )));
2124        }
2125
2126        let pack_checksum_end = 12usize
2127            .checked_add(hash_len)
2128            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2129        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2130        let mut offset = pack_checksum_end;
2131        let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2132        let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2133        let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2134        let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2135
2136        let mut entries = Vec::with_capacity(entry_count);
2137        for idx in 0..entry_count {
2138            if checksum_offset.saturating_sub(offset) < 6 {
2139                return Err(GitError::InvalidFormat(
2140                    "truncated bitmap index entry".into(),
2141                ));
2142            }
2143            let object_position = u32_be(&bytes[offset..offset + 4]);
2144            offset += 4;
2145            if object_position as usize >= object_count {
2146                return Err(GitError::InvalidFormat(
2147                    "bitmap index entry points past object table".into(),
2148                ));
2149            }
2150            let xor_offset = bytes[offset];
2151            offset += 1;
2152            if xor_offset as usize > idx || xor_offset > 160 {
2153                return Err(GitError::InvalidFormat(
2154                    "bitmap index entry has invalid XOR offset".into(),
2155                ));
2156            }
2157            let flags = bytes[offset];
2158            offset += 1;
2159            let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2160            entries.push(PackBitmapEntry {
2161                object_position,
2162                xor_offset,
2163                flags,
2164                bitmap,
2165            });
2166        }
2167
2168        let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2169            let cache_len = object_count
2170                .checked_mul(4)
2171                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2172            if checksum_offset.saturating_sub(offset) < cache_len {
2173                return Err(GitError::InvalidFormat(
2174                    "truncated bitmap hash cache".into(),
2175                ));
2176            }
2177            let mut cache = Vec::with_capacity(object_count);
2178            for _ in 0..object_count {
2179                cache.push(u32_be(&bytes[offset..offset + 4]));
2180                offset += 4;
2181            }
2182            Some(cache)
2183        } else {
2184            None
2185        };
2186
2187        if offset != checksum_offset {
2188            return Err(GitError::InvalidFormat(format!(
2189                "bitmap index has {} trailing bytes",
2190                checksum_offset - offset
2191            )));
2192        }
2193
2194        Ok(Self {
2195            version,
2196            format,
2197            options,
2198            pack_checksum,
2199            index_checksum,
2200            type_bitmaps: PackBitmapTypeBitmaps {
2201                commits,
2202                trees,
2203                blobs,
2204                tags,
2205            },
2206            entries,
2207            name_hash_cache,
2208        })
2209    }
2210
2211    /// Looks up the stored entry whose commit sits at `position` in the
2212    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
2213    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2214        self.entries
2215            .iter()
2216            .find(|entry| entry.object_position == position)
2217    }
2218}
2219
2220fn parse_bitmap_ewah(
2221    bytes: &[u8],
2222    offset: &mut usize,
2223    checksum_offset: usize,
2224    _object_count: usize,
2225) -> Result<EwahBitmap> {
2226    if checksum_offset.saturating_sub(*offset) < 12 {
2227        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2228    }
2229    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2230    *offset += 4;
2231    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2232    *offset += 4;
2233    let words_len = word_count
2234        .checked_mul(8)
2235        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2236    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2237        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2238    }
2239    let mut words = Vec::with_capacity(word_count);
2240    for _ in 0..word_count {
2241        words.push(u64_be(&bytes[*offset..*offset + 8]));
2242        *offset += 8;
2243    }
2244    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2245    *offset += 4;
2246    validate_ewah_words(bit_size, &words, rlw_position)?;
2247    Ok(EwahBitmap {
2248        bit_size,
2249        words,
2250        rlw_position,
2251    })
2252}
2253
2254fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2255    if words.is_empty() {
2256        if rlw_position != 0 || bit_size != 0 {
2257            return Err(GitError::InvalidFormat(
2258                "EWAH bitmap has invalid empty RLW".into(),
2259            ));
2260        }
2261        return Ok(());
2262    }
2263    if rlw_position as usize >= words.len() {
2264        return Err(GitError::InvalidFormat(
2265            "EWAH RLW position points past word table".into(),
2266        ));
2267    }
2268    let mut word_idx = 0usize;
2269    let mut decoded_words = 0u64;
2270    while word_idx < words.len() {
2271        let rlw = words[word_idx];
2272        let run_words = (rlw >> 1) & 0xffff_ffff;
2273        let literal_words = (rlw >> 33) as usize;
2274        word_idx += 1;
2275        word_idx = word_idx
2276            .checked_add(literal_words)
2277            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
2278        if word_idx > words.len() {
2279            return Err(GitError::InvalidFormat(
2280                "EWAH literal words extend past word table".into(),
2281            ));
2282        }
2283        decoded_words = decoded_words
2284            .checked_add(run_words)
2285            .and_then(|value| value.checked_add(literal_words as u64))
2286            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2287    }
2288    let decoded_bits = decoded_words
2289        .checked_mul(64)
2290        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2291    if decoded_bits < u64::from(bit_size) {
2292        return Err(GitError::InvalidFormat(
2293            "EWAH bitmap decodes fewer bits than declared".into(),
2294        ));
2295    }
2296    Ok(())
2297}
2298
2299impl MultiPackIndex {
2300    pub fn write(
2301        format: ObjectFormat,
2302        version: u8,
2303        pack_names: &[String],
2304        objects: &[MultiPackIndexEntry],
2305    ) -> Result<Vec<u8>> {
2306        Self::write_with_reverse_index(format, version, pack_names, objects, None)
2307    }
2308
2309    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
2310    /// additionally emits the `RIDX` chunk: the object order a multi-pack
2311    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
2312    /// the preferred pack first, then the rest by pack id, each pack's slice
2313    /// in offset order), stored as one u32 midx position per object.
2314    ///
2315    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
2316    /// must be in range.
2317    pub fn write_with_reverse_index(
2318        format: ObjectFormat,
2319        version: u8,
2320        pack_names: &[String],
2321        objects: &[MultiPackIndexEntry],
2322        preferred_pack: Option<u32>,
2323    ) -> Result<Vec<u8>> {
2324        if let Some(preferred) = preferred_pack
2325            && preferred as usize >= pack_names.len()
2326        {
2327            return Err(GitError::InvalidFormat(format!(
2328                "preferred pack {preferred} out of range for {} packs",
2329                pack_names.len()
2330            )));
2331        }
2332        if version != 1 && version != 2 {
2333            return Err(GitError::Unsupported(format!(
2334                "multi-pack-index version {version}"
2335            )));
2336        }
2337        if pack_names.len() > u32::MAX as usize {
2338            return Err(GitError::InvalidFormat(
2339                "too many multi-pack-index packs".into(),
2340            ));
2341        }
2342        if objects.len() > u32::MAX as usize {
2343            return Err(GitError::InvalidFormat(
2344                "too many multi-pack-index objects".into(),
2345            ));
2346        }
2347        validate_midx_pack_names(pack_names)?;
2348        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2349            return Err(GitError::InvalidFormat(
2350                "multi-pack-index v1 pack names must be sorted".into(),
2351            ));
2352        }
2353
2354        let mut objects = objects.iter().collect::<Vec<_>>();
2355        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2356        let mut previous_oid: Option<&ObjectId> = None;
2357        for object in &objects {
2358            if object.oid.format() != format {
2359                return Err(GitError::InvalidObjectId(
2360                    "multi-pack-index object format does not match index format".into(),
2361                ));
2362            }
2363            if let Some(previous) = previous_oid
2364                && previous.as_bytes() == object.oid.as_bytes()
2365            {
2366                return Err(GitError::InvalidFormat(
2367                    "multi-pack-index contains duplicate object ids".into(),
2368                ));
2369            }
2370            if object.pack_int_id as usize >= pack_names.len() {
2371                return Err(GitError::InvalidFormat(
2372                    "multi-pack-index object points past pack table".into(),
2373                ));
2374            }
2375            previous_oid = Some(&object.oid);
2376        }
2377
2378        let mut large_offsets = Vec::new();
2379        let mut chunks = vec![
2380            (*b"PNAM", write_midx_pack_names(pack_names)),
2381            (*b"OIDF", write_midx_oid_fanout(&objects)?),
2382            (*b"OIDL", write_midx_oid_lookup(&objects)),
2383            (
2384                *b"OOFF",
2385                write_midx_object_offsets(&objects, &mut large_offsets)?,
2386            ),
2387        ];
2388        if !large_offsets.is_empty() {
2389            chunks.push((*b"LOFF", large_offsets));
2390        }
2391        if let Some(preferred) = preferred_pack {
2392            // `objects` is already in midx (oid-sorted) order here; the chunk
2393            // lists each object's midx position in pseudo-pack order.
2394            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2395            pseudo.sort_by_key(|&midx_pos| {
2396                let object = objects[midx_pos as usize];
2397                (
2398                    object.pack_int_id != preferred,
2399                    object.pack_int_id,
2400                    object.offset,
2401                )
2402            });
2403            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2404            for midx_pos in pseudo {
2405                ridx.extend_from_slice(&midx_pos.to_be_bytes());
2406            }
2407            chunks.push((*b"RIDX", ridx));
2408        }
2409        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2410    }
2411
2412    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2413        let hash_len = format.raw_len();
2414        if bytes.len() < 12 + 12 + hash_len {
2415            return Err(GitError::InvalidFormat(
2416                "multi-pack-index file too short".into(),
2417            ));
2418        }
2419        if &bytes[..4] != b"MIDX" {
2420            return Err(GitError::InvalidFormat(
2421                "missing multi-pack-index signature".into(),
2422            ));
2423        }
2424        let version = bytes[4];
2425        if version != 1 && version != 2 {
2426            return Err(GitError::Unsupported(format!(
2427                "multi-pack-index version {version}"
2428            )));
2429        }
2430        let hash_id = bytes[5];
2431        if u32::from(hash_id) != hash_function_id(format) {
2432            return Err(GitError::InvalidFormat(format!(
2433                "multi-pack-index hash id {hash_id} does not match {}",
2434                format.name()
2435            )));
2436        }
2437        let chunk_count = bytes[6] as usize;
2438        let base_midx_count = bytes[7];
2439        if base_midx_count != 0 {
2440            return Err(GitError::Unsupported(format!(
2441                "multi-pack-index base count {base_midx_count}"
2442            )));
2443        }
2444        let pack_count = u32_be(&bytes[8..12]);
2445        let lookup_len = (chunk_count + 1)
2446            .checked_mul(12)
2447            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2448        let data_start = 12usize
2449            .checked_add(lookup_len)
2450            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2451        let checksum_offset = bytes.len() - hash_len;
2452        if data_start > checksum_offset {
2453            return Err(GitError::InvalidFormat(
2454                "truncated multi-pack-index chunk lookup".into(),
2455            ));
2456        }
2457
2458        let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2459        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2460        if actual_checksum != checksum {
2461            return Err(GitError::InvalidFormat(format!(
2462                "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2463            )));
2464        }
2465
2466        let mut entries = Vec::with_capacity(chunk_count + 1);
2467        let mut offset = 12usize;
2468        for _ in 0..=chunk_count {
2469            let id = [
2470                bytes[offset],
2471                bytes[offset + 1],
2472                bytes[offset + 2],
2473                bytes[offset + 3],
2474            ];
2475            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2476            entries.push((id, chunk_offset));
2477            offset += 12;
2478        }
2479        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2480            return Err(GitError::InvalidFormat(
2481                "multi-pack-index chunk lookup is empty".into(),
2482            ));
2483        };
2484        if terminator_id != [0, 0, 0, 0] {
2485            return Err(GitError::InvalidFormat(
2486                "multi-pack-index chunk lookup missing terminator".into(),
2487            ));
2488        }
2489        if terminator_offset != checksum_offset as u64 {
2490            return Err(GitError::InvalidFormat(
2491                "multi-pack-index terminator does not point at checksum".into(),
2492            ));
2493        }
2494
2495        let mut chunks = Vec::with_capacity(chunk_count);
2496        let mut previous_offset = data_start as u64;
2497        for pair in entries.windows(2) {
2498            let (id, chunk_offset) = pair[0];
2499            let (_next_id, next_offset) = pair[1];
2500            if id == [0, 0, 0, 0] {
2501                return Err(GitError::InvalidFormat(
2502                    "multi-pack-index chunk id is zero before terminator".into(),
2503                ));
2504            }
2505            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2506                return Err(GitError::InvalidFormat(
2507                    "multi-pack-index chunk offsets are not monotonic".into(),
2508                ));
2509            }
2510            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2511                return Err(GitError::InvalidFormat(
2512                    "multi-pack-index chunk length is invalid".into(),
2513                ));
2514            }
2515            chunks.push(MultiPackIndexChunk {
2516                id,
2517                offset: chunk_offset,
2518                len: next_offset - chunk_offset,
2519            });
2520            previous_offset = chunk_offset;
2521        }
2522
2523        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2524        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2525        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2526        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2527        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2528        let bitmapped_packs =
2529            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2530
2531        Ok(Self {
2532            version,
2533            format,
2534            pack_count,
2535            pack_names,
2536            object_count: object_count as u32,
2537            fanout,
2538            objects,
2539            reverse_index,
2540            bitmapped_packs,
2541            chunks,
2542            checksum,
2543        })
2544    }
2545
2546    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2547        self.objects
2548            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2549            .ok()
2550            .map(|idx| &self.objects[idx])
2551    }
2552}
2553
2554impl MultiPackIndexOidLookup {
2555    pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
2556        let raw = bytes.as_bytes();
2557        let hash_len = format.raw_len();
2558        if raw.len() < 12 + 12 + hash_len {
2559            return Err(GitError::InvalidFormat(
2560                "multi-pack-index file too short".into(),
2561            ));
2562        }
2563        if &raw[..4] != b"MIDX" {
2564            return Err(GitError::InvalidFormat(
2565                "missing multi-pack-index signature".into(),
2566            ));
2567        }
2568        let version = raw[4];
2569        if version != 1 && version != 2 {
2570            return Err(GitError::Unsupported(format!(
2571                "multi-pack-index version {version}"
2572            )));
2573        }
2574        let hash_id = raw[5];
2575        if u32::from(hash_id) != hash_function_id(format) {
2576            return Err(GitError::InvalidFormat(format!(
2577                "multi-pack-index hash id {hash_id} does not match {}",
2578                format.name()
2579            )));
2580        }
2581        let chunk_count = raw[6] as usize;
2582        let base_midx_count = raw[7];
2583        if base_midx_count != 0 {
2584            return Err(GitError::Unsupported(format!(
2585                "multi-pack-index base count {base_midx_count}"
2586            )));
2587        }
2588        let pack_count = u32_be(&raw[8..12]);
2589        let lookup_len = (chunk_count + 1)
2590            .checked_mul(12)
2591            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2592        let data_start = 12usize
2593            .checked_add(lookup_len)
2594            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2595        let checksum_offset = raw.len() - hash_len;
2596        if data_start > checksum_offset {
2597            return Err(GitError::InvalidFormat(
2598                "truncated multi-pack-index chunk lookup".into(),
2599            ));
2600        }
2601
2602        let mut entries = Vec::with_capacity(chunk_count + 1);
2603        let mut offset = 12usize;
2604        for _ in 0..=chunk_count {
2605            let id = [
2606                raw[offset],
2607                raw[offset + 1],
2608                raw[offset + 2],
2609                raw[offset + 3],
2610            ];
2611            let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
2612            entries.push((id, chunk_offset));
2613            offset += 12;
2614        }
2615        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2616            return Err(GitError::InvalidFormat(
2617                "multi-pack-index chunk lookup is empty".into(),
2618            ));
2619        };
2620        if terminator_id != [0, 0, 0, 0] {
2621            return Err(GitError::InvalidFormat(
2622                "multi-pack-index chunk lookup missing terminator".into(),
2623            ));
2624        }
2625        if terminator_offset != checksum_offset as u64 {
2626            return Err(GitError::InvalidFormat(
2627                "multi-pack-index terminator does not point at checksum".into(),
2628            ));
2629        }
2630
2631        let mut chunks = Vec::with_capacity(chunk_count);
2632        let mut previous_offset = data_start as u64;
2633        for pair in entries.windows(2) {
2634            let (id, chunk_offset) = pair[0];
2635            let (_next_id, next_offset) = pair[1];
2636            if id == [0, 0, 0, 0] {
2637                return Err(GitError::InvalidFormat(
2638                    "multi-pack-index chunk id is zero before terminator".into(),
2639                ));
2640            }
2641            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2642                return Err(GitError::InvalidFormat(
2643                    "multi-pack-index chunk offsets are not monotonic".into(),
2644                ));
2645            }
2646            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2647                return Err(GitError::InvalidFormat(
2648                    "multi-pack-index chunk length is invalid".into(),
2649                ));
2650            }
2651            chunks.push(MultiPackIndexChunk {
2652                id,
2653                offset: chunk_offset,
2654                len: next_offset - chunk_offset,
2655            });
2656            previous_offset = chunk_offset;
2657        }
2658
2659        let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
2660        let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
2661        let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
2662            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2663        let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2664            GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2665        })?;
2666        if oid_lookup.len() != expected_len {
2667            return Err(GitError::InvalidFormat(
2668                "multi-pack-index OIDL chunk has invalid length".into(),
2669            ));
2670        }
2671        let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
2672            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2673        let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2674            GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2675        })?;
2676        if object_offsets.len() != expected_offsets_len {
2677            return Err(GitError::InvalidFormat(
2678                "multi-pack-index OOFF chunk has invalid length".into(),
2679            ));
2680        }
2681        let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
2682        if let Some(large_offsets) = large_offsets
2683            && large_offsets.len() % 8 != 0
2684        {
2685            return Err(GitError::InvalidFormat(
2686                "multi-pack-index LOFF chunk has invalid length".into(),
2687            ));
2688        }
2689        let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
2690        let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
2691        let (large_offsets_offset, large_offsets_len) = match large_offsets {
2692            Some(large_offsets) => (
2693                Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
2694                large_offsets.len(),
2695            ),
2696            None => (None, 0),
2697        };
2698        Ok(Self {
2699            format,
2700            pack_count,
2701            pack_names,
2702            fanout,
2703            object_count,
2704            oid_lookup_offset,
2705            object_offsets_offset,
2706            large_offsets_offset,
2707            large_offsets_len,
2708            bytes,
2709        })
2710    }
2711
2712    pub fn contains(&self, oid: &ObjectId) -> bool {
2713        self.find_position(oid).is_some()
2714    }
2715
2716    pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2717        let Some(position) = self.find_position(oid) else {
2718            return Ok(None);
2719        };
2720        let bytes = self.bytes.as_bytes();
2721        let hash_len = self.format.raw_len();
2722        let oid_start = self
2723            .oid_lookup_offset
2724            .checked_add(position * hash_len)
2725            .ok_or_else(|| {
2726                GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2727            })?;
2728        let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
2729        let offset_start = self
2730            .object_offsets_offset
2731            .checked_add(position * 8)
2732            .ok_or_else(|| {
2733                GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2734            })?;
2735        let data = &bytes[offset_start..offset_start + 8];
2736        let pack_int_id = u32_be(&data[..4]);
2737        if pack_int_id >= self.pack_count {
2738            return Err(GitError::InvalidFormat(
2739                "multi-pack-index object points past pack table".into(),
2740            ));
2741        }
2742        let raw_offset = u32_be(&data[4..8]);
2743        let offset = if raw_offset & 0x8000_0000 == 0 {
2744            u64::from(raw_offset)
2745        } else {
2746            let Some(large_offsets_offset) = self.large_offsets_offset else {
2747                return Err(GitError::InvalidFormat(
2748                    "multi-pack-index large offset missing LOFF chunk".into(),
2749                ));
2750            };
2751            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2752            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2753                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2754            })?;
2755            let large_end = large_start.checked_add(8).ok_or_else(|| {
2756                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2757            })?;
2758            if large_end > self.large_offsets_len {
2759                return Err(GitError::InvalidFormat(
2760                    "multi-pack-index large offset points past LOFF chunk".into(),
2761                ));
2762            }
2763            let start = large_offsets_offset + large_start;
2764            u64_be(&bytes[start..start + 8])
2765        };
2766        Ok(Some(MultiPackIndexEntry {
2767            oid,
2768            pack_int_id,
2769            offset,
2770        }))
2771    }
2772
2773    pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2774        self.pack_names
2775            .get(pack_int_id as usize)
2776            .map(String::as_str)
2777    }
2778
2779    fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2780        if oid.format() != self.format || self.object_count == 0 {
2781            return None;
2782        }
2783        let first = oid.as_bytes()[0] as usize;
2784        let start = if first == 0 {
2785            0
2786        } else {
2787            self.fanout[first - 1] as usize
2788        };
2789        let end = self.fanout[first] as usize;
2790        if start >= end || end > self.object_count {
2791            return None;
2792        }
2793        let hash_len = self.format.raw_len();
2794        let table_start = self.oid_lookup_offset;
2795        let table_end = table_start + self.object_count * hash_len;
2796        let bytes = self.bytes.as_bytes();
2797        let table = &bytes[table_start..table_end];
2798        let needle = oid.as_bytes();
2799        let mut low = start;
2800        let mut high = end;
2801        while low < high {
2802            let mid = low + (high - low) / 2;
2803            let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2804            match raw.cmp(needle) {
2805                std::cmp::Ordering::Less => low = mid + 1,
2806                std::cmp::Ordering::Equal => return Some(mid),
2807                std::cmp::Ordering::Greater => high = mid,
2808            }
2809        }
2810        None
2811    }
2812}
2813
2814fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2815    for name in pack_names {
2816        if name.is_empty() {
2817            return Err(GitError::InvalidFormat(
2818                "multi-pack-index pack name is empty".into(),
2819            ));
2820        }
2821        if name
2822            .bytes()
2823            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2824        {
2825            return Err(GitError::InvalidFormat(
2826                "multi-pack-index pack name contains an invalid byte".into(),
2827            ));
2828        }
2829    }
2830    Ok(())
2831}
2832
2833fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2834    let mut out = Vec::new();
2835    for name in pack_names {
2836        out.extend_from_slice(name.as_bytes());
2837        out.push(0);
2838    }
2839    while out.len() % 4 != 0 {
2840        out.push(0);
2841    }
2842    out
2843}
2844
2845fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2846    let mut counts = [0u32; 256];
2847    for object in objects {
2848        let first = object.oid.as_bytes()[0] as usize;
2849        counts[first] = counts[first]
2850            .checked_add(1)
2851            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2852    }
2853    let mut running = 0u32;
2854    let mut out = Vec::with_capacity(256 * 4);
2855    for count in counts {
2856        running = running
2857            .checked_add(count)
2858            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2859        out.extend_from_slice(&running.to_be_bytes());
2860    }
2861    Ok(out)
2862}
2863
2864fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2865    let mut out = Vec::new();
2866    for object in objects {
2867        out.extend_from_slice(object.oid.as_bytes());
2868    }
2869    out
2870}
2871
2872fn write_midx_object_offsets(
2873    objects: &[&MultiPackIndexEntry],
2874    large_offsets: &mut Vec<u8>,
2875) -> Result<Vec<u8>> {
2876    let mut out = Vec::new();
2877    for object in objects {
2878        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2879        if object.offset < 0x8000_0000 {
2880            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2881        } else {
2882            let large_idx = large_offsets.len() / 8;
2883            if large_idx > 0x7fff_ffff {
2884                return Err(GitError::InvalidFormat(
2885                    "too many multi-pack-index large offsets".into(),
2886                ));
2887            }
2888            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2889            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2890        }
2891    }
2892    Ok(out)
2893}
2894
2895fn write_multi_pack_index_chunks(
2896    format: ObjectFormat,
2897    version: u8,
2898    pack_count: u32,
2899    chunks: &[([u8; 4], Vec<u8>)],
2900) -> Result<Vec<u8>> {
2901    if chunks.len() > u8::MAX as usize {
2902        return Err(GitError::InvalidFormat(
2903            "too many multi-pack-index chunks".into(),
2904        ));
2905    }
2906    let lookup_len = (chunks.len() + 1)
2907        .checked_mul(12)
2908        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2909    let mut out = Vec::new();
2910    out.extend_from_slice(b"MIDX");
2911    out.push(version);
2912    out.push(hash_function_id(format) as u8);
2913    out.push(chunks.len() as u8);
2914    out.push(0);
2915    out.extend_from_slice(&pack_count.to_be_bytes());
2916    let mut chunk_offset = (12usize)
2917        .checked_add(lookup_len)
2918        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
2919        as u64;
2920    for (id, data) in chunks {
2921        out.extend_from_slice(id);
2922        out.extend_from_slice(&chunk_offset.to_be_bytes());
2923        chunk_offset = chunk_offset
2924            .checked_add(data.len() as u64)
2925            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
2926    }
2927    out.extend_from_slice(&[0, 0, 0, 0]);
2928    out.extend_from_slice(&chunk_offset.to_be_bytes());
2929    for (_id, data) in chunks {
2930        out.extend_from_slice(data);
2931    }
2932    let checksum = sley_core::digest_bytes(format, &out)?;
2933    out.extend_from_slice(checksum.as_bytes());
2934    Ok(out)
2935}
2936
2937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2938struct EntryHeader {
2939    kind: PackObjectKind,
2940    size: u64,
2941}
2942
2943/// A cache of objects already decoded from one specific pack, keyed by the
2944/// in-pack byte offset at which each object's entry begins.
2945///
2946/// Delta resolution within a pack walks a chain of base objects by offset; the
2947/// same base is the parent of many deltas, so without a cache the entire chain
2948/// is re-inflated and re-applied on every read. Implementors let
2949/// [`read_object_at_with_cache`] reuse a warm base instead.
2950///
2951/// Correctness contract: a given `offset` within a given pack's bytes always
2952/// decodes to exactly one object, so caching by offset can never serve the wrong
2953/// object **provided the same cache is only ever used with one pack's bytes**.
2954/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
2955/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
2956pub trait PackDeltaCache {
2957    /// Return the decoded object whose entry begins at `offset`, if cached.
2958    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
2959    /// Record that the entry beginning at `offset` decodes to `object`.
2960    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
2961}
2962
2963/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
2964/// the original, allocation-free behavior for callers that do not opt in.
2965struct NoopDeltaCache;
2966
2967impl PackDeltaCache for NoopDeltaCache {
2968    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
2969        None
2970    }
2971    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
2972}
2973
2974// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
2975// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
2976// an allocation that dominated bulk reads. Borrowed only for the duration of a
2977// single inflate; the recursive pack reader fully inflates each entry's data before
2978// recursing to its base, so the borrow never nests.
2979thread_local! {
2980    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
2981}
2982
2983/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
2984/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
2985/// run of back-references). We pre-reserve no more than this multiple of the
2986/// available compressed input, so an attacker who declares a huge `size_hint`
2987/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
2988/// memory before the inflate has produced a single byte. The stream's *actual*
2989/// output is still verified against the declared size by the caller; this only
2990/// bounds the speculative allocation. git never pre-allocates an attacker's
2991/// declared size beyond a streaming buffer either (see index-pack.c's
2992/// `unpack_entry_data`).
2993const MAX_INFLATE_EXPANSION: usize = 1032;
2994
2995/// An absolute ceiling on the speculative pre-reservation, independent of the
2996/// input length, so even a large legitimate-looking compressed input can't be
2997/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
2998/// output buffer organically past this when a real stream genuinely produces
2999/// that much — this only caps the *speculative* reserve.
3000const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3001
3002/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
3003/// to something safe to reserve up front: no larger than what `compressed_len`
3004/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
3005/// returned value is only used to size the initial allocation; the inflate loop
3006/// grows the buffer as the real stream produces output, so legitimate large
3007/// objects still decode correctly — they just don't get the whole allocation at
3008/// once.
3009fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3010    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3011    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
3012    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3013}
3014
3015/// Inflate the entire zlib stream at the front of `compressed`, appending the
3016/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
3017/// is the caller's expectation for the decompressed length, but it is treated as
3018/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
3019/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
3020/// number of *compressed* bytes consumed (so callers stepping through a pack can
3021/// advance to the next entry). Byte-for-byte equivalent to
3022/// `ZlibDecoder::read_to_end` + `total_in`.
3023fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3024    INFLATE.with(|cell| {
3025        let mut decompress = cell.borrow_mut();
3026        decompress.reset(true);
3027        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3028        let mut input = compressed;
3029        let mut consumed_total = 0usize;
3030        loop {
3031            // Always leave output room so a zero-progress result means the input
3032            // (not the buffer) is exhausted.
3033            if out.len() == out.capacity() {
3034                out.reserve(out.len().max(64));
3035            }
3036            let before_in = decompress.total_in();
3037            let before_out = decompress.total_out();
3038            let status = decompress
3039                .decompress_vec(input, out, flate2::FlushDecompress::None)
3040                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3041            let consumed = (decompress.total_in() - before_in) as usize;
3042            let produced = decompress.total_out() - before_out;
3043            input = &input[consumed..];
3044            consumed_total += consumed;
3045            match status {
3046                flate2::Status::StreamEnd => return Ok(consumed_total),
3047                _ if consumed == 0 && produced == 0 => {
3048                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
3049                }
3050                _ => {}
3051            }
3052        }
3053    })
3054}
3055
3056/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
3057/// into `out`, reusing the thread-local state. Used to read a delta's leading
3058/// base-size / result-size varints without inflating the whole instruction stream.
3059fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3060    INFLATE.with(|cell| {
3061        let mut decompress = cell.borrow_mut();
3062        decompress.reset(true);
3063        out.reserve(max_out.max(16));
3064        let mut input = compressed;
3065        while out.len() < max_out {
3066            if out.len() == out.capacity() {
3067                out.reserve(out.len().max(16));
3068            }
3069            let before_in = decompress.total_in();
3070            let before_out = decompress.total_out();
3071            let status = decompress
3072                .decompress_vec(input, out, flate2::FlushDecompress::None)
3073                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3074            let consumed = (decompress.total_in() - before_in) as usize;
3075            let produced = decompress.total_out() - before_out;
3076            input = &input[consumed..];
3077            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3078                break;
3079            }
3080        }
3081        Ok(())
3082    })
3083}
3084
3085/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
3086/// only that object and its delta-base chain instead of parsing the whole pack.
3087///
3088/// Ofs-delta bases are followed by offset (recursively, within this pack);
3089/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
3090/// with the surrounding object store (so a base in another pack or loose still
3091/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
3092pub fn read_object_at_arc<F>(
3093    pack_bytes: &[u8],
3094    offset: u64,
3095    format: ObjectFormat,
3096    resolve_ref_base: F,
3097) -> Result<Arc<EncodedObject>>
3098where
3099    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3100{
3101    read_object_at_with_cache_arc(
3102        pack_bytes,
3103        offset,
3104        format,
3105        resolve_ref_base,
3106        &NoopDeltaCache,
3107    )
3108}
3109
3110/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
3111/// (keyed by in-pack offset) and records every object it decodes.
3112///
3113/// This turns repeated reads from the same pack — where many deltas share a base
3114/// chain — from re-inflating each chain per read into resolving each base once.
3115/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
3116/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
3117/// callers can reuse cache handles without cloning full object bodies.
3118pub fn read_object_at_with_cache_arc<F, C>(
3119    pack_bytes: &[u8],
3120    offset: u64,
3121    format: ObjectFormat,
3122    mut resolve_ref_base: F,
3123    cache: &C,
3124) -> Result<Arc<EncodedObject>>
3125where
3126    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3127    C: PackDeltaCache + ?Sized,
3128{
3129    read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3130}
3131
3132fn read_object_at_inner<F, C>(
3133    pack_bytes: &[u8],
3134    offset: u64,
3135    format: ObjectFormat,
3136    resolve_ref_base: &mut F,
3137    cache: &C,
3138) -> Result<Arc<EncodedObject>>
3139where
3140    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3141    C: PackDeltaCache + ?Sized,
3142{
3143    // A warm cache entry for this exact offset is already the fully resolved
3144    // object, so the whole base chain below can be skipped.
3145    if let Some(object) = cache.get(offset) {
3146        return Ok(object);
3147    }
3148    let trailer_offset = pack_bytes
3149        .len()
3150        .checked_sub(format.raw_len())
3151        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3152    let mut cursor = usize::try_from(offset)
3153        .ok()
3154        .filter(|&value| value < trailer_offset)
3155        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3156    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3157    let base = match header.kind {
3158        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3159            pack_bytes,
3160            &mut cursor,
3161            offset,
3162        )?)),
3163        PackObjectKind::RefDelta => {
3164            let hash_len = format.raw_len();
3165            if cursor + hash_len > trailer_offset {
3166                return Err(GitError::InvalidFormat(
3167                    "truncated ref-delta base object id".into(),
3168                ));
3169            }
3170            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3171            cursor += hash_len;
3172            Some(DeltaBase::Ref(oid))
3173        }
3174        _ => None,
3175    };
3176    let mut body = Vec::new();
3177    inflate_into(
3178        &pack_bytes[cursor..trailer_offset],
3179        &mut body,
3180        header.size.min(usize::MAX as u64) as usize,
3181    )?;
3182    if body.len() as u64 != header.size {
3183        return Err(GitError::InvalidObject(format!(
3184            "pack object declared {} bytes, decoded {}",
3185            header.size,
3186            body.len()
3187        )));
3188    }
3189    let object = match base {
3190        None => {
3191            let object_type = match header.kind {
3192                PackObjectKind::Commit => ObjectType::Commit,
3193                PackObjectKind::Tree => ObjectType::Tree,
3194                PackObjectKind::Blob => ObjectType::Blob,
3195                PackObjectKind::Tag => ObjectType::Tag,
3196                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3197                    return Err(GitError::InvalidFormat(
3198                        "delta pack entry decoded without a base".into(),
3199                    ));
3200                }
3201            };
3202            Arc::new(EncodedObject::new(object_type, body))
3203        }
3204        Some(DeltaBase::Offset(base_offset)) => {
3205            let base =
3206                read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
3207            let resolved = apply_pack_delta(&base.body, &body)?;
3208            Arc::new(EncodedObject::new(base.object_type, resolved))
3209        }
3210        Some(DeltaBase::Ref(base_oid)) => {
3211            let base = resolve_ref_base(&base_oid)?
3212                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
3213            let resolved = apply_pack_delta(&base.body, &body)?;
3214            Arc::new(EncodedObject::new(base.object_type, resolved))
3215        }
3216    };
3217    // Record the fully resolved object so any later read that walks through this
3218    // offset (as a delta base or directly) reuses it. Bases are inserted as the
3219    // recursion unwinds, so a chain is decoded at most once across reads.
3220    cache.insert(offset, Arc::clone(&object));
3221    Ok(object)
3222}
3223
3224/// The object type and final (inflated) size of the entry at `offset`, *without*
3225/// materializing the object body — git's `cat-file --batch-check` fast path.
3226///
3227/// A base object's size is already in its pack entry header, and a delta's result
3228/// size is the second varint at the front of its (small) delta stream, so neither
3229/// inflates the full content. The reported type is the type at the end of the
3230/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
3231/// the type of a ref-delta base that lives outside this pack (resolved through the
3232/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
3233pub fn read_object_header_at<F>(
3234    pack_bytes: &[u8],
3235    offset: u64,
3236    format: ObjectFormat,
3237    mut resolve_ref_base_type: F,
3238) -> Result<(ObjectType, u64)>
3239where
3240    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3241{
3242    read_object_header_at_inner(
3243        pack_bytes,
3244        offset,
3245        format,
3246        &mut resolve_ref_base_type,
3247        &mut NoopHeaderTypeCache,
3248    )
3249}
3250
3251/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
3252/// the `cat-file --batch-check` header fast path.
3253///
3254/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
3255/// to its base on every header read, re-inflating each link's leading varints
3256/// from scratch — so reading every object in a deeply-deltified pack costs
3257/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
3258/// of memoizing `offset -> (type, size)`:
3259///
3260/// * a chain's end-of-chain type is resolved at most once, so later objects on
3261///   the same chain skip the walk; and
3262/// * a repeated lookup of the same object (common in batch input) returns from
3263///   the memo without re-inflating its delta header at all.
3264///
3265/// The size stored is the object's final (inflated) result size — read from its
3266/// own pack/delta header, never by materializing the body.
3267pub trait HeaderTypeCache {
3268    /// The previously resolved header at `pack_offset`, if any.
3269    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
3270    /// Record the resolved header at `pack_offset` for reuse by later reads.
3271    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
3272}
3273
3274struct NoopHeaderTypeCache;
3275
3276impl HeaderTypeCache for NoopHeaderTypeCache {
3277    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
3278        None
3279    }
3280    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
3281}
3282
3283/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
3284/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
3285/// most once per chain and (b) a repeated lookup of the same offset returns from
3286/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
3287/// so it must be scoped to a single pack's bytes by the caller.
3288pub fn read_object_header_at_with_cache<F, C>(
3289    pack_bytes: &[u8],
3290    offset: u64,
3291    format: ObjectFormat,
3292    mut resolve_ref_base_type: F,
3293    type_cache: &mut C,
3294) -> Result<(ObjectType, u64)>
3295where
3296    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3297    C: HeaderTypeCache + ?Sized,
3298{
3299    if let Some(header) = type_cache.get(offset) {
3300        return Ok(header);
3301    }
3302    read_object_header_at_inner(
3303        pack_bytes,
3304        offset,
3305        format,
3306        &mut resolve_ref_base_type,
3307        type_cache,
3308    )
3309}
3310
3311fn read_object_header_at_inner<F, C>(
3312    pack_bytes: &[u8],
3313    offset: u64,
3314    format: ObjectFormat,
3315    resolve_ref_base_type: &mut F,
3316    type_cache: &mut C,
3317) -> Result<(ObjectType, u64)>
3318where
3319    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3320    C: HeaderTypeCache + ?Sized,
3321{
3322    let trailer_offset = pack_bytes
3323        .len()
3324        .checked_sub(format.raw_len())
3325        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3326    let mut cursor = usize::try_from(offset)
3327        .ok()
3328        .filter(|&value| value < trailer_offset)
3329        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3330    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3331    let resolved = match header.kind {
3332        PackObjectKind::Commit => (ObjectType::Commit, header.size),
3333        PackObjectKind::Tree => (ObjectType::Tree, header.size),
3334        PackObjectKind::Blob => (ObjectType::Blob, header.size),
3335        PackObjectKind::Tag => (ObjectType::Tag, header.size),
3336        PackObjectKind::OfsDelta => {
3337            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3338            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3339            // The end-of-chain type only depends on the base, so reuse it across
3340            // reads instead of re-walking the chain per object (sley#26).
3341            let base_type = match type_cache.get(base_offset) {
3342                Some((base_type, _)) => base_type,
3343                None => {
3344                    let (base_type, _) = read_object_header_at_inner(
3345                        pack_bytes,
3346                        base_offset,
3347                        format,
3348                        resolve_ref_base_type,
3349                        type_cache,
3350                    )?;
3351                    base_type
3352                }
3353            };
3354            (base_type, size)
3355        }
3356        PackObjectKind::RefDelta => {
3357            let hash_len = format.raw_len();
3358            if cursor + hash_len > trailer_offset {
3359                return Err(GitError::InvalidFormat(
3360                    "truncated ref-delta base object id".into(),
3361                ));
3362            }
3363            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3364            cursor += hash_len;
3365            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3366            let base_type = resolve_ref_base_type(&oid)?
3367                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3368            (base_type, size)
3369        }
3370    };
3371    // Memoize the fully resolved header so a repeated lookup of this offset (or a
3372    // chain that bases on it) returns without re-inflating (sley#26).
3373    type_cache.put(offset, resolved);
3374    Ok(resolved)
3375}
3376
3377/// Number of inflated delta-stream bytes to read when only the leading base-size
3378/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
3379/// prefix always covers both without inflating the delta instructions.
3380const DELTA_HEADER_PREFIX_LEN: usize = 32;
3381
3382/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
3383/// inflating only the short prefix that holds its two leading varints.
3384fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3385    let mut prefix = Vec::new();
3386    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3387    decoded_delta_result_size(&prefix)
3388}
3389
3390fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3391    let first = next_byte(bytes, offset)?;
3392    let mut size = u64::from(first & 0x0f);
3393    let kind = match (first >> 4) & 0x07 {
3394        1 => PackObjectKind::Commit,
3395        2 => PackObjectKind::Tree,
3396        3 => PackObjectKind::Blob,
3397        4 => PackObjectKind::Tag,
3398        6 => PackObjectKind::OfsDelta,
3399        7 => PackObjectKind::RefDelta,
3400        other => {
3401            return Err(GitError::InvalidFormat(format!(
3402                "invalid pack object type {other}"
3403            )));
3404        }
3405    };
3406    let mut shift = 4;
3407    let mut byte = first;
3408    while byte & 0x80 != 0 {
3409        byte = next_byte(bytes, offset)?;
3410        let part = u64::from(byte & 0x7f);
3411        size = size
3412            .checked_add(
3413                part.checked_shl(shift)
3414                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3415            )
3416            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3417        shift += 7;
3418    }
3419    Ok(EntryHeader { kind, size })
3420}
3421
3422fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3423    let mut byte = next_byte(bytes, offset)?;
3424    let mut relative = u64::from(byte & 0x7f);
3425    while byte & 0x80 != 0 {
3426        byte = next_byte(bytes, offset)?;
3427        relative = relative
3428            .checked_add(1)
3429            .and_then(|value| value.checked_shl(7))
3430            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3431            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3432    }
3433    entry_offset
3434        .checked_sub(relative)
3435        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3436}
3437
3438fn resolve_pack_entries<F>(
3439    parsed: Vec<ParsedPackEntry>,
3440    format: ObjectFormat,
3441    external_base: &mut F,
3442) -> Result<Vec<PackObject>>
3443where
3444    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3445{
3446    let mut offset_to_index = HashMap::with_capacity(parsed.len());
3447    for (idx, entry) in parsed.iter().enumerate() {
3448        offset_to_index.insert(parsed_entry_offset(entry), idx);
3449    }
3450
3451    let mut resolved = vec![None; parsed.len()];
3452    let mut oid_to_index = HashMap::new();
3453    let mut unresolved = 0usize;
3454    for (idx, entry) in parsed.iter().enumerate() {
3455        match entry {
3456            ParsedPackEntry::Resolved(object) => {
3457                oid_to_index.insert(object.entry.oid, idx);
3458                resolved[idx] = Some(object.clone());
3459            }
3460            ParsedPackEntry::Delta { .. } => unresolved += 1,
3461        }
3462    }
3463
3464    while unresolved != 0 {
3465        let mut progress = false;
3466        for idx in 0..parsed.len() {
3467            if resolved[idx].is_some() {
3468                continue;
3469            }
3470            let ParsedPackEntry::Delta {
3471                base,
3472                compressed_size,
3473                delta_size,
3474                offset,
3475                delta,
3476            } = &parsed[idx]
3477            else {
3478                continue;
3479            };
3480            let Some(base_object) = delta_base_object(
3481                base,
3482                &offset_to_index,
3483                &oid_to_index,
3484                &resolved,
3485                external_base,
3486            )?
3487            else {
3488                continue;
3489            };
3490            let body = apply_pack_delta(base_object.body(), delta)?;
3491            let object = EncodedObject::new(base_object.object_type(), body);
3492            let oid = object.object_id(format)?;
3493            let pack_object = PackObject {
3494                entry: PackEntry {
3495                    oid,
3496                    compressed_size: *compressed_size,
3497                    uncompressed_size: object.body.len() as u64,
3498                    offset: *offset,
3499                },
3500                object,
3501            };
3502            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3503                return Err(GitError::InvalidObject(
3504                    "resolved delta size does not match delta header".into(),
3505                ));
3506            }
3507            if *delta_size != delta.len() as u64 {
3508                return Err(GitError::InvalidObject(format!(
3509                    "pack delta declared {delta_size} bytes, decoded {}",
3510                    delta.len()
3511                )));
3512            }
3513            oid_to_index.insert(oid, idx);
3514            resolved[idx] = Some(pack_object);
3515            unresolved -= 1;
3516            progress = true;
3517        }
3518        if !progress {
3519            return Err(GitError::Unsupported("unresolved delta base".into()));
3520        }
3521    }
3522
3523    resolved
3524        .into_iter()
3525        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3526        .collect()
3527}
3528
3529fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3530    match entry {
3531        ParsedPackEntry::Resolved(object) => object.entry.offset,
3532        ParsedPackEntry::Delta { offset, .. } => *offset,
3533    }
3534}
3535
3536enum DeltaBaseObject<'a> {
3537    Borrowed(&'a EncodedObject),
3538    Owned(EncodedObject),
3539}
3540
3541impl DeltaBaseObject<'_> {
3542    fn object_type(&self) -> ObjectType {
3543        match self {
3544            Self::Borrowed(object) => object.object_type,
3545            Self::Owned(object) => object.object_type,
3546        }
3547    }
3548
3549    fn body(&self) -> &[u8] {
3550        match self {
3551            Self::Borrowed(object) => &object.body,
3552            Self::Owned(object) => &object.body,
3553        }
3554    }
3555}
3556
3557fn delta_base_object<'a, F>(
3558    base: &DeltaBase,
3559    offset_to_index: &HashMap<u64, usize>,
3560    oid_to_index: &HashMap<ObjectId, usize>,
3561    resolved: &'a [Option<PackObject>],
3562    external_base: &mut F,
3563) -> Result<Option<DeltaBaseObject<'a>>>
3564where
3565    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3566{
3567    match base {
3568        DeltaBase::Offset(offset) => {
3569            let Some(index) = offset_to_index.get(offset).copied() else {
3570                return Err(GitError::InvalidFormat(format!(
3571                    "ofs-delta base offset {offset} not found"
3572                )));
3573            };
3574            Ok(resolved[index]
3575                .as_ref()
3576                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3577        }
3578        DeltaBase::Ref(oid) => {
3579            if let Some(index) = oid_to_index.get(oid).copied() {
3580                return Ok(resolved[index]
3581                    .as_ref()
3582                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3583            }
3584            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3585        }
3586    }
3587}
3588
3589fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3590    let mut cursor = 0usize;
3591    let base_size = read_delta_varint(delta, &mut cursor)?;
3592    if base_size != base.len() as u64 {
3593        return Err(GitError::InvalidObject(format!(
3594            "delta base size mismatch: expected {base_size}, got {}",
3595            base.len()
3596        )));
3597    }
3598    let result_size = read_delta_varint(delta, &mut cursor)?;
3599    // `result_size` is an attacker-controlled delta varint from a network pack
3600    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
3601    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
3602    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
3603    // size-mismatch check below can fire. Route the up-front reservation through
3604    // the sley#2 bound so the speculative allocation is capped; `result.extend`
3605    // still grows the buffer organically and the post-decode length check
3606    // (`result.len() != result_size`) rejects the lie cleanly.
3607    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3608    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3609    while cursor < delta.len() {
3610        let command = delta[cursor];
3611        cursor += 1;
3612        if command & 0x80 != 0 {
3613            let copy_offset =
3614                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3615            let mut copy_size =
3616                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3617            if copy_size == 0 {
3618                copy_size = 0x10000;
3619            }
3620            let start = usize::try_from(copy_offset)
3621                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3622            let len = usize::try_from(copy_size)
3623                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3624            let end = start
3625                .checked_add(len)
3626                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3627            let Some(slice) = base.get(start..end) else {
3628                return Err(GitError::InvalidObject(
3629                    "delta copy range exceeds base object".into(),
3630                ));
3631            };
3632            result.extend_from_slice(slice);
3633        } else if command != 0 {
3634            let len = usize::from(command);
3635            let end = cursor
3636                .checked_add(len)
3637                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3638            let Some(slice) = delta.get(cursor..end) else {
3639                return Err(GitError::InvalidObject(
3640                    "delta insert range exceeds delta data".into(),
3641                ));
3642            };
3643            result.extend_from_slice(slice);
3644            cursor = end;
3645        } else {
3646            return Err(GitError::InvalidObject(
3647                "delta contains reserved zero command".into(),
3648            ));
3649        }
3650    }
3651    if result.len() as u64 != result_size {
3652        return Err(GitError::InvalidObject(format!(
3653            "delta result size mismatch: expected {result_size}, got {}",
3654            result.len()
3655        )));
3656    }
3657    Ok(result)
3658}
3659
3660fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3661    let mut cursor = 0usize;
3662    let _ = read_delta_varint(delta, &mut cursor)?;
3663    read_delta_varint(delta, &mut cursor)
3664}
3665
3666/// Size, in bytes, of the fixed blocks used to index a base object for delta
3667/// compression. Matches git's `diff-delta.c` block size.
3668const DELTA_BLOCK_SIZE: usize = 16;
3669
3670/// Distance between indexed base anchors. Delta generation still scans target
3671/// objects byte-by-byte once there is evidence of shared content; anchoring the
3672/// base at block boundaries keeps the index compact and avoids per-object
3673/// hash-table allocation storms on unrelated blobs.
3674const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3675
3676/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
3677/// base object's anchors while keeping exact-hash candidate scans short.
3678const DELTA_BUCKET_BITS: usize = 12;
3679const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3680const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3681
3682/// An index over a base object's content used to generate deltas against it.
3683///
3684/// The index hashes block-sized anchors of the base, groups them into fixed
3685/// buckets, and verifies exact byte matches before copying. This avoids both
3686/// per-bucket allocation storms and the per-object sort needed by a single
3687/// sorted vector.
3688struct DeltaIndex<'a> {
3689    base: &'a [u8],
3690    blocks: Vec<DeltaBlock>,
3691    buckets: Vec<usize>,
3692}
3693
3694#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3695struct DeltaBlock {
3696    hash: u32,
3697    offset: usize,
3698}
3699
3700impl<'a> DeltaIndex<'a> {
3701    fn new(base: &'a [u8]) -> Self {
3702        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3703        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3704        for_each_delta_anchor(base.len(), |offset| {
3705            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3706            buckets[delta_bucket(hash) + 1] += 1;
3707            anchors.push(DeltaBlock { hash, offset });
3708        });
3709        for idx in 1..buckets.len() {
3710            buckets[idx] += buckets[idx - 1];
3711        }
3712
3713        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3714        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3715        for anchor in anchors {
3716            let bucket = delta_bucket(anchor.hash);
3717            let next = &mut next_offsets[bucket];
3718            blocks[*next] = anchor;
3719            *next += 1;
3720        }
3721
3722        Self {
3723            base,
3724            blocks,
3725            buckets,
3726        }
3727    }
3728
3729    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3730        let bucket = delta_bucket(hash);
3731        let start = self.buckets[bucket];
3732        let end = self.buckets[bucket + 1];
3733        self.blocks[start..end]
3734            .iter()
3735            .filter(move |block| block.hash == hash)
3736    }
3737
3738    fn has_hash(&self, hash: u32) -> bool {
3739        self.candidate_blocks(hash).next().is_some()
3740    }
3741
3742    fn has_shared_anchor(&self, target: &[u8]) -> bool {
3743        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3744            return false;
3745        }
3746        let last = target.len() - DELTA_BLOCK_SIZE;
3747        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3748            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3749            if self.has_hash(hash) {
3750                return true;
3751            }
3752        }
3753        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3754            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3755            if self.has_hash(hash) {
3756                return true;
3757            }
3758        }
3759        false
3760    }
3761
3762    /// Generate a delta that reconstructs `target` from this index's base.
3763    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3764        if !self.has_shared_anchor(target) {
3765            return None;
3766        }
3767        let base = self.base;
3768        let mut delta = Vec::new();
3769        write_delta_varint(&mut delta, base.len() as u64);
3770        write_delta_varint(&mut delta, target.len() as u64);
3771
3772        let mut pending_insert_start = 0usize;
3773        let mut pos = 0usize;
3774        while pos < target.len() {
3775            let mut best_len = 0usize;
3776            let mut best_offset = 0usize;
3777            if pos + DELTA_BLOCK_SIZE <= target.len() {
3778                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3779                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3780                    // Confirm the block actually matches (hash collisions are
3781                    // possible) before measuring how far it extends.
3782                    let candidate = candidate.offset;
3783                    let max_len = (base.len() - candidate).min(target.len() - pos);
3784                    let mut len = 0usize;
3785                    while len < max_len && base[candidate + len] == target[pos + len] {
3786                        len += 1;
3787                    }
3788                    if len > best_len {
3789                        best_len = len;
3790                        best_offset = candidate;
3791                    }
3792                }
3793            }
3794
3795            if best_len >= DELTA_BLOCK_SIZE {
3796                if pending_insert_start < pos {
3797                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3798                }
3799                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3800                pos += best_len;
3801                pending_insert_start = pos;
3802            } else {
3803                pos += 1;
3804            }
3805        }
3806        if pending_insert_start < target.len() {
3807            write_delta_insert(&mut delta, &target[pending_insert_start..]);
3808        }
3809        Some(delta)
3810    }
3811}
3812
3813fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3814    if len < DELTA_BLOCK_SIZE {
3815        return;
3816    }
3817    len -= DELTA_BLOCK_SIZE;
3818    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3819        visit(offset);
3820    }
3821    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3822        visit(len);
3823    }
3824}
3825
3826fn delta_anchor_count(len: usize) -> usize {
3827    if len < DELTA_BLOCK_SIZE {
3828        return 0;
3829    }
3830    let last = len - DELTA_BLOCK_SIZE;
3831    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3832}
3833
3834fn delta_bucket(hash: u32) -> usize {
3835    (hash as usize) & DELTA_BUCKET_MASK
3836}
3837
3838/// Maximum number of base offsets retained per block-hash bucket. Caps the work
3839/// done extending candidate matches for inputs with many repeated blocks.
3840const DELTA_MAX_CHAIN: usize = 64;
3841
3842/// Hash a fixed-size block of base/target bytes into a bucket key.
3843///
3844/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
3845/// always verified byte-for-byte before use, so collisions only cost a little
3846/// extra comparison work and never affect correctness.
3847fn block_hash(block: &[u8]) -> u32 {
3848    let mut hash = 0u32;
3849    for &byte in block {
3850        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3851    }
3852    hash
3853}
3854
3855/// The chosen storage form for a single object during pack generation.
3856#[derive(Debug, Clone, PartialEq, Eq)]
3857enum PlannedBase {
3858    /// Stored undeltified (a base for others, or no good delta was found).
3859    None,
3860    /// Delta against another object in this pack, identified by its original
3861    /// index. The pre-computed `delta` bytes reconstruct the object from that
3862    /// base's body.
3863    InPack { base_idx: usize, delta: Vec<u8> },
3864    /// Delta against an external (thin-pack) base, referenced by object id.
3865    External { base_oid: ObjectId, delta: Vec<u8> },
3866}
3867
3868#[derive(Debug, Clone, PartialEq, Eq)]
3869struct PlannedEntry {
3870    base: PlannedBase,
3871}
3872
3873fn compress_planned_payloads(
3874    objects: &[&EncodedObject],
3875    plan: &[PlannedEntry],
3876    order: &[usize],
3877) -> Result<Vec<Vec<u8>>> {
3878    if order.is_empty() {
3879        return Ok(Vec::new());
3880    }
3881
3882    let worker_count = std::thread::available_parallelism()
3883        .map(|threads| threads.get())
3884        .unwrap_or(1)
3885        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3886        .min(order.len());
3887    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3888        let mut payloads = Vec::with_capacity(order.len());
3889        for &idx in order {
3890            payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
3891        }
3892        return Ok(payloads);
3893    }
3894
3895    let chunk_len = order.len().div_ceil(worker_count);
3896    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
3897    std::thread::scope(|scope| {
3898        let mut handles = Vec::new();
3899        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
3900            let chunk_start = chunk_idx * chunk_len;
3901            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
3902                let mut chunk_payloads = Vec::with_capacity(chunk.len());
3903                for (offset, &idx) in chunk.iter().enumerate() {
3904                    chunk_payloads.push((
3905                        chunk_start + offset,
3906                        compressed_payload(planned_payload(objects, plan, idx))?,
3907                    ));
3908                }
3909                Ok(chunk_payloads)
3910            }));
3911        }
3912
3913        let mut first_error = None;
3914        for handle in handles {
3915            match handle.join() {
3916                Ok(Ok(chunk_payloads)) => {
3917                    if first_error.is_none() {
3918                        for (pos, payload) in chunk_payloads {
3919                            payloads[pos] = payload;
3920                        }
3921                    }
3922                }
3923                Ok(Err(err)) => {
3924                    first_error.get_or_insert(err);
3925                }
3926                Err(_) => {
3927                    first_error.get_or_insert_with(|| {
3928                        GitError::InvalidObject("pack compression worker panicked".into())
3929                    });
3930                }
3931            }
3932        }
3933
3934        match first_error {
3935            Some(err) => Err(err),
3936            None => Ok(()),
3937        }
3938    })?;
3939    Ok(payloads)
3940}
3941
3942fn planned_payload<'a>(
3943    objects: &'a [&'a EncodedObject],
3944    plan: &'a [PlannedEntry],
3945    idx: usize,
3946) -> &'a [u8] {
3947    match &plan[idx].base {
3948        PlannedBase::None => &objects[idx].body,
3949        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
3950    }
3951}
3952
3953fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
3954    let mut out = Vec::new();
3955    write_compressed_payload(&mut out, body)?;
3956    Ok(out)
3957}
3958
3959/// Maximum number of external thin-pack bases compared against any single
3960/// object. Bounds the work of the thin path when a large base set is supplied.
3961const DELTA_MAX_EXTERNAL_BASES: usize = 64;
3962
3963struct DeltaWindowEntry<'a> {
3964    idx: usize,
3965    index: DeltaIndex<'a>,
3966}
3967
3968/// Rank object types for delta grouping. Objects of the same type are far more
3969/// likely to delta well, so the sort groups by this rank first.
3970fn delta_type_rank(object_type: ObjectType) -> u8 {
3971    match object_type {
3972        ObjectType::Commit => 0,
3973        ObjectType::Tree => 1,
3974        ObjectType::Blob => 2,
3975        ObjectType::Tag => 3,
3976    }
3977}
3978
3979/// Decide how each object is stored (undeltified or deltified) and the order in
3980/// which objects are emitted into the pack.
3981///
3982/// # Ordering
3983///
3984/// Candidates are sorted by `(type, size descending, object id)`:
3985/// * **type** — only same-type objects are deltified against one another, so
3986///   grouping by type keeps the sliding window full of viable bases. Type rank
3987///   follows [`delta_type_rank`] (commit, tree, blob, tag).
3988/// * **size descending** — larger objects come first so smaller, later objects
3989///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
3990///   no path/name, so the usual path-hash key is unavailable; size is the next
3991///   best locality signal.
3992/// * **object id** — a deterministic tiebreaker for reproducible packs.
3993///
3994/// # Selection
3995///
3996/// Each object is compared against the previous up to `window` same-type
3997/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
3998/// bases of the same type). The smallest delta whose encoded length is strictly
3999/// less than the object's own body is kept; otherwise the object is stored
4000/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
4001/// only be used if doing so keeps the resulting chain within the bound); a depth
4002/// of `0` disables deltification entirely.
4003///
4004/// Returns the per-object plan (indexed by original object index) together with
4005/// the emit order. Every in-pack delta references a candidate that is earlier in
4006/// the emit order, so emitting in that order writes each base before any object
4007/// that depends on it.
4008fn plan_pack_deltas(
4009    objects: &[&EncodedObject],
4010    object_ids: &[ObjectId],
4011    options: &PackWriteOptions,
4012) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4013    let count = objects.len();
4014    let mut plan: Vec<PlannedEntry> = (0..count)
4015        .map(|_| PlannedEntry {
4016            base: PlannedBase::None,
4017        })
4018        .collect();
4019
4020    // Processing order. Deltas only point backwards within this order, which is
4021    // therefore also a valid emit order. Reordering by type/size improves delta
4022    // locality but is skipped when disabled or when deltification is off.
4023    let mut order: Vec<usize> = (0..count).collect();
4024    if options.reorder && options.depth > 0 {
4025        order.sort_by(|&left, &right| {
4026            delta_type_rank(objects[left].object_type)
4027                .cmp(&delta_type_rank(objects[right].object_type))
4028                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4029                .then_with(|| {
4030                    object_ids[left]
4031                        .as_bytes()
4032                        .cmp(object_ids[right].as_bytes())
4033                })
4034        });
4035    }
4036
4037    if options.depth == 0 {
4038        return Ok((plan, order));
4039    }
4040
4041    // Pre-build delta indexes for external thin-pack bases, grouped by type so
4042    // an object only compares against compatible bases.
4043    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4044        Vec::with_capacity(options.thin_bases.len());
4045    for (oid, object) in &options.thin_bases {
4046        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4047    }
4048
4049    // Chain depth ending at each object (0 = undeltified). Used to keep delta
4050    // chains within `options.depth`.
4051    let mut depth = vec![0usize; count];
4052    // Sliding window of recently processed original indices, most recent last.
4053    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4054        std::collections::VecDeque::new();
4055
4056    for &idx in &order {
4057        let target = &objects[idx].body;
4058        let target_type = objects[idx].object_type;
4059
4060        let mut best_delta: Option<Vec<u8>> = None;
4061        let mut best_base = PlannedBase::None;
4062
4063        // Try in-pack candidates from the window (same type only).
4064        for base_entry in window.iter().rev() {
4065            let base_idx = base_entry.idx;
4066            if objects[base_idx].object_type != target_type {
4067                continue;
4068            }
4069            // Using this base would make the new chain depth + 1; skip if that
4070            // would exceed the configured maximum.
4071            if depth[base_idx] + 1 > options.depth {
4072                continue;
4073            }
4074            let Some(delta) = base_entry.index.delta(target) else {
4075                continue;
4076            };
4077            if !delta_is_acceptable(&delta, target.len()) {
4078                continue;
4079            }
4080            if best_delta
4081                .as_ref()
4082                .is_none_or(|current| delta.len() < current.len())
4083            {
4084                best_delta = Some(delta);
4085                best_base = PlannedBase::InPack {
4086                    base_idx,
4087                    delta: Vec::new(),
4088                };
4089            }
4090        }
4091
4092        // Try external thin-pack bases (ref-delta; external base is depth 0, so
4093        // the resulting chain depth is 1, always within a non-zero bound).
4094        for (base_oid, base_type, base_index) in
4095            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4096        {
4097            if *base_type != target_type {
4098                continue;
4099            }
4100            let Some(delta) = base_index.delta(target) else {
4101                continue;
4102            };
4103            if !delta_is_acceptable(&delta, target.len()) {
4104                continue;
4105            }
4106            if best_delta
4107                .as_ref()
4108                .is_none_or(|current| delta.len() < current.len())
4109            {
4110                best_delta = Some(delta);
4111                best_base = PlannedBase::External {
4112                    base_oid: *base_oid,
4113                    delta: Vec::new(),
4114                };
4115            }
4116        }
4117
4118        if let Some(delta) = best_delta {
4119            match best_base {
4120                PlannedBase::InPack { base_idx, .. } => {
4121                    depth[idx] = depth[base_idx] + 1;
4122                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
4123                }
4124                PlannedBase::External { base_oid, .. } => {
4125                    depth[idx] = 1;
4126                    plan[idx].base = PlannedBase::External { base_oid, delta };
4127                }
4128                PlannedBase::None => {}
4129            }
4130        }
4131
4132        // Add this object to the window for subsequent candidates.
4133        window.push_back(DeltaWindowEntry {
4134            idx,
4135            index: DeltaIndex::new(&objects[idx].body),
4136        });
4137        while window.len() > options.window {
4138            window.pop_front();
4139        }
4140    }
4141
4142    Ok((plan, order))
4143}
4144
4145/// Whether a generated delta is worth using instead of storing the object
4146/// undeltified. The encoded delta must be strictly smaller than the object's own
4147/// body; otherwise the undeltified form is the same size or smaller and is
4148/// always self-contained.
4149fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4150    !delta.is_empty() && delta.len() < target_len
4151}
4152
4153fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4154    loop {
4155        let mut byte = (value as u8) & 0x7f;
4156        value >>= 7;
4157        if value != 0 {
4158            byte |= 0x80;
4159        }
4160        out.push(byte);
4161        if value == 0 {
4162            break;
4163        }
4164    }
4165}
4166
4167fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4168    while size != 0 {
4169        let chunk = size.min(0x10000);
4170        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4171        let mut command = 0x80u8;
4172        let mut payload = [0u8; 7];
4173        let mut payload_len = 0usize;
4174        for idx in 0..4 {
4175            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4176            if byte != 0 {
4177                command |= 1 << idx;
4178                payload[payload_len] = byte;
4179                payload_len += 1;
4180            }
4181        }
4182        for idx in 0..3 {
4183            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4184            if byte != 0 {
4185                command |= 0x10 << idx;
4186                payload[payload_len] = byte;
4187                payload_len += 1;
4188            }
4189        }
4190        out.push(command);
4191        out.extend_from_slice(&payload[..payload_len]);
4192        offset += chunk;
4193        size -= chunk;
4194    }
4195}
4196
4197fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
4198    while !bytes.is_empty() {
4199        let chunk_len = bytes.len().min(0x7f);
4200        out.push(chunk_len as u8);
4201        out.extend_from_slice(&bytes[..chunk_len]);
4202        bytes = &bytes[chunk_len..];
4203    }
4204}
4205
4206fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
4207    let mut value = 0u64;
4208    let mut shift = 0u32;
4209    loop {
4210        let Some(byte) = delta.get(*cursor).copied() else {
4211            return Err(GitError::InvalidObject("truncated delta size".into()));
4212        };
4213        *cursor += 1;
4214        value = value
4215            .checked_add(
4216                u64::from(byte & 0x7f)
4217                    .checked_shl(shift)
4218                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
4219            )
4220            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4221        if byte & 0x80 == 0 {
4222            return Ok(value);
4223        }
4224        shift = shift
4225            .checked_add(7)
4226            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4227    }
4228}
4229
4230fn read_delta_copy_value(
4231    delta: &[u8],
4232    cursor: &mut usize,
4233    command: u8,
4234    masks: &[u8],
4235) -> Result<u64> {
4236    let mut value = 0u64;
4237    for (shift, mask) in masks.iter().enumerate() {
4238        if command & mask != 0 {
4239            let Some(byte) = delta.get(*cursor).copied() else {
4240                return Err(GitError::InvalidObject(
4241                    "truncated delta copy command".into(),
4242                ));
4243            };
4244            *cursor += 1;
4245            value |= u64::from(byte) << (shift * 8);
4246        }
4247    }
4248    Ok(value)
4249}
4250
4251thread_local! {
4252    static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
4253}
4254
4255fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
4256    DEFLATE.with(|cell| {
4257        let mut compressor = cell.borrow_mut();
4258        compressor.reset();
4259        out.reserve(zlib_compress_bound(body.len()));
4260        let status = compressor
4261            .compress_vec(body, out, FlushCompress::Finish)
4262            .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
4263        if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
4264            return Err(GitError::InvalidObject(
4265                "zlib compression did not finish pack entry".into(),
4266            ));
4267        }
4268        Ok(())
4269    })
4270}
4271
4272fn zlib_compress_bound(len: usize) -> usize {
4273    len.saturating_add(len >> 12)
4274        .saturating_add(len >> 14)
4275        .saturating_add(len >> 25)
4276        .saturating_add(13)
4277}
4278
4279fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
4280    let type_code = match object_type {
4281        ObjectType::Commit => 1,
4282        ObjectType::Tree => 2,
4283        ObjectType::Blob => 3,
4284        ObjectType::Tag => 4,
4285    };
4286    write_pack_entry_header_kind(out, type_code, size);
4287}
4288
4289fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4290    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4291    size >>= 4;
4292    if size != 0 {
4293        byte |= 0x80;
4294    }
4295    out.push(byte);
4296    while size != 0 {
4297        let mut byte = (size as u8) & 0x7f;
4298        size >>= 7;
4299        if size != 0 {
4300            byte |= 0x80;
4301        }
4302        out.push(byte);
4303    }
4304}
4305
4306fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4307    if relative == 0 {
4308        return Err(GitError::InvalidFormat(
4309            "ofs-delta relative offset cannot be zero".into(),
4310        ));
4311    }
4312    let mut value = relative;
4313    let mut bytes = vec![(value & 0x7f) as u8];
4314    value >>= 7;
4315    while value != 0 {
4316        value -= 1;
4317        bytes.push(((value & 0x7f) as u8) | 0x80);
4318        value >>= 7;
4319    }
4320    bytes.reverse();
4321    out.extend_from_slice(&bytes);
4322    Ok(())
4323}
4324
4325fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4326    let Some(byte) = bytes.get(*offset).copied() else {
4327        return Err(GitError::InvalidFormat(
4328            "truncated pack entry header".into(),
4329        ));
4330    };
4331    *offset += 1;
4332    Ok(byte)
4333}
4334
4335fn u16_be(bytes: &[u8]) -> u16 {
4336    u16::from_be_bytes([bytes[0], bytes[1]])
4337}
4338
4339fn u32_be(bytes: &[u8]) -> u32 {
4340    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4341}
4342
4343fn u64_be(bytes: &[u8]) -> u64 {
4344    u64::from_be_bytes([
4345        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4346    ])
4347}
4348
4349fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4350    let mut fanout = [0u32; 256];
4351    let mut previous = 0u32;
4352    for slot in &mut fanout {
4353        *slot = u32_be(&bytes[*offset..*offset + 4]);
4354        if *slot < previous {
4355            return Err(GitError::InvalidFormat(
4356                "pack index fanout is not monotonic".into(),
4357            ));
4358        }
4359        previous = *slot;
4360        *offset += 4;
4361    }
4362    Ok(fanout)
4363}
4364
4365fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4366    let expected_min = if oid_bytes[0] == 0 {
4367        0
4368    } else {
4369        fanout[usize::from(oid_bytes[0] - 1)]
4370    };
4371    if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4372        return Err(GitError::InvalidFormat(
4373            "pack index object id is outside its fanout bucket".into(),
4374        ));
4375    }
4376    Ok(())
4377}
4378
4379fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4380    if raw_offset & 0x8000_0000 == 0 {
4381        return Ok(u64::from(raw_offset));
4382    }
4383    let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4384    let large_start = large_idx
4385        .checked_mul(8)
4386        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4387    let large_end = large_start
4388        .checked_add(8)
4389        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4390    if large_end > large_offset_table.len() {
4391        return Err(GitError::InvalidFormat(
4392            "pack index large offset points past table".into(),
4393        ));
4394    }
4395    Ok(u64_be(&large_offset_table[large_start..large_end]))
4396}
4397
4398fn checked_range(
4399    start: usize,
4400    count: usize,
4401    width: usize,
4402    total: usize,
4403) -> Result<std::ops::Range<usize>> {
4404    let len = count
4405        .checked_mul(width)
4406        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4407    let end = start
4408        .checked_add(len)
4409        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4410    if end > total {
4411        return Err(GitError::InvalidFormat("truncated pack index table".into()));
4412    }
4413    Ok(start..end)
4414}
4415
4416fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4417    let mut seen = vec![false; positions.len()];
4418    for position in positions {
4419        let idx = *position as usize;
4420        if idx >= positions.len() {
4421            return Err(GitError::InvalidFormat(
4422                "reverse index position points past object table".into(),
4423            ));
4424        }
4425        if seen[idx] {
4426            return Err(GitError::InvalidFormat(
4427                "reverse index position is duplicated".into(),
4428            ));
4429        }
4430        seen[idx] = true;
4431    }
4432    Ok(())
4433}
4434
4435fn parse_midx_pack_names(
4436    bytes: &[u8],
4437    chunks: &[MultiPackIndexChunk],
4438    pack_count: usize,
4439    version: u8,
4440) -> Result<Vec<String>> {
4441    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4442        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4443    let mut names = Vec::with_capacity(pack_count);
4444    let mut offset = 0usize;
4445    while names.len() < pack_count {
4446        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4447            return Err(GitError::InvalidFormat(
4448                "multi-pack-index PNAM entry is unterminated".into(),
4449            ));
4450        };
4451        let name_bytes = &data[offset..offset + relative_end];
4452        if name_bytes.is_empty() {
4453            return Err(GitError::InvalidFormat(
4454                "multi-pack-index PNAM entry is empty".into(),
4455            ));
4456        }
4457        let name = std::str::from_utf8(name_bytes)
4458            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4459        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4460            return Err(GitError::InvalidFormat(
4461                "multi-pack-index PNAM entry contains a path separator".into(),
4462            ));
4463        }
4464        names.push(name.to_string());
4465        offset += relative_end + 1;
4466    }
4467    let padding = &data[offset..];
4468    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4469        return Err(GitError::InvalidFormat(
4470            "multi-pack-index PNAM padding is invalid".into(),
4471        ));
4472    }
4473    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4474        return Err(GitError::InvalidFormat(
4475            "multi-pack-index v1 PNAM entries are not sorted".into(),
4476        ));
4477    }
4478    Ok(names)
4479}
4480
4481fn parse_midx_oid_fanout(
4482    bytes: &[u8],
4483    chunks: &[MultiPackIndexChunk],
4484) -> Result<([u32; 256], usize)> {
4485    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4486        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4487    if data.len() != 256 * 4 {
4488        return Err(GitError::InvalidFormat(
4489            "multi-pack-index OIDF chunk has invalid length".into(),
4490        ));
4491    }
4492    let mut fanout = [0u32; 256];
4493    let mut previous = 0u32;
4494    for (idx, slot) in fanout.iter_mut().enumerate() {
4495        let start = idx * 4;
4496        *slot = u32_be(&data[start..start + 4]);
4497        if *slot < previous {
4498            return Err(GitError::InvalidFormat(
4499                "multi-pack-index OIDF fanout is not monotonic".into(),
4500            ));
4501        }
4502        previous = *slot;
4503    }
4504    Ok((fanout, fanout[255] as usize))
4505}
4506
4507fn parse_midx_object_ids(
4508    bytes: &[u8],
4509    chunks: &[MultiPackIndexChunk],
4510    format: ObjectFormat,
4511    object_count: usize,
4512    fanout: &[u32; 256],
4513) -> Result<Vec<ObjectId>> {
4514    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4515        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4516    let expected_len = object_count
4517        .checked_mul(format.raw_len())
4518        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4519    if data.len() != expected_len {
4520        return Err(GitError::InvalidFormat(
4521            "multi-pack-index OIDL chunk has invalid length".into(),
4522        ));
4523    }
4524
4525    let mut ids = Vec::with_capacity(object_count);
4526    let mut counts = [0u32; 256];
4527    let mut previous_oid: Option<ObjectId> = None;
4528    for idx in 0..object_count {
4529        let start = idx * format.raw_len();
4530        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4531        if let Some(previous) = &previous_oid
4532            && previous.as_bytes() >= oid.as_bytes()
4533        {
4534            return Err(GitError::InvalidFormat(
4535                "multi-pack-index OIDL object ids are not strictly sorted".into(),
4536            ));
4537        }
4538        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4539            .checked_add(1)
4540            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4541        previous_oid = Some(oid);
4542        ids.push(oid);
4543    }
4544
4545    let mut running = 0u32;
4546    for (idx, count) in counts.iter().enumerate() {
4547        running = running
4548            .checked_add(*count)
4549            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4550        if fanout[idx] != running {
4551            return Err(GitError::InvalidFormat(
4552                "multi-pack-index OIDF fanout does not match OIDL".into(),
4553            ));
4554        }
4555    }
4556    Ok(ids)
4557}
4558
4559fn parse_midx_object_offsets(
4560    bytes: &[u8],
4561    chunks: &[MultiPackIndexChunk],
4562    object_ids: Vec<ObjectId>,
4563    pack_count: u32,
4564) -> Result<Vec<MultiPackIndexEntry>> {
4565    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4566        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4567    let expected_len = object_ids
4568        .len()
4569        .checked_mul(8)
4570        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4571    if data.len() != expected_len {
4572        return Err(GitError::InvalidFormat(
4573            "multi-pack-index OOFF chunk has invalid length".into(),
4574        ));
4575    }
4576    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4577    if let Some(large_offsets) = large_offsets
4578        && large_offsets.len() % 8 != 0
4579    {
4580        return Err(GitError::InvalidFormat(
4581            "multi-pack-index LOFF chunk has invalid length".into(),
4582        ));
4583    }
4584
4585    let mut entries = Vec::with_capacity(object_ids.len());
4586    for (idx, oid) in object_ids.into_iter().enumerate() {
4587        let start = idx * 8;
4588        let pack_int_id = u32_be(&data[start..start + 4]);
4589        if pack_int_id >= pack_count {
4590            return Err(GitError::InvalidFormat(
4591                "multi-pack-index object points past pack table".into(),
4592            ));
4593        }
4594        let raw_offset = u32_be(&data[start + 4..start + 8]);
4595        let offset = if raw_offset & 0x8000_0000 == 0 {
4596            u64::from(raw_offset)
4597        } else {
4598            let Some(large_offsets) = large_offsets else {
4599                return Err(GitError::InvalidFormat(
4600                    "multi-pack-index large offset missing LOFF chunk".into(),
4601                ));
4602            };
4603            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4604            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4605                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4606            })?;
4607            let large_end = large_start.checked_add(8).ok_or_else(|| {
4608                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4609            })?;
4610            if large_end > large_offsets.len() {
4611                return Err(GitError::InvalidFormat(
4612                    "multi-pack-index large offset points past LOFF chunk".into(),
4613                ));
4614            }
4615            u64_be(&large_offsets[large_start..large_end])
4616        };
4617        entries.push(MultiPackIndexEntry {
4618            oid,
4619            pack_int_id,
4620            offset,
4621        });
4622    }
4623    Ok(entries)
4624}
4625
4626fn parse_midx_reverse_index(
4627    bytes: &[u8],
4628    chunks: &[MultiPackIndexChunk],
4629    object_count: usize,
4630) -> Result<Option<Vec<u32>>> {
4631    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4632        return Ok(None);
4633    };
4634    let expected_len = object_count
4635        .checked_mul(4)
4636        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4637    if data.len() != expected_len {
4638        return Err(GitError::InvalidFormat(
4639            "multi-pack-index RIDX chunk has invalid length".into(),
4640        ));
4641    }
4642    let mut positions = Vec::with_capacity(object_count);
4643    for idx in 0..object_count {
4644        let start = idx * 4;
4645        positions.push(u32_be(&data[start..start + 4]));
4646    }
4647    validate_position_permutation(&positions)?;
4648    Ok(Some(positions))
4649}
4650
4651fn parse_midx_bitmapped_packs(
4652    bytes: &[u8],
4653    chunks: &[MultiPackIndexChunk],
4654    pack_count: usize,
4655    object_count: usize,
4656) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4657    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4658        return Ok(None);
4659    };
4660    let expected_len = pack_count
4661        .checked_mul(8)
4662        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4663    if data.len() != expected_len {
4664        return Err(GitError::InvalidFormat(
4665            "multi-pack-index BTMP chunk has invalid length".into(),
4666        ));
4667    }
4668    let mut entries = Vec::with_capacity(pack_count);
4669    for idx in 0..pack_count {
4670        let start = idx * 8;
4671        let bitmap_pos = u32_be(&data[start..start + 4]);
4672        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4673        let bitmap_end = u64::from(bitmap_pos)
4674            .checked_add(u64::from(bitmap_nr))
4675            .ok_or_else(|| {
4676                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4677            })?;
4678        if bitmap_end > object_count as u64 {
4679            return Err(GitError::InvalidFormat(
4680                "multi-pack-index BTMP range points past object table".into(),
4681            ));
4682        }
4683        entries.push(MultiPackBitmapPack {
4684            bitmap_pos,
4685            bitmap_nr,
4686        });
4687    }
4688    Ok(Some(entries))
4689}
4690
4691fn midx_chunk_data<'a>(
4692    bytes: &'a [u8],
4693    chunks: &[MultiPackIndexChunk],
4694    id: [u8; 4],
4695    required: bool,
4696) -> Result<Option<&'a [u8]>> {
4697    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4698        if required {
4699            return Err(GitError::InvalidFormat(format!(
4700                "multi-pack-index missing {} chunk",
4701                std::str::from_utf8(&id).unwrap_or("required")
4702            )));
4703        }
4704        return Ok(None);
4705    };
4706    let start = usize::try_from(chunk.offset)
4707        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4708    let len = usize::try_from(chunk.len)
4709        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4710    let end = start
4711        .checked_add(len)
4712        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4713    let Some(data) = bytes.get(start..end) else {
4714        return Err(GitError::InvalidFormat(
4715            "multi-pack-index chunk extends past file".into(),
4716        ));
4717    };
4718    Ok(Some(data))
4719}
4720
4721fn hash_function_id(format: ObjectFormat) -> u32 {
4722    match format {
4723        ObjectFormat::Sha1 => 1,
4724        ObjectFormat::Sha256 => 2,
4725    }
4726}
4727
4728/// Maximum number of clean (run) words that a single EWAH running-length word
4729/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
4730const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4731
4732/// Maximum number of literal (dirty) words that can trail a single EWAH
4733/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
4734const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4735
4736/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
4737const EWAH_ALL_ONES: u64 = u64::MAX;
4738
4739impl EwahBitmap {
4740    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
4741    /// from a slice of raw uncompressed 64-bit words.
4742    ///
4743    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
4744    /// matching git's on-disk convention. `bit_size` records the number of
4745    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
4746    ///
4747    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
4748    /// encoder: consecutive all-zero or all-one words collapse into a run, and
4749    /// any other word is stored verbatim as a literal. Only the first
4750    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
4751    /// words supplied by the caller are ignored, just as git encodes a bitmap
4752    /// sized to its highest set bit.
4753    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4754        let required_words = bit_size.div_ceil(64) as usize;
4755        if required_words > words.len() {
4756            return Err(GitError::InvalidFormat(format!(
4757                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4758                words.len()
4759            )));
4760        }
4761        // Only the words that actually back the declared bits matter; libgit
4762        // never emits clean trailing zero words for the unused tail.
4763        let significant = &words[..required_words];
4764        let mut builder = EwahBuilder::new(bit_size);
4765        for &word in significant {
4766            if word == 0 {
4767                builder.add_empty_words(false, 1);
4768            } else if word == EWAH_ALL_ONES {
4769                builder.add_empty_words(true, 1);
4770            } else {
4771                builder.add_literal(word);
4772            }
4773        }
4774        builder.finish()
4775    }
4776
4777    /// Constructs an [`EwahBitmap`] from a set of bit positions.
4778    ///
4779    /// `bit_size` is the number of logical bits (typically the pack object
4780    /// count). Every position in `positions` must be strictly less than
4781    /// `bit_size`. Positions may be given in any order and may repeat.
4782    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4783        let word_count = bit_size.div_ceil(64) as usize;
4784        let mut words = vec![0u64; word_count];
4785        for &position in positions {
4786            if position >= bit_size {
4787                return Err(GitError::InvalidFormat(format!(
4788                    "EWAH bit position {position} out of range for bit_size {bit_size}"
4789                )));
4790            }
4791            let word_index = (position / 64) as usize;
4792            let bit_index = position % 64;
4793            words[word_index] |= 1u64 << bit_index;
4794        }
4795        Self::from_words(bit_size, &words)
4796    }
4797
4798    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
4799    /// all-zero type bitmap (e.g. when a pack has no tags).
4800    pub fn empty() -> Self {
4801        Self {
4802            bit_size: 0,
4803            words: Vec::new(),
4804            rlw_position: 0,
4805        }
4806    }
4807
4808    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
4809    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
4810    ///
4811    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
4812    /// bitmap actually covers and is primarily used to validate roundtrips.
4813    pub fn to_words(&self) -> Result<Vec<u64>> {
4814        let mut out = Vec::new();
4815        let mut word_idx = 0usize;
4816        while word_idx < self.words.len() {
4817            let rlw = self.words[word_idx];
4818            let run_bit = rlw & 1;
4819            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4820            let literal_words = (rlw >> 33) as usize;
4821            word_idx += 1;
4822            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4823            for _ in 0..run_words {
4824                out.push(fill);
4825            }
4826            let literal_end = word_idx
4827                .checked_add(literal_words)
4828                .filter(|end| *end <= self.words.len())
4829                .ok_or_else(|| {
4830                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
4831                })?;
4832            out.extend_from_slice(&self.words[word_idx..literal_end]);
4833            word_idx = literal_end;
4834        }
4835        let required_words = (self.bit_size as usize).div_ceil(64);
4836        if out.len() < required_words {
4837            out.resize(required_words, 0);
4838        }
4839        out.truncate(required_words);
4840        Ok(out)
4841    }
4842
4843    /// Returns the sorted set bit positions covered by this bitmap.
4844    pub fn to_positions(&self) -> Result<Vec<u32>> {
4845        let words = self.to_words()?;
4846        let mut positions = Vec::new();
4847        for (word_index, word) in words.iter().enumerate() {
4848            let mut remaining = *word;
4849            while remaining != 0 {
4850                let bit = remaining.trailing_zeros();
4851                let position = (word_index as u64) * 64 + u64::from(bit);
4852                if position < u64::from(self.bit_size) {
4853                    // position always fits in u32 because bit_size is u32.
4854                    positions.push(position as u32);
4855                }
4856                remaining &= remaining - 1;
4857            }
4858        }
4859        Ok(positions)
4860    }
4861
4862    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
4863    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
4864    /// running-length-word position (u32 BE).
4865    pub fn to_bytes(&self) -> Vec<u8> {
4866        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4867        self.append_bytes(&mut out);
4868        out
4869    }
4870
4871    fn append_bytes(&self, out: &mut Vec<u8>) {
4872        out.extend_from_slice(&self.bit_size.to_be_bytes());
4873        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4874        for word in &self.words {
4875            out.extend_from_slice(&word.to_be_bytes());
4876        }
4877        out.extend_from_slice(&self.rlw_position.to_be_bytes());
4878    }
4879}
4880
4881/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
4882///
4883/// The buffer is a sequence of blocks. Each block begins with a running-length
4884/// word (RLW) and is followed by zero or more literal words:
4885///   * bit 0      => value of the clean run words (0 or 1)
4886///   * bits 1..=32 => number of clean run words (32-bit field)
4887///   * bits 33..=63 => number of trailing literal words (31-bit field)
4888struct EwahBuilder {
4889    bit_size: u32,
4890    words: Vec<u64>,
4891    rlw_position: usize,
4892}
4893
4894impl EwahBuilder {
4895    fn new(bit_size: u32) -> Self {
4896        // Every EWAH buffer begins with an RLW, even an empty one.
4897        Self {
4898            bit_size,
4899            words: vec![0u64],
4900            rlw_position: 0,
4901        }
4902    }
4903
4904    fn rlw(&self) -> u64 {
4905        self.words[self.rlw_position]
4906    }
4907
4908    fn set_rlw(&mut self, value: u64) {
4909        self.words[self.rlw_position] = value;
4910    }
4911
4912    fn rlw_running_len(&self) -> u64 {
4913        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
4914    }
4915
4916    fn rlw_running_bit(&self) -> bool {
4917        self.rlw() & 1 == 1
4918    }
4919
4920    fn rlw_literal_len(&self) -> u64 {
4921        self.rlw() >> 33
4922    }
4923
4924    fn set_running_bit(&mut self, bit: bool) {
4925        let mut value = self.rlw();
4926        value &= !1;
4927        value |= u64::from(bit);
4928        self.set_rlw(value);
4929    }
4930
4931    fn set_running_len(&mut self, len: u64) {
4932        let mut value = self.rlw();
4933        value &= !(EWAH_MAX_RUNNING_LEN << 1);
4934        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
4935        self.set_rlw(value);
4936    }
4937
4938    fn set_literal_len(&mut self, len: u64) {
4939        let mut value = self.rlw();
4940        value &= (1u64 << 33) - 1;
4941        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
4942        self.set_rlw(value);
4943    }
4944
4945    /// Begins a fresh RLW block at the end of the buffer.
4946    fn push_rlw(&mut self) {
4947        self.rlw_position = self.words.len();
4948        self.words.push(0);
4949    }
4950
4951    /// Appends `number` clean words whose bits are all `value`, mirroring
4952    /// libgit's `ewah_add_empty_words`.
4953    ///
4954    /// A run can only be merged into the current RLW when that RLW has not yet
4955    /// emitted any literal words and its run either is empty or already carries
4956    /// the same fill value. Otherwise a fresh RLW block must be started, because
4957    /// every block stores its run strictly before its literals.
4958    fn add_empty_words(&mut self, value: bool, mut number: u64) {
4959        while number > 0 {
4960            // The current RLW can absorb more run words only when it has no
4961            // literals yet, its run is either empty or already the right fill
4962            // value, and the 32-bit run-length field is not already saturated.
4963            let can_extend = self.rlw_literal_len() == 0
4964                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
4965                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
4966            if !can_extend {
4967                self.push_rlw();
4968            }
4969            if self.rlw_running_len() == 0 {
4970                self.set_running_bit(value);
4971            }
4972            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
4973            let take = available.min(number);
4974            self.set_running_len(self.rlw_running_len() + take);
4975            number -= take;
4976        }
4977    }
4978
4979    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
4980    /// `ewah_add_dirty_words` for a count of one.
4981    fn add_literal(&mut self, word: u64) {
4982        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
4983            self.push_rlw();
4984        }
4985        let literal_len = self.rlw_literal_len();
4986        self.set_literal_len(literal_len + 1);
4987        self.words.push(word);
4988    }
4989
4990    fn finish(self) -> Result<EwahBitmap> {
4991        let rlw_position = u32::try_from(self.rlw_position)
4992            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
4993        if self.words.len() > u32::MAX as usize {
4994            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
4995        }
4996        Ok(EwahBitmap {
4997            bit_size: self.bit_size,
4998            words: self.words,
4999            rlw_position,
5000        })
5001    }
5002}
5003
5004/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
5005///
5006/// The writer is constructed from the object layout of a pack (one
5007/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
5008/// Callers then register one selected commit per [`add_commit`] call, supplying
5009/// the set of pack positions reachable from that commit. [`build`]/[`write`]
5010/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
5011/// on-disk format (signature `BITM`, version 1).
5012///
5013/// [`add_commit`]: PackBitmapWriter::add_commit
5014/// [`build`]: PackBitmapWriter::build
5015/// [`write`]: PackBitmapWriter::write
5016#[derive(Debug, Clone)]
5017pub struct PackBitmapWriter {
5018    format: ObjectFormat,
5019    pack_checksum: ObjectId,
5020    object_count: u32,
5021    commit_positions: Vec<u32>,
5022    tree_positions: Vec<u32>,
5023    blob_positions: Vec<u32>,
5024    tag_positions: Vec<u32>,
5025    name_hash_cache: Option<Vec<u32>>,
5026    selected: Vec<SelectedCommit>,
5027}
5028
5029#[derive(Debug, Clone)]
5030struct SelectedCommit {
5031    /// Oid-sorted `.idx` position (what the on-disk entry records). The
5032    /// commit's pack-order position lives in `reachable` with the rest of the
5033    /// bits.
5034    commit_index_position: u32,
5035    flags: u8,
5036    reachable: Vec<u32>,
5037}
5038
5039impl PackBitmapWriter {
5040    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
5041    /// compression against a previously selected commit). This is the only flag
5042    /// value this writer emits.
5043    pub const FLAG_NONE: u8 = 0;
5044
5045    /// Creates a writer for a pack whose objects (in pack order) have the given
5046    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
5047    ///
5048    /// Returns an error if the pack contains more than `u32::MAX` objects, if
5049    /// `pack_checksum`'s format does not match `format`, or if any object type
5050    /// is not one of the four reachable git object kinds.
5051    pub fn new(
5052        format: ObjectFormat,
5053        pack_checksum: ObjectId,
5054        object_types: &[ObjectType],
5055    ) -> Result<Self> {
5056        if object_types.len() > u32::MAX as usize {
5057            return Err(GitError::InvalidFormat(
5058                "too many objects for a pack bitmap".into(),
5059            ));
5060        }
5061        if pack_checksum.format() != format {
5062            return Err(GitError::InvalidObjectId(
5063                "pack checksum format does not match bitmap format".into(),
5064            ));
5065        }
5066        let object_count = object_types.len() as u32;
5067        let mut commit_positions = Vec::new();
5068        let mut tree_positions = Vec::new();
5069        let mut blob_positions = Vec::new();
5070        let mut tag_positions = Vec::new();
5071        for (index, object_type) in object_types.iter().enumerate() {
5072            let position = index as u32;
5073            match object_type {
5074                ObjectType::Commit => commit_positions.push(position),
5075                ObjectType::Tree => tree_positions.push(position),
5076                ObjectType::Blob => blob_positions.push(position),
5077                ObjectType::Tag => tag_positions.push(position),
5078            }
5079        }
5080        Ok(Self {
5081            format,
5082            pack_checksum,
5083            object_count,
5084            commit_positions,
5085            tree_positions,
5086            blob_positions,
5087            tag_positions,
5088            name_hash_cache: None,
5089            selected: Vec::new(),
5090        })
5091    }
5092
5093    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
5094    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
5095    /// and appends the cache after the bitmap entries, exactly as git does.
5096    ///
5097    /// Returns an error if the cache length does not equal the object count.
5098    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5099        if cache.len() != self.object_count as usize {
5100            return Err(GitError::InvalidFormat(format!(
5101                "name hash cache has {} entries but pack has {} objects",
5102                cache.len(),
5103                self.object_count
5104            )));
5105        }
5106        self.name_hash_cache = Some(cache);
5107        Ok(self)
5108    }
5109
5110    /// Registers a selected commit and the pack positions reachable from it.
5111    ///
5112    /// `commit_position` is the *pack-order* position of the commit itself (the
5113    /// bit-number space); it must reference a commit object and is implicitly
5114    /// part of the reachable set. `commit_index_position` is the commit's
5115    /// position in the *oid-sorted* pack index — this is what the on-disk entry
5116    /// records (upstream `oid_pos`); bits and entry positions live in different
5117    /// spaces. `reachable` lists the pack-order positions of every object
5118    /// reachable from the commit (it may include or omit `commit_position`;
5119    /// duplicates are fine). All positions must be in range. The commit's full
5120    /// (non-XORed) bitmap is stored.
5121    pub fn add_commit(
5122        &mut self,
5123        commit_position: u32,
5124        commit_index_position: u32,
5125        reachable: &[u32],
5126    ) -> Result<()> {
5127        if commit_position >= self.object_count {
5128            return Err(GitError::InvalidFormat(format!(
5129                "commit position {commit_position} out of range for {} objects",
5130                self.object_count
5131            )));
5132        }
5133        if commit_index_position >= self.object_count {
5134            return Err(GitError::InvalidFormat(format!(
5135                "commit index position {commit_index_position} out of range for {} objects",
5136                self.object_count
5137            )));
5138        }
5139        if !self.commit_positions.contains(&commit_position) {
5140            return Err(GitError::InvalidFormat(format!(
5141                "bitmap commit position {commit_position} is not a commit object"
5142            )));
5143        }
5144        for &position in reachable {
5145            if position >= self.object_count {
5146                return Err(GitError::InvalidFormat(format!(
5147                    "reachable position {position} out of range for {} objects",
5148                    self.object_count
5149                )));
5150            }
5151        }
5152        let mut reachable = reachable.to_vec();
5153        reachable.push(commit_position);
5154        self.selected.push(SelectedCommit {
5155            commit_index_position,
5156            flags: Self::FLAG_NONE,
5157            reachable,
5158        });
5159        Ok(())
5160    }
5161
5162    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
5163    ///
5164    /// The resulting index always advertises
5165    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
5166    /// the pack) and, when a name-hash cache was attached,
5167    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
5168    pub fn build(&self) -> Result<PackBitmapIndex> {
5169        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5170        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5171        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5172        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5173
5174        let mut entries = Vec::with_capacity(self.selected.len());
5175        for selected in &self.selected {
5176            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5177            entries.push(PackBitmapEntry {
5178                object_position: selected.commit_index_position,
5179                xor_offset: 0,
5180                flags: selected.flags,
5181                bitmap,
5182            });
5183        }
5184
5185        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5186        if self.name_hash_cache.is_some() {
5187            options |= PackBitmapIndex::OPTION_HASH_CACHE;
5188        }
5189
5190        // The index checksum is only known once the body is serialised; the
5191        // dedicated `write` path fills it in. `build` reports a placeholder of
5192        // the correct format so the struct is self-consistent for callers that
5193        // only need the decoded bitmaps.
5194        let placeholder_checksum = ObjectId::null(self.format);
5195        Ok(PackBitmapIndex {
5196            version: 1,
5197            format: self.format,
5198            options,
5199            pack_checksum: self.pack_checksum.clone(),
5200            index_checksum: placeholder_checksum,
5201            type_bitmaps: PackBitmapTypeBitmaps {
5202                commits,
5203                trees,
5204                blobs,
5205                tags,
5206            },
5207            entries,
5208            name_hash_cache: self.name_hash_cache.clone(),
5209        })
5210    }
5211
5212    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
5213    /// (including the trailing index checksum).
5214    pub fn write(&self) -> Result<Vec<u8>> {
5215        self.build()?.write()
5216    }
5217}
5218
5219impl PackBitmapIndex {
5220    /// Serialises this index into git's on-disk `.bitmap` byte layout.
5221    ///
5222    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
5223    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
5224    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
5225    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
5226    /// optional name-hash cache, and finally the trailing index checksum over
5227    /// everything written so far.
5228    ///
5229    /// The `index_checksum` field of `self` is ignored and recomputed from the
5230    /// serialised body. Returns an error for unsupported versions, mismatched
5231    /// object-id formats, an oversized entry table, or an inconsistent name-hash
5232    /// cache.
5233    pub fn write(&self) -> Result<Vec<u8>> {
5234        if self.version != 1 {
5235            return Err(GitError::Unsupported(format!(
5236                "bitmap index version {}",
5237                self.version
5238            )));
5239        }
5240        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
5241        if self.options & !known_options != 0 {
5242            return Err(GitError::Unsupported(format!(
5243                "bitmap index options {:#06x}",
5244                self.options & !known_options
5245            )));
5246        }
5247        if self.pack_checksum.format() != self.format {
5248            return Err(GitError::InvalidObjectId(
5249                "bitmap pack checksum format does not match index format".into(),
5250            ));
5251        }
5252        if self.entries.len() > u32::MAX as usize {
5253            return Err(GitError::InvalidFormat(
5254                "too many bitmap index entries".into(),
5255            ));
5256        }
5257        let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
5258        match (&self.name_hash_cache, want_cache) {
5259            (Some(_), false) => {
5260                return Err(GitError::InvalidFormat(
5261                    "name hash cache present without OPTION_HASH_CACHE".into(),
5262                ));
5263            }
5264            (None, true) => {
5265                return Err(GitError::InvalidFormat(
5266                    "OPTION_HASH_CACHE set without a name hash cache".into(),
5267                ));
5268            }
5269            _ => {}
5270        }
5271
5272        let mut out = Vec::new();
5273        out.extend_from_slice(b"BITM");
5274        out.extend_from_slice(&self.version.to_be_bytes());
5275        out.extend_from_slice(&self.options.to_be_bytes());
5276        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
5277        out.extend_from_slice(self.pack_checksum.as_bytes());
5278
5279        self.type_bitmaps.commits.append_bytes(&mut out);
5280        self.type_bitmaps.trees.append_bytes(&mut out);
5281        self.type_bitmaps.blobs.append_bytes(&mut out);
5282        self.type_bitmaps.tags.append_bytes(&mut out);
5283
5284        for (idx, entry) in self.entries.iter().enumerate() {
5285            if entry.xor_offset as usize > idx {
5286                return Err(GitError::InvalidFormat(
5287                    "bitmap index entry has invalid XOR offset".into(),
5288                ));
5289            }
5290            out.extend_from_slice(&entry.object_position.to_be_bytes());
5291            out.push(entry.xor_offset);
5292            out.push(entry.flags);
5293            entry.bitmap.append_bytes(&mut out);
5294        }
5295
5296        if let Some(cache) = &self.name_hash_cache {
5297            for value in cache {
5298                out.extend_from_slice(&value.to_be_bytes());
5299            }
5300        }
5301
5302        let checksum = sley_core::digest_bytes(self.format, &out)?;
5303        out.extend_from_slice(checksum.as_bytes());
5304        Ok(out)
5305    }
5306}
5307
5308/// Convenience wrapper that builds a `.bitmap` file in one call.
5309///
5310/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
5311/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
5312/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
5313/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
5314/// optional `name_hash_cache` (one entry per object) may be supplied to emit
5315/// the hash-cache extension.
5316pub fn write_bitmap(
5317    format: ObjectFormat,
5318    pack_checksum: ObjectId,
5319    object_types: &[ObjectType],
5320    commits: &[(u32, u32, Vec<u32>)],
5321    name_hash_cache: Option<Vec<u32>>,
5322) -> Result<Vec<u8>> {
5323    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5324    if let Some(cache) = name_hash_cache {
5325        writer = writer.with_name_hash_cache(cache)?;
5326    }
5327    for (commit_position, commit_index_position, reachable) in commits {
5328        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5329    }
5330    writer.write()
5331}
5332
5333#[cfg(test)]
5334mod tests {
5335    use super::*;
5336    use flate2::Compression;
5337    use flate2::read::ZlibDecoder;
5338    use flate2::write::ZlibEncoder;
5339    use std::fs;
5340    use std::io::Read;
5341    use std::io::Write;
5342    use std::path::{Path, PathBuf};
5343    use std::process::Command;
5344    use std::time::{SystemTime, UNIX_EPOCH};
5345
5346    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5347        PackWriteOptions::new()
5348            .with_prefer_ofs_delta(prefer_ofs_delta)
5349            .with_reorder(false)
5350    }
5351
5352    #[test]
5353    fn parses_single_blob_pack() {
5354        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5355        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5356        assert_eq!(parsed.version, 2);
5357        assert_eq!(parsed.entries.len(), 1);
5358        let object = &parsed.entries[0].object;
5359        assert_eq!(object.object_type, ObjectType::Blob);
5360        assert_eq!(object.body, b"hello\n");
5361        assert_eq!(
5362            parsed.entries[0].entry.oid.to_hex(),
5363            "ce013625030ba8dba906f756967f9e9ca394464a"
5364        );
5365    }
5366
5367    #[test]
5368    fn parses_single_blob_pack_sha256() {
5369        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5370        let parsed =
5371            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5372        assert_eq!(parsed.version, 2);
5373        assert_eq!(parsed.entries.len(), 1);
5374        let object = &parsed.entries[0].object;
5375        assert_eq!(object.object_type, ObjectType::Blob);
5376        assert_eq!(object.body, b"hello\n");
5377        assert_eq!(
5378            parsed.entries[0].entry.oid,
5379            object
5380                .object_id(ObjectFormat::Sha256)
5381                .expect("test operation should succeed")
5382        );
5383    }
5384
5385    #[test]
5386    fn parses_bundle_pack_payload_with_bundle_format() {
5387        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5388        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5389            .expect("test operation should succeed");
5390        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5391            .into_bytes()
5392            .into_iter()
5393            .chain(pack)
5394            .collect::<Vec<_>>();
5395        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5396            .expect("test operation should succeed");
5397
5398        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5399        assert_eq!(parsed.entries.len(), 1);
5400        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5401        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5402    }
5403
5404    /// Build a pack whose single blob entry header LIES about its decompressed
5405    /// size: it declares `declared_size` while the actual zlib payload only
5406    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
5407    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
5408    /// from a handful of compressed bytes.
5409    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5410        let mut pack = Vec::new();
5411        pack.extend_from_slice(b"PACK");
5412        pack.extend_from_slice(&2u32.to_be_bytes());
5413        pack.extend_from_slice(&1u32.to_be_bytes());
5414        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
5415        write_pack_entry_header_kind(&mut pack, 3, declared_size);
5416        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5417        encoder
5418            .write_all(real_body)
5419            .expect("test operation should succeed");
5420        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5421        let checksum =
5422            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5423        pack.extend_from_slice(checksum.as_bytes());
5424        pack
5425    }
5426
5427    /// Regression: a crafted pack object header declaring a gigantic decompressed
5428    /// size with a tiny compressed payload must NOT drive an up-front
5429    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
5430    /// header `size` is attacker-controlled over the network (install_raw_pack →
5431    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
5432    ///
5433    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
5434    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
5435    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
5436    /// run parse on a worker thread so that panic surfaces as a `join()` error
5437    /// rather than killing the test process; the fix turns this into a clean
5438    /// `Err` returned normally.
5439    #[test]
5440    fn rejects_decompression_bomb_header_without_oom() {
5441        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5442            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5443            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5444            let result = handle.join();
5445            // The parse thread must not have panicked/aborted on a huge reserve.
5446            assert!(
5447                result.is_ok(),
5448                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5449            );
5450            // And parsing must reject the lie (decoded len != declared size).
5451            let parse_result = result.expect("parse thread should not panic on a bomb header");
5452            assert!(
5453                parse_result.is_err(),
5454                "bomb header (declared={declared}) should be rejected as invalid"
5455            );
5456        }
5457    }
5458
5459    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
5460    /// whose *result-size* varint lies, declaring `declared_result_size`, while
5461    /// carrying a tiny real instruction stream. The delta's base-size varint is
5462    /// set correctly (so the base-size check at the top of `apply_pack_delta`
5463    /// passes and we reach the result reservation). Used to drive the sley#35
5464    /// delta-result-size bomb.
5465    fn lying_result_size_delta_pack(
5466        format: ObjectFormat,
5467        declared_result_size: u64,
5468        delta_kind: DeltaKind,
5469    ) -> Vec<u8> {
5470        let base = b"hello";
5471        let result = b"hello world"; // real produced length = 11
5472
5473        // Hand-build a delta with a truthful base-size and a LYING result-size.
5474        let mut delta = Vec::new();
5475        write_delta_varint(&mut delta, base.len() as u64);
5476        write_delta_varint(&mut delta, declared_result_size);
5477        // Real instructions: copy `base` then insert " world".
5478        let suffix = &result[base.len()..];
5479        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
5480        delta.push(base.len() as u8);
5481        delta.push(suffix.len() as u8);
5482        delta.extend_from_slice(suffix);
5483
5484        let mut pack = Vec::new();
5485        pack.extend_from_slice(b"PACK");
5486        pack.extend_from_slice(&2u32.to_be_bytes());
5487        pack.extend_from_slice(&2u32.to_be_bytes());
5488
5489        let base_offset = pack.len();
5490        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5491        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5492        encoder
5493            .write_all(base)
5494            .expect("test operation should succeed");
5495        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5496
5497        let delta_offset = pack.len();
5498        write_pack_entry_header_kind(
5499            &mut pack,
5500            match delta_kind {
5501                DeltaKind::Offset => 6,
5502                DeltaKind::Ref => 7,
5503            },
5504            delta.len() as u64,
5505        );
5506        match delta_kind {
5507            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5508            DeltaKind::Ref => {
5509                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5510                    .expect("test operation should succeed");
5511                pack.extend_from_slice(base_oid.as_bytes());
5512            }
5513        }
5514        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5515        encoder
5516            .write_all(&delta)
5517            .expect("test operation should succeed");
5518        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5519
5520        let checksum =
5521            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5522        pack.extend_from_slice(checksum.as_bytes());
5523        pack
5524    }
5525
5526    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
5527    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
5528    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
5529    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
5530    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
5531    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
5532    /// reach the same reservation, so both must be safe. We resolve the pack on a
5533    /// worker thread so an abort/panic surfaces as a `join()` error rather than
5534    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
5535    #[test]
5536    fn rejects_delta_result_size_bomb_without_oom() {
5537        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5538        for &declared in bombs {
5539            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5540                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5541                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5542                let join_result = handle.join();
5543                assert!(
5544                    join_result.is_ok(),
5545                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5546                     instead of erroring cleanly"
5547                );
5548                let parse_result =
5549                    join_result.expect("parse thread should not panic on a delta bomb");
5550                assert!(
5551                    parse_result.is_err(),
5552                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5553                     as invalid (result.len() != declared)"
5554                );
5555            }
5556        }
5557    }
5558
5559    /// A legitimate (truthful) delta whose result-size varint matches the real
5560    /// produced length must still resolve correctly — the bound only caps the
5561    /// speculative reservation, it must not break real delta application.
5562    #[test]
5563    fn applies_legitimate_delta_after_result_size_bound() {
5564        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5565            let base = b"hello";
5566            let result = b"hello world";
5567            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5568            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5569            assert_eq!(parsed.entries.len(), 2);
5570            assert_eq!(parsed.entries[0].object.body, base);
5571            assert_eq!(parsed.entries[1].object.body, result);
5572        }
5573    }
5574
5575    #[test]
5576    fn bounded_inflate_reserve_caps_attacker_declared_size() {
5577        // A tiny compressed input can't justify a multi-gigabyte reservation.
5578        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5579        // The absolute ceiling caps even a large input-justified hint.
5580        assert_eq!(
5581            bounded_inflate_reserve(usize::MAX, usize::MAX),
5582            MAX_INFLATE_RESERVE
5583        );
5584        // A modest legitimate hint is preserved unchanged (no regression for real
5585        // objects): 1000 bytes of output from 500 bytes of input is well within
5586        // both bounds.
5587        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5588        // Floor of 64 for tiny hints.
5589        assert_eq!(bounded_inflate_reserve(0, 0), 64);
5590    }
5591
5592    #[test]
5593    fn rejects_bundle_pack_payload_with_wrong_object_format() {
5594        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5595        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5596            .expect("test operation should succeed");
5597        let bundle_bytes =
5598            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5599                .into_bytes()
5600                .into_iter()
5601                .chain(pack)
5602                .collect::<Vec<_>>();
5603        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5604            .expect("test operation should succeed");
5605
5606        assert!(PackFile::parse_bundle(&bundle).is_err());
5607    }
5608
5609    fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5610        let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5611        let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5612        let owned_view =
5613            PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5614                .expect("test operation should succeed");
5615
5616        assert_eq!(view.version, owned.version);
5617        assert_eq!(view.count, owned.entries.len());
5618        assert_eq!(view.count(), owned.entries.len());
5619        assert_eq!(view.fanout(), &owned.fanout);
5620        assert_eq!(view.pack_checksum, owned.pack_checksum);
5621        assert_eq!(view.index_checksum, owned.index_checksum);
5622        assert_eq!(owned_view.version, owned.version);
5623        assert_eq!(owned_view.count(), owned.entries.len());
5624        assert_eq!(owned_view.fanout(), &owned.fanout);
5625        assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5626        assert_eq!(owned_view.index_checksum, owned.index_checksum);
5627        for entry in &owned.entries {
5628            let owned_found = owned
5629                .find(&entry.oid)
5630                .expect("test operation should succeed");
5631            let expected = Some(PackIndexLookup {
5632                crc32: owned_found.crc32,
5633                offset: owned_found.offset,
5634            });
5635            assert_eq!(view.find(&entry.oid), expected);
5636            assert_eq!(owned_view.find(&entry.oid), expected);
5637        }
5638    }
5639
5640    #[test]
5641    fn writes_pack_and_index_that_round_trip() {
5642        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5643        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5644            .expect("test operation should succeed");
5645        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5646        let index =
5647            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5648        let oid = object
5649            .object_id(ObjectFormat::Sha1)
5650            .expect("test operation should succeed");
5651        assert_eq!(pack.entries[0].object, object);
5652        assert_eq!(index.pack_checksum, pack.checksum);
5653        assert_eq!(
5654            index
5655                .find(&oid)
5656                .expect("test operation should succeed")
5657                .offset,
5658            12
5659        );
5660    }
5661
5662    #[test]
5663    fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5664        let objects = (0..8)
5665            .map(|idx| {
5666                EncodedObject::new(
5667                    ObjectType::Blob,
5668                    format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5669                )
5670            })
5671            .collect::<Vec<_>>();
5672        let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5673            .expect("test operation should succeed");
5674
5675        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5676
5677        let view =
5678            PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5679        let missing = sley_core::object_id_for_bytes(
5680            ObjectFormat::Sha1,
5681            "blob",
5682            b"not present in borrowed index\n",
5683        )
5684        .expect("test operation should succeed");
5685        assert_eq!(view.find(&missing), None);
5686    }
5687
5688    #[test]
5689    fn writes_sha256_pack_and_index_that_round_trip() {
5690        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5691        let written =
5692            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5693                .expect("test operation should succeed");
5694        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5695            .expect("test operation should succeed");
5696        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5697            .expect("test operation should succeed");
5698        let oid = object
5699            .object_id(ObjectFormat::Sha256)
5700            .expect("test operation should succeed");
5701        assert_eq!(pack.entries[0].object, object);
5702        assert_eq!(index.pack_checksum, pack.checksum);
5703        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5704        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5705        assert_eq!(
5706            index
5707                .find(&oid)
5708                .expect("test operation should succeed")
5709                .offset,
5710            12
5711        );
5712    }
5713
5714    #[test]
5715    fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5716        let objects = (0..4)
5717            .map(|idx| {
5718                EncodedObject::new(
5719                    ObjectType::Blob,
5720                    format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5721                )
5722            })
5723            .collect::<Vec<_>>();
5724        let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5725            .expect("test operation should succeed");
5726
5727        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5728    }
5729
5730    #[test]
5731    fn indexes_existing_sha256_pack_bytes() {
5732        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5733        let written =
5734            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5735                .expect("test operation should succeed");
5736
5737        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5738            .expect("test operation should succeed");
5739        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5740            .expect("test operation should succeed");
5741
5742        assert_eq!(indexed.pack_checksum, written.checksum);
5743        assert_eq!(indexed.entries, written.entries);
5744        assert_eq!(index.pack_checksum, written.checksum);
5745        assert_eq!(index.entries, written.entries);
5746    }
5747
5748    #[test]
5749    fn indexes_existing_delta_pack_bytes() {
5750        let (base, changed) = similar_blob_objects();
5751        let options = delta_pack_options(true);
5752        let written = PackFile::write_packed_with_options(
5753            &[base, changed.clone()],
5754            ObjectFormat::Sha1,
5755            &options,
5756        )
5757        .expect("test operation should succeed");
5758
5759        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5760            .expect("test operation should succeed");
5761        let index =
5762            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5763        let changed_oid = changed
5764            .object_id(ObjectFormat::Sha1)
5765            .expect("test operation should succeed");
5766
5767        assert_eq!(indexed.pack_checksum, written.checksum);
5768        assert_eq!(indexed.entries, written.entries);
5769        assert_eq!(
5770            index
5771                .find(&changed_oid)
5772                .expect("test operation should succeed")
5773                .offset,
5774            written.entries[1].offset
5775        );
5776        assert_eq!(
5777            index
5778                .find(&changed_oid)
5779                .expect("test operation should succeed")
5780                .crc32,
5781            written.entries[1].crc32
5782        );
5783    }
5784
5785    #[test]
5786    fn writes_ref_delta_pack_and_index_that_round_trip() {
5787        let (base, changed) = similar_blob_objects();
5788        let options = delta_pack_options(false);
5789        let written = PackFile::write_packed_with_options(
5790            &[base.clone(), changed.clone()],
5791            ObjectFormat::Sha1,
5792            &options,
5793        )
5794        .expect("test operation should succeed");
5795        let mut second_offset = written.entries[1].offset as usize;
5796        let header = parse_entry_header(&written.pack, &mut second_offset)
5797            .expect("test operation should succeed");
5798        assert_eq!(header.kind, PackObjectKind::RefDelta);
5799
5800        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5801        let index =
5802            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5803        let oid = changed
5804            .object_id(ObjectFormat::Sha1)
5805            .expect("test operation should succeed");
5806        assert_eq!(pack.entries[0].object, base);
5807        assert_eq!(pack.entries[1].object, changed);
5808        assert_eq!(index.pack_checksum, pack.checksum);
5809        assert_eq!(
5810            index
5811                .find(&oid)
5812                .expect("test operation should succeed")
5813                .offset,
5814            written.entries[1].offset
5815        );
5816    }
5817
5818    #[test]
5819    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5820        let (base, changed) = similar_blob_objects();
5821        let options = delta_pack_options(true);
5822        let written = PackFile::write_packed_with_options(
5823            &[base, changed.clone()],
5824            ObjectFormat::Sha1,
5825            &options,
5826        )
5827        .expect("test operation should succeed");
5828        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5829        let mut second = written.entries[1].offset as usize;
5830        assert_eq!(
5831            parse_entry_header(&written.pack, &mut second)
5832                .expect("test operation should succeed")
5833                .kind,
5834            PackObjectKind::OfsDelta
5835        );
5836        // Ground truth from a full parse; single-object decode must match at every offset.
5837        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5838        for po in &parsed.entries {
5839            let got =
5840                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5841                    Ok(None)
5842                })
5843                .expect("test operation should succeed");
5844            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5845        }
5846    }
5847
5848    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
5849    /// read is byte-identical to the uncached one cold and warm (sley#26).
5850    #[derive(Default)]
5851    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5852
5853    impl HeaderTypeCache for MapHeaderTypeCache {
5854        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5855            self.0.get(&pack_offset).copied()
5856        }
5857        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5858            self.0.insert(pack_offset, header);
5859        }
5860    }
5861
5862    #[test]
5863    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5864        let (base, changed) = similar_blob_objects();
5865        let options = delta_pack_options(true);
5866        let written =
5867            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5868                .expect("test operation should succeed");
5869        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5870        let mut second = written.entries[1].offset as usize;
5871        assert_eq!(
5872            parse_entry_header(&written.pack, &mut second)
5873                .expect("test operation should succeed")
5874                .kind,
5875            PackObjectKind::OfsDelta
5876        );
5877
5878        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5879        let mut cache = MapHeaderTypeCache::default();
5880        for po in &parsed.entries {
5881            let uncached =
5882                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5883                    Ok(None)
5884                })
5885                .expect("test operation should succeed");
5886            // Type inherited from the chain base; size is the inflated body length.
5887            assert_eq!(
5888                uncached,
5889                (po.object.object_type, po.object.body.len() as u64),
5890                "uncached header at offset {}",
5891                po.entry.offset
5892            );
5893            // Cold cache: must agree with the uncached read and populate the memo.
5894            let cold = read_object_header_at_with_cache(
5895                &written.pack,
5896                po.entry.offset,
5897                ObjectFormat::Sha1,
5898                |_| Ok(None),
5899                &mut cache,
5900            )
5901            .expect("test operation should succeed");
5902            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
5903        }
5904        // Warm cache: every offset now resolves from the memo and is still correct,
5905        // proving the fast path does not change behavior (sley#26).
5906        for po in &parsed.entries {
5907            let warm = read_object_header_at_with_cache(
5908                &written.pack,
5909                po.entry.offset,
5910                ObjectFormat::Sha1,
5911                |_| panic!("warm cache must not re-walk the chain"),
5912                &mut cache,
5913            )
5914            .expect("test operation should succeed");
5915            assert_eq!(
5916                warm,
5917                (po.object.object_type, po.object.body.len() as u64),
5918                "warm cache at offset {}",
5919                po.entry.offset
5920            );
5921        }
5922    }
5923
5924    #[test]
5925    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
5926        let (base, changed) = similar_blob_objects();
5927        let options = delta_pack_options(false);
5928        let written = PackFile::write_packed_with_options(
5929            &[base, changed.clone()],
5930            ObjectFormat::Sha1,
5931            &options,
5932        )
5933        .expect("test operation should succeed");
5934        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5935        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
5936            .entries
5937            .iter()
5938            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
5939            .collect();
5940        for po in &parsed.entries {
5941            let got =
5942                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
5943                    Ok(by_oid.get(oid).cloned())
5944                })
5945                .expect("test operation should succeed");
5946            assert_eq!(*got, po.object);
5947        }
5948    }
5949
5950    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
5951    /// used to prove the cached decode path is byte-identical to the uncached
5952    /// one and that bases are reused across reads.
5953    #[derive(Default)]
5954    struct CountingDeltaCache {
5955        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
5956        hits: std::cell::Cell<usize>,
5957        inserts: std::cell::Cell<usize>,
5958    }
5959
5960    impl PackDeltaCache for CountingDeltaCache {
5961        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
5962            let hit = self.map.borrow().get(&offset).cloned();
5963            if hit.is_some() {
5964                self.hits.set(self.hits.get() + 1);
5965            }
5966            hit
5967        }
5968        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
5969            self.inserts.set(self.inserts.get() + 1);
5970            self.map.borrow_mut().insert(offset, object);
5971        }
5972    }
5973
5974    #[test]
5975    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
5976        // A multi-object pack with a real ofs-delta chain so the cache has bases
5977        // to reuse. Build several similar blobs to encourage deltification.
5978        let mut objects = Vec::new();
5979        for idx in 0..8u32 {
5980            let mut body = vec![b'x'; 4096];
5981            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
5982            objects.push(EncodedObject::new(ObjectType::Blob, body));
5983        }
5984        let options = delta_pack_options(true);
5985        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5986            .expect("test operation should succeed");
5987        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5988
5989        let cache = CountingDeltaCache::default();
5990        // Read every object twice through the cache; each result must equal the
5991        // ground-truth from the full parse, byte for byte, both times.
5992        for _ in 0..2 {
5993            for po in &parsed.entries {
5994                let got = read_object_at_with_cache_arc(
5995                    &written.pack,
5996                    po.entry.offset,
5997                    ObjectFormat::Sha1,
5998                    |_| Ok(None),
5999                    &cache,
6000                )
6001                .expect("test operation should succeed");
6002                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6003            }
6004        }
6005        // The second pass reads everything straight from the cache, so there must
6006        // be at least one hit (proving reuse, not just correctness).
6007        assert!(cache.hits.get() > 0, "cache never served a warm object");
6008    }
6009
6010    #[test]
6011    fn writes_ofs_delta_pack_and_index_that_round_trip() {
6012        let (base, changed) = similar_blob_objects();
6013        let options = delta_pack_options(true);
6014        let written = PackFile::write_packed_with_options(
6015            &[base.clone(), changed.clone()],
6016            ObjectFormat::Sha1,
6017            &options,
6018        )
6019        .expect("test operation should succeed");
6020        let mut second_offset = written.entries[1].offset as usize;
6021        let header = parse_entry_header(&written.pack, &mut second_offset)
6022            .expect("test operation should succeed");
6023        assert_eq!(header.kind, PackObjectKind::OfsDelta);
6024
6025        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6026        let index =
6027            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6028        let oid = changed
6029            .object_id(ObjectFormat::Sha1)
6030            .expect("test operation should succeed");
6031        assert_eq!(pack.entries[0].object, base);
6032        assert_eq!(pack.entries[1].object, changed);
6033        assert_eq!(index.pack_checksum, pack.checksum);
6034        assert_eq!(
6035            index
6036                .find(&oid)
6037                .expect("test operation should succeed")
6038                .offset,
6039            written.entries[1].offset
6040        );
6041    }
6042
6043    #[test]
6044    fn resolves_ofs_delta_pack_entry() {
6045        let base = b"hello";
6046        let result = b"hello world";
6047        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6048        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6049        assert_eq!(parsed.entries.len(), 2);
6050        assert_eq!(parsed.entries[0].object.body, base);
6051        assert_eq!(parsed.entries[1].object.body, result);
6052        assert_eq!(
6053            parsed.entries[1].entry.oid,
6054            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6055                .expect("test operation should succeed")
6056        );
6057    }
6058
6059    #[test]
6060    fn resolves_ref_delta_pack_entry() {
6061        let base = b"hello";
6062        let result = b"hello world";
6063        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6064        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6065        assert_eq!(parsed.entries.len(), 2);
6066        assert_eq!(parsed.entries[0].object.body, base);
6067        assert_eq!(parsed.entries[1].object.body, result);
6068        assert_eq!(
6069            parsed.entries[1].entry.oid,
6070            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6071                .expect("test operation should succeed")
6072        );
6073    }
6074
6075    #[test]
6076    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6077        let base = b"hello";
6078        let result = b"hello world";
6079        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6080        assert!(PackFile::parse_sha1(&pack).is_err());
6081
6082        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6083            .expect("test operation should succeed");
6084        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6085            if oid == &base_oid {
6086                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6087            } else {
6088                Ok(None)
6089            }
6090        })
6091        .expect("test operation should succeed");
6092        assert_eq!(parsed.entries.len(), 1);
6093        assert_eq!(parsed.entries[0].object.body, result);
6094        assert_eq!(
6095            parsed.entries[0].entry.oid,
6096            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6097                .expect("test operation should succeed")
6098        );
6099    }
6100
6101    #[test]
6102    fn rejects_bad_pack_checksum() {
6103        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6104        let last = pack.len() - 1;
6105        pack[last] ^= 1;
6106        assert!(PackFile::parse_sha1(&pack).is_err());
6107    }
6108
6109    #[test]
6110    fn raw_pack_index_rejects_bad_pack_checksum() {
6111        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6112        let last = pack.len() - 1;
6113        pack[last] ^= 1;
6114        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6115    }
6116
6117    #[test]
6118    fn pack_index_writer_rejects_duplicate_object_ids() {
6119        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6120            .expect("test operation should succeed");
6121        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6122            .expect("test operation should succeed");
6123        let entries = vec![
6124            PackIndexEntry {
6125                oid,
6126                crc32: 1,
6127                offset: 12,
6128            },
6129            PackIndexEntry {
6130                oid,
6131                crc32: 2,
6132                offset: 24,
6133            },
6134        ];
6135        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6136    }
6137
6138    #[test]
6139    fn parses_single_entry_pack_index() {
6140        let oid = ObjectId::from_hex(
6141            ObjectFormat::Sha1,
6142            "ce013625030ba8dba906f756967f9e9ca394464a",
6143        )
6144        .expect("test operation should succeed");
6145        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6146            .expect("test operation should succeed");
6147        let index = single_entry_index(
6148            ObjectFormat::Sha1,
6149            oid,
6150            0x1234_5678,
6151            12,
6152            pack_checksum.clone(),
6153        );
6154        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6155        assert_eq!(parsed.version, 2);
6156        assert_eq!(parsed.pack_checksum, pack_checksum);
6157        assert_eq!(parsed.entries.len(), 1);
6158        assert_eq!(
6159            parsed
6160                .find(&oid)
6161                .expect("test operation should succeed")
6162                .offset,
6163            12
6164        );
6165        assert_eq!(
6166            parsed
6167                .find(&oid)
6168                .expect("test operation should succeed")
6169                .crc32,
6170            0x1234_5678
6171        );
6172        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6173    }
6174
6175    #[test]
6176    fn parses_single_entry_pack_index_v1() {
6177        let oid = ObjectId::from_hex(
6178            ObjectFormat::Sha1,
6179            "ce013625030ba8dba906f756967f9e9ca394464a",
6180        )
6181        .expect("test operation should succeed");
6182        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6183            .expect("test operation should succeed");
6184        let index =
6185            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6186        let parsed =
6187            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6188        assert_eq!(parsed.version, 1);
6189        assert_eq!(parsed.pack_checksum, pack_checksum);
6190        assert_eq!(parsed.entries.len(), 1);
6191        assert_eq!(
6192            parsed
6193                .find(&oid)
6194                .expect("test operation should succeed")
6195                .offset,
6196            0x1234_5678
6197        );
6198        assert_eq!(
6199            parsed
6200                .find(&oid)
6201                .expect("test operation should succeed")
6202                .crc32,
6203            0
6204        );
6205        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6206    }
6207
6208    #[test]
6209    fn rejects_bad_pack_index_v1_checksum() {
6210        let oid = ObjectId::from_hex(
6211            ObjectFormat::Sha1,
6212            "ce013625030ba8dba906f756967f9e9ca394464a",
6213        )
6214        .expect("test operation should succeed");
6215        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6216            .expect("test operation should succeed");
6217        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
6218        let last = index.len() - 1;
6219        index[last] ^= 1;
6220        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
6221    }
6222
6223    #[test]
6224    fn pack_index_view_reads_v2_large_offsets() {
6225        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
6226            .expect("test operation should succeed");
6227        let second =
6228            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
6229                .expect("test operation should succeed");
6230        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6231            .expect("test operation should succeed");
6232        let entries = vec![
6233            PackIndexEntry {
6234                oid: first,
6235                crc32: 0x1111_2222,
6236                offset: 0x8000_0000,
6237            },
6238            PackIndexEntry {
6239                oid: second,
6240                crc32: 0x3333_4444,
6241                offset: 0x1_0000_0042,
6242            },
6243        ];
6244        let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
6245            .expect("test operation should succeed");
6246
6247        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6248        let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
6249            .expect("test operation should succeed");
6250        for entry in entries {
6251            assert_eq!(
6252                view.find(&entry.oid),
6253                Some(PackIndexLookup {
6254                    crc32: entry.crc32,
6255                    offset: entry.offset,
6256                })
6257            );
6258        }
6259    }
6260
6261    #[test]
6262    fn pack_index_view_default_parse_checks_index_checksum() {
6263        let oid = ObjectId::from_hex(
6264            ObjectFormat::Sha1,
6265            "ce013625030ba8dba906f756967f9e9ca394464a",
6266        )
6267        .expect("test operation should succeed");
6268        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6269            .expect("test operation should succeed");
6270        let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
6271        let last = index.len() - 1;
6272        index[last] ^= 1;
6273
6274        assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
6275        let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
6276            .expect("test operation should succeed");
6277        let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
6278            Arc::from(index.clone().into_boxed_slice()),
6279            ObjectFormat::Sha1,
6280        )
6281        .expect("test operation should succeed");
6282        assert_eq!(
6283            view.find(&oid),
6284            Some(PackIndexLookup {
6285                crc32: 0x1234_5678,
6286                offset: 12,
6287            })
6288        );
6289        assert_eq!(
6290            trusted_view.find(&oid),
6291            Some(PackIndexLookup {
6292                crc32: 0x1234_5678,
6293                offset: 12,
6294            })
6295        );
6296    }
6297
6298    #[test]
6299    fn parses_pack_reverse_index() {
6300        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6301            .expect("test operation should succeed");
6302        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6303            .expect("test operation should succeed");
6304        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6305            .expect("test operation should succeed");
6306        assert_eq!(parsed.version, 1);
6307        assert_eq!(parsed.format, ObjectFormat::Sha1);
6308        assert_eq!(parsed.positions, vec![2, 0, 1]);
6309        assert_eq!(parsed.pack_checksum, pack_checksum);
6310        assert_eq!(
6311            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6312                .expect("test operation should succeed"),
6313            reverse_index
6314        );
6315    }
6316
6317    #[test]
6318    fn rejects_bad_pack_reverse_index_checksum() {
6319        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6320            .expect("test operation should succeed");
6321        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6322            .expect("test operation should succeed");
6323        let last = reverse_index.len() - 1;
6324        reverse_index[last] ^= 1;
6325        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6326    }
6327
6328    #[test]
6329    fn rejects_bad_pack_reverse_index_positions() {
6330        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6331            .expect("test operation should succeed");
6332        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6333        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6334        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6335        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6336        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6337            .expect("test operation should succeed");
6338        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6339        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6340    }
6341
6342    #[test]
6343    fn parses_pack_mtimes() {
6344        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6345            .expect("test operation should succeed");
6346        let mtimes = PackMtimes::write(
6347            ObjectFormat::Sha1,
6348            &[1, 1_700_000_000, u32::MAX],
6349            &pack_checksum,
6350        )
6351        .expect("test operation should succeed");
6352        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6353            .expect("test operation should succeed");
6354        assert_eq!(parsed.version, 1);
6355        assert_eq!(parsed.format, ObjectFormat::Sha1);
6356        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6357        assert_eq!(parsed.pack_checksum, pack_checksum);
6358        assert_eq!(
6359            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6360                .expect("test operation should succeed"),
6361            mtimes
6362        );
6363    }
6364
6365    #[test]
6366    fn rejects_bad_pack_mtimes_checksum() {
6367        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6368            .expect("test operation should succeed");
6369        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6370            .expect("test operation should succeed");
6371        let last = mtimes.len() - 1;
6372        mtimes[last] ^= 1;
6373        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6374    }
6375
6376    #[test]
6377    fn rejects_bad_pack_mtimes_shape() {
6378        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6379            .expect("test operation should succeed");
6380        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6381        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6382
6383        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6384        wrong_hash[11] = 2;
6385        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6386        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6387            .expect("test operation should succeed");
6388        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6389        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6390    }
6391
6392    #[test]
6393    fn parses_multi_pack_index_header_and_chunk_lookup() {
6394        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6395            .expect("test operation should succeed");
6396        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6397            .expect("test operation should succeed");
6398        let chunks = midx_chunks_with_pack_names(
6399            ObjectFormat::Sha1,
6400            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6401            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6402        );
6403        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6404        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6405            .expect("test operation should succeed");
6406        assert_eq!(parsed.version, 2);
6407        assert_eq!(parsed.format, ObjectFormat::Sha1);
6408        assert_eq!(parsed.pack_count, 2);
6409        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6410        assert_eq!(parsed.object_count, 2);
6411        assert_eq!(parsed.objects.len(), 2);
6412        assert_eq!(
6413            parsed
6414                .find(&first)
6415                .expect("test operation should succeed")
6416                .pack_int_id,
6417            0
6418        );
6419        assert_eq!(
6420            parsed
6421                .find(&first)
6422                .expect("test operation should succeed")
6423                .offset,
6424            12
6425        );
6426        assert_eq!(
6427            parsed
6428                .find(&second)
6429                .expect("test operation should succeed")
6430                .pack_int_id,
6431            1
6432        );
6433        assert_eq!(
6434            parsed
6435                .find(&second)
6436                .expect("test operation should succeed")
6437                .offset,
6438            0x1_0000_0000
6439        );
6440        assert_eq!(parsed.reverse_index, None);
6441        assert_eq!(parsed.bitmapped_packs, None);
6442        assert_eq!(parsed.chunks.len(), 5);
6443        assert_eq!(parsed.chunks[0].id, *b"PNAM");
6444        assert_eq!(parsed.chunks[0].offset, 84);
6445        assert_eq!(parsed.chunks[0].len, 24);
6446        assert_eq!(parsed.chunks[1].id, *b"OIDF");
6447        assert_eq!(parsed.chunks[1].offset, 108);
6448        assert_eq!(parsed.chunks[1].len, 1024);
6449    }
6450
6451    #[test]
6452    fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6453        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6454            .expect("test operation should succeed");
6455        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6456            .expect("test operation should succeed");
6457        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6458            .expect("test operation should succeed");
6459        let chunks = midx_chunks_with_pack_names(
6460            ObjectFormat::Sha1,
6461            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6462            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6463        );
6464        let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6465        let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6466            .expect("test operation should succeed");
6467
6468        assert!(lookup.contains(&first));
6469        assert!(lookup.contains(&second));
6470        assert!(!lookup.contains(&missing));
6471
6472        let first_entry = lookup
6473            .find(&first)
6474            .expect("test operation should succeed")
6475            .expect("object should be present");
6476        assert_eq!(
6477            lookup.pack_name(first_entry.pack_int_id),
6478            Some("pack-a.idx")
6479        );
6480        assert_eq!(first_entry.offset, 12);
6481
6482        let second_entry = lookup
6483            .find(&second)
6484            .expect("test operation should succeed")
6485            .expect("object should be present");
6486        assert_eq!(
6487            lookup.pack_name(second_entry.pack_int_id),
6488            Some("pack-b.idx")
6489        );
6490        assert_eq!(second_entry.offset, 0x1_0000_0000);
6491        assert!(
6492            lookup
6493                .find(&missing)
6494                .expect("test operation should succeed")
6495                .is_none()
6496        );
6497    }
6498
6499    #[test]
6500    fn rejects_bad_multi_pack_index_checksum() {
6501        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6502        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6503        let last = midx.len() - 1;
6504        midx[last] ^= 1;
6505        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6506    }
6507
6508    #[test]
6509    fn rejects_bad_multi_pack_index_shape() {
6510        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6511        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6512        wrong_hash[5] = 2;
6513        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6514        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6515            .expect("test operation should succeed");
6516        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6517        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6518
6519        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6520        missing_terminator[12] = b'B';
6521        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6522        let checksum =
6523            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6524                .expect("test operation should succeed");
6525        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6526        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6527
6528        let mut bad_offset = multi_pack_index(
6529            ObjectFormat::Sha1,
6530            2,
6531            0,
6532            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6533        );
6534        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6535        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6536        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6537            .expect("test operation should succeed");
6538        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6539        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6540    }
6541
6542    #[test]
6543    fn rejects_bad_multi_pack_index_pack_names() {
6544        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6545        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6546
6547        let too_few = multi_pack_index(
6548            ObjectFormat::Sha1,
6549            2,
6550            2,
6551            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6552        );
6553        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6554
6555        let bad_padding = multi_pack_index(
6556            ObjectFormat::Sha1,
6557            2,
6558            1,
6559            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6560        );
6561        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6562
6563        let unsorted_v1 = multi_pack_index(
6564            ObjectFormat::Sha1,
6565            1,
6566            2,
6567            &midx_chunks_with_pack_names(
6568                ObjectFormat::Sha1,
6569                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6570                &[],
6571            ),
6572        );
6573        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6574
6575        let unsorted_v2 = multi_pack_index(
6576            ObjectFormat::Sha1,
6577            2,
6578            2,
6579            &midx_chunks_with_pack_names(
6580                ObjectFormat::Sha1,
6581                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6582                &[],
6583            ),
6584        );
6585        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6586            .expect("test operation should succeed");
6587        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6588    }
6589
6590    #[test]
6591    fn rejects_bad_multi_pack_index_object_tables() {
6592        let oid_a = ObjectId::from_hex(
6593            ObjectFormat::Sha1,
6594            "1111111111111111111111111111111111111111",
6595        )
6596        .expect("test operation should succeed");
6597        let oid_b = ObjectId::from_hex(
6598            ObjectFormat::Sha1,
6599            "2222222222222222222222222222222222222222",
6600        )
6601        .expect("test operation should succeed");
6602
6603        let missing_oidf = multi_pack_index(
6604            ObjectFormat::Sha1,
6605            2,
6606            1,
6607            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6608        );
6609        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6610
6611        let bad_fanout = vec![
6612            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6613            (*b"OIDF", vec![0; 256 * 4]),
6614            (*b"OIDL", oid_a.as_bytes().to_vec()),
6615            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6616        ];
6617        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6618        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6619
6620        let mut unsorted = Vec::new();
6621        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6622        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6623        let mut oid_lookup = Vec::new();
6624        oid_lookup.extend_from_slice(oid_b.as_bytes());
6625        oid_lookup.extend_from_slice(oid_a.as_bytes());
6626        unsorted.push((*b"OIDL", oid_lookup));
6627        unsorted.push((
6628            *b"OOFF",
6629            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6630        ));
6631        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6632        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6633
6634        let bad_pack = multi_pack_index(
6635            ObjectFormat::Sha1,
6636            2,
6637            1,
6638            &midx_chunks_with_pack_names(
6639                ObjectFormat::Sha1,
6640                b"pack-a.idx\0\0".to_vec(),
6641                &[(oid_a.clone(), 1, 12)],
6642            ),
6643        );
6644        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6645
6646        let mut large_offsets = Vec::new();
6647        let missing_loff = vec![
6648            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6649            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6650            (*b"OIDL", oid_a.as_bytes().to_vec()),
6651            (
6652                *b"OOFF",
6653                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6654            ),
6655        ];
6656        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6657        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6658
6659        let mut bad_loff =
6660            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6661        bad_loff.push((*b"LOFF", vec![0]));
6662        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6663        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6664    }
6665
6666    #[test]
6667    fn parses_multi_pack_index_bitmap_chunks() {
6668        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6669            .expect("test operation should succeed");
6670        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6671            .expect("test operation should succeed");
6672        let mut chunks = midx_chunks_with_pack_names(
6673            ObjectFormat::Sha1,
6674            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6675            &[(first, 0, 12), (second, 1, 24)],
6676        );
6677        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6678        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6679        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6680
6681        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6682            .expect("test operation should succeed");
6683        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6684        assert_eq!(
6685            parsed.bitmapped_packs,
6686            Some(vec![
6687                MultiPackBitmapPack {
6688                    bitmap_pos: 0,
6689                    bitmap_nr: 1,
6690                },
6691                MultiPackBitmapPack {
6692                    bitmap_pos: 1,
6693                    bitmap_nr: 1,
6694                },
6695            ])
6696        );
6697    }
6698
6699    #[test]
6700    fn writes_multi_pack_index_that_round_trips() {
6701        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6702            .expect("test operation should succeed");
6703        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6704            .expect("test operation should succeed");
6705        let bytes = MultiPackIndex::write(
6706            ObjectFormat::Sha1,
6707            2,
6708            &["pack-b.idx".into(), "pack-a.idx".into()],
6709            &[
6710                MultiPackIndexEntry {
6711                    oid: second.clone(),
6712                    pack_int_id: 0,
6713                    offset: 0x1_0000_0000,
6714                },
6715                MultiPackIndexEntry {
6716                    oid: first.clone(),
6717                    pack_int_id: 1,
6718                    offset: 12,
6719                },
6720            ],
6721        )
6722        .expect("test operation should succeed");
6723
6724        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6725            .expect("test operation should succeed");
6726        assert_eq!(parsed.version, 2);
6727        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6728        assert_eq!(parsed.object_count, 2);
6729        assert_eq!(
6730            parsed
6731                .find(&first)
6732                .expect("test operation should succeed")
6733                .pack_int_id,
6734            1
6735        );
6736        assert_eq!(
6737            parsed
6738                .find(&first)
6739                .expect("test operation should succeed")
6740                .offset,
6741            12
6742        );
6743        assert_eq!(
6744            parsed
6745                .find(&second)
6746                .expect("test operation should succeed")
6747                .pack_int_id,
6748            0
6749        );
6750        assert_eq!(
6751            parsed
6752                .find(&second)
6753                .expect("test operation should succeed")
6754                .offset,
6755            0x1_0000_0000
6756        );
6757        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6758    }
6759
6760    #[test]
6761    fn write_multi_pack_index_rejects_invalid_inputs() {
6762        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6763            .expect("test operation should succeed");
6764        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6765        assert!(
6766            MultiPackIndex::write(
6767                ObjectFormat::Sha1,
6768                1,
6769                &["pack-b.idx".into(), "pack-a.idx".into()],
6770                &[],
6771            )
6772            .is_err()
6773        );
6774        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6775        assert!(
6776            MultiPackIndex::write(
6777                ObjectFormat::Sha1,
6778                2,
6779                &["pack-a.idx".into()],
6780                &[MultiPackIndexEntry {
6781                    oid,
6782                    pack_int_id: 1,
6783                    offset: 12,
6784                }],
6785            )
6786            .is_err()
6787        );
6788        assert!(
6789            MultiPackIndex::write(
6790                ObjectFormat::Sha1,
6791                2,
6792                &["pack-a.idx".into()],
6793                &[
6794                    MultiPackIndexEntry {
6795                        oid,
6796                        pack_int_id: 0,
6797                        offset: 12,
6798                    },
6799                    MultiPackIndexEntry {
6800                        oid,
6801                        pack_int_id: 0,
6802                        offset: 24,
6803                    },
6804                ],
6805            )
6806            .is_err()
6807        );
6808    }
6809
6810    #[test]
6811    fn rejects_bad_multi_pack_index_bitmap_chunks() {
6812        let oid_a = ObjectId::from_hex(
6813            ObjectFormat::Sha1,
6814            "1111111111111111111111111111111111111111",
6815        )
6816        .expect("test operation should succeed");
6817        let oid_b = ObjectId::from_hex(
6818            ObjectFormat::Sha1,
6819            "2222222222222222222222222222222222222222",
6820        )
6821        .expect("test operation should succeed");
6822
6823        let mut duplicate_ridx = midx_chunks_with_pack_names(
6824            ObjectFormat::Sha1,
6825            b"pack-a.idx\0\0".to_vec(),
6826            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6827        );
6828        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6829        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6830        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6831
6832        let mut short_btmp = midx_chunks_with_pack_names(
6833            ObjectFormat::Sha1,
6834            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6835            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6836        );
6837        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6838        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6839        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6840
6841        let mut out_of_range_btmp = midx_chunks_with_pack_names(
6842            ObjectFormat::Sha1,
6843            b"pack-a.idx\0\0".to_vec(),
6844            &[(oid_a, 0, 12), (oid_b, 0, 24)],
6845        );
6846        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6847        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6848        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6849    }
6850
6851    #[test]
6852    fn parses_pack_bitmap_index_with_hash_cache() {
6853        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6854            .expect("test operation should succeed");
6855        let bitmap = pack_bitmap_index(
6856            ObjectFormat::Sha1,
6857            3,
6858            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6859            &pack_checksum,
6860            &[(2, 0, 1, &[0b101])],
6861            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6862        );
6863
6864        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6865            .expect("test operation should succeed");
6866        assert_eq!(parsed.version, 1);
6867        assert_eq!(parsed.format, ObjectFormat::Sha1);
6868        assert_eq!(
6869            parsed.options,
6870            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6871        );
6872        assert_eq!(parsed.pack_checksum, pack_checksum);
6873        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6874        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6875        assert_eq!(parsed.entries.len(), 1);
6876        let entry = parsed
6877            .entry_for_index_position(2)
6878            .expect("test operation should succeed");
6879        assert_eq!(entry.xor_offset, 0);
6880        assert_eq!(entry.flags, 1);
6881        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6882        assert_eq!(
6883            parsed.name_hash_cache,
6884            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6885        );
6886    }
6887
6888    #[test]
6889    fn parses_pack_bitmap_index_sha256() {
6890        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6891            .expect("test operation should succeed");
6892        let bitmap = pack_bitmap_index(
6893            ObjectFormat::Sha256,
6894            2,
6895            PackBitmapIndex::OPTION_FULL_DAG,
6896            &pack_checksum,
6897            &[(0, 0, 0, &[0b11])],
6898            None,
6899        );
6900
6901        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
6902            .expect("test operation should succeed");
6903        assert_eq!(parsed.version, 1);
6904        assert_eq!(parsed.format, ObjectFormat::Sha256);
6905        assert_eq!(parsed.pack_checksum, pack_checksum);
6906        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6907        assert_eq!(parsed.entries[0].object_position, 0);
6908        assert_eq!(parsed.name_hash_cache, None);
6909    }
6910
6911    #[test]
6912    fn parses_upstream_git_written_pack_bitmap_index() {
6913        let root = unique_temp_dir("git-pack-bitmap-upstream");
6914        fs::create_dir_all(&root).expect("test operation should succeed");
6915        {
6916            run_git_success(&root, &["init", "-q", "-b", "main"]);
6917            run_git_success(
6918                &root,
6919                &[
6920                    "-c",
6921                    "user.name=Example User",
6922                    "-c",
6923                    "user.email=example@example.invalid",
6924                    "commit",
6925                    "--allow-empty",
6926                    "-q",
6927                    "-m",
6928                    "one",
6929                ],
6930            );
6931            run_git_success(
6932                &root,
6933                &[
6934                    "-c",
6935                    "user.name=Example User",
6936                    "-c",
6937                    "user.email=example@example.invalid",
6938                    "commit",
6939                    "--allow-empty",
6940                    "-q",
6941                    "-m",
6942                    "two",
6943                ],
6944            );
6945            run_git_success(&root, &["repack", "-adb"]);
6946            let pack_dir = root.join(".git").join("objects").join("pack");
6947            let idx_path = single_path_with_extension(&pack_dir, "idx");
6948            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
6949            let index = PackIndex::parse(
6950                &fs::read(idx_path).expect("test operation should succeed"),
6951                ObjectFormat::Sha1,
6952            )
6953            .expect("test operation should succeed");
6954            let bitmap = PackBitmapIndex::parse(
6955                &fs::read(bitmap_path).expect("test operation should succeed"),
6956                ObjectFormat::Sha1,
6957                index.entries.len(),
6958            )
6959            .expect("test operation should succeed");
6960            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
6961            assert!(!bitmap.entries.is_empty());
6962        };
6963        let _ = fs::remove_dir_all(&root);
6964    }
6965
6966    #[test]
6967    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
6968        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6969            .expect("test operation should succeed");
6970        let bitmap = pack_bitmap_index(
6971            ObjectFormat::Sha1,
6972            1,
6973            PackBitmapIndex::OPTION_FULL_DAG,
6974            &pack_checksum,
6975            &[(0, 0, 0, &[1])],
6976            None,
6977        );
6978
6979        let mut bad_signature = bitmap.clone();
6980        bad_signature[0] = b'X';
6981        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
6982
6983        let mut bad_version = bitmap.clone();
6984        bad_version[5] = 2;
6985        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
6986        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
6987
6988        let mut bad_option = bitmap.clone();
6989        bad_option[7] = 0x20;
6990        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
6991        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
6992
6993        let mut bad_checksum = bitmap;
6994        let last = bad_checksum.len() - 1;
6995        bad_checksum[last] ^= 1;
6996        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
6997    }
6998
6999    #[test]
7000    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
7001        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7002            .expect("test operation should succeed");
7003        let bitmap = pack_bitmap_index(
7004            ObjectFormat::Sha1,
7005            2,
7006            PackBitmapIndex::OPTION_FULL_DAG,
7007            &pack_checksum,
7008            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7009            None,
7010        );
7011
7012        let mut truncated = bitmap.clone();
7013        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7014        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7015        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7016
7017        let mut out_of_range_position = pack_bitmap_index(
7018            ObjectFormat::Sha1,
7019            2,
7020            PackBitmapIndex::OPTION_FULL_DAG,
7021            &pack_checksum,
7022            &[(2, 0, 0, &[0b01])],
7023            None,
7024        );
7025        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7026        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7027        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7028
7029        let invalid_xor = pack_bitmap_index(
7030            ObjectFormat::Sha1,
7031            2,
7032            PackBitmapIndex::OPTION_FULL_DAG,
7033            &pack_checksum,
7034            &[(0, 1, 0, &[0b01])],
7035            None,
7036        );
7037        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7038    }
7039
7040    #[test]
7041    fn parses_single_entry_pack_index_sha256() {
7042        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7043            .expect("test operation should succeed");
7044        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7045            .expect("test operation should succeed");
7046        let index = single_entry_index(
7047            ObjectFormat::Sha256,
7048            oid,
7049            0x1234_5678,
7050            12,
7051            pack_checksum.clone(),
7052        );
7053        let parsed =
7054            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7055        assert_eq!(parsed.version, 2);
7056        assert_eq!(parsed.pack_checksum, pack_checksum);
7057        assert_eq!(parsed.entries.len(), 1);
7058        assert_eq!(
7059            parsed
7060                .find(&oid)
7061                .expect("test operation should succeed")
7062                .offset,
7063            12
7064        );
7065        assert_eq!(
7066            parsed
7067                .find(&oid)
7068                .expect("test operation should succeed")
7069                .crc32,
7070            0x1234_5678
7071        );
7072        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7073        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7074    }
7075
7076    #[test]
7077    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7078        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7079    }
7080
7081    #[test]
7082    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7083        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7084    }
7085
7086    #[test]
7087    fn write_packed_rejects_duplicate_objects() {
7088        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7089        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7090    }
7091
7092    #[test]
7093    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7094        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7095        let sha1 = object
7096            .object_id(ObjectFormat::Sha1)
7097            .expect("test operation should succeed");
7098        let sha256 = object
7099            .object_id(ObjectFormat::Sha256)
7100            .expect("test operation should succeed");
7101        let duplicate = [
7102            PackInput {
7103                oid: &sha1,
7104                object: &object,
7105            },
7106            PackInput {
7107                oid: &sha1,
7108                object: &object,
7109            },
7110        ];
7111        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7112
7113        let wrong_format = [PackInput {
7114            oid: &sha256,
7115            object: &object,
7116        }];
7117        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7118    }
7119
7120    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7121        let objects = similar_blob_family(8);
7122        let packed =
7123            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7124        let undeltified =
7125            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7126
7127        // The whole point of delta selection: the packed output is smaller than
7128        // storing every object undeltified.
7129        assert!(
7130            packed.pack.len() < undeltified.pack.len(),
7131            "expected delta pack ({}) smaller than undeltified pack ({})",
7132            packed.pack.len(),
7133            undeltified.pack.len()
7134        );
7135
7136        // At least one object must actually be stored as a delta.
7137        let kinds = pack_entry_kinds(&packed.pack, format);
7138        let delta_count = kinds
7139            .iter()
7140            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7141            .count();
7142        assert!(
7143            delta_count >= 1,
7144            "expected at least one delta entry, found kinds {kinds:?}"
7145        );
7146
7147        // Round-trip: every original object reconstructs byte-for-byte.
7148        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7149        assert_eq!(parsed.entries.len(), objects.len());
7150        for object in &objects {
7151            let oid = object
7152                .object_id(format)
7153                .expect("test operation should succeed");
7154            let found = parsed
7155                .entries
7156                .iter()
7157                .find(|entry| entry.entry.oid == oid)
7158                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
7159            assert_eq!(&found.object, object, "object {oid} did not round-trip");
7160        }
7161
7162        // The index must agree with the pack and locate every object.
7163        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
7164        assert_eq!(index.pack_checksum, packed.checksum);
7165        for object in &objects {
7166            let oid = object
7167                .object_id(format)
7168                .expect("test operation should succeed");
7169            assert!(index.find(&oid).is_some(), "index missing {oid}");
7170        }
7171    }
7172
7173    #[test]
7174    fn write_packed_emits_ofs_delta_by_default() {
7175        let objects = similar_blob_family(6);
7176        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7177            .expect("test operation should succeed");
7178        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7179        assert!(
7180            kinds.contains(&PackObjectKind::OfsDelta),
7181            "expected an ofs-delta entry by default, found {kinds:?}"
7182        );
7183        assert!(
7184            !kinds.contains(&PackObjectKind::RefDelta),
7185            "default self-contained pack must not use ref-delta, found {kinds:?}"
7186        );
7187        // Round-trips.
7188        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
7189    }
7190
7191    #[test]
7192    fn write_packed_can_emit_ref_delta() {
7193        let objects = similar_blob_family(6);
7194        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
7195        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7196            .expect("test operation should succeed");
7197        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7198        assert!(
7199            kinds.contains(&PackObjectKind::RefDelta),
7200            "expected a ref-delta entry, found {kinds:?}"
7201        );
7202        assert!(
7203            !kinds.contains(&PackObjectKind::OfsDelta),
7204            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
7205        );
7206
7207        // Ref-delta packs are still self-contained here, so they round-trip
7208        // without any external base lookup.
7209        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
7210            .expect("test operation should succeed");
7211        assert_eq!(parsed.entries.len(), objects.len());
7212    }
7213
7214    #[test]
7215    fn write_packed_bounds_delta_chain_depth() {
7216        // A long chain of progressively-modified blobs. With a large window
7217        // every object could otherwise delta against its immediate predecessor,
7218        // forming a chain as long as the input.
7219        let objects = incremental_blob_chain(20);
7220        let format = ObjectFormat::Sha1;
7221
7222        for max_depth in [1usize, 2, 5] {
7223            let options = PackWriteOptions::new()
7224                .with_window(20)
7225                .with_depth(max_depth);
7226            let packed = PackFile::write_packed_with_options(&objects, format, &options)
7227                .expect("test operation should succeed");
7228
7229            let depths = pack_entry_depths(&packed.pack, format);
7230            let observed = depths.iter().copied().max().unwrap_or(0);
7231            assert!(
7232                observed <= max_depth,
7233                "max chain depth {observed} exceeded bound {max_depth}"
7234            );
7235
7236            // Still correct: round-trips byte-for-byte.
7237            let parsed =
7238                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7239            for object in &objects {
7240                let oid = object
7241                    .object_id(format)
7242                    .expect("test operation should succeed");
7243                let found = parsed
7244                    .entries
7245                    .iter()
7246                    .find(|entry| entry.entry.oid == oid)
7247                    .expect("test operation should succeed");
7248                assert_eq!(&found.object, object);
7249            }
7250        }
7251    }
7252
7253    #[test]
7254    fn write_packed_depth_zero_stores_everything_undeltified() {
7255        let objects = similar_blob_family(5);
7256        let options = PackWriteOptions::new().with_depth(0);
7257        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7258            .expect("test operation should succeed");
7259        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7260        assert!(
7261            kinds
7262                .iter()
7263                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
7264            "depth 0 must disable deltas, found {kinds:?}"
7265        );
7266    }
7267
7268    #[test]
7269    fn write_thin_uses_external_base_and_round_trips_sha1() {
7270        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
7271    }
7272
7273    #[test]
7274    fn write_thin_uses_external_base_and_round_trips_sha256() {
7275        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
7276    }
7277
7278    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
7279        // The base object stays OUT of the pack; only `target` is written, as a
7280        // ref-delta against the external base's object id.
7281        let base = blob_with_marker("EXTERNAL-BASE");
7282        let target = blob_with_marker("EXTERNAL-TARGET");
7283        let base_oid = base
7284            .object_id(format)
7285            .expect("test operation should succeed");
7286
7287        let mut external = HashMap::new();
7288        external.insert(base_oid, base.clone());
7289        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
7290            .expect("test operation should succeed");
7291
7292        // Exactly one entry, encoded as a ref-delta to the external base.
7293        let kinds = pack_entry_kinds(&packed.pack, format);
7294        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7295
7296        // The external base reference must be the base oid.
7297        let mut offset = 12usize;
7298        let header =
7299            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7300        assert_eq!(header.kind, PackObjectKind::RefDelta);
7301        let referenced =
7302            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7303                .expect("test operation should succeed");
7304        assert_eq!(referenced, base_oid);
7305
7306        // A plain (non-thin) parse fails: the base is not present.
7307        assert!(PackFile::parse(&packed.pack, format).is_err());
7308
7309        // A thin parse that supplies the external base reconstructs the target.
7310        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7311            if oid == &base_oid {
7312                Ok(Some(base.clone()))
7313            } else {
7314                Ok(None)
7315            }
7316        })
7317        .expect("test operation should succeed");
7318        assert_eq!(parsed.entries.len(), 1);
7319        assert_eq!(parsed.entries[0].object, target);
7320    }
7321
7322    #[test]
7323    fn write_packed_preserves_distinct_objects_with_no_similarity() {
7324        // Unrelated objects: nothing should delta, but the pack must still be
7325        // valid and complete.
7326        let objects = vec![
7327            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7328            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7329            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7330        ];
7331        let format = ObjectFormat::Sha1;
7332        let packed =
7333            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7334        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7335        assert_eq!(parsed.entries.len(), objects.len());
7336        for object in &objects {
7337            let oid = object
7338                .object_id(format)
7339                .expect("test operation should succeed");
7340            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7341        }
7342    }
7343
7344    /// Build a family of blobs that all share a large common region but differ
7345    /// in a marker placed in the *middle*, so a good delta finds copy regions on
7346    /// both sides of the change.
7347    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7348        let mut common_head = Vec::new();
7349        for _ in 0..200 {
7350            common_head.extend_from_slice(b"shared header line for delta testing\n");
7351        }
7352        let mut common_tail = Vec::new();
7353        for _ in 0..200 {
7354            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7355        }
7356        (0..count)
7357            .map(|idx| {
7358                let mut body = common_head.clone();
7359                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7360                body.extend_from_slice(&common_tail);
7361                EncodedObject::new(ObjectType::Blob, body)
7362            })
7363            .collect()
7364    }
7365
7366    /// Build a chain where each blob is the previous one plus an appended line,
7367    /// so each is highly similar to its predecessor.
7368    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7369        let mut body = Vec::new();
7370        for _ in 0..100 {
7371            body.extend_from_slice(b"baseline content shared across the whole chain\n");
7372        }
7373        let mut objects = Vec::with_capacity(count);
7374        for idx in 0..count {
7375            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7376            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7377        }
7378        objects
7379    }
7380
7381    fn blob_with_marker(marker: &str) -> EncodedObject {
7382        let mut body = Vec::new();
7383        for _ in 0..150 {
7384            body.extend_from_slice(b"common body shared between base and target\n");
7385        }
7386        body.extend_from_slice(marker.as_bytes());
7387        body.push(b'\n');
7388        for _ in 0..150 {
7389            body.extend_from_slice(b"more common body shared between objects\n");
7390        }
7391        EncodedObject::new(ObjectType::Blob, body)
7392    }
7393
7394    /// Classify every entry in a pack (in pack order) by its on-disk kind.
7395    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7396        pack_entry_descriptors(pack, format)
7397            .into_iter()
7398            .map(|descriptor| descriptor.kind)
7399            .collect()
7400    }
7401
7402    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
7403    /// order. Entries always appear after their in-pack bases, so a single
7404    /// forward pass suffices.
7405    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7406        let descriptors = pack_entry_descriptors(pack, format);
7407        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7408        let mut depths = Vec::with_capacity(descriptors.len());
7409        for descriptor in &descriptors {
7410            let depth = match &descriptor.base {
7411                EntryBase::None => 0,
7412                EntryBase::Offset(base_offset) => {
7413                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7414                }
7415                // Ref-delta to an in-pack base: look it up by offset via oid is
7416                // unnecessary for these tests (which only use ofs-delta for the
7417                // chains), so treat as depth 1 if unknown.
7418                EntryBase::Ref => 1,
7419            };
7420            depth_by_offset.insert(descriptor.offset, depth);
7421            depths.push(depth);
7422        }
7423        depths
7424    }
7425
7426    struct EntryDescriptor {
7427        offset: u64,
7428        kind: PackObjectKind,
7429        base: EntryBase,
7430    }
7431
7432    enum EntryBase {
7433        None,
7434        Offset(u64),
7435        Ref,
7436    }
7437
7438    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7439        let trailer_offset = pack.len() - format.raw_len();
7440        let count = u32_be(&pack[8..12]) as usize;
7441        let mut offset = 12usize;
7442        let mut descriptors = Vec::with_capacity(count);
7443        for _ in 0..count {
7444            let entry_offset = offset as u64;
7445            let header =
7446                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7447            let base = match header.kind {
7448                PackObjectKind::OfsDelta => {
7449                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7450                        .expect("test operation should succeed");
7451                    EntryBase::Offset(base_offset)
7452                }
7453                PackObjectKind::RefDelta => {
7454                    offset += format.raw_len();
7455                    EntryBase::Ref
7456                }
7457                _ => EntryBase::None,
7458            };
7459            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7460            let mut body = Vec::new();
7461            decoder
7462                .read_to_end(&mut body)
7463                .expect("test operation should succeed");
7464            offset += decoder.total_in() as usize;
7465            descriptors.push(EntryDescriptor {
7466                offset: entry_offset,
7467                kind: header.kind,
7468                base,
7469            });
7470        }
7471        descriptors
7472    }
7473
7474    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7475        let mut base = Vec::new();
7476        for _ in 0..300 {
7477            base.extend_from_slice(b"common payload\n");
7478        }
7479        base.extend_from_slice(b"base\n");
7480        let mut changed = Vec::new();
7481        for _ in 0..300 {
7482            changed.extend_from_slice(b"common payload\n");
7483        }
7484        changed.extend_from_slice(b"changed\n");
7485        (
7486            EncodedObject::new(ObjectType::Blob, base),
7487            EncodedObject::new(ObjectType::Blob, changed),
7488        )
7489    }
7490
7491    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7492        let mut pack = Vec::new();
7493        pack.extend_from_slice(b"PACK");
7494        pack.extend_from_slice(&2u32.to_be_bytes());
7495        pack.extend_from_slice(&1u32.to_be_bytes());
7496        write_entry_header(&mut pack, object_type, body.len() as u64);
7497        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7498        encoder
7499            .write_all(body)
7500            .expect("test operation should succeed");
7501        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7502        let checksum =
7503            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7504        pack.extend_from_slice(checksum.as_bytes());
7505        pack
7506    }
7507
7508    #[derive(Clone, Copy, Debug)]
7509    enum DeltaKind {
7510        Offset,
7511        Ref,
7512    }
7513
7514    fn two_object_delta_pack(
7515        format: ObjectFormat,
7516        base: &[u8],
7517        result: &[u8],
7518        delta_kind: DeltaKind,
7519    ) -> Vec<u8> {
7520        let mut pack = Vec::new();
7521        pack.extend_from_slice(b"PACK");
7522        pack.extend_from_slice(&2u32.to_be_bytes());
7523        pack.extend_from_slice(&2u32.to_be_bytes());
7524
7525        let base_offset = pack.len();
7526        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7527        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7528        encoder
7529            .write_all(base)
7530            .expect("test operation should succeed");
7531        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7532
7533        let delta = append_suffix_delta(base, result);
7534        let delta_offset = pack.len();
7535        write_pack_entry_header_kind(
7536            &mut pack,
7537            match delta_kind {
7538                DeltaKind::Offset => 6,
7539                DeltaKind::Ref => 7,
7540            },
7541            delta.len() as u64,
7542        );
7543        match delta_kind {
7544            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7545            DeltaKind::Ref => {
7546                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7547                    .expect("test operation should succeed");
7548                pack.extend_from_slice(base_oid.as_bytes());
7549            }
7550        }
7551        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7552        encoder
7553            .write_all(&delta)
7554            .expect("test operation should succeed");
7555        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7556
7557        let checksum =
7558            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7559        pack.extend_from_slice(checksum.as_bytes());
7560        pack
7561    }
7562
7563    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7564        let mut pack = Vec::new();
7565        pack.extend_from_slice(b"PACK");
7566        pack.extend_from_slice(&2u32.to_be_bytes());
7567        pack.extend_from_slice(&1u32.to_be_bytes());
7568
7569        let delta = append_suffix_delta(base, result);
7570        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7571        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7572            .expect("test operation should succeed");
7573        pack.extend_from_slice(base_oid.as_bytes());
7574        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7575        encoder
7576            .write_all(&delta)
7577            .expect("test operation should succeed");
7578        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7579
7580        let checksum =
7581            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7582        pack.extend_from_slice(checksum.as_bytes());
7583        pack
7584    }
7585
7586    fn unique_temp_dir(name: &str) -> PathBuf {
7587        let nanos = SystemTime::now()
7588            .duration_since(UNIX_EPOCH)
7589            .expect("test operation should succeed")
7590            .as_nanos();
7591        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7592    }
7593
7594    fn run_git_success(cwd: &Path, args: &[&str]) {
7595        let output = Command::new("git")
7596            .current_dir(cwd)
7597            .args(args)
7598            .output()
7599            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7600        assert!(
7601            output.status.success(),
7602            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7603            output.status.code(),
7604            String::from_utf8_lossy(&output.stdout),
7605            String::from_utf8_lossy(&output.stderr)
7606        );
7607    }
7608
7609    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7610        let mut paths = fs::read_dir(dir)
7611            .expect("test operation should succeed")
7612            .map(|entry| entry.expect("test operation should succeed").path())
7613            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7614            .collect::<Vec<_>>();
7615        assert_eq!(paths.len(), 1, "expected one .{extension} file");
7616        paths.remove(0)
7617    }
7618
7619    fn pack_bitmap_index(
7620        format: ObjectFormat,
7621        object_count: u32,
7622        options: u16,
7623        pack_checksum: &ObjectId,
7624        entries: &[(u32, u8, u8, &[u64])],
7625        name_hash_cache: Option<&[u32]>,
7626    ) -> Vec<u8> {
7627        let mut out = Vec::new();
7628        out.extend_from_slice(b"BITM");
7629        out.extend_from_slice(&1u16.to_be_bytes());
7630        out.extend_from_slice(&options.to_be_bytes());
7631        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7632        out.extend_from_slice(pack_checksum.as_bytes());
7633        write_test_ewah(&mut out, object_count, &[0b001]);
7634        write_test_ewah(&mut out, object_count, &[0b010]);
7635        write_test_ewah(&mut out, object_count, &[0b100]);
7636        write_test_ewah(&mut out, object_count, &[0]);
7637        for (position, xor_offset, flags, words) in entries {
7638            out.extend_from_slice(&position.to_be_bytes());
7639            out.push(*xor_offset);
7640            out.push(*flags);
7641            write_test_ewah(&mut out, object_count, words);
7642        }
7643        if let Some(cache) = name_hash_cache {
7644            for value in cache {
7645                out.extend_from_slice(&value.to_be_bytes());
7646            }
7647        }
7648        let checksum =
7649            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7650        out.extend_from_slice(checksum.as_bytes());
7651        out
7652    }
7653
7654    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7655        out.extend_from_slice(&bit_size.to_be_bytes());
7656        let words = ewah_literal_words(literals);
7657        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7658        for word in words {
7659            out.extend_from_slice(&word.to_be_bytes());
7660        }
7661        out.extend_from_slice(&0u32.to_be_bytes());
7662    }
7663
7664    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7665        let rlw = (literals.len() as u64) << 33;
7666        let mut words = vec![rlw];
7667        words.extend_from_slice(literals);
7668        words
7669    }
7670
7671    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7672        let checksum_offset = bytes.len() - format.raw_len();
7673        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7674            .expect("test operation should succeed");
7675        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7676    }
7677
7678    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7679        assert!(result.starts_with(base));
7680        let suffix = &result[base.len()..];
7681        assert!(base.len() < 0x10000);
7682        assert!(suffix.len() < 0x80);
7683        let mut delta = Vec::new();
7684        write_delta_varint(&mut delta, base.len() as u64);
7685        write_delta_varint(&mut delta, result.len() as u64);
7686        delta.push(0x90);
7687        delta.push(base.len() as u8);
7688        delta.push(suffix.len() as u8);
7689        delta.extend_from_slice(suffix);
7690        delta
7691    }
7692
7693    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7694        loop {
7695            let mut byte = (value as u8) & 0x7f;
7696            value >>= 7;
7697            if value != 0 {
7698                byte |= 0x80;
7699            }
7700            out.push(byte);
7701            if value == 0 {
7702                break;
7703            }
7704        }
7705    }
7706
7707    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7708        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7709        size >>= 4;
7710        if size != 0 {
7711            byte |= 0x80;
7712        }
7713        out.push(byte);
7714        while size != 0 {
7715            let mut byte = (size as u8) & 0x7f;
7716            size >>= 7;
7717            if size != 0 {
7718                byte |= 0x80;
7719            }
7720            out.push(byte);
7721        }
7722    }
7723
7724    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7725        assert!(relative < 0x80);
7726        out.push(relative as u8);
7727    }
7728
7729    fn single_entry_index(
7730        format: ObjectFormat,
7731        oid: ObjectId,
7732        crc32: u32,
7733        offset: u32,
7734        pack_checksum: ObjectId,
7735    ) -> Vec<u8> {
7736        let mut index = Vec::new();
7737        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7738        index.extend_from_slice(&2u32.to_be_bytes());
7739        for idx in 0..256 {
7740            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7741                1u32
7742            } else {
7743                0u32
7744            };
7745            index.extend_from_slice(&count.to_be_bytes());
7746        }
7747        index.extend_from_slice(oid.as_bytes());
7748        index.extend_from_slice(&crc32.to_be_bytes());
7749        index.extend_from_slice(&offset.to_be_bytes());
7750        index.extend_from_slice(pack_checksum.as_bytes());
7751        let checksum =
7752            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7753        index.extend_from_slice(checksum.as_bytes());
7754        index
7755    }
7756
7757    fn single_entry_index_v1(
7758        format: ObjectFormat,
7759        oid: ObjectId,
7760        offset: u32,
7761        pack_checksum: ObjectId,
7762    ) -> Vec<u8> {
7763        let mut index = Vec::new();
7764        for idx in 0..256 {
7765            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7766                1u32
7767            } else {
7768                0u32
7769            };
7770            index.extend_from_slice(&count.to_be_bytes());
7771        }
7772        index.extend_from_slice(&offset.to_be_bytes());
7773        index.extend_from_slice(oid.as_bytes());
7774        index.extend_from_slice(pack_checksum.as_bytes());
7775        let checksum =
7776            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7777        index.extend_from_slice(checksum.as_bytes());
7778        index
7779    }
7780
7781    fn pack_reverse_index(
7782        format: ObjectFormat,
7783        positions: &[u32],
7784        pack_checksum: ObjectId,
7785    ) -> Vec<u8> {
7786        let mut reverse_index = Vec::new();
7787        reverse_index.extend_from_slice(b"RIDX");
7788        reverse_index.extend_from_slice(&1u32.to_be_bytes());
7789        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7790        for position in positions {
7791            reverse_index.extend_from_slice(&position.to_be_bytes());
7792        }
7793        reverse_index.extend_from_slice(pack_checksum.as_bytes());
7794        let checksum =
7795            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7796        reverse_index.extend_from_slice(checksum.as_bytes());
7797        reverse_index
7798    }
7799
7800    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7801        let mut out = Vec::new();
7802        out.extend_from_slice(b"MTME");
7803        out.extend_from_slice(&1u32.to_be_bytes());
7804        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7805        for mtime in mtimes {
7806            out.extend_from_slice(&mtime.to_be_bytes());
7807        }
7808        out.extend_from_slice(pack_checksum.as_bytes());
7809        let checksum =
7810            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7811        out.extend_from_slice(checksum.as_bytes());
7812        out
7813    }
7814
7815    fn midx_chunks_with_pack_names(
7816        _format: ObjectFormat,
7817        pack_names: Vec<u8>,
7818        entries: &[(ObjectId, u32, u64)],
7819    ) -> Vec<([u8; 4], Vec<u8>)> {
7820        let mut entries = entries.to_vec();
7821        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7822        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7823        let mut large_offsets = Vec::new();
7824        let mut chunks = vec![
7825            (*b"PNAM", pack_names),
7826            (*b"OIDF", midx_oid_fanout(&object_ids)),
7827            (*b"OIDL", midx_oid_lookup(&object_ids)),
7828            (
7829                *b"OOFF",
7830                midx_ooff_entries(
7831                    &entries
7832                        .iter()
7833                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7834                        .collect::<Vec<_>>(),
7835                    &mut large_offsets,
7836                ),
7837            ),
7838        ];
7839        if !large_offsets.is_empty() {
7840            chunks.push((*b"LOFF", large_offsets));
7841        }
7842        chunks
7843    }
7844
7845    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7846        let mut counts = [0u32; 256];
7847        for oid in object_ids {
7848            counts[oid.as_bytes()[0] as usize] += 1;
7849        }
7850        let mut running = 0u32;
7851        let mut out = Vec::new();
7852        for count in counts {
7853            running += count;
7854            out.extend_from_slice(&running.to_be_bytes());
7855        }
7856        out
7857    }
7858
7859    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7860        let mut out = Vec::new();
7861        for oid in object_ids {
7862            out.extend_from_slice(oid.as_bytes());
7863        }
7864        out
7865    }
7866
7867    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7868        let mut out = Vec::new();
7869        for (pack_int_id, offset) in entries {
7870            out.extend_from_slice(&pack_int_id.to_be_bytes());
7871            if *offset < 0x8000_0000 {
7872                out.extend_from_slice(&(*offset as u32).to_be_bytes());
7873            } else {
7874                let large_idx = (large_offsets.len() / 8) as u32;
7875                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7876                large_offsets.extend_from_slice(&offset.to_be_bytes());
7877            }
7878        }
7879        out
7880    }
7881
7882    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7883        let mut out = Vec::new();
7884        for value in values {
7885            out.extend_from_slice(&value.to_be_bytes());
7886        }
7887        out
7888    }
7889
7890    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
7891        let mut out = Vec::new();
7892        for (bitmap_pos, bitmap_nr) in entries {
7893            out.extend_from_slice(&bitmap_pos.to_be_bytes());
7894            out.extend_from_slice(&bitmap_nr.to_be_bytes());
7895        }
7896        out
7897    }
7898
7899    fn multi_pack_index(
7900        format: ObjectFormat,
7901        version: u8,
7902        pack_count: u32,
7903        chunks: &[([u8; 4], Vec<u8>)],
7904    ) -> Vec<u8> {
7905        let lookup_len = (chunks.len() + 1) * 12;
7906        let mut out = Vec::new();
7907        out.extend_from_slice(b"MIDX");
7908        out.push(version);
7909        out.push(hash_function_id(format) as u8);
7910        out.push(chunks.len() as u8);
7911        out.push(0);
7912        out.extend_from_slice(&pack_count.to_be_bytes());
7913        let mut chunk_offset = (12 + lookup_len) as u64;
7914        for (id, data) in chunks {
7915            out.extend_from_slice(id);
7916            out.extend_from_slice(&chunk_offset.to_be_bytes());
7917            chunk_offset += data.len() as u64;
7918        }
7919        out.extend_from_slice(&[0, 0, 0, 0]);
7920        out.extend_from_slice(&chunk_offset.to_be_bytes());
7921        for (_id, data) in chunks {
7922            out.extend_from_slice(data);
7923        }
7924        let checksum =
7925            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7926        out.extend_from_slice(checksum.as_bytes());
7927        out
7928    }
7929
7930    // ---- EWAH encoder / bitmap writer tests ------------------------------
7931
7932    fn pack_checksum_sha1() -> ObjectId {
7933        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
7934    }
7935
7936    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
7937        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
7938        // expects: a checksum offset that lies just past the serialised bitmap.
7939        let mut offset = 0usize;
7940        let checksum_offset = bytes.len();
7941        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
7942            .expect("test operation should succeed")
7943    }
7944
7945    #[test]
7946    fn ewah_encodes_single_literal_word_matching_helper() {
7947        // A bitmap whose only word is a literal must serialise as one RLW with
7948        // literal_len == 1 followed by the literal, identical to the test
7949        // helper used by the existing parser tests.
7950        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
7951        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
7952        assert_eq!(ewah.rlw_position, 0);
7953        assert_eq!(ewah.bit_size, 64);
7954    }
7955
7956    #[test]
7957    fn ewah_byte_layout_is_big_endian() {
7958        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
7959            .expect("test operation should succeed");
7960        let bytes = ewah.to_bytes();
7961        let mut expected = Vec::new();
7962        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
7963        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
7964        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
7965        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
7966        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
7967        assert_eq!(bytes, expected);
7968    }
7969
7970    #[test]
7971    fn ewah_empty_bitmap_serialises_like_git() {
7972        let ewah = EwahBitmap::empty();
7973        let bytes = ewah.to_bytes();
7974        // bit_size = 0, word_count = 0, rlw_position = 0.
7975        assert_eq!(bytes, vec![0u8; 12]);
7976        // It must still parse and decode to nothing.
7977        let parsed = parse_ewah_bytes(&bytes);
7978        assert_eq!(parsed, ewah);
7979        assert!(
7980            parsed
7981                .to_positions()
7982                .expect("test operation should succeed")
7983                .is_empty()
7984        );
7985    }
7986
7987    #[test]
7988    fn ewah_compresses_clean_zero_run() {
7989        // Three all-zero words followed by a literal: the encoder should emit a
7990        // single RLW carrying a run of 3 clean-zero words plus one literal.
7991        let ewah =
7992            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
7993        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
7994        let rlw = ewah.words[0];
7995        assert_eq!(rlw & 1, 0, "run bit should be zero");
7996        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7997        assert_eq!(rlw >> 33, 1, "literal length should be 1");
7998        assert_eq!(ewah.words[1], 0b1);
7999    }
8000
8001    #[test]
8002    fn ewah_compresses_clean_ones_run() {
8003        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
8004            .expect("test operation should succeed");
8005        // Pure run of ones, no literals: one RLW only.
8006        assert_eq!(ewah.words.len(), 1);
8007        let rlw = ewah.words[0];
8008        assert_eq!(rlw & 1, 1, "run bit should be one");
8009        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8010        assert_eq!(rlw >> 33, 0, "no literals");
8011    }
8012
8013    #[test]
8014    fn ewah_run_then_literal_then_run_roundtrips() {
8015        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8016        let bit_size = (words.len() * 64) as u32;
8017        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8018        assert_eq!(
8019            ewah.to_words().expect("test operation should succeed"),
8020            words
8021        );
8022    }
8023
8024    #[test]
8025    fn ewah_drops_trailing_clean_zero_words() {
8026        // Trailing all-zero words beyond a literal carry no information and git
8027        // does not serialise them, but to_words() restores them up to bit_size.
8028        let words = vec![0b1, 0, 0, 0];
8029        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8030        // bit_size of 1 means a single backing word.
8031        assert_eq!(ewah.bit_size, 1);
8032        assert_eq!(
8033            ewah.to_words().expect("test operation should succeed"),
8034            vec![0b1]
8035        );
8036    }
8037
8038    #[test]
8039    fn ewah_from_positions_roundtrips_via_positions() {
8040        let positions = [0u32, 1, 63, 64, 65, 200, 511];
8041        let ewah =
8042            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8043        let mut decoded = ewah.to_positions().expect("test operation should succeed");
8044        decoded.sort_unstable();
8045        assert_eq!(decoded, positions);
8046    }
8047
8048    #[test]
8049    fn ewah_from_positions_dedupes_and_orders() {
8050        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8051            .expect("test operation should succeed");
8052        assert_eq!(
8053            ewah.to_positions().expect("test operation should succeed"),
8054            vec![5, 100]
8055        );
8056    }
8057
8058    #[test]
8059    fn ewah_huge_zero_run_spans_multiple_rlws() {
8060        // A run longer than the 32-bit running-length field forces the encoder
8061        // to emit more than one RLW. Use one literal bit far out, with a bit
8062        // size large enough to exceed u32::MAX clean words is impractical, so
8063        // assert the field arithmetic via a direct builder run instead.
8064        let mut builder = EwahBuilder::new(0);
8065        builder.add_empty_words(false, 0xffff_ffff);
8066        builder.add_empty_words(false, 5);
8067        let ewah = builder.finish().expect("test operation should succeed");
8068        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8069        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8070        assert_eq!(ewah.words[1] & 1, 0);
8071        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8072        assert_eq!(ewah.rlw_position, 1);
8073    }
8074
8075    #[test]
8076    fn ewah_from_words_rejects_oversized_bit_size() {
8077        // bit_size demands two words but only one is supplied.
8078        assert!(EwahBitmap::from_words(65, &[0]).is_err());
8079    }
8080
8081    #[test]
8082    fn ewah_from_positions_rejects_out_of_range() {
8083        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8084    }
8085
8086    #[test]
8087    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8088        // Exercise the full encode -> serialise -> parse loop for a non-trivial
8089        // pattern and assert structural equality against the parser's model.
8090        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8091        let bit_size = (words.len() * 64) as u32;
8092        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8093        let bytes = ewah.to_bytes();
8094        let parsed = parse_ewah_bytes(&bytes);
8095        assert_eq!(parsed, ewah);
8096        assert_eq!(
8097            parsed.to_words().expect("test operation should succeed"),
8098            words
8099        );
8100    }
8101
8102    #[test]
8103    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8104        // commit, tree, blob in pack order; one selected commit reaching all.
8105        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8106        let bytes = write_bitmap(
8107            ObjectFormat::Sha1,
8108            pack_checksum_sha1(),
8109            &object_types,
8110            &[(0u32, 0u32, vec![1u32, 2u32])],
8111            None,
8112        )
8113        .expect("test operation should succeed");
8114        assert_eq!(&bytes[..4], b"BITM");
8115
8116        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8117            .expect("test operation should succeed");
8118        assert_eq!(parsed.version, 1);
8119        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8120        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8121        assert_eq!(
8122            parsed
8123                .type_bitmaps
8124                .commits
8125                .to_positions()
8126                .expect("test operation should succeed"),
8127            vec![0]
8128        );
8129        assert_eq!(
8130            parsed
8131                .type_bitmaps
8132                .trees
8133                .to_positions()
8134                .expect("test operation should succeed"),
8135            vec![1]
8136        );
8137        assert_eq!(
8138            parsed
8139                .type_bitmaps
8140                .blobs
8141                .to_positions()
8142                .expect("test operation should succeed"),
8143            vec![2]
8144        );
8145        assert!(
8146            parsed
8147                .type_bitmaps
8148                .tags
8149                .to_positions()
8150                .expect("test operation should succeed")
8151                .is_empty()
8152        );
8153        assert_eq!(parsed.entries.len(), 1);
8154        let entry = parsed
8155            .entry_for_index_position(0)
8156            .expect("test operation should succeed");
8157        assert_eq!(entry.xor_offset, 0);
8158        assert_eq!(entry.flags, 0);
8159        assert_eq!(
8160            entry
8161                .bitmap
8162                .to_positions()
8163                .expect("test operation should succeed"),
8164            vec![0, 1, 2]
8165        );
8166        assert_eq!(parsed.name_hash_cache, None);
8167    }
8168
8169    #[test]
8170    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
8171        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8172            .expect("test operation should succeed");
8173        let object_types = [ObjectType::Commit, ObjectType::Tree];
8174        let bytes = write_bitmap(
8175            ObjectFormat::Sha256,
8176            pack_checksum.clone(),
8177            &object_types,
8178            &[(0u32, 0u32, vec![1u32])],
8179            None,
8180        )
8181        .expect("test operation should succeed");
8182        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
8183            .expect("test operation should succeed");
8184        assert_eq!(parsed.format, ObjectFormat::Sha256);
8185        assert_eq!(parsed.pack_checksum, pack_checksum);
8186        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8187        assert_eq!(
8188            parsed.entries[0]
8189                .bitmap
8190                .to_positions()
8191                .expect("test operation should succeed"),
8192            vec![0, 1]
8193        );
8194    }
8195
8196    #[test]
8197    fn pack_bitmap_index_write_includes_name_hash_cache() {
8198        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8199        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
8200        let bytes = write_bitmap(
8201            ObjectFormat::Sha1,
8202            pack_checksum_sha1(),
8203            &object_types,
8204            &[(0u32, 0u32, vec![1u32, 2u32])],
8205            Some(cache.clone()),
8206        )
8207        .expect("test operation should succeed");
8208        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8209            .expect("test operation should succeed");
8210        assert_eq!(
8211            parsed.options,
8212            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8213        );
8214        assert_eq!(parsed.name_hash_cache, Some(cache));
8215    }
8216
8217    #[test]
8218    fn pack_bitmap_writer_supports_multiple_commits() {
8219        let object_types = [
8220            ObjectType::Commit,
8221            ObjectType::Commit,
8222            ObjectType::Tree,
8223            ObjectType::Blob,
8224        ];
8225        let mut writer =
8226            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8227                .expect("test operation should succeed");
8228        writer
8229            .add_commit(0, 0, &[2, 3])
8230            .expect("test operation should succeed");
8231        writer
8232            .add_commit(1, 1, &[2])
8233            .expect("test operation should succeed");
8234        let bytes = writer.write().expect("test operation should succeed");
8235        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
8236            .expect("test operation should succeed");
8237        assert_eq!(parsed.entries.len(), 2);
8238        assert_eq!(
8239            parsed
8240                .type_bitmaps
8241                .commits
8242                .to_positions()
8243                .expect("test operation should succeed"),
8244            vec![0, 1]
8245        );
8246        let first = parsed
8247            .entry_for_index_position(0)
8248            .expect("test operation should succeed");
8249        assert_eq!(
8250            first
8251                .bitmap
8252                .to_positions()
8253                .expect("test operation should succeed"),
8254            vec![0, 2, 3]
8255        );
8256        let second = parsed
8257            .entry_for_index_position(1)
8258            .expect("test operation should succeed");
8259        assert_eq!(
8260            second
8261                .bitmap
8262                .to_positions()
8263                .expect("test operation should succeed"),
8264            vec![1, 2]
8265        );
8266    }
8267
8268    #[test]
8269    fn pack_bitmap_index_recomputes_checksum_on_write() {
8270        // The provided index_checksum field is ignored; write recomputes it so
8271        // a bogus placeholder still produces a valid, parseable file.
8272        let object_types = [ObjectType::Commit, ObjectType::Blob];
8273        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8274            .expect("test operation should succeed");
8275        let mut index = writer.build().expect("test operation should succeed");
8276        // build() sets an all-zero placeholder checksum.
8277        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
8278        index.entries.clear(); // mutate the model after build
8279        index.entries.push(PackBitmapEntry {
8280            object_position: 0,
8281            xor_offset: 0,
8282            flags: 0,
8283            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
8284        });
8285        let bytes = index.write().expect("test operation should succeed");
8286        // Parsing validates the trailing checksum, so a wrong checksum fails.
8287        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
8288            .expect("test operation should succeed");
8289        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
8290    }
8291
8292    #[test]
8293    fn pack_bitmap_writer_rejects_non_commit_selection() {
8294        let object_types = [ObjectType::Commit, ObjectType::Blob];
8295        let mut writer =
8296            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8297                .expect("test operation should succeed");
8298        // Position 1 is a blob, not a commit.
8299        assert!(writer.add_commit(1, 1, &[]).is_err());
8300        // Position 5 is out of range entirely.
8301        assert!(writer.add_commit(5, 5, &[]).is_err());
8302        // Index position out of range.
8303        assert!(writer.add_commit(0, 5, &[]).is_err());
8304        // Reachable position out of range.
8305        assert!(writer.add_commit(0, 0, &[9]).is_err());
8306    }
8307
8308    #[test]
8309    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8310        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8311            .expect("test operation should succeed");
8312        assert!(
8313            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8314                .is_err()
8315        );
8316    }
8317
8318    #[test]
8319    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8320        let writer = PackBitmapWriter::new(
8321            ObjectFormat::Sha1,
8322            pack_checksum_sha1(),
8323            &[ObjectType::Commit],
8324        )
8325        .expect("test operation should succeed");
8326        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8327    }
8328
8329    #[test]
8330    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8331        let mut index = PackBitmapWriter::new(
8332            ObjectFormat::Sha1,
8333            pack_checksum_sha1(),
8334            &[ObjectType::Commit],
8335        )
8336        .expect("test operation should succeed")
8337        .build()
8338        .expect("test operation should succeed");
8339        // Flag set but no cache present.
8340        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8341        assert!(index.write().is_err());
8342        // Cache present but flag missing.
8343        index.options = PackBitmapIndex::OPTION_FULL_DAG;
8344        index.name_hash_cache = Some(vec![0]);
8345        assert!(index.write().is_err());
8346    }
8347
8348    #[test]
8349    fn write_bitmap_roundtrips_through_upstream_git_parser() {
8350        // Build a real pack with git, then overwrite reachability with our own
8351        // writer using the real pack checksum and object types, and confirm our
8352        // bytes parse under the same parser that reads upstream bitmaps.
8353        let root = unique_temp_dir("git-pack-bitmap-writer");
8354        fs::create_dir_all(&root).expect("test operation should succeed");
8355        {
8356            run_git_success(&root, &["init", "-q", "-b", "main"]);
8357            run_git_success(
8358                &root,
8359                &[
8360                    "-c",
8361                    "user.name=Example User",
8362                    "-c",
8363                    "user.email=example@example.invalid",
8364                    "commit",
8365                    "--allow-empty",
8366                    "-q",
8367                    "-m",
8368                    "one",
8369                ],
8370            );
8371            run_git_success(&root, &["repack", "-adb"]);
8372            let pack_dir = root.join(".git").join("objects").join("pack");
8373            let idx_path = single_path_with_extension(&pack_dir, "idx");
8374            let index = PackIndex::parse(
8375                &fs::read(idx_path).expect("test operation should succeed"),
8376                ObjectFormat::Sha1,
8377            )
8378            .expect("test operation should succeed");
8379            // Read object types from the pack so the type bitmaps are accurate.
8380            let pack_path = single_path_with_extension(&pack_dir, "pack");
8381            let pack =
8382                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8383                    .expect("test operation should succeed");
8384            // Map each index entry (sorted by oid) to its pack offset, then to a
8385            // pack-order position so positions line up with the index ordering.
8386            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8387            offsets.sort_unstable();
8388            let position_of = |offset: u64| -> u32 {
8389                offsets
8390                    .iter()
8391                    .position(|value| *value == offset)
8392                    .expect("test operation should succeed") as u32
8393            };
8394            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8395            for entry in &index.entries {
8396                let position = position_of(entry.offset) as usize;
8397                // Find the parsed object at this pack offset to read its type.
8398                if let Some(parsed) = pack
8399                    .entries
8400                    .iter()
8401                    .find(|po| po.entry.offset == entry.offset)
8402                {
8403                    object_types[position] = parsed.object.object_type;
8404                }
8405            }
8406            // Select the first commit position we find and reach everything.
8407            let commit_position = object_types
8408                .iter()
8409                .position(|ty| *ty == ObjectType::Commit)
8410                .expect("test operation should succeed") as u32;
8411            // The entry records the commit's position in the oid-sorted index.
8412            let commit_index_position = index
8413                .entries
8414                .iter()
8415                .position(|entry| position_of(entry.offset) == commit_position)
8416                .expect("test operation should succeed")
8417                as u32;
8418            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8419            let bytes = write_bitmap(
8420                ObjectFormat::Sha1,
8421                index.pack_checksum.clone(),
8422                &object_types,
8423                &[(commit_position, commit_index_position, reachable)],
8424                None,
8425            )
8426            .expect("test operation should succeed");
8427            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8428                .expect("test operation should succeed");
8429            assert_eq!(parsed.pack_checksum, index.pack_checksum);
8430            assert_eq!(parsed.entries.len(), 1);
8431            assert_eq!(
8432                parsed.entries[0]
8433                    .bitmap
8434                    .to_positions()
8435                    .expect("test operation should succeed")
8436                    .len(),
8437                index.entries.len()
8438            );
8439        };
8440        let _ = fs::remove_dir_all(&root);
8441    }
8442}