Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18    pub oid: ObjectId,
19    pub compressed_size: u64,
20    pub uncompressed_size: u64,
21    pub offset: u64,
22}
23
24/// Default sliding-window size used by [`PackFile::write_packed`].
25///
26/// Each object is compared against up to this many previously emitted
27/// candidates of the same type when searching for a small delta. Matches git's
28/// default `pack.window`.
29pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31/// Default maximum delta chain depth used by [`PackFile::write_packed`].
32///
33/// A delta may reference a base that is itself a delta; this bounds how long
34/// such chains may grow so that reconstructing any object stays cheap and the
35/// reader's recursion stays shallow. Matches git's default `pack.depth`.
36pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38/// Object-count threshold before pack payload compression is fanned out across
39/// worker threads. Below this, thread setup and extra buffering cost more than
40/// they save.
41const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43/// Keep parallel compression bounded. Git gets much of its wall-clock win from
44/// using several cores, but unbounded threads can steal cache from delta
45/// planning and inflate peak memory on large packs.
46const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48/// Options controlling sliding-window delta selection during pack generation.
49///
50/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
51/// the builder-style setters, or build one directly. Used by
52/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
53#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55    /// Number of previous same-type candidates each object is deltified
56    /// against. Larger windows find better deltas at higher cost.
57    pub window: usize,
58    /// Maximum delta chain depth. A value of `0` disables deltification.
59    pub depth: usize,
60    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
61    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
62    /// against external thin-pack bases always use ref-deltas regardless.
63    pub prefer_ofs_delta: bool,
64    /// External base objects, keyed by object id, that are *not* written into
65    /// the pack but may be used as delta bases. Supplying any entries here
66    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
67    /// yielding a self-contained pack.
68    pub thin_bases: HashMap<ObjectId, EncodedObject>,
69    /// When `true` (the default), objects are reordered by type and size for
70    /// better delta locality. When `false`, the input order is preserved (the
71    /// emitted pack lists objects in the order supplied); deltas then only
72    /// reference earlier input objects. Reordering is always skipped when
73    /// deltification is disabled (`depth == 0`), since it has no effect there.
74    pub reorder: bool,
75}
76
77impl Default for PackWriteOptions {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl PackWriteOptions {
84    /// Options with git-compatible defaults: window
85    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
86    /// no external thin bases.
87    pub fn new() -> Self {
88        Self {
89            window: DEFAULT_PACK_WINDOW,
90            depth: DEFAULT_PACK_DEPTH,
91            prefer_ofs_delta: true,
92            thin_bases: HashMap::new(),
93            reorder: true,
94        }
95    }
96
97    /// Set the sliding-window size.
98    pub fn with_window(mut self, window: usize) -> Self {
99        self.window = window;
100        self
101    }
102
103    /// Set the maximum delta chain depth (`0` disables deltas).
104    pub fn with_depth(mut self, depth: usize) -> Self {
105        self.depth = depth;
106        self
107    }
108
109    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
110    /// (`false`) base references.
111    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
112        self.prefer_ofs_delta = prefer_ofs_delta;
113        self
114    }
115
116    /// Provide the set of external base objects permitted for a thin pack.
117    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
118        self.thin_bases = thin_bases;
119        self
120    }
121
122    /// Choose whether objects may be reordered for delta locality (`true`) or
123    /// emitted in input order (`false`).
124    pub fn with_reorder(mut self, reorder: bool) -> Self {
125        self.reorder = reorder;
126        self
127    }
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct RepackPolicy {
132    pub write_bitmaps: bool,
133    pub cruft_packs: bool,
134    pub geometric_factor: Option<u8>,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct PackFile {
139    pub version: u32,
140    pub entries: Vec<PackObject>,
141    pub checksum: ObjectId,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct PackObject {
146    pub entry: PackEntry,
147    pub object: EncodedObject,
148}
149
150/// Per-object statistics for one entry of a verified pack, in the shape
151/// `git verify-pack -v` reports.
152#[derive(Debug, Clone, PartialEq, Eq)]
153pub struct PackVerifyStat {
154    /// Resolved object id.
155    pub oid: ObjectId,
156    /// Resolved object type (the delta's *result* type, not `ofs-delta`).
157    pub object_type: ObjectType,
158    /// Resolved (inflated) object size in bytes.
159    pub size: u64,
160    /// Bytes this object occupies in the pack: the offset delta to the next
161    /// object, or to the trailing checksum for the last object.
162    pub size_in_pack: u64,
163    /// In-pack byte offset where this object's entry begins.
164    pub offset: u64,
165    /// Delta chain depth: `0` for undeltified objects, base-depth + 1 otherwise.
166    pub delta_depth: u32,
167    /// For delta objects, the id of the *immediate* base object (which may
168    /// itself be a delta). `None` for undeltified objects.
169    pub base_oid: Option<ObjectId>,
170}
171
172/// Result of [`PackFile::verify_pack_stats`]: per-object stats in pack offset
173/// order plus the pack's trailing checksum.
174#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct PackVerifyStats {
176    pub objects: Vec<PackVerifyStat>,
177    pub checksum: ObjectId,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct PackWrite {
182    pub pack: Vec<u8>,
183    pub index: Vec<u8>,
184    pub checksum: ObjectId,
185    pub entries: Vec<PackIndexEntry>,
186}
187
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub struct PackInput<'a> {
190    pub oid: &'a ObjectId,
191    pub object: &'a EncodedObject,
192}
193
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct PackIndexBuild {
196    pub index: Vec<u8>,
197    pub pack_checksum: ObjectId,
198    pub entries: Vec<PackIndexEntry>,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
202pub struct PackIndex {
203    pub version: u32,
204    pub fanout: [u32; 256],
205    pub entries: Vec<PackIndexEntry>,
206    pub pack_checksum: ObjectId,
207    pub index_checksum: ObjectId,
208}
209
210#[derive(Debug, Clone, PartialEq, Eq)]
211pub struct PackIndexView<'a> {
212    pub version: u32,
213    pub count: usize,
214    pub fanout: [u32; 256],
215    pub pack_checksum: ObjectId,
216    pub index_checksum: ObjectId,
217    bytes: &'a [u8],
218    format: ObjectFormat,
219    tables: PackIndexViewTables,
220}
221
222pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
223    fn as_bytes(&self) -> &[u8];
224}
225
226impl<T> PackIndexByteSource for T
227where
228    T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
229{
230    fn as_bytes(&self) -> &[u8] {
231        self.as_ref()
232    }
233}
234
235#[derive(Debug)]
236struct SharedIndexBytes(Arc<[u8]>);
237
238impl PackIndexByteSource for SharedIndexBytes {
239    fn as_bytes(&self) -> &[u8] {
240        self.0.as_ref()
241    }
242}
243
244#[derive(Debug, Clone)]
245pub struct PackIndexViewData {
246    pub version: u32,
247    pub count: usize,
248    pub fanout: [u32; 256],
249    pub pack_checksum: ObjectId,
250    pub index_checksum: ObjectId,
251    bytes: Arc<dyn PackIndexByteSource>,
252    format: ObjectFormat,
253    tables: PackIndexViewTables,
254}
255
256#[derive(Debug, Clone, PartialEq, Eq)]
257pub struct PackIndexEntry {
258    pub oid: ObjectId,
259    pub crc32: u32,
260    pub offset: u64,
261}
262
263#[derive(Debug, Clone, Copy, PartialEq, Eq)]
264pub struct PackIndexLookup {
265    pub crc32: u32,
266    pub offset: u64,
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
270enum PackIndexViewTables {
271    V1 {
272        entry_table: Range<usize>,
273    },
274    V2 {
275        oid_table: Range<usize>,
276        crc_table: Range<usize>,
277        small_offset_table: Range<usize>,
278        large_offset_table: Range<usize>,
279    },
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct PackReverseIndex {
284    pub version: u32,
285    pub format: ObjectFormat,
286    pub positions: Vec<u32>,
287    pub pack_checksum: ObjectId,
288    pub index_checksum: ObjectId,
289}
290
291#[derive(Debug, Clone, PartialEq, Eq)]
292pub struct PackMtimes {
293    pub version: u32,
294    pub format: ObjectFormat,
295    pub mtimes: Vec<u32>,
296    pub pack_checksum: ObjectId,
297    pub index_checksum: ObjectId,
298}
299
300#[derive(Debug, Clone, PartialEq, Eq)]
301pub struct PackBitmapIndex {
302    pub version: u16,
303    pub format: ObjectFormat,
304    pub options: u16,
305    pub pack_checksum: ObjectId,
306    pub index_checksum: ObjectId,
307    pub type_bitmaps: PackBitmapTypeBitmaps,
308    pub entries: Vec<PackBitmapEntry>,
309    pub name_hash_cache: Option<Vec<u32>>,
310}
311
312#[derive(Debug, Clone, PartialEq, Eq)]
313pub struct PackBitmapTypeBitmaps {
314    pub commits: EwahBitmap,
315    pub trees: EwahBitmap,
316    pub blobs: EwahBitmap,
317    pub tags: EwahBitmap,
318}
319
320#[derive(Debug, Clone, PartialEq, Eq)]
321pub struct PackBitmapEntry {
322    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
323    /// NOT the pack-order position used for the bitmap's bit numbering.
324    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
325    /// back via `nth_packed_object_id` (pack-bitmap.c).
326    pub object_position: u32,
327    pub xor_offset: u8,
328    pub flags: u8,
329    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
330    /// order* (offset order), as mapped by the pack's reverse index.
331    pub bitmap: EwahBitmap,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct EwahBitmap {
336    pub bit_size: u32,
337    pub words: Vec<u64>,
338    pub rlw_position: u32,
339}
340
341#[derive(Debug, Clone, PartialEq, Eq)]
342pub struct MultiPackIndex {
343    pub version: u8,
344    pub format: ObjectFormat,
345    pub pack_count: u32,
346    pub pack_names: Vec<String>,
347    pub object_count: u32,
348    pub fanout: [u32; 256],
349    pub objects: Vec<MultiPackIndexEntry>,
350    pub reverse_index: Option<Vec<u32>>,
351    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
352    pub chunks: Vec<MultiPackIndexChunk>,
353    pub checksum: ObjectId,
354}
355
356#[derive(Debug, Clone)]
357pub struct MultiPackIndexOidLookup {
358    format: ObjectFormat,
359    pack_count: u32,
360    pack_names: Vec<String>,
361    fanout: [u32; 256],
362    object_count: usize,
363    oid_lookup_offset: usize,
364    object_offsets_offset: usize,
365    large_offsets_offset: Option<usize>,
366    large_offsets_len: usize,
367    bytes: Arc<Vec<u8>>,
368}
369
370#[derive(Debug, Clone, PartialEq, Eq)]
371pub struct MultiPackIndexEntry {
372    pub oid: ObjectId,
373    pub pack_int_id: u32,
374    pub offset: u64,
375}
376
377#[derive(Debug, Clone, PartialEq, Eq)]
378pub struct MultiPackBitmapPack {
379    pub bitmap_pos: u32,
380    pub bitmap_nr: u32,
381}
382
383#[derive(Debug, Clone, PartialEq, Eq)]
384pub struct MultiPackIndexChunk {
385    pub id: [u8; 4],
386    pub offset: u64,
387    pub len: u64,
388}
389
390#[derive(Debug, Clone, Copy, PartialEq, Eq)]
391enum PackObjectKind {
392    Commit,
393    Tree,
394    Blob,
395    Tag,
396    OfsDelta,
397    RefDelta,
398}
399
400#[derive(Debug, Clone, PartialEq, Eq)]
401enum ParsedPackEntry {
402    Resolved(PackObject),
403    Delta {
404        base: DeltaBase,
405        compressed_size: u64,
406        delta_size: u64,
407        offset: u64,
408        delta: Vec<u8>,
409    },
410}
411
412#[derive(Debug, Clone, PartialEq, Eq)]
413enum DeltaBase {
414    Offset(u64),
415    Ref(ObjectId),
416}
417
418/// One pack entry as stored on disk, used by [`PackFile::verify_pack_stats`] to
419/// recover the delta structure and on-disk stream size that resolved
420/// [`PackObject`]s no longer carry.
421struct OnDiskEntry {
422    offset: u64,
423    base: Option<DeltaBase>,
424    stream_size: u64,
425}
426
427impl PackFile {
428    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
429        Self::parse(bytes, ObjectFormat::Sha1)
430    }
431
432    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
433        Self::parse_with_base(bytes, format, |_| Ok(None))
434    }
435
436    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
437        Self::parse(&bundle.pack, bundle.format)
438    }
439
440    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
441        let PackIndexBuild {
442            index,
443            pack_checksum,
444            entries,
445        } = PackIndex::write_v2_for_pack(bytes, format)?;
446        Ok(PackWrite {
447            pack: bytes.to_vec(),
448            index,
449            checksum: pack_checksum,
450            entries,
451        })
452    }
453
454    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
455    where
456        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
457    {
458        Self::parse_with_base(bytes, format, external_base)
459    }
460
461    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
462    where
463        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
464    {
465        let trailer_len = format.raw_len();
466        if bytes.len() < 12 + trailer_len {
467            return Err(GitError::InvalidFormat("pack file too short".into()));
468        }
469        let trailer_offset = bytes.len() - trailer_len;
470        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
471        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
472        if checksum != expected {
473            return Err(GitError::InvalidFormat(format!(
474                "pack checksum mismatch: expected {expected}, got {checksum}"
475            )));
476        }
477
478        if &bytes[..4] != b"PACK" {
479            return Err(GitError::InvalidFormat("missing PACK signature".into()));
480        }
481        let version = u32_be(&bytes[4..8]);
482        if version != 2 && version != 3 {
483            return Err(GitError::Unsupported(format!("pack version {version}")));
484        }
485        let count = u32_be(&bytes[8..12]) as usize;
486        let mut offset = 12usize;
487        let mut entries = Vec::with_capacity(count);
488        for _ in 0..count {
489            let entry_offset = offset;
490            let header = parse_entry_header(bytes, &mut offset)?;
491            let base =
492                match header.kind {
493                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
494                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
495                    )),
496                    PackObjectKind::RefDelta => {
497                        let hash_len = format.raw_len();
498                        if offset + hash_len > trailer_offset {
499                            return Err(GitError::InvalidFormat(
500                                "truncated ref-delta base object id".into(),
501                            ));
502                        }
503                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
504                        offset += hash_len;
505                        Some(DeltaBase::Ref(oid))
506                    }
507                    _ => None,
508                };
509            let mut body = Vec::new();
510            let consumed = inflate_into(
511                &bytes[offset..trailer_offset],
512                &mut body,
513                header.size.min(usize::MAX as u64) as usize,
514            )?;
515            if body.len() as u64 != header.size {
516                return Err(GitError::InvalidObject(format!(
517                    "pack object declared {} bytes, decoded {}",
518                    header.size,
519                    body.len()
520                )));
521            }
522            if consumed == 0 {
523                return Err(GitError::InvalidFormat(
524                    "empty compressed pack entry".into(),
525                ));
526            }
527            offset = offset
528                .checked_add(consumed)
529                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
530            if offset > trailer_offset {
531                return Err(GitError::InvalidFormat(
532                    "pack entry extends past checksum".into(),
533                ));
534            }
535            if let Some(base) = base {
536                entries.push(ParsedPackEntry::Delta {
537                    base,
538                    compressed_size: consumed as u64,
539                    delta_size: header.size,
540                    offset: entry_offset as u64,
541                    delta: body,
542                });
543            } else {
544                let object_type = match header.kind {
545                    PackObjectKind::Commit => ObjectType::Commit,
546                    PackObjectKind::Tree => ObjectType::Tree,
547                    PackObjectKind::Blob => ObjectType::Blob,
548                    PackObjectKind::Tag => ObjectType::Tag,
549                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
550                };
551                let object = EncodedObject::new(object_type, body);
552                let oid = object.object_id(format)?;
553                entries.push(ParsedPackEntry::Resolved(PackObject {
554                    entry: PackEntry {
555                        oid,
556                        compressed_size: consumed as u64,
557                        uncompressed_size: header.size,
558                        offset: entry_offset as u64,
559                    },
560                    object,
561                }));
562            }
563        }
564        if offset != trailer_offset {
565            return Err(GitError::InvalidFormat(format!(
566                "pack has {} trailing bytes before checksum",
567                trailer_offset - offset
568            )));
569        }
570        Ok(Self {
571            version,
572            entries: resolve_pack_entries(entries, format, &mut external_base)?,
573            checksum,
574        })
575    }
576
577    /// Walk the pack and produce per-object statistics matching the output of
578    /// `git verify-pack -v` / `git index-pack --verify-stat`.
579    ///
580    /// Objects are returned in pack offset order (the order `git verify-pack -v`
581    /// prints them). Each entry carries the *resolved* object id, type and size,
582    /// the in-pack byte span (`size_in_pack` = the offset delta to the next
583    /// object, or to the trailing checksum for the last object), the in-pack
584    /// offset, the delta chain depth (`0` for undeltified objects), and — for
585    /// deltas — the object id of the *immediate* base (which may itself be a
586    /// delta). This mirrors `builtin/index-pack.c`'s `show_pack_info`.
587    pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
588        // Resolve the whole pack first: this validates the trailing checksum,
589        // every object's inflate, and yields the resolved oid/type/size keyed by
590        // offset. `verify-pack` is exactly this validation plus the stat report.
591        let pack = Self::parse(bytes, format)?;
592
593        // Independently walk the on-disk entries to recover each object's stored
594        // kind and (for deltas) its base reference — information `PackFile`
595        // discards once deltas are resolved.
596        let trailer_len = format.raw_len();
597        let trailer_offset = bytes.len() - trailer_len;
598        let count = u32_be(&bytes[8..12]) as usize;
599        let mut offset = 12usize;
600        // Per entry in read (offset) order: (offset, base, on-disk stream size).
601        // The stream size is what git prints in the size column: it is the
602        // resolved object size for an undeltified entry, but the *delta
603        // instruction stream* length for a delta entry (builtin/index-pack.c sets
604        // `obj->size` from the entry header, before any delta is applied).
605        let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
606        for _ in 0..count {
607            let entry_offset = offset as u64;
608            let header = parse_entry_header(bytes, &mut offset)?;
609            let stream_size = header.size;
610            let base = match header.kind {
611                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
612                    parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
613                )),
614                PackObjectKind::RefDelta => {
615                    let hash_len = format.raw_len();
616                    if offset + hash_len > trailer_offset {
617                        return Err(GitError::InvalidFormat(
618                            "truncated ref-delta base object id".into(),
619                        ));
620                    }
621                    let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
622                    offset += hash_len;
623                    Some(DeltaBase::Ref(oid))
624                }
625                _ => None,
626            };
627            // Skip the compressed body to reach the next entry header.
628            let mut body = Vec::new();
629            let consumed = inflate_into(
630                &bytes[offset..trailer_offset],
631                &mut body,
632                header.size.min(usize::MAX as u64) as usize,
633            )?;
634            offset = offset
635                .checked_add(consumed)
636                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
637            on_disk.push(OnDiskEntry {
638                offset: entry_offset,
639                base,
640                stream_size,
641            });
642        }
643
644        // Map offset -> resolved object so the on-disk walk can join in oid/type.
645        let mut resolved_by_offset: HashMap<u64, &PackObject> =
646            HashMap::with_capacity(pack.entries.len());
647        for object in &pack.entries {
648            resolved_by_offset.insert(object.entry.offset, object);
649        }
650        // Map offset -> resolved oid, for ofs-delta base lookups.
651        let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
652        for entry in &on_disk {
653            if let Some(object) = resolved_by_offset.get(&entry.offset) {
654                oid_by_offset.insert(entry.offset, object.entry.oid);
655            }
656        }
657        // Map base offset -> index in `on_disk`, for delta-depth propagation.
658        let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
659        for (idx, entry) in on_disk.iter().enumerate() {
660            index_by_offset.insert(entry.offset, idx);
661        }
662
663        // Sorted offsets give the size-in-pack span (next offset - this offset),
664        // with the trailing checksum offset as the final sentinel.
665        let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
666        sorted_offsets.sort_unstable();
667        let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
668        for window in sorted_offsets.windows(2) {
669            next_offset.insert(window[0], window[1]);
670        }
671        if let Some(last) = sorted_offsets.last() {
672            next_offset.insert(*last, trailer_offset as u64);
673        }
674
675        // Compute delta depth by following base offsets. Depth of a non-delta is
676        // 0; a delta's depth is its base's depth + 1. `index_by_offset` lets an
677        // ofs-delta find its base's index; a ref-delta resolves its base oid to
678        // an in-pack offset when present (thin-pack external bases are not stored
679        // in this pack, but verify-pack only ever runs on self-contained packs).
680        let mut depth = vec![None; on_disk.len()];
681        fn resolve_depth(
682            idx: usize,
683            on_disk: &[OnDiskEntry],
684            index_by_offset: &HashMap<u64, usize>,
685            offset_of_oid: &HashMap<ObjectId, u64>,
686            depth: &mut [Option<u32>],
687        ) -> u32 {
688            if let Some(d) = depth[idx] {
689                return d;
690            }
691            let computed = match &on_disk[idx].base {
692                None => 0,
693                Some(base) => {
694                    let base_idx = match base {
695                        DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
696                        DeltaBase::Ref(oid) => offset_of_oid
697                            .get(oid)
698                            .and_then(|off| index_by_offset.get(off).copied()),
699                    };
700                    match base_idx {
701                        Some(bi) => {
702                            resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
703                        }
704                        // Base not in this pack (thin pack); treat as depth 1.
705                        None => 1,
706                    }
707                }
708            };
709            depth[idx] = Some(computed);
710            computed
711        }
712        let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
713        for (off, oid) in &oid_by_offset {
714            offset_of_oid.insert(*oid, *off);
715        }
716        for idx in 0..on_disk.len() {
717            resolve_depth(
718                idx,
719                &on_disk,
720                &index_by_offset,
721                &offset_of_oid,
722                &mut depth,
723            );
724        }
725
726        let mut stats = Vec::with_capacity(on_disk.len());
727        for (idx, entry) in on_disk.iter().enumerate() {
728            let off = entry.offset;
729            let object = resolved_by_offset.get(&off).ok_or_else(|| {
730                GitError::InvalidFormat("pack offset missing from resolved set".into())
731            })?;
732            let size_in_pack = next_offset
733                .get(&off)
734                .copied()
735                .unwrap_or(trailer_offset as u64)
736                .saturating_sub(off);
737            let base_oid = match &entry.base {
738                None => None,
739                Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
740                Some(DeltaBase::Ref(oid)) => Some(*oid),
741            };
742            stats.push(PackVerifyStat {
743                oid: object.entry.oid,
744                object_type: object.object.object_type,
745                // git prints the on-disk stream size: object body size for an
746                // undeltified entry, delta-instruction stream size for a delta.
747                size: entry.stream_size,
748                size_in_pack,
749                offset: off,
750                delta_depth: depth[idx].unwrap_or(0),
751                base_oid,
752            });
753        }
754        // Emit in pack offset order, matching git's read order.
755        stats.sort_by_key(|stat| stat.offset);
756
757        Ok(PackVerifyStats {
758            objects: stats,
759            checksum: pack.checksum,
760        })
761    }
762
763    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
764    where
765        T: Borrow<EncodedObject>,
766    {
767        Self::write_undeltified(objects, ObjectFormat::Sha1)
768    }
769
770    /// Write a pack with every object stored undeltified (no delta entries).
771    ///
772    /// This is the simple, self-contained encoding; objects appear in the given
773    /// order. For smaller output that exploits similarity between objects, use
774    /// [`PackFile::write_packed`].
775    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
776    where
777        T: Borrow<EncodedObject>,
778    {
779        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
780        Self::write_packed_impl(objects, format, &options)
781    }
782
783    /// Write a pack using sliding-window delta selection with git-compatible
784    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
785    /// ofs-deltas, self-contained).
786    ///
787    /// Objects are grouped by type and ordered for good deltas, then each is
788    /// compared against a window of previously emitted candidates; the smallest
789    /// acceptable delta is kept, otherwise the object is stored undeltified. The
790    /// result round-trips through [`PackFile::parse`].
791    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
792    where
793        T: Borrow<EncodedObject>,
794    {
795        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
796    }
797
798    /// Like [`PackFile::write_packed`] but with caller-supplied
799    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
800    /// external thin bases).
801    pub fn write_packed_with_options<T>(
802        objects: &[T],
803        format: ObjectFormat,
804        options: &PackWriteOptions,
805    ) -> Result<PackWrite>
806    where
807        T: Borrow<EncodedObject>,
808    {
809        Self::write_packed_impl(objects, format, options)
810    }
811
812    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
813    /// instead of re-hashing each object before pack planning.
814    ///
815    /// This is intended for object-database paths that reached each object by
816    /// its id and already trust that id/object mapping. The function validates
817    /// id formats and duplicate ids, but it does not re-hash object bodies; use
818    /// [`PackFile::write_packed`] when the ids are not already known to be
819    /// canonical.
820    pub fn write_packed_with_known_ids(
821        inputs: &[PackInput<'_>],
822        format: ObjectFormat,
823    ) -> Result<PackWrite> {
824        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
825    }
826
827    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
828    /// [`PackWriteOptions`].
829    pub fn write_packed_with_known_ids_and_options(
830        inputs: &[PackInput<'_>],
831        format: ObjectFormat,
832        options: &PackWriteOptions,
833    ) -> Result<PackWrite> {
834        if inputs.len() > u32::MAX as usize {
835            return Err(GitError::InvalidFormat("too many pack objects".into()));
836        }
837        let mut objects = Vec::with_capacity(inputs.len());
838        let mut object_ids = Vec::with_capacity(inputs.len());
839        for input in inputs {
840            if input.oid.format() != format {
841                return Err(GitError::InvalidObjectId(format!(
842                    "pack object id {} uses {}, pack uses {}",
843                    input.oid,
844                    input.oid.format().name(),
845                    format.name()
846                )));
847            }
848            objects.push(input.object);
849            object_ids.push(*input.oid);
850        }
851        Self::write_packed_from_parts(objects, object_ids, format, options)
852    }
853
854    /// Write a thin pack: objects may be deltified against `external_bases`
855    /// that are *not* included in the pack, referenced by ref-delta to their
856    /// object id.
857    ///
858    /// The receiver must already have (or otherwise obtain) those base objects
859    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
860    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
861    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
862    pub fn write_thin<T>(
863        objects: &[T],
864        format: ObjectFormat,
865        external_bases: HashMap<ObjectId, EncodedObject>,
866    ) -> Result<PackWrite>
867    where
868        T: Borrow<EncodedObject>,
869    {
870        let options = PackWriteOptions::new().with_thin_bases(external_bases);
871        Self::write_packed_impl(objects, format, &options)
872    }
873
874    fn write_packed_impl<T>(
875        objects: &[T],
876        format: ObjectFormat,
877        options: &PackWriteOptions,
878    ) -> Result<PackWrite>
879    where
880        T: Borrow<EncodedObject>,
881    {
882        if objects.len() > u32::MAX as usize {
883            return Err(GitError::InvalidFormat("too many pack objects".into()));
884        }
885        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
886
887        // Compute object ids up front; they are needed both for the index and,
888        // for ref-deltas, inside the pack entries themselves.
889        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
890        for object in &objects {
891            object_ids.push(object.object_id(format)?);
892        }
893        Self::write_packed_from_parts(objects, object_ids, format, options)
894    }
895
896    fn write_packed_from_parts(
897        objects: Vec<&EncodedObject>,
898        object_ids: Vec<ObjectId>,
899        format: ObjectFormat,
900        options: &PackWriteOptions,
901    ) -> Result<PackWrite> {
902        let mut seen = HashSet::with_capacity(object_ids.len());
903        for oid in &object_ids {
904            if !seen.insert(oid) {
905                return Err(GitError::InvalidFormat(format!(
906                    "pack contains duplicate object id {oid}"
907                )));
908            }
909        }
910
911        // Validate external thin bases share the pack's hash format.
912        for oid in options.thin_bases.keys() {
913            if oid.format() != format {
914                return Err(GitError::InvalidObjectId(
915                    "thin pack base object id format does not match pack format".into(),
916                ));
917            }
918        }
919
920        // Decide, for each object, whether it is stored undeltified or as a
921        // delta against another object (in-pack or an external thin base), and
922        // obtain the emit order. In-pack deltas only ever reference candidates
923        // that appear earlier in `order`, so emitting in `order` guarantees a
924        // base is always written before any object that deltas against it.
925        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
926
927        let mut pack = Vec::new();
928        pack.extend_from_slice(b"PACK");
929        pack.extend_from_slice(&2u32.to_be_bytes());
930        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
931
932        let mut index_entries = Vec::with_capacity(objects.len());
933        // Pack offset at which each original object index was written, or
934        // `None` until it has been emitted.
935        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
936
937        let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
938
939        for (order_pos, &idx) in order.iter().enumerate() {
940            let offset = pack.len() as u64;
941            let mut entry_bytes = Vec::new();
942            match &plan[idx].base {
943                PlannedBase::None => {
944                    write_entry_header(
945                        &mut entry_bytes,
946                        objects[idx].object_type,
947                        objects[idx].body.len() as u64,
948                    );
949                }
950                PlannedBase::InPack { base_idx, delta } => {
951                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
952                        GitError::InvalidFormat(
953                            "in-pack delta base emitted after dependent object".into(),
954                        )
955                    })?;
956                    if options.prefer_ofs_delta {
957                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
958                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
959                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
960                        })?;
961                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
962                    } else {
963                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
964                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
965                    }
966                }
967                PlannedBase::External { base_oid, delta } => {
968                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
969                    entry_bytes.extend_from_slice(base_oid.as_bytes());
970                }
971            }
972            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
973            let crc32 = crc32fast::hash(&entry_bytes);
974            pack.extend_from_slice(&entry_bytes);
975            written_offsets[idx] = Some(offset);
976            index_entries.push(PackIndexEntry {
977                oid: object_ids[idx].clone(),
978                crc32,
979                offset,
980            });
981        }
982
983        let checksum = sley_core::digest_bytes(format, &pack)?;
984        pack.extend_from_slice(checksum.as_bytes());
985        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
986        Ok(PackWrite {
987            pack,
988            index,
989            checksum,
990            entries: index_entries,
991        })
992    }
993}
994
995impl<'a> PackIndexView<'a> {
996    pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
997        Self::parse(bytes, ObjectFormat::Sha1)
998    }
999
1000    pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1001        Self::parse_impl(bytes, format, true, true)
1002    }
1003
1004    /// Parse and validate the index layout without recomputing the trailing
1005    /// index checksum. The checksum stored in the file is still exposed via
1006    /// [`PackIndexView::index_checksum`].
1007    pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1008        Self::parse_impl(bytes, format, false, true)
1009    }
1010
1011    /// Parse a local/trusted pack index without recomputing the trailing index
1012    /// checksum or walking every entry for canonical-order validation.
1013    ///
1014    /// This still validates the table layout and all lookup paths remain
1015    /// bounds-checked, but it avoids O(number-of-objects) startup validation for
1016    /// repository-owned `.idx` files in hot read paths.
1017    pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1018        Self::parse_impl(bytes, format, false, false)
1019    }
1020
1021    pub fn count(&self) -> usize {
1022        self.count
1023    }
1024
1025    pub fn fanout(&self) -> &[u32; 256] {
1026        &self.fanout
1027    }
1028
1029    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1030        if oid.format() != self.format {
1031            return None;
1032        }
1033        let bucket = usize::from(oid.as_bytes()[0]);
1034        let mut start = if bucket == 0 {
1035            0
1036        } else {
1037            self.fanout[bucket - 1] as usize
1038        };
1039        let mut end = self.fanout[bucket] as usize;
1040        let target = oid.as_bytes();
1041
1042        while start < end {
1043            let mid = start + (end - start) / 2;
1044            match self.oid_bytes_at(mid).cmp(target) {
1045                std::cmp::Ordering::Less => start = mid + 1,
1046                std::cmp::Ordering::Equal => return self.lookup_at(mid),
1047                std::cmp::Ordering::Greater => end = mid,
1048            }
1049        }
1050        None
1051    }
1052
1053    fn parse_impl(
1054        bytes: &'a [u8],
1055        format: ObjectFormat,
1056        verify_checksum: bool,
1057        validate_entries: bool,
1058    ) -> Result<Self> {
1059        let hash_len = format.raw_len();
1060        if bytes.len() < 4 {
1061            return Err(GitError::InvalidFormat("pack index too short".into()));
1062        }
1063        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1064            return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1065        }
1066        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1067            return Err(GitError::InvalidFormat("pack index too short".into()));
1068        }
1069        let version = u32_be(&bytes[4..8]);
1070        if version != 2 {
1071            return Err(GitError::Unsupported(format!(
1072                "pack index version {version}"
1073            )));
1074        }
1075        let index_checksum_offset = bytes.len() - hash_len;
1076        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1077        if verify_checksum {
1078            let actual_index_checksum =
1079                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1080            if actual_index_checksum != index_checksum {
1081                return Err(GitError::InvalidFormat(format!(
1082                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1083                )));
1084            }
1085        }
1086
1087        let mut offset = 8usize;
1088        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1089        let count = fanout[255] as usize;
1090        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1091        offset = oid_table.end;
1092        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1093        offset = crc_table.end;
1094        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1095        offset = small_offset_table.end;
1096
1097        let large_offset_count = (0..count)
1098            .filter(|idx| {
1099                let start = small_offset_table.start + idx * 4;
1100                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1101            })
1102            .count();
1103        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1104        offset = large_offset_table.end;
1105
1106        let expected_trailer_offset = bytes.len() - hash_len * 2;
1107        if offset != expected_trailer_offset {
1108            return Err(GitError::InvalidFormat(format!(
1109                "pack index has {} unexpected bytes before trailer",
1110                expected_trailer_offset.saturating_sub(offset)
1111            )));
1112        }
1113        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1114
1115        let view = Self {
1116            version,
1117            count,
1118            fanout,
1119            pack_checksum,
1120            index_checksum,
1121            bytes,
1122            format,
1123            tables: PackIndexViewTables::V2 {
1124                oid_table,
1125                crc_table,
1126                small_offset_table,
1127                large_offset_table,
1128            },
1129        };
1130        if validate_entries {
1131            view.validate_v2_entries()?;
1132        }
1133        Ok(view)
1134    }
1135
1136    fn parse_v1_impl(
1137        bytes: &'a [u8],
1138        format: ObjectFormat,
1139        verify_checksum: bool,
1140        validate_entries: bool,
1141    ) -> Result<Self> {
1142        let hash_len = format.raw_len();
1143        if bytes.len() < 256 * 4 + 2 * hash_len {
1144            return Err(GitError::InvalidFormat("pack index too short".into()));
1145        }
1146        let index_checksum_offset = bytes.len() - hash_len;
1147        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1148        if verify_checksum {
1149            let actual_index_checksum =
1150                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1151            if actual_index_checksum != index_checksum {
1152                return Err(GitError::InvalidFormat(format!(
1153                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1154                )));
1155            }
1156        }
1157
1158        let mut offset = 0usize;
1159        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1160        let count = fanout[255] as usize;
1161        let entry_len = hash_len
1162            .checked_add(4)
1163            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1164        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1165        offset = entry_table.end;
1166        let expected_trailer_offset = bytes.len() - hash_len * 2;
1167        if offset != expected_trailer_offset {
1168            return Err(GitError::InvalidFormat(format!(
1169                "pack index has {} unexpected bytes before trailer",
1170                expected_trailer_offset.saturating_sub(offset)
1171            )));
1172        }
1173        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1174
1175        let view = Self {
1176            version: 1,
1177            count,
1178            fanout,
1179            pack_checksum,
1180            index_checksum,
1181            bytes,
1182            format,
1183            tables: PackIndexViewTables::V1 { entry_table },
1184        };
1185        if validate_entries {
1186            view.validate_v1_entries()?;
1187        }
1188        Ok(view)
1189    }
1190
1191    fn validate_v2_entries(&self) -> Result<()> {
1192        let PackIndexViewTables::V2 {
1193            oid_table,
1194            small_offset_table,
1195            large_offset_table,
1196            ..
1197        } = &self.tables
1198        else {
1199            unreachable!("v2 validation only runs for v2 views");
1200        };
1201        let oid_table = self.slice(oid_table.clone());
1202        let small_offset_table = self.slice(small_offset_table.clone());
1203        let large_offset_table = self.slice(large_offset_table.clone());
1204        let hash_len = self.format.raw_len();
1205        for idx in 0..self.count {
1206            let oid_start = idx * hash_len;
1207            let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1208            if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1209                return Err(GitError::InvalidFormat(
1210                    "pack index object ids are not strictly ascending".into(),
1211                ));
1212            }
1213            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1214
1215            let offset_start = idx * 4;
1216            let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1217            pack_index_v2_offset(raw_offset, large_offset_table)?;
1218        }
1219        Ok(())
1220    }
1221
1222    fn validate_v1_entries(&self) -> Result<()> {
1223        let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1224            unreachable!("v1 validation only runs for v1 views");
1225        };
1226        let entry_table = self.slice(entry_table.clone());
1227        let hash_len = self.format.raw_len();
1228        let entry_len = hash_len
1229            .checked_add(4)
1230            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1231        for idx in 0..self.count {
1232            let start = idx * entry_len;
1233            let oid_start = start + 4;
1234            let oid_bytes = &entry_table[oid_start..start + entry_len];
1235            if idx > 0 {
1236                let previous_oid_start = oid_start - entry_len;
1237                let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1238                if previous_oid >= oid_bytes {
1239                    return Err(GitError::InvalidFormat(
1240                        "pack index object ids are not strictly sorted".into(),
1241                    ));
1242                }
1243            }
1244            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1245        }
1246        Ok(())
1247    }
1248
1249    fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1250        let hash_len = self.format.raw_len();
1251        match &self.tables {
1252            PackIndexViewTables::V1 { entry_table } => {
1253                let entry_table = self.slice(entry_table.clone());
1254                let entry_len = hash_len + 4;
1255                let start = idx * entry_len + 4;
1256                &entry_table[start..start + hash_len]
1257            }
1258            PackIndexViewTables::V2 { oid_table, .. } => {
1259                let oid_table = self.slice(oid_table.clone());
1260                let start = idx * hash_len;
1261                &oid_table[start..start + hash_len]
1262            }
1263        }
1264    }
1265
1266    fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1267        if idx >= self.count {
1268            return None;
1269        }
1270        let hash_len = self.format.raw_len();
1271        match &self.tables {
1272            PackIndexViewTables::V1 { entry_table } => {
1273                let entry_table = self.slice(entry_table.clone());
1274                let entry_len = hash_len + 4;
1275                let start = idx * entry_len;
1276                Some(PackIndexLookup {
1277                    crc32: 0,
1278                    offset: u64::from(u32_be(&entry_table[start..start + 4])),
1279                })
1280            }
1281            PackIndexViewTables::V2 {
1282                crc_table,
1283                small_offset_table,
1284                large_offset_table,
1285                ..
1286            } => {
1287                let crc_table = self.slice(crc_table.clone());
1288                let small_offset_table = self.slice(small_offset_table.clone());
1289                let large_offset_table = self.slice(large_offset_table.clone());
1290                let crc_start = idx * 4;
1291                let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1292                Some(PackIndexLookup {
1293                    crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1294                    offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1295                })
1296            }
1297        }
1298    }
1299
1300    fn slice(&self, range: Range<usize>) -> &'a [u8] {
1301        &self.bytes[range]
1302    }
1303}
1304
1305impl PackIndexViewData {
1306    pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1307        Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1308    }
1309
1310    /// Parse and validate an owned index view without recomputing the trailing
1311    /// index checksum. The stored checksum is still exposed via
1312    /// [`PackIndexViewData::index_checksum`].
1313    pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1314        Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1315    }
1316
1317    /// Parse a local/trusted owned index view without the checksum or full-entry
1318    /// validation passes.
1319    pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1320        Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1321    }
1322
1323    pub fn parse_source(
1324        bytes: Arc<dyn PackIndexByteSource>,
1325        format: ObjectFormat,
1326    ) -> Result<Self> {
1327        Self::parse_impl(bytes, format, true, true)
1328    }
1329
1330    pub fn parse_source_without_checksum(
1331        bytes: Arc<dyn PackIndexByteSource>,
1332        format: ObjectFormat,
1333    ) -> Result<Self> {
1334        Self::parse_impl(bytes, format, false, true)
1335    }
1336
1337    pub fn parse_trusted_source_without_checksum(
1338        bytes: Arc<dyn PackIndexByteSource>,
1339        format: ObjectFormat,
1340    ) -> Result<Self> {
1341        Self::parse_impl(bytes, format, false, false)
1342    }
1343
1344    pub fn count(&self) -> usize {
1345        self.count
1346    }
1347
1348    pub fn fanout(&self) -> &[u32; 256] {
1349        &self.fanout
1350    }
1351
1352    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1353        self.as_view().find(oid)
1354    }
1355
1356    pub fn as_view(&self) -> PackIndexView<'_> {
1357        PackIndexView {
1358            version: self.version,
1359            count: self.count,
1360            fanout: self.fanout,
1361            pack_checksum: self.pack_checksum,
1362            index_checksum: self.index_checksum,
1363            bytes: self.bytes.as_bytes(),
1364            format: self.format,
1365            tables: self.tables.clone(),
1366        }
1367    }
1368
1369    fn parse_impl(
1370        bytes: Arc<dyn PackIndexByteSource>,
1371        format: ObjectFormat,
1372        verify_checksum: bool,
1373        validate_entries: bool,
1374    ) -> Result<Self> {
1375        let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1376            let view = PackIndexView::parse_impl(
1377                bytes.as_bytes(),
1378                format,
1379                verify_checksum,
1380                validate_entries,
1381            )?;
1382            (
1383                view.version,
1384                view.count,
1385                view.fanout,
1386                view.pack_checksum,
1387                view.index_checksum,
1388                view.tables,
1389            )
1390        };
1391        Ok(Self {
1392            version,
1393            count,
1394            fanout,
1395            pack_checksum,
1396            index_checksum,
1397            bytes,
1398            format,
1399            tables,
1400        })
1401    }
1402}
1403
1404impl PackIndex {
1405    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1406        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1407    }
1408
1409    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1410        let trailer_len = format.raw_len();
1411        if pack_bytes.len() < 12 + trailer_len {
1412            return Err(GitError::InvalidFormat("pack file too short".into()));
1413        }
1414        let trailer_offset = pack_bytes.len() - trailer_len;
1415        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1416        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1417        if pack_checksum != expected {
1418            return Err(GitError::InvalidFormat(format!(
1419                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1420            )));
1421        }
1422
1423        if &pack_bytes[..4] != b"PACK" {
1424            return Err(GitError::InvalidFormat("missing PACK signature".into()));
1425        }
1426        let version = u32_be(&pack_bytes[4..8]);
1427        if version != 2 && version != 3 {
1428            return Err(GitError::Unsupported(format!("pack version {version}")));
1429        }
1430        let count = u32_be(&pack_bytes[8..12]) as usize;
1431        let mut offset = 12usize;
1432        let mut parsed_entries = Vec::with_capacity(count);
1433        let mut raw_entries = Vec::with_capacity(count);
1434        for _ in 0..count {
1435            let entry_offset = offset;
1436            let header = parse_entry_header(pack_bytes, &mut offset)?;
1437            let base = match header.kind {
1438                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1439                    pack_bytes,
1440                    &mut offset,
1441                    entry_offset as u64,
1442                )?)),
1443                PackObjectKind::RefDelta => {
1444                    let hash_len = format.raw_len();
1445                    if offset + hash_len > trailer_offset {
1446                        return Err(GitError::InvalidFormat(
1447                            "truncated ref-delta base object id".into(),
1448                        ));
1449                    }
1450                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1451                    offset += hash_len;
1452                    Some(DeltaBase::Ref(oid))
1453                }
1454                _ => None,
1455            };
1456            let mut body = Vec::new();
1457            let consumed = inflate_into(
1458                &pack_bytes[offset..trailer_offset],
1459                &mut body,
1460                header.size.min(usize::MAX as u64) as usize,
1461            )?;
1462            if body.len() as u64 != header.size {
1463                return Err(GitError::InvalidObject(format!(
1464                    "pack object declared {} bytes, decoded {}",
1465                    header.size,
1466                    body.len()
1467                )));
1468            }
1469            if consumed == 0 {
1470                return Err(GitError::InvalidFormat(
1471                    "empty compressed pack entry".into(),
1472                ));
1473            }
1474            offset = offset
1475                .checked_add(consumed)
1476                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1477            if offset > trailer_offset {
1478                return Err(GitError::InvalidFormat(
1479                    "pack entry extends past checksum".into(),
1480                ));
1481            }
1482            raw_entries.push((
1483                entry_offset as u64,
1484                crc32fast::hash(&pack_bytes[entry_offset..offset]),
1485            ));
1486            if let Some(base) = base {
1487                parsed_entries.push(ParsedPackEntry::Delta {
1488                    base,
1489                    compressed_size: consumed as u64,
1490                    delta_size: header.size,
1491                    offset: entry_offset as u64,
1492                    delta: body,
1493                });
1494            } else {
1495                let object_type = match header.kind {
1496                    PackObjectKind::Commit => ObjectType::Commit,
1497                    PackObjectKind::Tree => ObjectType::Tree,
1498                    PackObjectKind::Blob => ObjectType::Blob,
1499                    PackObjectKind::Tag => ObjectType::Tag,
1500                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1501                };
1502                let object = EncodedObject::new(object_type, body);
1503                let oid = object.object_id(format)?;
1504                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1505                    entry: PackEntry {
1506                        oid,
1507                        compressed_size: consumed as u64,
1508                        uncompressed_size: header.size,
1509                        offset: entry_offset as u64,
1510                    },
1511                    object,
1512                }));
1513            }
1514        }
1515        if offset != trailer_offset {
1516            return Err(GitError::InvalidFormat(format!(
1517                "pack has {} trailing bytes before checksum",
1518                trailer_offset - offset
1519            )));
1520        }
1521
1522        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1523        let entries = resolved
1524            .iter()
1525            .zip(raw_entries)
1526            .map(|(object, (offset, crc32))| PackIndexEntry {
1527                oid: object.entry.oid,
1528                crc32,
1529                offset,
1530            })
1531            .collect::<Vec<_>>();
1532        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1533        Ok(PackIndexBuild {
1534            index,
1535            pack_checksum,
1536            entries,
1537        })
1538    }
1539
1540    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1541        Self::parse(bytes, ObjectFormat::Sha1)
1542    }
1543
1544    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1545        let hash_len = format.raw_len();
1546        if bytes.len() < 4 {
1547            return Err(GitError::InvalidFormat("pack index too short".into()));
1548        }
1549        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1550            return Self::parse_v1(bytes, format);
1551        }
1552        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1553            return Err(GitError::InvalidFormat("pack index too short".into()));
1554        }
1555        let version = u32_be(&bytes[4..8]);
1556        if version != 2 {
1557            return Err(GitError::Unsupported(format!(
1558                "pack index version {version}"
1559            )));
1560        }
1561        let index_checksum_offset = bytes.len() - hash_len;
1562        let actual_index_checksum =
1563            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1564        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1565        if actual_index_checksum != index_checksum {
1566            return Err(GitError::InvalidFormat(format!(
1567                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1568            )));
1569        }
1570
1571        let mut offset = 8usize;
1572        let mut fanout = [0u32; 256];
1573        let mut previous = 0u32;
1574        for slot in &mut fanout {
1575            *slot = u32_be(&bytes[offset..offset + 4]);
1576            if *slot < previous {
1577                return Err(GitError::InvalidFormat(
1578                    "pack index fanout is not monotonic".into(),
1579                ));
1580            }
1581            previous = *slot;
1582            offset += 4;
1583        }
1584        let count = fanout[255] as usize;
1585        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1586        offset = oid_table.end;
1587        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1588        offset = crc_table.end;
1589        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1590        offset = small_offset_table.end;
1591
1592        let large_offset_count = (0..count)
1593            .filter(|idx| {
1594                let start = small_offset_table.start + idx * 4;
1595                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1596            })
1597            .count();
1598        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1599        offset = large_offset_table.end;
1600
1601        let expected_trailer_offset = bytes.len() - hash_len * 2;
1602        if offset != expected_trailer_offset {
1603            return Err(GitError::InvalidFormat(format!(
1604                "pack index has {} unexpected bytes before trailer",
1605                expected_trailer_offset.saturating_sub(offset)
1606            )));
1607        }
1608        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1609
1610        let mut entries = Vec::with_capacity(count);
1611        for idx in 0..count {
1612            let oid_start = oid_table.start + idx * hash_len;
1613            let crc_start = crc_table.start + idx * 4;
1614            let offset_start = small_offset_table.start + idx * 4;
1615            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1616            // Object ids must be strictly ascending: lookup binary-searches them,
1617            // and the fanout must match the first byte. A malformed/forged index
1618            // (e.g. from a received pack) would otherwise yield silent misses.
1619            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1620                return Err(GitError::InvalidFormat(
1621                    "pack index object ids are not strictly ascending".into(),
1622                ));
1623            }
1624            let expected_min = if oid_bytes[0] == 0 {
1625                0
1626            } else {
1627                fanout[usize::from(oid_bytes[0] - 1)]
1628            };
1629            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1630                return Err(GitError::InvalidFormat(
1631                    "pack index object id is outside its fanout bucket".into(),
1632                ));
1633            }
1634            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1635            let offset = if raw_offset & 0x8000_0000 == 0 {
1636                u64::from(raw_offset)
1637            } else {
1638                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1639                let large_start = large_offset_table.start + large_idx * 8;
1640                if large_idx >= large_offset_count {
1641                    return Err(GitError::InvalidFormat(
1642                        "pack index large offset points past table".into(),
1643                    ));
1644                }
1645                u64_be(&bytes[large_start..large_start + 8])
1646            };
1647            entries.push(PackIndexEntry {
1648                oid: ObjectId::from_raw(format, oid_bytes)?,
1649                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1650                offset,
1651            });
1652        }
1653        Ok(Self {
1654            version,
1655            fanout,
1656            entries,
1657            pack_checksum,
1658            index_checksum,
1659        })
1660    }
1661
1662    fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1663        let hash_len = format.raw_len();
1664        if bytes.len() < 256 * 4 + 2 * hash_len {
1665            return Err(GitError::InvalidFormat("pack index too short".into()));
1666        }
1667        let index_checksum_offset = bytes.len() - hash_len;
1668        let actual_index_checksum =
1669            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1670        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1671        if actual_index_checksum != index_checksum {
1672            return Err(GitError::InvalidFormat(format!(
1673                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1674            )));
1675        }
1676
1677        let mut offset = 0usize;
1678        let mut fanout = [0u32; 256];
1679        let mut previous = 0u32;
1680        for slot in &mut fanout {
1681            *slot = u32_be(&bytes[offset..offset + 4]);
1682            if *slot < previous {
1683                return Err(GitError::InvalidFormat(
1684                    "pack index fanout is not monotonic".into(),
1685                ));
1686            }
1687            previous = *slot;
1688            offset += 4;
1689        }
1690        let count = fanout[255] as usize;
1691        let entry_len = hash_len
1692            .checked_add(4)
1693            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1694        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1695        offset = entry_table.end;
1696        let expected_trailer_offset = bytes.len() - hash_len * 2;
1697        if offset != expected_trailer_offset {
1698            return Err(GitError::InvalidFormat(format!(
1699                "pack index has {} unexpected bytes before trailer",
1700                expected_trailer_offset.saturating_sub(offset)
1701            )));
1702        }
1703        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1704
1705        let mut entries = Vec::with_capacity(count);
1706        let mut previous_oid: Option<ObjectId> = None;
1707        for idx in 0..count {
1708            let start = entry_table.start + idx * entry_len;
1709            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1710            if let Some(previous) = &previous_oid
1711                && previous.as_bytes() >= oid.as_bytes()
1712            {
1713                return Err(GitError::InvalidFormat(
1714                    "pack index object ids are not strictly sorted".into(),
1715                ));
1716            }
1717            previous_oid = Some(oid);
1718            entries.push(PackIndexEntry {
1719                oid,
1720                crc32: 0,
1721                offset: u64::from(u32_be(&bytes[start..start + 4])),
1722            });
1723        }
1724        Ok(Self {
1725            version: 1,
1726            fanout,
1727            entries,
1728            pack_checksum,
1729            index_checksum,
1730        })
1731    }
1732
1733    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1734        self.entries
1735            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1736            .ok()
1737            .map(|idx| &self.entries[idx])
1738    }
1739
1740    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1741        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1742    }
1743
1744    pub fn write_v2(
1745        format: ObjectFormat,
1746        entries: &[PackIndexEntry],
1747        pack_checksum: &ObjectId,
1748    ) -> Result<Vec<u8>> {
1749        if pack_checksum.format() != format {
1750            return Err(GitError::InvalidObjectId(
1751                "pack checksum format does not match index format".into(),
1752            ));
1753        }
1754        let mut entries = entries.iter().collect::<Vec<_>>();
1755        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1756        for pair in entries.windows(2) {
1757            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1758                return Err(GitError::InvalidFormat(format!(
1759                    "pack index contains duplicate object id {}",
1760                    pair[0].oid
1761                )));
1762            }
1763        }
1764        let mut fanout = [0u32; 256];
1765        for entry in &entries {
1766            if entry.oid.format() != format {
1767                return Err(GitError::InvalidObjectId(
1768                    "pack index entry format does not match index format".into(),
1769                ));
1770            }
1771            let first = entry.oid.as_bytes()[0] as usize;
1772            fanout[first] = fanout[first]
1773                .checked_add(1)
1774                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1775        }
1776        let mut running = 0u32;
1777        for slot in &mut fanout {
1778            running = running
1779                .checked_add(*slot)
1780                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1781            *slot = running;
1782        }
1783
1784        let mut index = Vec::new();
1785        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1786        index.extend_from_slice(&2u32.to_be_bytes());
1787        for count in fanout {
1788            index.extend_from_slice(&count.to_be_bytes());
1789        }
1790        for entry in &entries {
1791            index.extend_from_slice(entry.oid.as_bytes());
1792        }
1793        for entry in &entries {
1794            index.extend_from_slice(&entry.crc32.to_be_bytes());
1795        }
1796
1797        let mut large_offsets = Vec::new();
1798        for entry in &entries {
1799            if entry.offset < 0x8000_0000 {
1800                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1801            } else {
1802                if large_offsets.len() > 0x7fff_ffff {
1803                    return Err(GitError::InvalidFormat(
1804                        "too many large pack offsets".into(),
1805                    ));
1806                }
1807                let large_idx = large_offsets.len() as u32;
1808                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1809                large_offsets.push(entry.offset);
1810            }
1811        }
1812        for offset in large_offsets {
1813            index.extend_from_slice(&offset.to_be_bytes());
1814        }
1815        index.extend_from_slice(pack_checksum.as_bytes());
1816        let index_checksum = sley_core::digest_bytes(format, &index)?;
1817        index.extend_from_slice(index_checksum.as_bytes());
1818        Ok(index)
1819    }
1820
1821    /// Serialise a version-1 pack `.idx`: a 256-entry fanout, then for each
1822    /// object an inline 4-byte big-endian pack offset immediately followed by
1823    /// its object id (sorted by oid), then the pack checksum and a trailing
1824    /// index checksum. v1 has no CRC table and cannot represent offsets that
1825    /// do not fit in 32 bits.
1826    pub fn write_v1(
1827        format: ObjectFormat,
1828        entries: &[PackIndexEntry],
1829        pack_checksum: &ObjectId,
1830    ) -> Result<Vec<u8>> {
1831        if pack_checksum.format() != format {
1832            return Err(GitError::InvalidObjectId(
1833                "pack checksum format does not match index format".into(),
1834            ));
1835        }
1836        let mut entries = entries.iter().collect::<Vec<_>>();
1837        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1838        for pair in entries.windows(2) {
1839            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1840                return Err(GitError::InvalidFormat(format!(
1841                    "pack index contains duplicate object id {}",
1842                    pair[0].oid
1843                )));
1844            }
1845        }
1846        let mut fanout = [0u32; 256];
1847        for entry in &entries {
1848            if entry.oid.format() != format {
1849                return Err(GitError::InvalidObjectId(
1850                    "pack index entry format does not match index format".into(),
1851                ));
1852            }
1853            if entry.offset > 0xffff_ffff {
1854                return Err(GitError::InvalidFormat(
1855                    "pack offset too large for a version-1 index".into(),
1856                ));
1857            }
1858            let first = entry.oid.as_bytes()[0] as usize;
1859            fanout[first] = fanout[first]
1860                .checked_add(1)
1861                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1862        }
1863        let mut running = 0u32;
1864        for slot in &mut fanout {
1865            running = running
1866                .checked_add(*slot)
1867                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1868            *slot = running;
1869        }
1870
1871        let mut index = Vec::new();
1872        for count in fanout {
1873            index.extend_from_slice(&count.to_be_bytes());
1874        }
1875        for entry in &entries {
1876            index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1877            index.extend_from_slice(entry.oid.as_bytes());
1878        }
1879        index.extend_from_slice(pack_checksum.as_bytes());
1880        let index_checksum = sley_core::digest_bytes(format, &index)?;
1881        index.extend_from_slice(index_checksum.as_bytes());
1882        Ok(index)
1883    }
1884}
1885
1886/// The `.rev` table for a pack: index positions (the rank of each object in
1887/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
1888/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
1889/// the result feeds [`PackReverseIndex::write`].
1890pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1891    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1892    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1893    let mut index_position = vec![0u32; entries.len()];
1894    for (position, &entry) in oid_sorted.iter().enumerate() {
1895        index_position[entry] = position as u32;
1896    }
1897    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1898    by_offset.sort_by_key(|&entry| entries[entry].offset);
1899    by_offset
1900        .into_iter()
1901        .map(|entry| index_position[entry])
1902        .collect()
1903}
1904
1905impl PackReverseIndex {
1906    pub fn write(
1907        format: ObjectFormat,
1908        positions: &[u32],
1909        pack_checksum: &ObjectId,
1910    ) -> Result<Vec<u8>> {
1911        if pack_checksum.format() != format {
1912            return Err(GitError::InvalidObjectId(
1913                "pack checksum format does not match reverse index format".into(),
1914            ));
1915        }
1916        validate_position_permutation(positions)?;
1917
1918        let mut out = Vec::new();
1919        out.extend_from_slice(b"RIDX");
1920        out.extend_from_slice(&1u32.to_be_bytes());
1921        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1922        for position in positions {
1923            out.extend_from_slice(&position.to_be_bytes());
1924        }
1925        out.extend_from_slice(pack_checksum.as_bytes());
1926        let checksum = sley_core::digest_bytes(format, &out)?;
1927        out.extend_from_slice(checksum.as_bytes());
1928        Ok(out)
1929    }
1930
1931    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1932        let hash_len = format.raw_len();
1933        let table_len = object_count
1934            .checked_mul(4)
1935            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1936        let min_len = 12usize
1937            .checked_add(table_len)
1938            .and_then(|len| len.checked_add(hash_len * 2))
1939            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1940        if bytes.len() < min_len {
1941            return Err(GitError::InvalidFormat("reverse index too short".into()));
1942        }
1943        if bytes.len() != min_len {
1944            return Err(GitError::InvalidFormat(format!(
1945                "reverse index has {} trailing bytes",
1946                bytes.len() - min_len
1947            )));
1948        }
1949        if &bytes[..4] != b"RIDX" {
1950            return Err(GitError::InvalidFormat(
1951                "missing reverse index signature".into(),
1952            ));
1953        }
1954        let version = u32_be(&bytes[4..8]);
1955        if version != 1 {
1956            return Err(GitError::Unsupported(format!(
1957                "reverse index version {version}"
1958            )));
1959        }
1960        let hash_id = u32_be(&bytes[8..12]);
1961        if hash_id != hash_function_id(format) {
1962            return Err(GitError::InvalidFormat(format!(
1963                "reverse index hash id {hash_id} does not match {}",
1964                format.name()
1965            )));
1966        }
1967
1968        let index_checksum_offset = bytes.len() - hash_len;
1969        let actual_index_checksum =
1970            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1971        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1972        if actual_index_checksum != index_checksum {
1973            return Err(GitError::InvalidFormat(format!(
1974                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1975            )));
1976        }
1977
1978        let pack_checksum_offset = index_checksum_offset - hash_len;
1979        let pack_checksum =
1980            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1981        let mut positions = Vec::with_capacity(object_count);
1982        let mut offset = 12usize;
1983        for _ in 0..object_count {
1984            let position = u32_be(&bytes[offset..offset + 4]);
1985            positions.push(position);
1986            offset += 4;
1987        }
1988        validate_position_permutation(&positions)?;
1989
1990        Ok(Self {
1991            version,
1992            format,
1993            positions,
1994            pack_checksum,
1995            index_checksum,
1996        })
1997    }
1998}
1999
2000impl PackMtimes {
2001    pub fn write(
2002        format: ObjectFormat,
2003        mtimes: &[u32],
2004        pack_checksum: &ObjectId,
2005    ) -> Result<Vec<u8>> {
2006        if pack_checksum.format() != format {
2007            return Err(GitError::InvalidObjectId(
2008                "pack checksum format does not match mtimes format".into(),
2009            ));
2010        }
2011
2012        let mut out = Vec::new();
2013        out.extend_from_slice(b"MTME");
2014        out.extend_from_slice(&1u32.to_be_bytes());
2015        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2016        for mtime in mtimes {
2017            out.extend_from_slice(&mtime.to_be_bytes());
2018        }
2019        out.extend_from_slice(pack_checksum.as_bytes());
2020        let checksum = sley_core::digest_bytes(format, &out)?;
2021        out.extend_from_slice(checksum.as_bytes());
2022        Ok(out)
2023    }
2024
2025    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2026        let hash_len = format.raw_len();
2027        let table_len = object_count
2028            .checked_mul(4)
2029            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
2030        let expected_len = 12usize
2031            .checked_add(table_len)
2032            .and_then(|len| len.checked_add(hash_len * 2))
2033            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
2034        if bytes.len() < expected_len {
2035            return Err(GitError::InvalidFormat("mtimes file too short".into()));
2036        }
2037        if bytes.len() != expected_len {
2038            return Err(GitError::InvalidFormat(format!(
2039                "mtimes file has {} trailing bytes",
2040                bytes.len() - expected_len
2041            )));
2042        }
2043        if &bytes[..4] != b"MTME" {
2044            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
2045        }
2046        let version = u32_be(&bytes[4..8]);
2047        if version != 1 {
2048            return Err(GitError::Unsupported(format!("mtimes version {version}")));
2049        }
2050        let hash_id = u32_be(&bytes[8..12]);
2051        if hash_id != hash_function_id(format) {
2052            return Err(GitError::InvalidFormat(format!(
2053                "mtimes hash id {hash_id} does not match {}",
2054                format.name()
2055            )));
2056        }
2057
2058        let index_checksum_offset = bytes.len() - hash_len;
2059        let actual_index_checksum =
2060            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2061        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2062        if actual_index_checksum != index_checksum {
2063            return Err(GitError::InvalidFormat(format!(
2064                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2065            )));
2066        }
2067
2068        let pack_checksum_offset = index_checksum_offset - hash_len;
2069        let pack_checksum =
2070            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2071        let mut mtimes = Vec::with_capacity(object_count);
2072        let mut offset = 12usize;
2073        for _ in 0..object_count {
2074            mtimes.push(u32_be(&bytes[offset..offset + 4]));
2075            offset += 4;
2076        }
2077
2078        Ok(Self {
2079            version,
2080            format,
2081            mtimes,
2082            pack_checksum,
2083            index_checksum,
2084        })
2085    }
2086}
2087
2088impl PackBitmapIndex {
2089    pub const OPTION_FULL_DAG: u16 = 0x0001;
2090    pub const OPTION_HASH_CACHE: u16 = 0x0004;
2091
2092    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2093        let hash_len = format.raw_len();
2094        let min_len = 12usize
2095            .checked_add(hash_len * 2)
2096            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2097        if bytes.len() < min_len {
2098            return Err(GitError::InvalidFormat("bitmap index too short".into()));
2099        }
2100        if &bytes[..4] != b"BITM" {
2101            return Err(GitError::InvalidFormat(
2102                "missing bitmap index signature".into(),
2103            ));
2104        }
2105        let version = u16_be(&bytes[4..6]);
2106        if version != 1 {
2107            return Err(GitError::Unsupported(format!(
2108                "bitmap index version {version}"
2109            )));
2110        }
2111        let options = u16_be(&bytes[6..8]);
2112        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
2113        if options & !known_options != 0 {
2114            return Err(GitError::Unsupported(format!(
2115                "bitmap index options {:#06x}",
2116                options & !known_options
2117            )));
2118        }
2119        let entry_count = u32_be(&bytes[8..12]) as usize;
2120        let checksum_offset = bytes.len() - hash_len;
2121        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2122        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2123        if actual_index_checksum != index_checksum {
2124            return Err(GitError::InvalidFormat(format!(
2125                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2126            )));
2127        }
2128
2129        let pack_checksum_end = 12usize
2130            .checked_add(hash_len)
2131            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
2132        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
2133        let mut offset = pack_checksum_end;
2134        let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2135        let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2136        let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2137        let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2138
2139        let mut entries = Vec::with_capacity(entry_count);
2140        for idx in 0..entry_count {
2141            if checksum_offset.saturating_sub(offset) < 6 {
2142                return Err(GitError::InvalidFormat(
2143                    "truncated bitmap index entry".into(),
2144                ));
2145            }
2146            let object_position = u32_be(&bytes[offset..offset + 4]);
2147            offset += 4;
2148            if object_position as usize >= object_count {
2149                return Err(GitError::InvalidFormat(
2150                    "bitmap index entry points past object table".into(),
2151                ));
2152            }
2153            let xor_offset = bytes[offset];
2154            offset += 1;
2155            if xor_offset as usize > idx || xor_offset > 160 {
2156                return Err(GitError::InvalidFormat(
2157                    "bitmap index entry has invalid XOR offset".into(),
2158                ));
2159            }
2160            let flags = bytes[offset];
2161            offset += 1;
2162            let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
2163            entries.push(PackBitmapEntry {
2164                object_position,
2165                xor_offset,
2166                flags,
2167                bitmap,
2168            });
2169        }
2170
2171        let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
2172            let cache_len = object_count
2173                .checked_mul(4)
2174                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
2175            if checksum_offset.saturating_sub(offset) < cache_len {
2176                return Err(GitError::InvalidFormat(
2177                    "truncated bitmap hash cache".into(),
2178                ));
2179            }
2180            let mut cache = Vec::with_capacity(object_count);
2181            for _ in 0..object_count {
2182                cache.push(u32_be(&bytes[offset..offset + 4]));
2183                offset += 4;
2184            }
2185            Some(cache)
2186        } else {
2187            None
2188        };
2189
2190        if offset != checksum_offset {
2191            return Err(GitError::InvalidFormat(format!(
2192                "bitmap index has {} trailing bytes",
2193                checksum_offset - offset
2194            )));
2195        }
2196
2197        Ok(Self {
2198            version,
2199            format,
2200            options,
2201            pack_checksum,
2202            index_checksum,
2203            type_bitmaps: PackBitmapTypeBitmaps {
2204                commits,
2205                trees,
2206                blobs,
2207                tags,
2208            },
2209            entries,
2210            name_hash_cache,
2211        })
2212    }
2213
2214    /// Looks up the stored entry whose commit sits at `position` in the
2215    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
2216    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
2217        self.entries
2218            .iter()
2219            .find(|entry| entry.object_position == position)
2220    }
2221}
2222
2223fn parse_bitmap_ewah(
2224    bytes: &[u8],
2225    offset: &mut usize,
2226    checksum_offset: usize,
2227    _object_count: usize,
2228) -> Result<EwahBitmap> {
2229    if checksum_offset.saturating_sub(*offset) < 12 {
2230        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
2231    }
2232    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
2233    *offset += 4;
2234    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
2235    *offset += 4;
2236    let words_len = word_count
2237        .checked_mul(8)
2238        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
2239    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
2240        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
2241    }
2242    let mut words = Vec::with_capacity(word_count);
2243    for _ in 0..word_count {
2244        words.push(u64_be(&bytes[*offset..*offset + 8]));
2245        *offset += 8;
2246    }
2247    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
2248    *offset += 4;
2249    validate_ewah_words(bit_size, &words, rlw_position)?;
2250    Ok(EwahBitmap {
2251        bit_size,
2252        words,
2253        rlw_position,
2254    })
2255}
2256
2257fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
2258    if words.is_empty() {
2259        if rlw_position != 0 || bit_size != 0 {
2260            return Err(GitError::InvalidFormat(
2261                "EWAH bitmap has invalid empty RLW".into(),
2262            ));
2263        }
2264        return Ok(());
2265    }
2266    if rlw_position as usize >= words.len() {
2267        return Err(GitError::InvalidFormat(
2268            "EWAH RLW position points past word table".into(),
2269        ));
2270    }
2271    let mut word_idx = 0usize;
2272    let mut decoded_words = 0u64;
2273    while word_idx < words.len() {
2274        let rlw = words[word_idx];
2275        let run_words = (rlw >> 1) & 0xffff_ffff;
2276        let literal_words = (rlw >> 33) as usize;
2277        word_idx += 1;
2278        word_idx = word_idx
2279            .checked_add(literal_words)
2280            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
2281        if word_idx > words.len() {
2282            return Err(GitError::InvalidFormat(
2283                "EWAH literal words extend past word table".into(),
2284            ));
2285        }
2286        decoded_words = decoded_words
2287            .checked_add(run_words)
2288            .and_then(|value| value.checked_add(literal_words as u64))
2289            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2290    }
2291    let decoded_bits = decoded_words
2292        .checked_mul(64)
2293        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2294    if decoded_bits < u64::from(bit_size) {
2295        return Err(GitError::InvalidFormat(
2296            "EWAH bitmap decodes fewer bits than declared".into(),
2297        ));
2298    }
2299    Ok(())
2300}
2301
2302impl MultiPackIndex {
2303    pub fn write(
2304        format: ObjectFormat,
2305        version: u8,
2306        pack_names: &[String],
2307        objects: &[MultiPackIndexEntry],
2308    ) -> Result<Vec<u8>> {
2309        Self::write_with_reverse_index(format, version, pack_names, objects, None)
2310    }
2311
2312    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
2313    /// additionally emits the `RIDX` chunk: the object order a multi-pack
2314    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
2315    /// the preferred pack first, then the rest by pack id, each pack's slice
2316    /// in offset order), stored as one u32 midx position per object.
2317    ///
2318    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
2319    /// must be in range.
2320    pub fn write_with_reverse_index(
2321        format: ObjectFormat,
2322        version: u8,
2323        pack_names: &[String],
2324        objects: &[MultiPackIndexEntry],
2325        preferred_pack: Option<u32>,
2326    ) -> Result<Vec<u8>> {
2327        if let Some(preferred) = preferred_pack
2328            && preferred as usize >= pack_names.len()
2329        {
2330            return Err(GitError::InvalidFormat(format!(
2331                "preferred pack {preferred} out of range for {} packs",
2332                pack_names.len()
2333            )));
2334        }
2335        if version != 1 && version != 2 {
2336            return Err(GitError::Unsupported(format!(
2337                "multi-pack-index version {version}"
2338            )));
2339        }
2340        if pack_names.len() > u32::MAX as usize {
2341            return Err(GitError::InvalidFormat(
2342                "too many multi-pack-index packs".into(),
2343            ));
2344        }
2345        if objects.len() > u32::MAX as usize {
2346            return Err(GitError::InvalidFormat(
2347                "too many multi-pack-index objects".into(),
2348            ));
2349        }
2350        validate_midx_pack_names(pack_names)?;
2351        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2352            return Err(GitError::InvalidFormat(
2353                "multi-pack-index v1 pack names must be sorted".into(),
2354            ));
2355        }
2356
2357        let mut objects = objects.iter().collect::<Vec<_>>();
2358        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2359        let mut previous_oid: Option<&ObjectId> = None;
2360        for object in &objects {
2361            if object.oid.format() != format {
2362                return Err(GitError::InvalidObjectId(
2363                    "multi-pack-index object format does not match index format".into(),
2364                ));
2365            }
2366            if let Some(previous) = previous_oid
2367                && previous.as_bytes() == object.oid.as_bytes()
2368            {
2369                return Err(GitError::InvalidFormat(
2370                    "multi-pack-index contains duplicate object ids".into(),
2371                ));
2372            }
2373            if object.pack_int_id as usize >= pack_names.len() {
2374                return Err(GitError::InvalidFormat(
2375                    "multi-pack-index object points past pack table".into(),
2376                ));
2377            }
2378            previous_oid = Some(&object.oid);
2379        }
2380
2381        let mut large_offsets = Vec::new();
2382        let mut chunks = vec![
2383            (*b"PNAM", write_midx_pack_names(pack_names)),
2384            (*b"OIDF", write_midx_oid_fanout(&objects)?),
2385            (*b"OIDL", write_midx_oid_lookup(&objects)),
2386            (
2387                *b"OOFF",
2388                write_midx_object_offsets(&objects, &mut large_offsets)?,
2389            ),
2390        ];
2391        if !large_offsets.is_empty() {
2392            chunks.push((*b"LOFF", large_offsets));
2393        }
2394        if let Some(preferred) = preferred_pack {
2395            // `objects` is already in midx (oid-sorted) order here; the chunk
2396            // lists each object's midx position in pseudo-pack order.
2397            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2398            pseudo.sort_by_key(|&midx_pos| {
2399                let object = objects[midx_pos as usize];
2400                (
2401                    object.pack_int_id != preferred,
2402                    object.pack_int_id,
2403                    object.offset,
2404                )
2405            });
2406            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2407            for midx_pos in pseudo {
2408                ridx.extend_from_slice(&midx_pos.to_be_bytes());
2409            }
2410            chunks.push((*b"RIDX", ridx));
2411        }
2412        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2413    }
2414
2415    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2416        let hash_len = format.raw_len();
2417        if bytes.len() < 12 + 12 + hash_len {
2418            return Err(GitError::InvalidFormat(
2419                "multi-pack-index file too short".into(),
2420            ));
2421        }
2422        if &bytes[..4] != b"MIDX" {
2423            return Err(GitError::InvalidFormat(
2424                "missing multi-pack-index signature".into(),
2425            ));
2426        }
2427        let version = bytes[4];
2428        if version != 1 && version != 2 {
2429            return Err(GitError::Unsupported(format!(
2430                "multi-pack-index version {version}"
2431            )));
2432        }
2433        let hash_id = bytes[5];
2434        if u32::from(hash_id) != hash_function_id(format) {
2435            return Err(GitError::InvalidFormat(format!(
2436                "multi-pack-index hash id {hash_id} does not match {}",
2437                format.name()
2438            )));
2439        }
2440        let chunk_count = bytes[6] as usize;
2441        let base_midx_count = bytes[7];
2442        if base_midx_count != 0 {
2443            return Err(GitError::Unsupported(format!(
2444                "multi-pack-index base count {base_midx_count}"
2445            )));
2446        }
2447        let pack_count = u32_be(&bytes[8..12]);
2448        let lookup_len = (chunk_count + 1)
2449            .checked_mul(12)
2450            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2451        let data_start = 12usize
2452            .checked_add(lookup_len)
2453            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2454        let checksum_offset = bytes.len() - hash_len;
2455        if data_start > checksum_offset {
2456            return Err(GitError::InvalidFormat(
2457                "truncated multi-pack-index chunk lookup".into(),
2458            ));
2459        }
2460
2461        let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2462        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2463        if actual_checksum != checksum {
2464            return Err(GitError::InvalidFormat(format!(
2465                "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2466            )));
2467        }
2468
2469        let mut entries = Vec::with_capacity(chunk_count + 1);
2470        let mut offset = 12usize;
2471        for _ in 0..=chunk_count {
2472            let id = [
2473                bytes[offset],
2474                bytes[offset + 1],
2475                bytes[offset + 2],
2476                bytes[offset + 3],
2477            ];
2478            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2479            entries.push((id, chunk_offset));
2480            offset += 12;
2481        }
2482        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2483            return Err(GitError::InvalidFormat(
2484                "multi-pack-index chunk lookup is empty".into(),
2485            ));
2486        };
2487        if terminator_id != [0, 0, 0, 0] {
2488            return Err(GitError::InvalidFormat(
2489                "multi-pack-index chunk lookup missing terminator".into(),
2490            ));
2491        }
2492        if terminator_offset != checksum_offset as u64 {
2493            return Err(GitError::InvalidFormat(
2494                "multi-pack-index terminator does not point at checksum".into(),
2495            ));
2496        }
2497
2498        let mut chunks = Vec::with_capacity(chunk_count);
2499        let mut previous_offset = data_start as u64;
2500        for pair in entries.windows(2) {
2501            let (id, chunk_offset) = pair[0];
2502            let (_next_id, next_offset) = pair[1];
2503            if id == [0, 0, 0, 0] {
2504                return Err(GitError::InvalidFormat(
2505                    "multi-pack-index chunk id is zero before terminator".into(),
2506                ));
2507            }
2508            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2509                return Err(GitError::InvalidFormat(
2510                    "multi-pack-index chunk offsets are not monotonic".into(),
2511                ));
2512            }
2513            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2514                return Err(GitError::InvalidFormat(
2515                    "multi-pack-index chunk length is invalid".into(),
2516                ));
2517            }
2518            chunks.push(MultiPackIndexChunk {
2519                id,
2520                offset: chunk_offset,
2521                len: next_offset - chunk_offset,
2522            });
2523            previous_offset = chunk_offset;
2524        }
2525
2526        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2527        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2528        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2529        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2530        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2531        let bitmapped_packs =
2532            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2533
2534        Ok(Self {
2535            version,
2536            format,
2537            pack_count,
2538            pack_names,
2539            object_count: object_count as u32,
2540            fanout,
2541            objects,
2542            reverse_index,
2543            bitmapped_packs,
2544            chunks,
2545            checksum,
2546        })
2547    }
2548
2549    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2550        self.objects
2551            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2552            .ok()
2553            .map(|idx| &self.objects[idx])
2554    }
2555}
2556
2557impl MultiPackIndexOidLookup {
2558    pub fn parse(bytes: Arc<Vec<u8>>, format: ObjectFormat) -> Result<Self> {
2559        let hash_len = format.raw_len();
2560        if bytes.len() < 12 + 12 + hash_len {
2561            return Err(GitError::InvalidFormat(
2562                "multi-pack-index file too short".into(),
2563            ));
2564        }
2565        if &bytes[..4] != b"MIDX" {
2566            return Err(GitError::InvalidFormat(
2567                "missing multi-pack-index signature".into(),
2568            ));
2569        }
2570        let version = bytes[4];
2571        if version != 1 && version != 2 {
2572            return Err(GitError::Unsupported(format!(
2573                "multi-pack-index version {version}"
2574            )));
2575        }
2576        let hash_id = bytes[5];
2577        if u32::from(hash_id) != hash_function_id(format) {
2578            return Err(GitError::InvalidFormat(format!(
2579                "multi-pack-index hash id {hash_id} does not match {}",
2580                format.name()
2581            )));
2582        }
2583        let chunk_count = bytes[6] as usize;
2584        let base_midx_count = bytes[7];
2585        if base_midx_count != 0 {
2586            return Err(GitError::Unsupported(format!(
2587                "multi-pack-index base count {base_midx_count}"
2588            )));
2589        }
2590        let pack_count = u32_be(&bytes[8..12]);
2591        let lookup_len = (chunk_count + 1)
2592            .checked_mul(12)
2593            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2594        let data_start = 12usize
2595            .checked_add(lookup_len)
2596            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2597        let checksum_offset = bytes.len() - hash_len;
2598        if data_start > checksum_offset {
2599            return Err(GitError::InvalidFormat(
2600                "truncated multi-pack-index chunk lookup".into(),
2601            ));
2602        }
2603
2604        let mut entries = Vec::with_capacity(chunk_count + 1);
2605        let mut offset = 12usize;
2606        for _ in 0..=chunk_count {
2607            let id = [
2608                bytes[offset],
2609                bytes[offset + 1],
2610                bytes[offset + 2],
2611                bytes[offset + 3],
2612            ];
2613            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2614            entries.push((id, chunk_offset));
2615            offset += 12;
2616        }
2617        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2618            return Err(GitError::InvalidFormat(
2619                "multi-pack-index chunk lookup is empty".into(),
2620            ));
2621        };
2622        if terminator_id != [0, 0, 0, 0] {
2623            return Err(GitError::InvalidFormat(
2624                "multi-pack-index chunk lookup missing terminator".into(),
2625            ));
2626        }
2627        if terminator_offset != checksum_offset as u64 {
2628            return Err(GitError::InvalidFormat(
2629                "multi-pack-index terminator does not point at checksum".into(),
2630            ));
2631        }
2632
2633        let mut chunks = Vec::with_capacity(chunk_count);
2634        let mut previous_offset = data_start as u64;
2635        for pair in entries.windows(2) {
2636            let (id, chunk_offset) = pair[0];
2637            let (_next_id, next_offset) = pair[1];
2638            if id == [0, 0, 0, 0] {
2639                return Err(GitError::InvalidFormat(
2640                    "multi-pack-index chunk id is zero before terminator".into(),
2641                ));
2642            }
2643            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2644                return Err(GitError::InvalidFormat(
2645                    "multi-pack-index chunk offsets are not monotonic".into(),
2646                ));
2647            }
2648            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2649                return Err(GitError::InvalidFormat(
2650                    "multi-pack-index chunk length is invalid".into(),
2651                ));
2652            }
2653            chunks.push(MultiPackIndexChunk {
2654                id,
2655                offset: chunk_offset,
2656                len: next_offset - chunk_offset,
2657            });
2658            previous_offset = chunk_offset;
2659        }
2660
2661        let pack_names = parse_midx_pack_names(&bytes, &chunks, pack_count as usize, version)?;
2662        let (fanout, object_count) = parse_midx_oid_fanout(&bytes, &chunks)?;
2663        let oid_lookup = midx_chunk_data(&bytes, &chunks, *b"OIDL", true)?
2664            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2665        let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2666            GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2667        })?;
2668        if oid_lookup.len() != expected_len {
2669            return Err(GitError::InvalidFormat(
2670                "multi-pack-index OIDL chunk has invalid length".into(),
2671            ));
2672        }
2673        let object_offsets = midx_chunk_data(&bytes, &chunks, *b"OOFF", true)?
2674            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2675        let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2676            GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2677        })?;
2678        if object_offsets.len() != expected_offsets_len {
2679            return Err(GitError::InvalidFormat(
2680                "multi-pack-index OOFF chunk has invalid length".into(),
2681            ));
2682        }
2683        let large_offsets = midx_chunk_data(&bytes, &chunks, *b"LOFF", false)?;
2684        if let Some(large_offsets) = large_offsets
2685            && large_offsets.len() % 8 != 0
2686        {
2687            return Err(GitError::InvalidFormat(
2688                "multi-pack-index LOFF chunk has invalid length".into(),
2689            ));
2690        }
2691        let oid_lookup_offset = oid_lookup.as_ptr() as usize - bytes.as_ptr() as usize;
2692        let object_offsets_offset = object_offsets.as_ptr() as usize - bytes.as_ptr() as usize;
2693        let (large_offsets_offset, large_offsets_len) = match large_offsets {
2694            Some(large_offsets) => (
2695                Some(large_offsets.as_ptr() as usize - bytes.as_ptr() as usize),
2696                large_offsets.len(),
2697            ),
2698            None => (None, 0),
2699        };
2700        Ok(Self {
2701            format,
2702            pack_count,
2703            pack_names,
2704            fanout,
2705            object_count,
2706            oid_lookup_offset,
2707            object_offsets_offset,
2708            large_offsets_offset,
2709            large_offsets_len,
2710            bytes,
2711        })
2712    }
2713
2714    pub fn contains(&self, oid: &ObjectId) -> bool {
2715        self.find_position(oid).is_some()
2716    }
2717
2718    pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2719        let Some(position) = self.find_position(oid) else {
2720            return Ok(None);
2721        };
2722        let hash_len = self.format.raw_len();
2723        let oid_start = self
2724            .oid_lookup_offset
2725            .checked_add(position * hash_len)
2726            .ok_or_else(|| {
2727                GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2728            })?;
2729        let oid = ObjectId::from_raw(self.format, &self.bytes[oid_start..oid_start + hash_len])?;
2730        let offset_start = self
2731            .object_offsets_offset
2732            .checked_add(position * 8)
2733            .ok_or_else(|| {
2734                GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2735            })?;
2736        let data = &self.bytes[offset_start..offset_start + 8];
2737        let pack_int_id = u32_be(&data[..4]);
2738        if pack_int_id >= self.pack_count {
2739            return Err(GitError::InvalidFormat(
2740                "multi-pack-index object points past pack table".into(),
2741            ));
2742        }
2743        let raw_offset = u32_be(&data[4..8]);
2744        let offset = if raw_offset & 0x8000_0000 == 0 {
2745            u64::from(raw_offset)
2746        } else {
2747            let Some(large_offsets_offset) = self.large_offsets_offset else {
2748                return Err(GitError::InvalidFormat(
2749                    "multi-pack-index large offset missing LOFF chunk".into(),
2750                ));
2751            };
2752            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2753            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2754                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2755            })?;
2756            let large_end = large_start.checked_add(8).ok_or_else(|| {
2757                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2758            })?;
2759            if large_end > self.large_offsets_len {
2760                return Err(GitError::InvalidFormat(
2761                    "multi-pack-index large offset points past LOFF chunk".into(),
2762                ));
2763            }
2764            let start = large_offsets_offset + large_start;
2765            u64_be(&self.bytes[start..start + 8])
2766        };
2767        Ok(Some(MultiPackIndexEntry {
2768            oid,
2769            pack_int_id,
2770            offset,
2771        }))
2772    }
2773
2774    pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2775        self.pack_names
2776            .get(pack_int_id as usize)
2777            .map(String::as_str)
2778    }
2779
2780    fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2781        if oid.format() != self.format || self.object_count == 0 {
2782            return None;
2783        }
2784        let first = oid.as_bytes()[0] as usize;
2785        let start = if first == 0 {
2786            0
2787        } else {
2788            self.fanout[first - 1] as usize
2789        };
2790        let end = self.fanout[first] as usize;
2791        if start >= end || end > self.object_count {
2792            return None;
2793        }
2794        let hash_len = self.format.raw_len();
2795        let table_start = self.oid_lookup_offset;
2796        let table_end = table_start + self.object_count * hash_len;
2797        let table = &self.bytes[table_start..table_end];
2798        let needle = oid.as_bytes();
2799        let mut low = start;
2800        let mut high = end;
2801        while low < high {
2802            let mid = low + (high - low) / 2;
2803            let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2804            match raw.cmp(needle) {
2805                std::cmp::Ordering::Less => low = mid + 1,
2806                std::cmp::Ordering::Equal => return Some(mid),
2807                std::cmp::Ordering::Greater => high = mid,
2808            }
2809        }
2810        None
2811    }
2812}
2813
2814fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2815    for name in pack_names {
2816        if name.is_empty() {
2817            return Err(GitError::InvalidFormat(
2818                "multi-pack-index pack name is empty".into(),
2819            ));
2820        }
2821        if name
2822            .bytes()
2823            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2824        {
2825            return Err(GitError::InvalidFormat(
2826                "multi-pack-index pack name contains an invalid byte".into(),
2827            ));
2828        }
2829    }
2830    Ok(())
2831}
2832
2833fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2834    let mut out = Vec::new();
2835    for name in pack_names {
2836        out.extend_from_slice(name.as_bytes());
2837        out.push(0);
2838    }
2839    while out.len() % 4 != 0 {
2840        out.push(0);
2841    }
2842    out
2843}
2844
2845fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2846    let mut counts = [0u32; 256];
2847    for object in objects {
2848        let first = object.oid.as_bytes()[0] as usize;
2849        counts[first] = counts[first]
2850            .checked_add(1)
2851            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2852    }
2853    let mut running = 0u32;
2854    let mut out = Vec::with_capacity(256 * 4);
2855    for count in counts {
2856        running = running
2857            .checked_add(count)
2858            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2859        out.extend_from_slice(&running.to_be_bytes());
2860    }
2861    Ok(out)
2862}
2863
2864fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2865    let mut out = Vec::new();
2866    for object in objects {
2867        out.extend_from_slice(object.oid.as_bytes());
2868    }
2869    out
2870}
2871
2872fn write_midx_object_offsets(
2873    objects: &[&MultiPackIndexEntry],
2874    large_offsets: &mut Vec<u8>,
2875) -> Result<Vec<u8>> {
2876    let mut out = Vec::new();
2877    for object in objects {
2878        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2879        if object.offset < 0x8000_0000 {
2880            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2881        } else {
2882            let large_idx = large_offsets.len() / 8;
2883            if large_idx > 0x7fff_ffff {
2884                return Err(GitError::InvalidFormat(
2885                    "too many multi-pack-index large offsets".into(),
2886                ));
2887            }
2888            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2889            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2890        }
2891    }
2892    Ok(out)
2893}
2894
2895fn write_multi_pack_index_chunks(
2896    format: ObjectFormat,
2897    version: u8,
2898    pack_count: u32,
2899    chunks: &[([u8; 4], Vec<u8>)],
2900) -> Result<Vec<u8>> {
2901    if chunks.len() > u8::MAX as usize {
2902        return Err(GitError::InvalidFormat(
2903            "too many multi-pack-index chunks".into(),
2904        ));
2905    }
2906    let lookup_len = (chunks.len() + 1)
2907        .checked_mul(12)
2908        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2909    let mut out = Vec::new();
2910    out.extend_from_slice(b"MIDX");
2911    out.push(version);
2912    out.push(hash_function_id(format) as u8);
2913    out.push(chunks.len() as u8);
2914    out.push(0);
2915    out.extend_from_slice(&pack_count.to_be_bytes());
2916    let mut chunk_offset = (12usize)
2917        .checked_add(lookup_len)
2918        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
2919        as u64;
2920    for (id, data) in chunks {
2921        out.extend_from_slice(id);
2922        out.extend_from_slice(&chunk_offset.to_be_bytes());
2923        chunk_offset = chunk_offset
2924            .checked_add(data.len() as u64)
2925            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
2926    }
2927    out.extend_from_slice(&[0, 0, 0, 0]);
2928    out.extend_from_slice(&chunk_offset.to_be_bytes());
2929    for (_id, data) in chunks {
2930        out.extend_from_slice(data);
2931    }
2932    let checksum = sley_core::digest_bytes(format, &out)?;
2933    out.extend_from_slice(checksum.as_bytes());
2934    Ok(out)
2935}
2936
2937#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2938struct EntryHeader {
2939    kind: PackObjectKind,
2940    size: u64,
2941}
2942
2943/// A cache of objects already decoded from one specific pack, keyed by the
2944/// in-pack byte offset at which each object's entry begins.
2945///
2946/// Delta resolution within a pack walks a chain of base objects by offset; the
2947/// same base is the parent of many deltas, so without a cache the entire chain
2948/// is re-inflated and re-applied on every read. Implementors let
2949/// [`read_object_at_with_cache`] reuse a warm base instead.
2950///
2951/// Correctness contract: a given `offset` within a given pack's bytes always
2952/// decodes to exactly one object, so caching by offset can never serve the wrong
2953/// object **provided the same cache is only ever used with one pack's bytes**.
2954/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
2955/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
2956pub trait PackDeltaCache {
2957    /// Return the decoded object whose entry begins at `offset`, if cached.
2958    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
2959    /// Record that the entry beginning at `offset` decodes to `object`.
2960    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
2961}
2962
2963/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
2964/// the original, allocation-free behavior for callers that do not opt in.
2965struct NoopDeltaCache;
2966
2967impl PackDeltaCache for NoopDeltaCache {
2968    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
2969        None
2970    }
2971    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
2972}
2973
2974// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
2975// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
2976// an allocation that dominated bulk reads. Borrowed only for the duration of a
2977// single inflate; the recursive pack reader fully inflates each entry's data before
2978// recursing to its base, so the borrow never nests.
2979thread_local! {
2980    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
2981}
2982
2983/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
2984/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
2985/// run of back-references). We pre-reserve no more than this multiple of the
2986/// available compressed input, so an attacker who declares a huge `size_hint`
2987/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
2988/// memory before the inflate has produced a single byte. The stream's *actual*
2989/// output is still verified against the declared size by the caller; this only
2990/// bounds the speculative allocation. git never pre-allocates an attacker's
2991/// declared size beyond a streaming buffer either (see index-pack.c's
2992/// `unpack_entry_data`).
2993const MAX_INFLATE_EXPANSION: usize = 1032;
2994
2995/// An absolute ceiling on the speculative pre-reservation, independent of the
2996/// input length, so even a large legitimate-looking compressed input can't be
2997/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
2998/// output buffer organically past this when a real stream genuinely produces
2999/// that much — this only caps the *speculative* reserve.
3000const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
3001
3002/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
3003/// to something safe to reserve up front: no larger than what `compressed_len`
3004/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
3005/// returned value is only used to size the initial allocation; the inflate loop
3006/// grows the buffer as the real stream produces output, so legitimate large
3007/// objects still decode correctly — they just don't get the whole allocation at
3008/// once.
3009fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
3010    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
3011    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
3012    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
3013}
3014
3015/// Inflate the entire zlib stream at the front of `compressed`, appending the
3016/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
3017/// is the caller's expectation for the decompressed length, but it is treated as
3018/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
3019/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
3020/// number of *compressed* bytes consumed (so callers stepping through a pack can
3021/// advance to the next entry). Byte-for-byte equivalent to
3022/// `ZlibDecoder::read_to_end` + `total_in`.
3023fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
3024    INFLATE.with(|cell| {
3025        let mut decompress = cell.borrow_mut();
3026        decompress.reset(true);
3027        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
3028        let mut input = compressed;
3029        let mut consumed_total = 0usize;
3030        loop {
3031            // Always leave output room so a zero-progress result means the input
3032            // (not the buffer) is exhausted.
3033            if out.len() == out.capacity() {
3034                out.reserve(out.len().max(64));
3035            }
3036            let before_in = decompress.total_in();
3037            let before_out = decompress.total_out();
3038            let status = decompress
3039                .decompress_vec(input, out, flate2::FlushDecompress::None)
3040                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3041            let consumed = (decompress.total_in() - before_in) as usize;
3042            let produced = decompress.total_out() - before_out;
3043            input = &input[consumed..];
3044            consumed_total += consumed;
3045            match status {
3046                flate2::Status::StreamEnd => return Ok(consumed_total),
3047                _ if consumed == 0 && produced == 0 => {
3048                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
3049                }
3050                _ => {}
3051            }
3052        }
3053    })
3054}
3055
3056/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
3057/// into `out`, reusing the thread-local state. Used to read a delta's leading
3058/// base-size / result-size varints without inflating the whole instruction stream.
3059fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
3060    INFLATE.with(|cell| {
3061        let mut decompress = cell.borrow_mut();
3062        decompress.reset(true);
3063        out.reserve(max_out.max(16));
3064        let mut input = compressed;
3065        while out.len() < max_out {
3066            if out.len() == out.capacity() {
3067                out.reserve(out.len().max(16));
3068            }
3069            let before_in = decompress.total_in();
3070            let before_out = decompress.total_out();
3071            let status = decompress
3072                .decompress_vec(input, out, flate2::FlushDecompress::None)
3073                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
3074            let consumed = (decompress.total_in() - before_in) as usize;
3075            let produced = decompress.total_out() - before_out;
3076            input = &input[consumed..];
3077            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
3078                break;
3079            }
3080        }
3081        Ok(())
3082    })
3083}
3084
3085/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
3086/// only that object and its delta-base chain instead of parsing the whole pack.
3087///
3088/// Ofs-delta bases are followed by offset (recursively, within this pack);
3089/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
3090/// with the surrounding object store (so a base in another pack or loose still
3091/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
3092pub fn read_object_at_arc<F>(
3093    pack_bytes: &[u8],
3094    offset: u64,
3095    format: ObjectFormat,
3096    resolve_ref_base: F,
3097) -> Result<Arc<EncodedObject>>
3098where
3099    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3100{
3101    read_object_at_with_cache_arc(
3102        pack_bytes,
3103        offset,
3104        format,
3105        resolve_ref_base,
3106        &NoopDeltaCache,
3107    )
3108}
3109
3110/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
3111/// (keyed by in-pack offset) and records every object it decodes.
3112///
3113/// This turns repeated reads from the same pack — where many deltas share a base
3114/// chain — from re-inflating each chain per read into resolving each base once.
3115/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
3116/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
3117/// callers can reuse cache handles without cloning full object bodies.
3118pub fn read_object_at_with_cache_arc<F, C>(
3119    pack_bytes: &[u8],
3120    offset: u64,
3121    format: ObjectFormat,
3122    mut resolve_ref_base: F,
3123    cache: &C,
3124) -> Result<Arc<EncodedObject>>
3125where
3126    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3127    C: PackDeltaCache + ?Sized,
3128{
3129    read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
3130}
3131
3132fn read_object_at_inner<F, C>(
3133    pack_bytes: &[u8],
3134    offset: u64,
3135    format: ObjectFormat,
3136    resolve_ref_base: &mut F,
3137    cache: &C,
3138) -> Result<Arc<EncodedObject>>
3139where
3140    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
3141    C: PackDeltaCache + ?Sized,
3142{
3143    // A warm cache entry for this exact offset is already the fully resolved
3144    // object, so the whole base chain below can be skipped.
3145    if let Some(object) = cache.get(offset) {
3146        return Ok(object);
3147    }
3148    let trailer_offset = pack_bytes
3149        .len()
3150        .checked_sub(format.raw_len())
3151        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3152    let mut cursor = usize::try_from(offset)
3153        .ok()
3154        .filter(|&value| value < trailer_offset)
3155        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3156    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3157    let base = match header.kind {
3158        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
3159            pack_bytes,
3160            &mut cursor,
3161            offset,
3162        )?)),
3163        PackObjectKind::RefDelta => {
3164            let hash_len = format.raw_len();
3165            if cursor + hash_len > trailer_offset {
3166                return Err(GitError::InvalidFormat(
3167                    "truncated ref-delta base object id".into(),
3168                ));
3169            }
3170            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3171            cursor += hash_len;
3172            Some(DeltaBase::Ref(oid))
3173        }
3174        _ => None,
3175    };
3176    let mut body = Vec::new();
3177    inflate_into(
3178        &pack_bytes[cursor..trailer_offset],
3179        &mut body,
3180        header.size.min(usize::MAX as u64) as usize,
3181    )?;
3182    if body.len() as u64 != header.size {
3183        return Err(GitError::InvalidObject(format!(
3184            "pack object declared {} bytes, decoded {}",
3185            header.size,
3186            body.len()
3187        )));
3188    }
3189    let object = match base {
3190        None => {
3191            let object_type = match header.kind {
3192                PackObjectKind::Commit => ObjectType::Commit,
3193                PackObjectKind::Tree => ObjectType::Tree,
3194                PackObjectKind::Blob => ObjectType::Blob,
3195                PackObjectKind::Tag => ObjectType::Tag,
3196                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
3197                    return Err(GitError::InvalidFormat(
3198                        "delta pack entry decoded without a base".into(),
3199                    ));
3200                }
3201            };
3202            Arc::new(EncodedObject::new(object_type, body))
3203        }
3204        Some(DeltaBase::Offset(base_offset)) => {
3205            let base =
3206                read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
3207            let resolved = apply_pack_delta(&base.body, &body)?;
3208            Arc::new(EncodedObject::new(base.object_type, resolved))
3209        }
3210        Some(DeltaBase::Ref(base_oid)) => {
3211            let base = resolve_ref_base(&base_oid)?
3212                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
3213            let resolved = apply_pack_delta(&base.body, &body)?;
3214            Arc::new(EncodedObject::new(base.object_type, resolved))
3215        }
3216    };
3217    // Record the fully resolved object so any later read that walks through this
3218    // offset (as a delta base or directly) reuses it. Bases are inserted as the
3219    // recursion unwinds, so a chain is decoded at most once across reads.
3220    cache.insert(offset, Arc::clone(&object));
3221    Ok(object)
3222}
3223
3224/// The object type and final (inflated) size of the entry at `offset`, *without*
3225/// materializing the object body — git's `cat-file --batch-check` fast path.
3226///
3227/// A base object's size is already in its pack entry header, and a delta's result
3228/// size is the second varint at the front of its (small) delta stream, so neither
3229/// inflates the full content. The reported type is the type at the end of the
3230/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
3231/// the type of a ref-delta base that lives outside this pack (resolved through the
3232/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
3233pub fn read_object_header_at<F>(
3234    pack_bytes: &[u8],
3235    offset: u64,
3236    format: ObjectFormat,
3237    mut resolve_ref_base_type: F,
3238) -> Result<(ObjectType, u64)>
3239where
3240    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3241{
3242    read_object_header_at_inner(
3243        pack_bytes,
3244        offset,
3245        format,
3246        &mut resolve_ref_base_type,
3247        &mut NoopHeaderTypeCache,
3248    )
3249}
3250
3251/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
3252/// the `cat-file --batch-check` header fast path.
3253///
3254/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
3255/// to its base on every header read, re-inflating each link's leading varints
3256/// from scratch — so reading every object in a deeply-deltified pack costs
3257/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
3258/// of memoizing `offset -> (type, size)`:
3259///
3260/// * a chain's end-of-chain type is resolved at most once, so later objects on
3261///   the same chain skip the walk; and
3262/// * a repeated lookup of the same object (common in batch input) returns from
3263///   the memo without re-inflating its delta header at all.
3264///
3265/// The size stored is the object's final (inflated) result size — read from its
3266/// own pack/delta header, never by materializing the body.
3267pub trait HeaderTypeCache {
3268    /// The previously resolved header at `pack_offset`, if any.
3269    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
3270    /// Record the resolved header at `pack_offset` for reuse by later reads.
3271    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
3272}
3273
3274struct NoopHeaderTypeCache;
3275
3276impl HeaderTypeCache for NoopHeaderTypeCache {
3277    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
3278        None
3279    }
3280    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
3281}
3282
3283/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
3284/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
3285/// most once per chain and (b) a repeated lookup of the same offset returns from
3286/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
3287/// so it must be scoped to a single pack's bytes by the caller.
3288pub fn read_object_header_at_with_cache<F, C>(
3289    pack_bytes: &[u8],
3290    offset: u64,
3291    format: ObjectFormat,
3292    mut resolve_ref_base_type: F,
3293    type_cache: &mut C,
3294) -> Result<(ObjectType, u64)>
3295where
3296    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3297    C: HeaderTypeCache + ?Sized,
3298{
3299    if let Some(header) = type_cache.get(offset) {
3300        return Ok(header);
3301    }
3302    read_object_header_at_inner(
3303        pack_bytes,
3304        offset,
3305        format,
3306        &mut resolve_ref_base_type,
3307        type_cache,
3308    )
3309}
3310
3311fn read_object_header_at_inner<F, C>(
3312    pack_bytes: &[u8],
3313    offset: u64,
3314    format: ObjectFormat,
3315    resolve_ref_base_type: &mut F,
3316    type_cache: &mut C,
3317) -> Result<(ObjectType, u64)>
3318where
3319    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3320    C: HeaderTypeCache + ?Sized,
3321{
3322    let trailer_offset = pack_bytes
3323        .len()
3324        .checked_sub(format.raw_len())
3325        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3326    let mut cursor = usize::try_from(offset)
3327        .ok()
3328        .filter(|&value| value < trailer_offset)
3329        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3330    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3331    let resolved = match header.kind {
3332        PackObjectKind::Commit => (ObjectType::Commit, header.size),
3333        PackObjectKind::Tree => (ObjectType::Tree, header.size),
3334        PackObjectKind::Blob => (ObjectType::Blob, header.size),
3335        PackObjectKind::Tag => (ObjectType::Tag, header.size),
3336        PackObjectKind::OfsDelta => {
3337            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3338            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3339            // The end-of-chain type only depends on the base, so reuse it across
3340            // reads instead of re-walking the chain per object (sley#26).
3341            let base_type = match type_cache.get(base_offset) {
3342                Some((base_type, _)) => base_type,
3343                None => {
3344                    let (base_type, _) = read_object_header_at_inner(
3345                        pack_bytes,
3346                        base_offset,
3347                        format,
3348                        resolve_ref_base_type,
3349                        type_cache,
3350                    )?;
3351                    base_type
3352                }
3353            };
3354            (base_type, size)
3355        }
3356        PackObjectKind::RefDelta => {
3357            let hash_len = format.raw_len();
3358            if cursor + hash_len > trailer_offset {
3359                return Err(GitError::InvalidFormat(
3360                    "truncated ref-delta base object id".into(),
3361                ));
3362            }
3363            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3364            cursor += hash_len;
3365            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3366            let base_type = resolve_ref_base_type(&oid)?
3367                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3368            (base_type, size)
3369        }
3370    };
3371    // Memoize the fully resolved header so a repeated lookup of this offset (or a
3372    // chain that bases on it) returns without re-inflating (sley#26).
3373    type_cache.put(offset, resolved);
3374    Ok(resolved)
3375}
3376
3377/// Number of inflated delta-stream bytes to read when only the leading base-size
3378/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
3379/// prefix always covers both without inflating the delta instructions.
3380const DELTA_HEADER_PREFIX_LEN: usize = 32;
3381
3382/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
3383/// inflating only the short prefix that holds its two leading varints.
3384fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3385    let mut prefix = Vec::new();
3386    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3387    decoded_delta_result_size(&prefix)
3388}
3389
3390fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3391    let first = next_byte(bytes, offset)?;
3392    let mut size = u64::from(first & 0x0f);
3393    let kind = match (first >> 4) & 0x07 {
3394        1 => PackObjectKind::Commit,
3395        2 => PackObjectKind::Tree,
3396        3 => PackObjectKind::Blob,
3397        4 => PackObjectKind::Tag,
3398        6 => PackObjectKind::OfsDelta,
3399        7 => PackObjectKind::RefDelta,
3400        other => {
3401            return Err(GitError::InvalidFormat(format!(
3402                "invalid pack object type {other}"
3403            )));
3404        }
3405    };
3406    let mut shift = 4;
3407    let mut byte = first;
3408    while byte & 0x80 != 0 {
3409        byte = next_byte(bytes, offset)?;
3410        let part = u64::from(byte & 0x7f);
3411        size = size
3412            .checked_add(
3413                part.checked_shl(shift)
3414                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3415            )
3416            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3417        shift += 7;
3418    }
3419    Ok(EntryHeader { kind, size })
3420}
3421
3422fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3423    let mut byte = next_byte(bytes, offset)?;
3424    let mut relative = u64::from(byte & 0x7f);
3425    while byte & 0x80 != 0 {
3426        byte = next_byte(bytes, offset)?;
3427        relative = relative
3428            .checked_add(1)
3429            .and_then(|value| value.checked_shl(7))
3430            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3431            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3432    }
3433    entry_offset
3434        .checked_sub(relative)
3435        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3436}
3437
3438fn resolve_pack_entries<F>(
3439    parsed: Vec<ParsedPackEntry>,
3440    format: ObjectFormat,
3441    external_base: &mut F,
3442) -> Result<Vec<PackObject>>
3443where
3444    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3445{
3446    let mut offset_to_index = HashMap::with_capacity(parsed.len());
3447    for (idx, entry) in parsed.iter().enumerate() {
3448        offset_to_index.insert(parsed_entry_offset(entry), idx);
3449    }
3450
3451    let mut resolved = vec![None; parsed.len()];
3452    let mut oid_to_index = HashMap::new();
3453    let mut unresolved = 0usize;
3454    for (idx, entry) in parsed.iter().enumerate() {
3455        match entry {
3456            ParsedPackEntry::Resolved(object) => {
3457                oid_to_index.insert(object.entry.oid, idx);
3458                resolved[idx] = Some(object.clone());
3459            }
3460            ParsedPackEntry::Delta { .. } => unresolved += 1,
3461        }
3462    }
3463
3464    while unresolved != 0 {
3465        let mut progress = false;
3466        for idx in 0..parsed.len() {
3467            if resolved[idx].is_some() {
3468                continue;
3469            }
3470            let ParsedPackEntry::Delta {
3471                base,
3472                compressed_size,
3473                delta_size,
3474                offset,
3475                delta,
3476            } = &parsed[idx]
3477            else {
3478                continue;
3479            };
3480            let Some(base_object) = delta_base_object(
3481                base,
3482                &offset_to_index,
3483                &oid_to_index,
3484                &resolved,
3485                external_base,
3486            )?
3487            else {
3488                continue;
3489            };
3490            let body = apply_pack_delta(base_object.body(), delta)?;
3491            let object = EncodedObject::new(base_object.object_type(), body);
3492            let oid = object.object_id(format)?;
3493            let pack_object = PackObject {
3494                entry: PackEntry {
3495                    oid,
3496                    compressed_size: *compressed_size,
3497                    uncompressed_size: object.body.len() as u64,
3498                    offset: *offset,
3499                },
3500                object,
3501            };
3502            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3503                return Err(GitError::InvalidObject(
3504                    "resolved delta size does not match delta header".into(),
3505                ));
3506            }
3507            if *delta_size != delta.len() as u64 {
3508                return Err(GitError::InvalidObject(format!(
3509                    "pack delta declared {delta_size} bytes, decoded {}",
3510                    delta.len()
3511                )));
3512            }
3513            oid_to_index.insert(oid, idx);
3514            resolved[idx] = Some(pack_object);
3515            unresolved -= 1;
3516            progress = true;
3517        }
3518        if !progress {
3519            return Err(GitError::Unsupported("unresolved delta base".into()));
3520        }
3521    }
3522
3523    resolved
3524        .into_iter()
3525        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3526        .collect()
3527}
3528
3529fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3530    match entry {
3531        ParsedPackEntry::Resolved(object) => object.entry.offset,
3532        ParsedPackEntry::Delta { offset, .. } => *offset,
3533    }
3534}
3535
3536enum DeltaBaseObject<'a> {
3537    Borrowed(&'a EncodedObject),
3538    Owned(EncodedObject),
3539}
3540
3541impl DeltaBaseObject<'_> {
3542    fn object_type(&self) -> ObjectType {
3543        match self {
3544            Self::Borrowed(object) => object.object_type,
3545            Self::Owned(object) => object.object_type,
3546        }
3547    }
3548
3549    fn body(&self) -> &[u8] {
3550        match self {
3551            Self::Borrowed(object) => &object.body,
3552            Self::Owned(object) => &object.body,
3553        }
3554    }
3555}
3556
3557fn delta_base_object<'a, F>(
3558    base: &DeltaBase,
3559    offset_to_index: &HashMap<u64, usize>,
3560    oid_to_index: &HashMap<ObjectId, usize>,
3561    resolved: &'a [Option<PackObject>],
3562    external_base: &mut F,
3563) -> Result<Option<DeltaBaseObject<'a>>>
3564where
3565    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3566{
3567    match base {
3568        DeltaBase::Offset(offset) => {
3569            let Some(index) = offset_to_index.get(offset).copied() else {
3570                return Err(GitError::InvalidFormat(format!(
3571                    "ofs-delta base offset {offset} not found"
3572                )));
3573            };
3574            Ok(resolved[index]
3575                .as_ref()
3576                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3577        }
3578        DeltaBase::Ref(oid) => {
3579            if let Some(index) = oid_to_index.get(oid).copied() {
3580                return Ok(resolved[index]
3581                    .as_ref()
3582                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3583            }
3584            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3585        }
3586    }
3587}
3588
3589fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3590    let mut cursor = 0usize;
3591    let base_size = read_delta_varint(delta, &mut cursor)?;
3592    if base_size != base.len() as u64 {
3593        return Err(GitError::InvalidObject(format!(
3594            "delta base size mismatch: expected {base_size}, got {}",
3595            base.len()
3596        )));
3597    }
3598    let result_size = read_delta_varint(delta, &mut cursor)?;
3599    // `result_size` is an attacker-controlled delta varint from a network pack
3600    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
3601    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
3602    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
3603    // size-mismatch check below can fire. Route the up-front reservation through
3604    // the sley#2 bound so the speculative allocation is capped; `result.extend`
3605    // still grows the buffer organically and the post-decode length check
3606    // (`result.len() != result_size`) rejects the lie cleanly.
3607    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3608    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3609    while cursor < delta.len() {
3610        let command = delta[cursor];
3611        cursor += 1;
3612        if command & 0x80 != 0 {
3613            let copy_offset =
3614                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3615            let mut copy_size =
3616                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3617            if copy_size == 0 {
3618                copy_size = 0x10000;
3619            }
3620            let start = usize::try_from(copy_offset)
3621                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3622            let len = usize::try_from(copy_size)
3623                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3624            let end = start
3625                .checked_add(len)
3626                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3627            let Some(slice) = base.get(start..end) else {
3628                return Err(GitError::InvalidObject(
3629                    "delta copy range exceeds base object".into(),
3630                ));
3631            };
3632            result.extend_from_slice(slice);
3633        } else if command != 0 {
3634            let len = usize::from(command);
3635            let end = cursor
3636                .checked_add(len)
3637                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3638            let Some(slice) = delta.get(cursor..end) else {
3639                return Err(GitError::InvalidObject(
3640                    "delta insert range exceeds delta data".into(),
3641                ));
3642            };
3643            result.extend_from_slice(slice);
3644            cursor = end;
3645        } else {
3646            return Err(GitError::InvalidObject(
3647                "delta contains reserved zero command".into(),
3648            ));
3649        }
3650    }
3651    if result.len() as u64 != result_size {
3652        return Err(GitError::InvalidObject(format!(
3653            "delta result size mismatch: expected {result_size}, got {}",
3654            result.len()
3655        )));
3656    }
3657    Ok(result)
3658}
3659
3660fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3661    let mut cursor = 0usize;
3662    let _ = read_delta_varint(delta, &mut cursor)?;
3663    read_delta_varint(delta, &mut cursor)
3664}
3665
3666/// Size, in bytes, of the fixed blocks used to index a base object for delta
3667/// compression. Matches git's `diff-delta.c` block size.
3668const DELTA_BLOCK_SIZE: usize = 16;
3669
3670/// Distance between indexed base anchors. Delta generation still scans target
3671/// objects byte-by-byte once there is evidence of shared content; anchoring the
3672/// base at block boundaries keeps the index compact and avoids per-object
3673/// hash-table allocation storms on unrelated blobs.
3674const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3675
3676/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
3677/// base object's anchors while keeping exact-hash candidate scans short.
3678const DELTA_BUCKET_BITS: usize = 12;
3679const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3680const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3681
3682/// An index over a base object's content used to generate deltas against it.
3683///
3684/// The index hashes block-sized anchors of the base, groups them into fixed
3685/// buckets, and verifies exact byte matches before copying. This avoids both
3686/// per-bucket allocation storms and the per-object sort needed by a single
3687/// sorted vector.
3688struct DeltaIndex<'a> {
3689    base: &'a [u8],
3690    blocks: Vec<DeltaBlock>,
3691    buckets: Vec<usize>,
3692}
3693
3694#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3695struct DeltaBlock {
3696    hash: u32,
3697    offset: usize,
3698}
3699
3700impl<'a> DeltaIndex<'a> {
3701    fn new(base: &'a [u8]) -> Self {
3702        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3703        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3704        for_each_delta_anchor(base.len(), |offset| {
3705            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3706            buckets[delta_bucket(hash) + 1] += 1;
3707            anchors.push(DeltaBlock { hash, offset });
3708        });
3709        for idx in 1..buckets.len() {
3710            buckets[idx] += buckets[idx - 1];
3711        }
3712
3713        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3714        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3715        for anchor in anchors {
3716            let bucket = delta_bucket(anchor.hash);
3717            let next = &mut next_offsets[bucket];
3718            blocks[*next] = anchor;
3719            *next += 1;
3720        }
3721
3722        Self {
3723            base,
3724            blocks,
3725            buckets,
3726        }
3727    }
3728
3729    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3730        let bucket = delta_bucket(hash);
3731        let start = self.buckets[bucket];
3732        let end = self.buckets[bucket + 1];
3733        self.blocks[start..end]
3734            .iter()
3735            .filter(move |block| block.hash == hash)
3736    }
3737
3738    fn has_hash(&self, hash: u32) -> bool {
3739        self.candidate_blocks(hash).next().is_some()
3740    }
3741
3742    fn has_shared_anchor(&self, target: &[u8]) -> bool {
3743        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3744            return false;
3745        }
3746        let last = target.len() - DELTA_BLOCK_SIZE;
3747        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3748            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3749            if self.has_hash(hash) {
3750                return true;
3751            }
3752        }
3753        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3754            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3755            if self.has_hash(hash) {
3756                return true;
3757            }
3758        }
3759        false
3760    }
3761
3762    /// Generate a delta that reconstructs `target` from this index's base.
3763    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3764        if !self.has_shared_anchor(target) {
3765            return None;
3766        }
3767        let base = self.base;
3768        let mut delta = Vec::new();
3769        write_delta_varint(&mut delta, base.len() as u64);
3770        write_delta_varint(&mut delta, target.len() as u64);
3771
3772        let mut pending_insert_start = 0usize;
3773        let mut pos = 0usize;
3774        while pos < target.len() {
3775            let mut best_len = 0usize;
3776            let mut best_offset = 0usize;
3777            if pos + DELTA_BLOCK_SIZE <= target.len() {
3778                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3779                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3780                    // Confirm the block actually matches (hash collisions are
3781                    // possible) before measuring how far it extends.
3782                    let candidate = candidate.offset;
3783                    let max_len = (base.len() - candidate).min(target.len() - pos);
3784                    let mut len = 0usize;
3785                    while len < max_len && base[candidate + len] == target[pos + len] {
3786                        len += 1;
3787                    }
3788                    if len > best_len {
3789                        best_len = len;
3790                        best_offset = candidate;
3791                    }
3792                }
3793            }
3794
3795            if best_len >= DELTA_BLOCK_SIZE {
3796                if pending_insert_start < pos {
3797                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3798                }
3799                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3800                pos += best_len;
3801                pending_insert_start = pos;
3802            } else {
3803                pos += 1;
3804            }
3805        }
3806        if pending_insert_start < target.len() {
3807            write_delta_insert(&mut delta, &target[pending_insert_start..]);
3808        }
3809        Some(delta)
3810    }
3811}
3812
3813fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3814    if len < DELTA_BLOCK_SIZE {
3815        return;
3816    }
3817    len -= DELTA_BLOCK_SIZE;
3818    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3819        visit(offset);
3820    }
3821    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3822        visit(len);
3823    }
3824}
3825
3826fn delta_anchor_count(len: usize) -> usize {
3827    if len < DELTA_BLOCK_SIZE {
3828        return 0;
3829    }
3830    let last = len - DELTA_BLOCK_SIZE;
3831    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3832}
3833
3834fn delta_bucket(hash: u32) -> usize {
3835    (hash as usize) & DELTA_BUCKET_MASK
3836}
3837
3838/// Maximum number of base offsets retained per block-hash bucket. Caps the work
3839/// done extending candidate matches for inputs with many repeated blocks.
3840const DELTA_MAX_CHAIN: usize = 64;
3841
3842/// Hash a fixed-size block of base/target bytes into a bucket key.
3843///
3844/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
3845/// always verified byte-for-byte before use, so collisions only cost a little
3846/// extra comparison work and never affect correctness.
3847fn block_hash(block: &[u8]) -> u32 {
3848    let mut hash = 0u32;
3849    for &byte in block {
3850        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3851    }
3852    hash
3853}
3854
3855/// The chosen storage form for a single object during pack generation.
3856#[derive(Debug, Clone, PartialEq, Eq)]
3857enum PlannedBase {
3858    /// Stored undeltified (a base for others, or no good delta was found).
3859    None,
3860    /// Delta against another object in this pack, identified by its original
3861    /// index. The pre-computed `delta` bytes reconstruct the object from that
3862    /// base's body.
3863    InPack { base_idx: usize, delta: Vec<u8> },
3864    /// Delta against an external (thin-pack) base, referenced by object id.
3865    External { base_oid: ObjectId, delta: Vec<u8> },
3866}
3867
3868#[derive(Debug, Clone, PartialEq, Eq)]
3869struct PlannedEntry {
3870    base: PlannedBase,
3871}
3872
3873fn compress_planned_payloads(
3874    objects: &[&EncodedObject],
3875    plan: &[PlannedEntry],
3876    order: &[usize],
3877) -> Result<Vec<Vec<u8>>> {
3878    if order.is_empty() {
3879        return Ok(Vec::new());
3880    }
3881
3882    let worker_count = std::thread::available_parallelism()
3883        .map(|threads| threads.get())
3884        .unwrap_or(1)
3885        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3886        .min(order.len());
3887    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3888        let mut payloads = Vec::with_capacity(order.len());
3889        for &idx in order {
3890            payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
3891        }
3892        return Ok(payloads);
3893    }
3894
3895    let chunk_len = order.len().div_ceil(worker_count);
3896    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
3897    std::thread::scope(|scope| {
3898        let mut handles = Vec::new();
3899        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
3900            let chunk_start = chunk_idx * chunk_len;
3901            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
3902                let mut chunk_payloads = Vec::with_capacity(chunk.len());
3903                for (offset, &idx) in chunk.iter().enumerate() {
3904                    chunk_payloads.push((
3905                        chunk_start + offset,
3906                        compressed_payload(planned_payload(objects, plan, idx))?,
3907                    ));
3908                }
3909                Ok(chunk_payloads)
3910            }));
3911        }
3912
3913        let mut first_error = None;
3914        for handle in handles {
3915            match handle.join() {
3916                Ok(Ok(chunk_payloads)) => {
3917                    if first_error.is_none() {
3918                        for (pos, payload) in chunk_payloads {
3919                            payloads[pos] = payload;
3920                        }
3921                    }
3922                }
3923                Ok(Err(err)) => {
3924                    first_error.get_or_insert(err);
3925                }
3926                Err(_) => {
3927                    first_error.get_or_insert_with(|| {
3928                        GitError::InvalidObject("pack compression worker panicked".into())
3929                    });
3930                }
3931            }
3932        }
3933
3934        match first_error {
3935            Some(err) => Err(err),
3936            None => Ok(()),
3937        }
3938    })?;
3939    Ok(payloads)
3940}
3941
3942fn planned_payload<'a>(
3943    objects: &'a [&'a EncodedObject],
3944    plan: &'a [PlannedEntry],
3945    idx: usize,
3946) -> &'a [u8] {
3947    match &plan[idx].base {
3948        PlannedBase::None => &objects[idx].body,
3949        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
3950    }
3951}
3952
3953fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
3954    let mut out = Vec::new();
3955    write_compressed_payload(&mut out, body)?;
3956    Ok(out)
3957}
3958
3959/// Maximum number of external thin-pack bases compared against any single
3960/// object. Bounds the work of the thin path when a large base set is supplied.
3961const DELTA_MAX_EXTERNAL_BASES: usize = 64;
3962
3963struct DeltaWindowEntry<'a> {
3964    idx: usize,
3965    index: DeltaIndex<'a>,
3966}
3967
3968/// Rank object types for delta grouping. Objects of the same type are far more
3969/// likely to delta well, so the sort groups by this rank first.
3970fn delta_type_rank(object_type: ObjectType) -> u8 {
3971    match object_type {
3972        ObjectType::Commit => 0,
3973        ObjectType::Tree => 1,
3974        ObjectType::Blob => 2,
3975        ObjectType::Tag => 3,
3976    }
3977}
3978
3979/// Decide how each object is stored (undeltified or deltified) and the order in
3980/// which objects are emitted into the pack.
3981///
3982/// # Ordering
3983///
3984/// Candidates are sorted by `(type, size descending, object id)`:
3985/// * **type** — only same-type objects are deltified against one another, so
3986///   grouping by type keeps the sliding window full of viable bases. Type rank
3987///   follows [`delta_type_rank`] (commit, tree, blob, tag).
3988/// * **size descending** — larger objects come first so smaller, later objects
3989///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
3990///   no path/name, so the usual path-hash key is unavailable; size is the next
3991///   best locality signal.
3992/// * **object id** — a deterministic tiebreaker for reproducible packs.
3993///
3994/// # Selection
3995///
3996/// Each object is compared against the previous up to `window` same-type
3997/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
3998/// bases of the same type). The smallest delta whose encoded length is strictly
3999/// less than the object's own body is kept; otherwise the object is stored
4000/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
4001/// only be used if doing so keeps the resulting chain within the bound); a depth
4002/// of `0` disables deltification entirely.
4003///
4004/// Returns the per-object plan (indexed by original object index) together with
4005/// the emit order. Every in-pack delta references a candidate that is earlier in
4006/// the emit order, so emitting in that order writes each base before any object
4007/// that depends on it.
4008fn plan_pack_deltas(
4009    objects: &[&EncodedObject],
4010    object_ids: &[ObjectId],
4011    options: &PackWriteOptions,
4012) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
4013    let count = objects.len();
4014    let mut plan: Vec<PlannedEntry> = (0..count)
4015        .map(|_| PlannedEntry {
4016            base: PlannedBase::None,
4017        })
4018        .collect();
4019
4020    // Processing order. Deltas only point backwards within this order, which is
4021    // therefore also a valid emit order. Reordering by type/size improves delta
4022    // locality but is skipped when disabled or when deltification is off.
4023    let mut order: Vec<usize> = (0..count).collect();
4024    if options.reorder && options.depth > 0 {
4025        order.sort_by(|&left, &right| {
4026            delta_type_rank(objects[left].object_type)
4027                .cmp(&delta_type_rank(objects[right].object_type))
4028                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
4029                .then_with(|| {
4030                    object_ids[left]
4031                        .as_bytes()
4032                        .cmp(object_ids[right].as_bytes())
4033                })
4034        });
4035    }
4036
4037    if options.depth == 0 {
4038        return Ok((plan, order));
4039    }
4040
4041    // Pre-build delta indexes for external thin-pack bases, grouped by type so
4042    // an object only compares against compatible bases.
4043    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
4044        Vec::with_capacity(options.thin_bases.len());
4045    for (oid, object) in &options.thin_bases {
4046        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
4047    }
4048
4049    // Chain depth ending at each object (0 = undeltified). Used to keep delta
4050    // chains within `options.depth`.
4051    let mut depth = vec![0usize; count];
4052    // Sliding window of recently processed original indices, most recent last.
4053    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
4054        std::collections::VecDeque::new();
4055
4056    for &idx in &order {
4057        let target = &objects[idx].body;
4058        let target_type = objects[idx].object_type;
4059
4060        let mut best_delta: Option<Vec<u8>> = None;
4061        let mut best_base = PlannedBase::None;
4062
4063        // Try in-pack candidates from the window (same type only).
4064        for base_entry in window.iter().rev() {
4065            let base_idx = base_entry.idx;
4066            if objects[base_idx].object_type != target_type {
4067                continue;
4068            }
4069            // Using this base would make the new chain depth + 1; skip if that
4070            // would exceed the configured maximum.
4071            if depth[base_idx] + 1 > options.depth {
4072                continue;
4073            }
4074            let Some(delta) = base_entry.index.delta(target) else {
4075                continue;
4076            };
4077            if !delta_is_acceptable(&delta, target.len()) {
4078                continue;
4079            }
4080            if best_delta
4081                .as_ref()
4082                .is_none_or(|current| delta.len() < current.len())
4083            {
4084                best_delta = Some(delta);
4085                best_base = PlannedBase::InPack {
4086                    base_idx,
4087                    delta: Vec::new(),
4088                };
4089            }
4090        }
4091
4092        // Try external thin-pack bases (ref-delta; external base is depth 0, so
4093        // the resulting chain depth is 1, always within a non-zero bound).
4094        for (base_oid, base_type, base_index) in
4095            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
4096        {
4097            if *base_type != target_type {
4098                continue;
4099            }
4100            let Some(delta) = base_index.delta(target) else {
4101                continue;
4102            };
4103            if !delta_is_acceptable(&delta, target.len()) {
4104                continue;
4105            }
4106            if best_delta
4107                .as_ref()
4108                .is_none_or(|current| delta.len() < current.len())
4109            {
4110                best_delta = Some(delta);
4111                best_base = PlannedBase::External {
4112                    base_oid: *base_oid,
4113                    delta: Vec::new(),
4114                };
4115            }
4116        }
4117
4118        if let Some(delta) = best_delta {
4119            match best_base {
4120                PlannedBase::InPack { base_idx, .. } => {
4121                    depth[idx] = depth[base_idx] + 1;
4122                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
4123                }
4124                PlannedBase::External { base_oid, .. } => {
4125                    depth[idx] = 1;
4126                    plan[idx].base = PlannedBase::External { base_oid, delta };
4127                }
4128                PlannedBase::None => {}
4129            }
4130        }
4131
4132        // Add this object to the window for subsequent candidates.
4133        window.push_back(DeltaWindowEntry {
4134            idx,
4135            index: DeltaIndex::new(&objects[idx].body),
4136        });
4137        while window.len() > options.window {
4138            window.pop_front();
4139        }
4140    }
4141
4142    Ok((plan, order))
4143}
4144
4145/// Whether a generated delta is worth using instead of storing the object
4146/// undeltified. The encoded delta must be strictly smaller than the object's own
4147/// body; otherwise the undeltified form is the same size or smaller and is
4148/// always self-contained.
4149fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
4150    !delta.is_empty() && delta.len() < target_len
4151}
4152
4153fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
4154    loop {
4155        let mut byte = (value as u8) & 0x7f;
4156        value >>= 7;
4157        if value != 0 {
4158            byte |= 0x80;
4159        }
4160        out.push(byte);
4161        if value == 0 {
4162            break;
4163        }
4164    }
4165}
4166
4167fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
4168    while size != 0 {
4169        let chunk = size.min(0x10000);
4170        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
4171        let mut command = 0x80u8;
4172        let mut payload = [0u8; 7];
4173        let mut payload_len = 0usize;
4174        for idx in 0..4 {
4175            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
4176            if byte != 0 {
4177                command |= 1 << idx;
4178                payload[payload_len] = byte;
4179                payload_len += 1;
4180            }
4181        }
4182        for idx in 0..3 {
4183            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
4184            if byte != 0 {
4185                command |= 0x10 << idx;
4186                payload[payload_len] = byte;
4187                payload_len += 1;
4188            }
4189        }
4190        out.push(command);
4191        out.extend_from_slice(&payload[..payload_len]);
4192        offset += chunk;
4193        size -= chunk;
4194    }
4195}
4196
4197fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
4198    while !bytes.is_empty() {
4199        let chunk_len = bytes.len().min(0x7f);
4200        out.push(chunk_len as u8);
4201        out.extend_from_slice(&bytes[..chunk_len]);
4202        bytes = &bytes[chunk_len..];
4203    }
4204}
4205
4206fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
4207    let mut value = 0u64;
4208    let mut shift = 0u32;
4209    loop {
4210        let Some(byte) = delta.get(*cursor).copied() else {
4211            return Err(GitError::InvalidObject("truncated delta size".into()));
4212        };
4213        *cursor += 1;
4214        value = value
4215            .checked_add(
4216                u64::from(byte & 0x7f)
4217                    .checked_shl(shift)
4218                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
4219            )
4220            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4221        if byte & 0x80 == 0 {
4222            return Ok(value);
4223        }
4224        shift = shift
4225            .checked_add(7)
4226            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
4227    }
4228}
4229
4230fn read_delta_copy_value(
4231    delta: &[u8],
4232    cursor: &mut usize,
4233    command: u8,
4234    masks: &[u8],
4235) -> Result<u64> {
4236    let mut value = 0u64;
4237    for (shift, mask) in masks.iter().enumerate() {
4238        if command & mask != 0 {
4239            let Some(byte) = delta.get(*cursor).copied() else {
4240                return Err(GitError::InvalidObject(
4241                    "truncated delta copy command".into(),
4242                ));
4243            };
4244            *cursor += 1;
4245            value |= u64::from(byte) << (shift * 8);
4246        }
4247    }
4248    Ok(value)
4249}
4250
4251thread_local! {
4252    static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
4253}
4254
4255fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
4256    DEFLATE.with(|cell| {
4257        let mut compressor = cell.borrow_mut();
4258        compressor.reset();
4259        out.reserve(zlib_compress_bound(body.len()));
4260        let status = compressor
4261            .compress_vec(body, out, FlushCompress::Finish)
4262            .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
4263        if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
4264            return Err(GitError::InvalidObject(
4265                "zlib compression did not finish pack entry".into(),
4266            ));
4267        }
4268        Ok(())
4269    })
4270}
4271
4272fn zlib_compress_bound(len: usize) -> usize {
4273    len.saturating_add(len >> 12)
4274        .saturating_add(len >> 14)
4275        .saturating_add(len >> 25)
4276        .saturating_add(13)
4277}
4278
4279fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
4280    let type_code = match object_type {
4281        ObjectType::Commit => 1,
4282        ObjectType::Tree => 2,
4283        ObjectType::Blob => 3,
4284        ObjectType::Tag => 4,
4285    };
4286    write_pack_entry_header_kind(out, type_code, size);
4287}
4288
4289fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4290    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4291    size >>= 4;
4292    if size != 0 {
4293        byte |= 0x80;
4294    }
4295    out.push(byte);
4296    while size != 0 {
4297        let mut byte = (size as u8) & 0x7f;
4298        size >>= 7;
4299        if size != 0 {
4300            byte |= 0x80;
4301        }
4302        out.push(byte);
4303    }
4304}
4305
4306fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4307    if relative == 0 {
4308        return Err(GitError::InvalidFormat(
4309            "ofs-delta relative offset cannot be zero".into(),
4310        ));
4311    }
4312    let mut value = relative;
4313    let mut bytes = vec![(value & 0x7f) as u8];
4314    value >>= 7;
4315    while value != 0 {
4316        value -= 1;
4317        bytes.push(((value & 0x7f) as u8) | 0x80);
4318        value >>= 7;
4319    }
4320    bytes.reverse();
4321    out.extend_from_slice(&bytes);
4322    Ok(())
4323}
4324
4325fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4326    let Some(byte) = bytes.get(*offset).copied() else {
4327        return Err(GitError::InvalidFormat(
4328            "truncated pack entry header".into(),
4329        ));
4330    };
4331    *offset += 1;
4332    Ok(byte)
4333}
4334
4335fn u16_be(bytes: &[u8]) -> u16 {
4336    u16::from_be_bytes([bytes[0], bytes[1]])
4337}
4338
4339fn u32_be(bytes: &[u8]) -> u32 {
4340    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4341}
4342
4343fn u64_be(bytes: &[u8]) -> u64 {
4344    u64::from_be_bytes([
4345        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4346    ])
4347}
4348
4349fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4350    let mut fanout = [0u32; 256];
4351    let mut previous = 0u32;
4352    for slot in &mut fanout {
4353        *slot = u32_be(&bytes[*offset..*offset + 4]);
4354        if *slot < previous {
4355            return Err(GitError::InvalidFormat(
4356                "pack index fanout is not monotonic".into(),
4357            ));
4358        }
4359        previous = *slot;
4360        *offset += 4;
4361    }
4362    Ok(fanout)
4363}
4364
4365fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4366    let expected_min = if oid_bytes[0] == 0 {
4367        0
4368    } else {
4369        fanout[usize::from(oid_bytes[0] - 1)]
4370    };
4371    if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4372        return Err(GitError::InvalidFormat(
4373            "pack index object id is outside its fanout bucket".into(),
4374        ));
4375    }
4376    Ok(())
4377}
4378
4379fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4380    if raw_offset & 0x8000_0000 == 0 {
4381        return Ok(u64::from(raw_offset));
4382    }
4383    let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4384    let large_start = large_idx
4385        .checked_mul(8)
4386        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4387    let large_end = large_start
4388        .checked_add(8)
4389        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4390    if large_end > large_offset_table.len() {
4391        return Err(GitError::InvalidFormat(
4392            "pack index large offset points past table".into(),
4393        ));
4394    }
4395    Ok(u64_be(&large_offset_table[large_start..large_end]))
4396}
4397
4398fn checked_range(
4399    start: usize,
4400    count: usize,
4401    width: usize,
4402    total: usize,
4403) -> Result<std::ops::Range<usize>> {
4404    let len = count
4405        .checked_mul(width)
4406        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4407    let end = start
4408        .checked_add(len)
4409        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4410    if end > total {
4411        return Err(GitError::InvalidFormat("truncated pack index table".into()));
4412    }
4413    Ok(start..end)
4414}
4415
4416fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4417    let mut seen = vec![false; positions.len()];
4418    for position in positions {
4419        let idx = *position as usize;
4420        if idx >= positions.len() {
4421            return Err(GitError::InvalidFormat(
4422                "reverse index position points past object table".into(),
4423            ));
4424        }
4425        if seen[idx] {
4426            return Err(GitError::InvalidFormat(
4427                "reverse index position is duplicated".into(),
4428            ));
4429        }
4430        seen[idx] = true;
4431    }
4432    Ok(())
4433}
4434
4435fn parse_midx_pack_names(
4436    bytes: &[u8],
4437    chunks: &[MultiPackIndexChunk],
4438    pack_count: usize,
4439    version: u8,
4440) -> Result<Vec<String>> {
4441    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4442        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4443    let mut names = Vec::with_capacity(pack_count);
4444    let mut offset = 0usize;
4445    while names.len() < pack_count {
4446        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4447            return Err(GitError::InvalidFormat(
4448                "multi-pack-index PNAM entry is unterminated".into(),
4449            ));
4450        };
4451        let name_bytes = &data[offset..offset + relative_end];
4452        if name_bytes.is_empty() {
4453            return Err(GitError::InvalidFormat(
4454                "multi-pack-index PNAM entry is empty".into(),
4455            ));
4456        }
4457        let name = std::str::from_utf8(name_bytes)
4458            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4459        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4460            return Err(GitError::InvalidFormat(
4461                "multi-pack-index PNAM entry contains a path separator".into(),
4462            ));
4463        }
4464        names.push(name.to_string());
4465        offset += relative_end + 1;
4466    }
4467    let padding = &data[offset..];
4468    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4469        return Err(GitError::InvalidFormat(
4470            "multi-pack-index PNAM padding is invalid".into(),
4471        ));
4472    }
4473    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4474        return Err(GitError::InvalidFormat(
4475            "multi-pack-index v1 PNAM entries are not sorted".into(),
4476        ));
4477    }
4478    Ok(names)
4479}
4480
4481fn parse_midx_oid_fanout(
4482    bytes: &[u8],
4483    chunks: &[MultiPackIndexChunk],
4484) -> Result<([u32; 256], usize)> {
4485    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4486        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4487    if data.len() != 256 * 4 {
4488        return Err(GitError::InvalidFormat(
4489            "multi-pack-index OIDF chunk has invalid length".into(),
4490        ));
4491    }
4492    let mut fanout = [0u32; 256];
4493    let mut previous = 0u32;
4494    for (idx, slot) in fanout.iter_mut().enumerate() {
4495        let start = idx * 4;
4496        *slot = u32_be(&data[start..start + 4]);
4497        if *slot < previous {
4498            return Err(GitError::InvalidFormat(
4499                "multi-pack-index OIDF fanout is not monotonic".into(),
4500            ));
4501        }
4502        previous = *slot;
4503    }
4504    Ok((fanout, fanout[255] as usize))
4505}
4506
4507fn parse_midx_object_ids(
4508    bytes: &[u8],
4509    chunks: &[MultiPackIndexChunk],
4510    format: ObjectFormat,
4511    object_count: usize,
4512    fanout: &[u32; 256],
4513) -> Result<Vec<ObjectId>> {
4514    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4515        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4516    let expected_len = object_count
4517        .checked_mul(format.raw_len())
4518        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4519    if data.len() != expected_len {
4520        return Err(GitError::InvalidFormat(
4521            "multi-pack-index OIDL chunk has invalid length".into(),
4522        ));
4523    }
4524
4525    let mut ids = Vec::with_capacity(object_count);
4526    let mut counts = [0u32; 256];
4527    let mut previous_oid: Option<ObjectId> = None;
4528    for idx in 0..object_count {
4529        let start = idx * format.raw_len();
4530        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4531        if let Some(previous) = &previous_oid
4532            && previous.as_bytes() >= oid.as_bytes()
4533        {
4534            return Err(GitError::InvalidFormat(
4535                "multi-pack-index OIDL object ids are not strictly sorted".into(),
4536            ));
4537        }
4538        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4539            .checked_add(1)
4540            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4541        previous_oid = Some(oid);
4542        ids.push(oid);
4543    }
4544
4545    let mut running = 0u32;
4546    for (idx, count) in counts.iter().enumerate() {
4547        running = running
4548            .checked_add(*count)
4549            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4550        if fanout[idx] != running {
4551            return Err(GitError::InvalidFormat(
4552                "multi-pack-index OIDF fanout does not match OIDL".into(),
4553            ));
4554        }
4555    }
4556    Ok(ids)
4557}
4558
4559fn parse_midx_object_offsets(
4560    bytes: &[u8],
4561    chunks: &[MultiPackIndexChunk],
4562    object_ids: Vec<ObjectId>,
4563    pack_count: u32,
4564) -> Result<Vec<MultiPackIndexEntry>> {
4565    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4566        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4567    let expected_len = object_ids
4568        .len()
4569        .checked_mul(8)
4570        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4571    if data.len() != expected_len {
4572        return Err(GitError::InvalidFormat(
4573            "multi-pack-index OOFF chunk has invalid length".into(),
4574        ));
4575    }
4576    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4577    if let Some(large_offsets) = large_offsets
4578        && large_offsets.len() % 8 != 0
4579    {
4580        return Err(GitError::InvalidFormat(
4581            "multi-pack-index LOFF chunk has invalid length".into(),
4582        ));
4583    }
4584
4585    let mut entries = Vec::with_capacity(object_ids.len());
4586    for (idx, oid) in object_ids.into_iter().enumerate() {
4587        let start = idx * 8;
4588        let pack_int_id = u32_be(&data[start..start + 4]);
4589        if pack_int_id >= pack_count {
4590            return Err(GitError::InvalidFormat(
4591                "multi-pack-index object points past pack table".into(),
4592            ));
4593        }
4594        let raw_offset = u32_be(&data[start + 4..start + 8]);
4595        let offset = if raw_offset & 0x8000_0000 == 0 {
4596            u64::from(raw_offset)
4597        } else {
4598            let Some(large_offsets) = large_offsets else {
4599                return Err(GitError::InvalidFormat(
4600                    "multi-pack-index large offset missing LOFF chunk".into(),
4601                ));
4602            };
4603            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4604            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4605                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4606            })?;
4607            let large_end = large_start.checked_add(8).ok_or_else(|| {
4608                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4609            })?;
4610            if large_end > large_offsets.len() {
4611                return Err(GitError::InvalidFormat(
4612                    "multi-pack-index large offset points past LOFF chunk".into(),
4613                ));
4614            }
4615            u64_be(&large_offsets[large_start..large_end])
4616        };
4617        entries.push(MultiPackIndexEntry {
4618            oid,
4619            pack_int_id,
4620            offset,
4621        });
4622    }
4623    Ok(entries)
4624}
4625
4626fn parse_midx_reverse_index(
4627    bytes: &[u8],
4628    chunks: &[MultiPackIndexChunk],
4629    object_count: usize,
4630) -> Result<Option<Vec<u32>>> {
4631    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4632        return Ok(None);
4633    };
4634    let expected_len = object_count
4635        .checked_mul(4)
4636        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4637    if data.len() != expected_len {
4638        return Err(GitError::InvalidFormat(
4639            "multi-pack-index RIDX chunk has invalid length".into(),
4640        ));
4641    }
4642    let mut positions = Vec::with_capacity(object_count);
4643    for idx in 0..object_count {
4644        let start = idx * 4;
4645        positions.push(u32_be(&data[start..start + 4]));
4646    }
4647    validate_position_permutation(&positions)?;
4648    Ok(Some(positions))
4649}
4650
4651fn parse_midx_bitmapped_packs(
4652    bytes: &[u8],
4653    chunks: &[MultiPackIndexChunk],
4654    pack_count: usize,
4655    object_count: usize,
4656) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4657    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4658        return Ok(None);
4659    };
4660    let expected_len = pack_count
4661        .checked_mul(8)
4662        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4663    if data.len() != expected_len {
4664        return Err(GitError::InvalidFormat(
4665            "multi-pack-index BTMP chunk has invalid length".into(),
4666        ));
4667    }
4668    let mut entries = Vec::with_capacity(pack_count);
4669    for idx in 0..pack_count {
4670        let start = idx * 8;
4671        let bitmap_pos = u32_be(&data[start..start + 4]);
4672        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4673        let bitmap_end = u64::from(bitmap_pos)
4674            .checked_add(u64::from(bitmap_nr))
4675            .ok_or_else(|| {
4676                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4677            })?;
4678        if bitmap_end > object_count as u64 {
4679            return Err(GitError::InvalidFormat(
4680                "multi-pack-index BTMP range points past object table".into(),
4681            ));
4682        }
4683        entries.push(MultiPackBitmapPack {
4684            bitmap_pos,
4685            bitmap_nr,
4686        });
4687    }
4688    Ok(Some(entries))
4689}
4690
4691fn midx_chunk_data<'a>(
4692    bytes: &'a [u8],
4693    chunks: &[MultiPackIndexChunk],
4694    id: [u8; 4],
4695    required: bool,
4696) -> Result<Option<&'a [u8]>> {
4697    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4698        if required {
4699            return Err(GitError::InvalidFormat(format!(
4700                "multi-pack-index missing {} chunk",
4701                std::str::from_utf8(&id).unwrap_or("required")
4702            )));
4703        }
4704        return Ok(None);
4705    };
4706    let start = usize::try_from(chunk.offset)
4707        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4708    let len = usize::try_from(chunk.len)
4709        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4710    let end = start
4711        .checked_add(len)
4712        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4713    let Some(data) = bytes.get(start..end) else {
4714        return Err(GitError::InvalidFormat(
4715            "multi-pack-index chunk extends past file".into(),
4716        ));
4717    };
4718    Ok(Some(data))
4719}
4720
4721fn hash_function_id(format: ObjectFormat) -> u32 {
4722    match format {
4723        ObjectFormat::Sha1 => 1,
4724        ObjectFormat::Sha256 => 2,
4725    }
4726}
4727
4728/// Maximum number of clean (run) words that a single EWAH running-length word
4729/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
4730const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4731
4732/// Maximum number of literal (dirty) words that can trail a single EWAH
4733/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
4734const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4735
4736/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
4737const EWAH_ALL_ONES: u64 = u64::MAX;
4738
4739impl EwahBitmap {
4740    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
4741    /// from a slice of raw uncompressed 64-bit words.
4742    ///
4743    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
4744    /// matching git's on-disk convention. `bit_size` records the number of
4745    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
4746    ///
4747    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
4748    /// encoder: consecutive all-zero or all-one words collapse into a run, and
4749    /// any other word is stored verbatim as a literal. Only the first
4750    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
4751    /// words supplied by the caller are ignored, just as git encodes a bitmap
4752    /// sized to its highest set bit.
4753    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4754        let required_words = bit_size.div_ceil(64) as usize;
4755        if required_words > words.len() {
4756            return Err(GitError::InvalidFormat(format!(
4757                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4758                words.len()
4759            )));
4760        }
4761        // Only the words that actually back the declared bits matter; libgit
4762        // never emits clean trailing zero words for the unused tail.
4763        let significant = &words[..required_words];
4764        let mut builder = EwahBuilder::new(bit_size);
4765        for &word in significant {
4766            if word == 0 {
4767                builder.add_empty_words(false, 1);
4768            } else if word == EWAH_ALL_ONES {
4769                builder.add_empty_words(true, 1);
4770            } else {
4771                builder.add_literal(word);
4772            }
4773        }
4774        builder.finish()
4775    }
4776
4777    /// Constructs an [`EwahBitmap`] from a set of bit positions.
4778    ///
4779    /// `bit_size` is the number of logical bits (typically the pack object
4780    /// count). Every position in `positions` must be strictly less than
4781    /// `bit_size`. Positions may be given in any order and may repeat.
4782    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4783        let word_count = bit_size.div_ceil(64) as usize;
4784        let mut words = vec![0u64; word_count];
4785        for &position in positions {
4786            if position >= bit_size {
4787                return Err(GitError::InvalidFormat(format!(
4788                    "EWAH bit position {position} out of range for bit_size {bit_size}"
4789                )));
4790            }
4791            let word_index = (position / 64) as usize;
4792            let bit_index = position % 64;
4793            words[word_index] |= 1u64 << bit_index;
4794        }
4795        Self::from_words(bit_size, &words)
4796    }
4797
4798    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
4799    /// all-zero type bitmap (e.g. when a pack has no tags).
4800    pub fn empty() -> Self {
4801        Self {
4802            bit_size: 0,
4803            words: Vec::new(),
4804            rlw_position: 0,
4805        }
4806    }
4807
4808    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
4809    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
4810    ///
4811    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
4812    /// bitmap actually covers and is primarily used to validate roundtrips.
4813    pub fn to_words(&self) -> Result<Vec<u64>> {
4814        let mut out = Vec::new();
4815        let mut word_idx = 0usize;
4816        while word_idx < self.words.len() {
4817            let rlw = self.words[word_idx];
4818            let run_bit = rlw & 1;
4819            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4820            let literal_words = (rlw >> 33) as usize;
4821            word_idx += 1;
4822            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4823            for _ in 0..run_words {
4824                out.push(fill);
4825            }
4826            let literal_end = word_idx
4827                .checked_add(literal_words)
4828                .filter(|end| *end <= self.words.len())
4829                .ok_or_else(|| {
4830                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
4831                })?;
4832            out.extend_from_slice(&self.words[word_idx..literal_end]);
4833            word_idx = literal_end;
4834        }
4835        let required_words = (self.bit_size as usize).div_ceil(64);
4836        if out.len() < required_words {
4837            out.resize(required_words, 0);
4838        }
4839        out.truncate(required_words);
4840        Ok(out)
4841    }
4842
4843    /// Returns the sorted set bit positions covered by this bitmap.
4844    pub fn to_positions(&self) -> Result<Vec<u32>> {
4845        let words = self.to_words()?;
4846        let mut positions = Vec::new();
4847        for (word_index, word) in words.iter().enumerate() {
4848            let mut remaining = *word;
4849            while remaining != 0 {
4850                let bit = remaining.trailing_zeros();
4851                let position = (word_index as u64) * 64 + u64::from(bit);
4852                if position < u64::from(self.bit_size) {
4853                    // position always fits in u32 because bit_size is u32.
4854                    positions.push(position as u32);
4855                }
4856                remaining &= remaining - 1;
4857            }
4858        }
4859        Ok(positions)
4860    }
4861
4862    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
4863    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
4864    /// running-length-word position (u32 BE).
4865    pub fn to_bytes(&self) -> Vec<u8> {
4866        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4867        self.append_bytes(&mut out);
4868        out
4869    }
4870
4871    fn append_bytes(&self, out: &mut Vec<u8>) {
4872        out.extend_from_slice(&self.bit_size.to_be_bytes());
4873        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4874        for word in &self.words {
4875            out.extend_from_slice(&word.to_be_bytes());
4876        }
4877        out.extend_from_slice(&self.rlw_position.to_be_bytes());
4878    }
4879}
4880
4881/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
4882///
4883/// The buffer is a sequence of blocks. Each block begins with a running-length
4884/// word (RLW) and is followed by zero or more literal words:
4885///   * bit 0      => value of the clean run words (0 or 1)
4886///   * bits 1..=32 => number of clean run words (32-bit field)
4887///   * bits 33..=63 => number of trailing literal words (31-bit field)
4888struct EwahBuilder {
4889    bit_size: u32,
4890    words: Vec<u64>,
4891    rlw_position: usize,
4892}
4893
4894impl EwahBuilder {
4895    fn new(bit_size: u32) -> Self {
4896        // Every EWAH buffer begins with an RLW, even an empty one.
4897        Self {
4898            bit_size,
4899            words: vec![0u64],
4900            rlw_position: 0,
4901        }
4902    }
4903
4904    fn rlw(&self) -> u64 {
4905        self.words[self.rlw_position]
4906    }
4907
4908    fn set_rlw(&mut self, value: u64) {
4909        self.words[self.rlw_position] = value;
4910    }
4911
4912    fn rlw_running_len(&self) -> u64 {
4913        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
4914    }
4915
4916    fn rlw_running_bit(&self) -> bool {
4917        self.rlw() & 1 == 1
4918    }
4919
4920    fn rlw_literal_len(&self) -> u64 {
4921        self.rlw() >> 33
4922    }
4923
4924    fn set_running_bit(&mut self, bit: bool) {
4925        let mut value = self.rlw();
4926        value &= !1;
4927        value |= u64::from(bit);
4928        self.set_rlw(value);
4929    }
4930
4931    fn set_running_len(&mut self, len: u64) {
4932        let mut value = self.rlw();
4933        value &= !(EWAH_MAX_RUNNING_LEN << 1);
4934        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
4935        self.set_rlw(value);
4936    }
4937
4938    fn set_literal_len(&mut self, len: u64) {
4939        let mut value = self.rlw();
4940        value &= (1u64 << 33) - 1;
4941        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
4942        self.set_rlw(value);
4943    }
4944
4945    /// Begins a fresh RLW block at the end of the buffer.
4946    fn push_rlw(&mut self) {
4947        self.rlw_position = self.words.len();
4948        self.words.push(0);
4949    }
4950
4951    /// Appends `number` clean words whose bits are all `value`, mirroring
4952    /// libgit's `ewah_add_empty_words`.
4953    ///
4954    /// A run can only be merged into the current RLW when that RLW has not yet
4955    /// emitted any literal words and its run either is empty or already carries
4956    /// the same fill value. Otherwise a fresh RLW block must be started, because
4957    /// every block stores its run strictly before its literals.
4958    fn add_empty_words(&mut self, value: bool, mut number: u64) {
4959        while number > 0 {
4960            // The current RLW can absorb more run words only when it has no
4961            // literals yet, its run is either empty or already the right fill
4962            // value, and the 32-bit run-length field is not already saturated.
4963            let can_extend = self.rlw_literal_len() == 0
4964                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
4965                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
4966            if !can_extend {
4967                self.push_rlw();
4968            }
4969            if self.rlw_running_len() == 0 {
4970                self.set_running_bit(value);
4971            }
4972            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
4973            let take = available.min(number);
4974            self.set_running_len(self.rlw_running_len() + take);
4975            number -= take;
4976        }
4977    }
4978
4979    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
4980    /// `ewah_add_dirty_words` for a count of one.
4981    fn add_literal(&mut self, word: u64) {
4982        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
4983            self.push_rlw();
4984        }
4985        let literal_len = self.rlw_literal_len();
4986        self.set_literal_len(literal_len + 1);
4987        self.words.push(word);
4988    }
4989
4990    fn finish(self) -> Result<EwahBitmap> {
4991        let rlw_position = u32::try_from(self.rlw_position)
4992            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
4993        if self.words.len() > u32::MAX as usize {
4994            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
4995        }
4996        Ok(EwahBitmap {
4997            bit_size: self.bit_size,
4998            words: self.words,
4999            rlw_position,
5000        })
5001    }
5002}
5003
5004/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
5005///
5006/// The writer is constructed from the object layout of a pack (one
5007/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
5008/// Callers then register one selected commit per [`add_commit`] call, supplying
5009/// the set of pack positions reachable from that commit. [`build`]/[`write`]
5010/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
5011/// on-disk format (signature `BITM`, version 1).
5012///
5013/// [`add_commit`]: PackBitmapWriter::add_commit
5014/// [`build`]: PackBitmapWriter::build
5015/// [`write`]: PackBitmapWriter::write
5016#[derive(Debug, Clone)]
5017pub struct PackBitmapWriter {
5018    format: ObjectFormat,
5019    pack_checksum: ObjectId,
5020    object_count: u32,
5021    commit_positions: Vec<u32>,
5022    tree_positions: Vec<u32>,
5023    blob_positions: Vec<u32>,
5024    tag_positions: Vec<u32>,
5025    name_hash_cache: Option<Vec<u32>>,
5026    selected: Vec<SelectedCommit>,
5027}
5028
5029#[derive(Debug, Clone)]
5030struct SelectedCommit {
5031    /// Oid-sorted `.idx` position (what the on-disk entry records). The
5032    /// commit's pack-order position lives in `reachable` with the rest of the
5033    /// bits.
5034    commit_index_position: u32,
5035    flags: u8,
5036    reachable: Vec<u32>,
5037}
5038
5039impl PackBitmapWriter {
5040    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
5041    /// compression against a previously selected commit). This is the only flag
5042    /// value this writer emits.
5043    pub const FLAG_NONE: u8 = 0;
5044
5045    /// Creates a writer for a pack whose objects (in pack order) have the given
5046    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
5047    ///
5048    /// Returns an error if the pack contains more than `u32::MAX` objects, if
5049    /// `pack_checksum`'s format does not match `format`, or if any object type
5050    /// is not one of the four reachable git object kinds.
5051    pub fn new(
5052        format: ObjectFormat,
5053        pack_checksum: ObjectId,
5054        object_types: &[ObjectType],
5055    ) -> Result<Self> {
5056        if object_types.len() > u32::MAX as usize {
5057            return Err(GitError::InvalidFormat(
5058                "too many objects for a pack bitmap".into(),
5059            ));
5060        }
5061        if pack_checksum.format() != format {
5062            return Err(GitError::InvalidObjectId(
5063                "pack checksum format does not match bitmap format".into(),
5064            ));
5065        }
5066        let object_count = object_types.len() as u32;
5067        let mut commit_positions = Vec::new();
5068        let mut tree_positions = Vec::new();
5069        let mut blob_positions = Vec::new();
5070        let mut tag_positions = Vec::new();
5071        for (index, object_type) in object_types.iter().enumerate() {
5072            let position = index as u32;
5073            match object_type {
5074                ObjectType::Commit => commit_positions.push(position),
5075                ObjectType::Tree => tree_positions.push(position),
5076                ObjectType::Blob => blob_positions.push(position),
5077                ObjectType::Tag => tag_positions.push(position),
5078            }
5079        }
5080        Ok(Self {
5081            format,
5082            pack_checksum,
5083            object_count,
5084            commit_positions,
5085            tree_positions,
5086            blob_positions,
5087            tag_positions,
5088            name_hash_cache: None,
5089            selected: Vec::new(),
5090        })
5091    }
5092
5093    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
5094    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
5095    /// and appends the cache after the bitmap entries, exactly as git does.
5096    ///
5097    /// Returns an error if the cache length does not equal the object count.
5098    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
5099        if cache.len() != self.object_count as usize {
5100            return Err(GitError::InvalidFormat(format!(
5101                "name hash cache has {} entries but pack has {} objects",
5102                cache.len(),
5103                self.object_count
5104            )));
5105        }
5106        self.name_hash_cache = Some(cache);
5107        Ok(self)
5108    }
5109
5110    /// Registers a selected commit and the pack positions reachable from it.
5111    ///
5112    /// `commit_position` is the *pack-order* position of the commit itself (the
5113    /// bit-number space); it must reference a commit object and is implicitly
5114    /// part of the reachable set. `commit_index_position` is the commit's
5115    /// position in the *oid-sorted* pack index — this is what the on-disk entry
5116    /// records (upstream `oid_pos`); bits and entry positions live in different
5117    /// spaces. `reachable` lists the pack-order positions of every object
5118    /// reachable from the commit (it may include or omit `commit_position`;
5119    /// duplicates are fine). All positions must be in range. The commit's full
5120    /// (non-XORed) bitmap is stored.
5121    pub fn add_commit(
5122        &mut self,
5123        commit_position: u32,
5124        commit_index_position: u32,
5125        reachable: &[u32],
5126    ) -> Result<()> {
5127        if commit_position >= self.object_count {
5128            return Err(GitError::InvalidFormat(format!(
5129                "commit position {commit_position} out of range for {} objects",
5130                self.object_count
5131            )));
5132        }
5133        if commit_index_position >= self.object_count {
5134            return Err(GitError::InvalidFormat(format!(
5135                "commit index position {commit_index_position} out of range for {} objects",
5136                self.object_count
5137            )));
5138        }
5139        if !self.commit_positions.contains(&commit_position) {
5140            return Err(GitError::InvalidFormat(format!(
5141                "bitmap commit position {commit_position} is not a commit object"
5142            )));
5143        }
5144        for &position in reachable {
5145            if position >= self.object_count {
5146                return Err(GitError::InvalidFormat(format!(
5147                    "reachable position {position} out of range for {} objects",
5148                    self.object_count
5149                )));
5150            }
5151        }
5152        let mut reachable = reachable.to_vec();
5153        reachable.push(commit_position);
5154        self.selected.push(SelectedCommit {
5155            commit_index_position,
5156            flags: Self::FLAG_NONE,
5157            reachable,
5158        });
5159        Ok(())
5160    }
5161
5162    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
5163    ///
5164    /// The resulting index always advertises
5165    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
5166    /// the pack) and, when a name-hash cache was attached,
5167    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
5168    pub fn build(&self) -> Result<PackBitmapIndex> {
5169        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
5170        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
5171        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
5172        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
5173
5174        let mut entries = Vec::with_capacity(self.selected.len());
5175        for selected in &self.selected {
5176            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
5177            entries.push(PackBitmapEntry {
5178                object_position: selected.commit_index_position,
5179                xor_offset: 0,
5180                flags: selected.flags,
5181                bitmap,
5182            });
5183        }
5184
5185        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
5186        if self.name_hash_cache.is_some() {
5187            options |= PackBitmapIndex::OPTION_HASH_CACHE;
5188        }
5189
5190        // The index checksum is only known once the body is serialised; the
5191        // dedicated `write` path fills it in. `build` reports a placeholder of
5192        // the correct format so the struct is self-consistent for callers that
5193        // only need the decoded bitmaps.
5194        let placeholder_checksum = ObjectId::null(self.format);
5195        Ok(PackBitmapIndex {
5196            version: 1,
5197            format: self.format,
5198            options,
5199            pack_checksum: self.pack_checksum.clone(),
5200            index_checksum: placeholder_checksum,
5201            type_bitmaps: PackBitmapTypeBitmaps {
5202                commits,
5203                trees,
5204                blobs,
5205                tags,
5206            },
5207            entries,
5208            name_hash_cache: self.name_hash_cache.clone(),
5209        })
5210    }
5211
5212    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
5213    /// (including the trailing index checksum).
5214    pub fn write(&self) -> Result<Vec<u8>> {
5215        self.build()?.write()
5216    }
5217}
5218
5219impl PackBitmapIndex {
5220    /// Serialises this index into git's on-disk `.bitmap` byte layout.
5221    ///
5222    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
5223    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
5224    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
5225    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
5226    /// optional name-hash cache, and finally the trailing index checksum over
5227    /// everything written so far.
5228    ///
5229    /// The `index_checksum` field of `self` is ignored and recomputed from the
5230    /// serialised body. Returns an error for unsupported versions, mismatched
5231    /// object-id formats, an oversized entry table, or an inconsistent name-hash
5232    /// cache.
5233    pub fn write(&self) -> Result<Vec<u8>> {
5234        if self.version != 1 {
5235            return Err(GitError::Unsupported(format!(
5236                "bitmap index version {}",
5237                self.version
5238            )));
5239        }
5240        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
5241        if self.options & !known_options != 0 {
5242            return Err(GitError::Unsupported(format!(
5243                "bitmap index options {:#06x}",
5244                self.options & !known_options
5245            )));
5246        }
5247        if self.pack_checksum.format() != self.format {
5248            return Err(GitError::InvalidObjectId(
5249                "bitmap pack checksum format does not match index format".into(),
5250            ));
5251        }
5252        if self.entries.len() > u32::MAX as usize {
5253            return Err(GitError::InvalidFormat(
5254                "too many bitmap index entries".into(),
5255            ));
5256        }
5257        let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
5258        match (&self.name_hash_cache, want_cache) {
5259            (Some(_), false) => {
5260                return Err(GitError::InvalidFormat(
5261                    "name hash cache present without OPTION_HASH_CACHE".into(),
5262                ));
5263            }
5264            (None, true) => {
5265                return Err(GitError::InvalidFormat(
5266                    "OPTION_HASH_CACHE set without a name hash cache".into(),
5267                ));
5268            }
5269            _ => {}
5270        }
5271
5272        let mut out = Vec::new();
5273        out.extend_from_slice(b"BITM");
5274        out.extend_from_slice(&self.version.to_be_bytes());
5275        out.extend_from_slice(&self.options.to_be_bytes());
5276        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
5277        out.extend_from_slice(self.pack_checksum.as_bytes());
5278
5279        self.type_bitmaps.commits.append_bytes(&mut out);
5280        self.type_bitmaps.trees.append_bytes(&mut out);
5281        self.type_bitmaps.blobs.append_bytes(&mut out);
5282        self.type_bitmaps.tags.append_bytes(&mut out);
5283
5284        for (idx, entry) in self.entries.iter().enumerate() {
5285            if entry.xor_offset as usize > idx {
5286                return Err(GitError::InvalidFormat(
5287                    "bitmap index entry has invalid XOR offset".into(),
5288                ));
5289            }
5290            out.extend_from_slice(&entry.object_position.to_be_bytes());
5291            out.push(entry.xor_offset);
5292            out.push(entry.flags);
5293            entry.bitmap.append_bytes(&mut out);
5294        }
5295
5296        if let Some(cache) = &self.name_hash_cache {
5297            for value in cache {
5298                out.extend_from_slice(&value.to_be_bytes());
5299            }
5300        }
5301
5302        let checksum = sley_core::digest_bytes(self.format, &out)?;
5303        out.extend_from_slice(checksum.as_bytes());
5304        Ok(out)
5305    }
5306}
5307
5308/// Convenience wrapper that builds a `.bitmap` file in one call.
5309///
5310/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
5311/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
5312/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
5313/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
5314/// optional `name_hash_cache` (one entry per object) may be supplied to emit
5315/// the hash-cache extension.
5316pub fn write_bitmap(
5317    format: ObjectFormat,
5318    pack_checksum: ObjectId,
5319    object_types: &[ObjectType],
5320    commits: &[(u32, u32, Vec<u32>)],
5321    name_hash_cache: Option<Vec<u32>>,
5322) -> Result<Vec<u8>> {
5323    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5324    if let Some(cache) = name_hash_cache {
5325        writer = writer.with_name_hash_cache(cache)?;
5326    }
5327    for (commit_position, commit_index_position, reachable) in commits {
5328        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5329    }
5330    writer.write()
5331}
5332
5333#[cfg(test)]
5334mod tests {
5335    use super::*;
5336    use flate2::Compression;
5337    use flate2::read::ZlibDecoder;
5338    use flate2::write::ZlibEncoder;
5339    use std::fs;
5340    use std::io::Read;
5341    use std::io::Write;
5342    use std::path::{Path, PathBuf};
5343    use std::process::Command;
5344    use std::time::{SystemTime, UNIX_EPOCH};
5345
5346    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5347        PackWriteOptions::new()
5348            .with_prefer_ofs_delta(prefer_ofs_delta)
5349            .with_reorder(false)
5350    }
5351
5352    #[test]
5353    fn parses_single_blob_pack() {
5354        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5355        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5356        assert_eq!(parsed.version, 2);
5357        assert_eq!(parsed.entries.len(), 1);
5358        let object = &parsed.entries[0].object;
5359        assert_eq!(object.object_type, ObjectType::Blob);
5360        assert_eq!(object.body, b"hello\n");
5361        assert_eq!(
5362            parsed.entries[0].entry.oid.to_hex(),
5363            "ce013625030ba8dba906f756967f9e9ca394464a"
5364        );
5365    }
5366
5367    #[test]
5368    fn parses_single_blob_pack_sha256() {
5369        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5370        let parsed =
5371            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5372        assert_eq!(parsed.version, 2);
5373        assert_eq!(parsed.entries.len(), 1);
5374        let object = &parsed.entries[0].object;
5375        assert_eq!(object.object_type, ObjectType::Blob);
5376        assert_eq!(object.body, b"hello\n");
5377        assert_eq!(
5378            parsed.entries[0].entry.oid,
5379            object
5380                .object_id(ObjectFormat::Sha256)
5381                .expect("test operation should succeed")
5382        );
5383    }
5384
5385    #[test]
5386    fn parses_bundle_pack_payload_with_bundle_format() {
5387        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5388        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5389            .expect("test operation should succeed");
5390        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5391            .into_bytes()
5392            .into_iter()
5393            .chain(pack)
5394            .collect::<Vec<_>>();
5395        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5396            .expect("test operation should succeed");
5397
5398        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5399        assert_eq!(parsed.entries.len(), 1);
5400        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5401        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5402    }
5403
5404    /// Build a pack whose single blob entry header LIES about its decompressed
5405    /// size: it declares `declared_size` while the actual zlib payload only
5406    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
5407    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
5408    /// from a handful of compressed bytes.
5409    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5410        let mut pack = Vec::new();
5411        pack.extend_from_slice(b"PACK");
5412        pack.extend_from_slice(&2u32.to_be_bytes());
5413        pack.extend_from_slice(&1u32.to_be_bytes());
5414        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
5415        write_pack_entry_header_kind(&mut pack, 3, declared_size);
5416        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5417        encoder
5418            .write_all(real_body)
5419            .expect("test operation should succeed");
5420        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5421        let checksum =
5422            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5423        pack.extend_from_slice(checksum.as_bytes());
5424        pack
5425    }
5426
5427    /// Regression: a crafted pack object header declaring a gigantic decompressed
5428    /// size with a tiny compressed payload must NOT drive an up-front
5429    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
5430    /// header `size` is attacker-controlled over the network (install_raw_pack →
5431    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
5432    ///
5433    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
5434    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
5435    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
5436    /// run parse on a worker thread so that panic surfaces as a `join()` error
5437    /// rather than killing the test process; the fix turns this into a clean
5438    /// `Err` returned normally.
5439    #[test]
5440    fn rejects_decompression_bomb_header_without_oom() {
5441        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5442            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5443            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5444            let result = handle.join();
5445            // The parse thread must not have panicked/aborted on a huge reserve.
5446            assert!(
5447                result.is_ok(),
5448                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5449            );
5450            // And parsing must reject the lie (decoded len != declared size).
5451            let parse_result = result.expect("parse thread should not panic on a bomb header");
5452            assert!(
5453                parse_result.is_err(),
5454                "bomb header (declared={declared}) should be rejected as invalid"
5455            );
5456        }
5457    }
5458
5459    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
5460    /// whose *result-size* varint lies, declaring `declared_result_size`, while
5461    /// carrying a tiny real instruction stream. The delta's base-size varint is
5462    /// set correctly (so the base-size check at the top of `apply_pack_delta`
5463    /// passes and we reach the result reservation). Used to drive the sley#35
5464    /// delta-result-size bomb.
5465    fn lying_result_size_delta_pack(
5466        format: ObjectFormat,
5467        declared_result_size: u64,
5468        delta_kind: DeltaKind,
5469    ) -> Vec<u8> {
5470        let base = b"hello";
5471        let result = b"hello world"; // real produced length = 11
5472
5473        // Hand-build a delta with a truthful base-size and a LYING result-size.
5474        let mut delta = Vec::new();
5475        write_delta_varint(&mut delta, base.len() as u64);
5476        write_delta_varint(&mut delta, declared_result_size);
5477        // Real instructions: copy `base` then insert " world".
5478        let suffix = &result[base.len()..];
5479        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
5480        delta.push(base.len() as u8);
5481        delta.push(suffix.len() as u8);
5482        delta.extend_from_slice(suffix);
5483
5484        let mut pack = Vec::new();
5485        pack.extend_from_slice(b"PACK");
5486        pack.extend_from_slice(&2u32.to_be_bytes());
5487        pack.extend_from_slice(&2u32.to_be_bytes());
5488
5489        let base_offset = pack.len();
5490        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5491        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5492        encoder
5493            .write_all(base)
5494            .expect("test operation should succeed");
5495        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5496
5497        let delta_offset = pack.len();
5498        write_pack_entry_header_kind(
5499            &mut pack,
5500            match delta_kind {
5501                DeltaKind::Offset => 6,
5502                DeltaKind::Ref => 7,
5503            },
5504            delta.len() as u64,
5505        );
5506        match delta_kind {
5507            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5508            DeltaKind::Ref => {
5509                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5510                    .expect("test operation should succeed");
5511                pack.extend_from_slice(base_oid.as_bytes());
5512            }
5513        }
5514        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5515        encoder
5516            .write_all(&delta)
5517            .expect("test operation should succeed");
5518        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5519
5520        let checksum =
5521            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5522        pack.extend_from_slice(checksum.as_bytes());
5523        pack
5524    }
5525
5526    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
5527    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
5528    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
5529    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
5530    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
5531    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
5532    /// reach the same reservation, so both must be safe. We resolve the pack on a
5533    /// worker thread so an abort/panic surfaces as a `join()` error rather than
5534    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
5535    #[test]
5536    fn rejects_delta_result_size_bomb_without_oom() {
5537        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5538        for &declared in bombs {
5539            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5540                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5541                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5542                let join_result = handle.join();
5543                assert!(
5544                    join_result.is_ok(),
5545                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5546                     instead of erroring cleanly"
5547                );
5548                let parse_result =
5549                    join_result.expect("parse thread should not panic on a delta bomb");
5550                assert!(
5551                    parse_result.is_err(),
5552                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5553                     as invalid (result.len() != declared)"
5554                );
5555            }
5556        }
5557    }
5558
5559    /// A legitimate (truthful) delta whose result-size varint matches the real
5560    /// produced length must still resolve correctly — the bound only caps the
5561    /// speculative reservation, it must not break real delta application.
5562    #[test]
5563    fn applies_legitimate_delta_after_result_size_bound() {
5564        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5565            let base = b"hello";
5566            let result = b"hello world";
5567            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5568            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5569            assert_eq!(parsed.entries.len(), 2);
5570            assert_eq!(parsed.entries[0].object.body, base);
5571            assert_eq!(parsed.entries[1].object.body, result);
5572        }
5573    }
5574
5575    #[test]
5576    fn bounded_inflate_reserve_caps_attacker_declared_size() {
5577        // A tiny compressed input can't justify a multi-gigabyte reservation.
5578        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5579        // The absolute ceiling caps even a large input-justified hint.
5580        assert_eq!(
5581            bounded_inflate_reserve(usize::MAX, usize::MAX),
5582            MAX_INFLATE_RESERVE
5583        );
5584        // A modest legitimate hint is preserved unchanged (no regression for real
5585        // objects): 1000 bytes of output from 500 bytes of input is well within
5586        // both bounds.
5587        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5588        // Floor of 64 for tiny hints.
5589        assert_eq!(bounded_inflate_reserve(0, 0), 64);
5590    }
5591
5592    #[test]
5593    fn rejects_bundle_pack_payload_with_wrong_object_format() {
5594        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5595        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5596            .expect("test operation should succeed");
5597        let bundle_bytes =
5598            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5599                .into_bytes()
5600                .into_iter()
5601                .chain(pack)
5602                .collect::<Vec<_>>();
5603        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5604            .expect("test operation should succeed");
5605
5606        assert!(PackFile::parse_bundle(&bundle).is_err());
5607    }
5608
5609    fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5610        let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5611        let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5612        let owned_view =
5613            PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5614                .expect("test operation should succeed");
5615
5616        assert_eq!(view.version, owned.version);
5617        assert_eq!(view.count, owned.entries.len());
5618        assert_eq!(view.count(), owned.entries.len());
5619        assert_eq!(view.fanout(), &owned.fanout);
5620        assert_eq!(view.pack_checksum, owned.pack_checksum);
5621        assert_eq!(view.index_checksum, owned.index_checksum);
5622        assert_eq!(owned_view.version, owned.version);
5623        assert_eq!(owned_view.count(), owned.entries.len());
5624        assert_eq!(owned_view.fanout(), &owned.fanout);
5625        assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5626        assert_eq!(owned_view.index_checksum, owned.index_checksum);
5627        for entry in &owned.entries {
5628            let owned_found = owned
5629                .find(&entry.oid)
5630                .expect("test operation should succeed");
5631            let expected = Some(PackIndexLookup {
5632                crc32: owned_found.crc32,
5633                offset: owned_found.offset,
5634            });
5635            assert_eq!(view.find(&entry.oid), expected);
5636            assert_eq!(owned_view.find(&entry.oid), expected);
5637        }
5638    }
5639
5640    #[test]
5641    fn writes_pack_and_index_that_round_trip() {
5642        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5643        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5644            .expect("test operation should succeed");
5645        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5646        let index =
5647            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5648        let oid = object
5649            .object_id(ObjectFormat::Sha1)
5650            .expect("test operation should succeed");
5651        assert_eq!(pack.entries[0].object, object);
5652        assert_eq!(index.pack_checksum, pack.checksum);
5653        assert_eq!(
5654            index
5655                .find(&oid)
5656                .expect("test operation should succeed")
5657                .offset,
5658            12
5659        );
5660    }
5661
5662    #[test]
5663    fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5664        let objects = (0..8)
5665            .map(|idx| {
5666                EncodedObject::new(
5667                    ObjectType::Blob,
5668                    format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5669                )
5670            })
5671            .collect::<Vec<_>>();
5672        let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5673            .expect("test operation should succeed");
5674
5675        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5676
5677        let view =
5678            PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5679        let missing = sley_core::object_id_for_bytes(
5680            ObjectFormat::Sha1,
5681            "blob",
5682            b"not present in borrowed index\n",
5683        )
5684        .expect("test operation should succeed");
5685        assert_eq!(view.find(&missing), None);
5686    }
5687
5688    #[test]
5689    fn writes_sha256_pack_and_index_that_round_trip() {
5690        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5691        let written =
5692            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5693                .expect("test operation should succeed");
5694        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5695            .expect("test operation should succeed");
5696        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5697            .expect("test operation should succeed");
5698        let oid = object
5699            .object_id(ObjectFormat::Sha256)
5700            .expect("test operation should succeed");
5701        assert_eq!(pack.entries[0].object, object);
5702        assert_eq!(index.pack_checksum, pack.checksum);
5703        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5704        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5705        assert_eq!(
5706            index
5707                .find(&oid)
5708                .expect("test operation should succeed")
5709                .offset,
5710            12
5711        );
5712    }
5713
5714    #[test]
5715    fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5716        let objects = (0..4)
5717            .map(|idx| {
5718                EncodedObject::new(
5719                    ObjectType::Blob,
5720                    format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5721                )
5722            })
5723            .collect::<Vec<_>>();
5724        let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5725            .expect("test operation should succeed");
5726
5727        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5728    }
5729
5730    #[test]
5731    fn indexes_existing_sha256_pack_bytes() {
5732        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5733        let written =
5734            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5735                .expect("test operation should succeed");
5736
5737        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5738            .expect("test operation should succeed");
5739        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5740            .expect("test operation should succeed");
5741
5742        assert_eq!(indexed.pack_checksum, written.checksum);
5743        assert_eq!(indexed.entries, written.entries);
5744        assert_eq!(index.pack_checksum, written.checksum);
5745        assert_eq!(index.entries, written.entries);
5746    }
5747
5748    #[test]
5749    fn indexes_existing_delta_pack_bytes() {
5750        let (base, changed) = similar_blob_objects();
5751        let options = delta_pack_options(true);
5752        let written = PackFile::write_packed_with_options(
5753            &[base, changed.clone()],
5754            ObjectFormat::Sha1,
5755            &options,
5756        )
5757        .expect("test operation should succeed");
5758
5759        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5760            .expect("test operation should succeed");
5761        let index =
5762            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5763        let changed_oid = changed
5764            .object_id(ObjectFormat::Sha1)
5765            .expect("test operation should succeed");
5766
5767        assert_eq!(indexed.pack_checksum, written.checksum);
5768        assert_eq!(indexed.entries, written.entries);
5769        assert_eq!(
5770            index
5771                .find(&changed_oid)
5772                .expect("test operation should succeed")
5773                .offset,
5774            written.entries[1].offset
5775        );
5776        assert_eq!(
5777            index
5778                .find(&changed_oid)
5779                .expect("test operation should succeed")
5780                .crc32,
5781            written.entries[1].crc32
5782        );
5783    }
5784
5785    #[test]
5786    fn writes_ref_delta_pack_and_index_that_round_trip() {
5787        let (base, changed) = similar_blob_objects();
5788        let options = delta_pack_options(false);
5789        let written = PackFile::write_packed_with_options(
5790            &[base.clone(), changed.clone()],
5791            ObjectFormat::Sha1,
5792            &options,
5793        )
5794        .expect("test operation should succeed");
5795        let mut second_offset = written.entries[1].offset as usize;
5796        let header = parse_entry_header(&written.pack, &mut second_offset)
5797            .expect("test operation should succeed");
5798        assert_eq!(header.kind, PackObjectKind::RefDelta);
5799
5800        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5801        let index =
5802            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5803        let oid = changed
5804            .object_id(ObjectFormat::Sha1)
5805            .expect("test operation should succeed");
5806        assert_eq!(pack.entries[0].object, base);
5807        assert_eq!(pack.entries[1].object, changed);
5808        assert_eq!(index.pack_checksum, pack.checksum);
5809        assert_eq!(
5810            index
5811                .find(&oid)
5812                .expect("test operation should succeed")
5813                .offset,
5814            written.entries[1].offset
5815        );
5816    }
5817
5818    #[test]
5819    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5820        let (base, changed) = similar_blob_objects();
5821        let options = delta_pack_options(true);
5822        let written = PackFile::write_packed_with_options(
5823            &[base, changed.clone()],
5824            ObjectFormat::Sha1,
5825            &options,
5826        )
5827        .expect("test operation should succeed");
5828        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5829        let mut second = written.entries[1].offset as usize;
5830        assert_eq!(
5831            parse_entry_header(&written.pack, &mut second)
5832                .expect("test operation should succeed")
5833                .kind,
5834            PackObjectKind::OfsDelta
5835        );
5836        // Ground truth from a full parse; single-object decode must match at every offset.
5837        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5838        for po in &parsed.entries {
5839            let got =
5840                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5841                    Ok(None)
5842                })
5843                .expect("test operation should succeed");
5844            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5845        }
5846    }
5847
5848    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
5849    /// read is byte-identical to the uncached one cold and warm (sley#26).
5850    #[derive(Default)]
5851    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5852
5853    impl HeaderTypeCache for MapHeaderTypeCache {
5854        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5855            self.0.get(&pack_offset).copied()
5856        }
5857        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5858            self.0.insert(pack_offset, header);
5859        }
5860    }
5861
5862    #[test]
5863    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5864        let (base, changed) = similar_blob_objects();
5865        let options = delta_pack_options(true);
5866        let written =
5867            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5868                .expect("test operation should succeed");
5869        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5870        let mut second = written.entries[1].offset as usize;
5871        assert_eq!(
5872            parse_entry_header(&written.pack, &mut second)
5873                .expect("test operation should succeed")
5874                .kind,
5875            PackObjectKind::OfsDelta
5876        );
5877
5878        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5879        let mut cache = MapHeaderTypeCache::default();
5880        for po in &parsed.entries {
5881            let uncached =
5882                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5883                    Ok(None)
5884                })
5885                .expect("test operation should succeed");
5886            // Type inherited from the chain base; size is the inflated body length.
5887            assert_eq!(
5888                uncached,
5889                (po.object.object_type, po.object.body.len() as u64),
5890                "uncached header at offset {}",
5891                po.entry.offset
5892            );
5893            // Cold cache: must agree with the uncached read and populate the memo.
5894            let cold = read_object_header_at_with_cache(
5895                &written.pack,
5896                po.entry.offset,
5897                ObjectFormat::Sha1,
5898                |_| Ok(None),
5899                &mut cache,
5900            )
5901            .expect("test operation should succeed");
5902            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
5903        }
5904        // Warm cache: every offset now resolves from the memo and is still correct,
5905        // proving the fast path does not change behavior (sley#26).
5906        for po in &parsed.entries {
5907            let warm = read_object_header_at_with_cache(
5908                &written.pack,
5909                po.entry.offset,
5910                ObjectFormat::Sha1,
5911                |_| panic!("warm cache must not re-walk the chain"),
5912                &mut cache,
5913            )
5914            .expect("test operation should succeed");
5915            assert_eq!(
5916                warm,
5917                (po.object.object_type, po.object.body.len() as u64),
5918                "warm cache at offset {}",
5919                po.entry.offset
5920            );
5921        }
5922    }
5923
5924    #[test]
5925    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
5926        let (base, changed) = similar_blob_objects();
5927        let options = delta_pack_options(false);
5928        let written = PackFile::write_packed_with_options(
5929            &[base, changed.clone()],
5930            ObjectFormat::Sha1,
5931            &options,
5932        )
5933        .expect("test operation should succeed");
5934        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5935        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
5936            .entries
5937            .iter()
5938            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
5939            .collect();
5940        for po in &parsed.entries {
5941            let got =
5942                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
5943                    Ok(by_oid.get(oid).cloned())
5944                })
5945                .expect("test operation should succeed");
5946            assert_eq!(*got, po.object);
5947        }
5948    }
5949
5950    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
5951    /// used to prove the cached decode path is byte-identical to the uncached
5952    /// one and that bases are reused across reads.
5953    #[derive(Default)]
5954    struct CountingDeltaCache {
5955        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
5956        hits: std::cell::Cell<usize>,
5957        inserts: std::cell::Cell<usize>,
5958    }
5959
5960    impl PackDeltaCache for CountingDeltaCache {
5961        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
5962            let hit = self.map.borrow().get(&offset).cloned();
5963            if hit.is_some() {
5964                self.hits.set(self.hits.get() + 1);
5965            }
5966            hit
5967        }
5968        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
5969            self.inserts.set(self.inserts.get() + 1);
5970            self.map.borrow_mut().insert(offset, object);
5971        }
5972    }
5973
5974    #[test]
5975    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
5976        // A multi-object pack with a real ofs-delta chain so the cache has bases
5977        // to reuse. Build several similar blobs to encourage deltification.
5978        let mut objects = Vec::new();
5979        for idx in 0..8u32 {
5980            let mut body = vec![b'x'; 4096];
5981            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
5982            objects.push(EncodedObject::new(ObjectType::Blob, body));
5983        }
5984        let options = delta_pack_options(true);
5985        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5986            .expect("test operation should succeed");
5987        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5988
5989        let cache = CountingDeltaCache::default();
5990        // Read every object twice through the cache; each result must equal the
5991        // ground-truth from the full parse, byte for byte, both times.
5992        for _ in 0..2 {
5993            for po in &parsed.entries {
5994                let got = read_object_at_with_cache_arc(
5995                    &written.pack,
5996                    po.entry.offset,
5997                    ObjectFormat::Sha1,
5998                    |_| Ok(None),
5999                    &cache,
6000                )
6001                .expect("test operation should succeed");
6002                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
6003            }
6004        }
6005        // The second pass reads everything straight from the cache, so there must
6006        // be at least one hit (proving reuse, not just correctness).
6007        assert!(cache.hits.get() > 0, "cache never served a warm object");
6008    }
6009
6010    #[test]
6011    fn writes_ofs_delta_pack_and_index_that_round_trip() {
6012        let (base, changed) = similar_blob_objects();
6013        let options = delta_pack_options(true);
6014        let written = PackFile::write_packed_with_options(
6015            &[base.clone(), changed.clone()],
6016            ObjectFormat::Sha1,
6017            &options,
6018        )
6019        .expect("test operation should succeed");
6020        let mut second_offset = written.entries[1].offset as usize;
6021        let header = parse_entry_header(&written.pack, &mut second_offset)
6022            .expect("test operation should succeed");
6023        assert_eq!(header.kind, PackObjectKind::OfsDelta);
6024
6025        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
6026        let index =
6027            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
6028        let oid = changed
6029            .object_id(ObjectFormat::Sha1)
6030            .expect("test operation should succeed");
6031        assert_eq!(pack.entries[0].object, base);
6032        assert_eq!(pack.entries[1].object, changed);
6033        assert_eq!(index.pack_checksum, pack.checksum);
6034        assert_eq!(
6035            index
6036                .find(&oid)
6037                .expect("test operation should succeed")
6038                .offset,
6039            written.entries[1].offset
6040        );
6041    }
6042
6043    #[test]
6044    fn resolves_ofs_delta_pack_entry() {
6045        let base = b"hello";
6046        let result = b"hello world";
6047        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
6048        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6049        assert_eq!(parsed.entries.len(), 2);
6050        assert_eq!(parsed.entries[0].object.body, base);
6051        assert_eq!(parsed.entries[1].object.body, result);
6052        assert_eq!(
6053            parsed.entries[1].entry.oid,
6054            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6055                .expect("test operation should succeed")
6056        );
6057    }
6058
6059    #[test]
6060    fn resolves_ref_delta_pack_entry() {
6061        let base = b"hello";
6062        let result = b"hello world";
6063        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
6064        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
6065        assert_eq!(parsed.entries.len(), 2);
6066        assert_eq!(parsed.entries[0].object.body, base);
6067        assert_eq!(parsed.entries[1].object.body, result);
6068        assert_eq!(
6069            parsed.entries[1].entry.oid,
6070            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6071                .expect("test operation should succeed")
6072        );
6073    }
6074
6075    #[test]
6076    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
6077        let base = b"hello";
6078        let result = b"hello world";
6079        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
6080        assert!(PackFile::parse_sha1(&pack).is_err());
6081
6082        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
6083            .expect("test operation should succeed");
6084        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
6085            if oid == &base_oid {
6086                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
6087            } else {
6088                Ok(None)
6089            }
6090        })
6091        .expect("test operation should succeed");
6092        assert_eq!(parsed.entries.len(), 1);
6093        assert_eq!(parsed.entries[0].object.body, result);
6094        assert_eq!(
6095            parsed.entries[0].entry.oid,
6096            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
6097                .expect("test operation should succeed")
6098        );
6099    }
6100
6101    #[test]
6102    fn rejects_bad_pack_checksum() {
6103        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6104        let last = pack.len() - 1;
6105        pack[last] ^= 1;
6106        assert!(PackFile::parse_sha1(&pack).is_err());
6107    }
6108
6109    #[test]
6110    fn raw_pack_index_rejects_bad_pack_checksum() {
6111        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
6112        let last = pack.len() - 1;
6113        pack[last] ^= 1;
6114        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
6115    }
6116
6117    #[test]
6118    fn pack_index_writer_rejects_duplicate_object_ids() {
6119        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
6120            .expect("test operation should succeed");
6121        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6122            .expect("test operation should succeed");
6123        let entries = vec![
6124            PackIndexEntry {
6125                oid,
6126                crc32: 1,
6127                offset: 12,
6128            },
6129            PackIndexEntry {
6130                oid,
6131                crc32: 2,
6132                offset: 24,
6133            },
6134        ];
6135        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
6136    }
6137
6138    #[test]
6139    fn parses_single_entry_pack_index() {
6140        let oid = ObjectId::from_hex(
6141            ObjectFormat::Sha1,
6142            "ce013625030ba8dba906f756967f9e9ca394464a",
6143        )
6144        .expect("test operation should succeed");
6145        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6146            .expect("test operation should succeed");
6147        let index = single_entry_index(
6148            ObjectFormat::Sha1,
6149            oid,
6150            0x1234_5678,
6151            12,
6152            pack_checksum.clone(),
6153        );
6154        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
6155        assert_eq!(parsed.version, 2);
6156        assert_eq!(parsed.pack_checksum, pack_checksum);
6157        assert_eq!(parsed.entries.len(), 1);
6158        assert_eq!(
6159            parsed
6160                .find(&oid)
6161                .expect("test operation should succeed")
6162                .offset,
6163            12
6164        );
6165        assert_eq!(
6166            parsed
6167                .find(&oid)
6168                .expect("test operation should succeed")
6169                .crc32,
6170            0x1234_5678
6171        );
6172        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6173    }
6174
6175    #[test]
6176    fn parses_single_entry_pack_index_v1() {
6177        let oid = ObjectId::from_hex(
6178            ObjectFormat::Sha1,
6179            "ce013625030ba8dba906f756967f9e9ca394464a",
6180        )
6181        .expect("test operation should succeed");
6182        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6183            .expect("test operation should succeed");
6184        let index =
6185            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
6186        let parsed =
6187            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
6188        assert_eq!(parsed.version, 1);
6189        assert_eq!(parsed.pack_checksum, pack_checksum);
6190        assert_eq!(parsed.entries.len(), 1);
6191        assert_eq!(
6192            parsed
6193                .find(&oid)
6194                .expect("test operation should succeed")
6195                .offset,
6196            0x1234_5678
6197        );
6198        assert_eq!(
6199            parsed
6200                .find(&oid)
6201                .expect("test operation should succeed")
6202                .crc32,
6203            0
6204        );
6205        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6206    }
6207
6208    #[test]
6209    fn rejects_bad_pack_index_v1_checksum() {
6210        let oid = ObjectId::from_hex(
6211            ObjectFormat::Sha1,
6212            "ce013625030ba8dba906f756967f9e9ca394464a",
6213        )
6214        .expect("test operation should succeed");
6215        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6216            .expect("test operation should succeed");
6217        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
6218        let last = index.len() - 1;
6219        index[last] ^= 1;
6220        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
6221    }
6222
6223    #[test]
6224    fn pack_index_view_reads_v2_large_offsets() {
6225        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
6226            .expect("test operation should succeed");
6227        let second =
6228            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
6229                .expect("test operation should succeed");
6230        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6231            .expect("test operation should succeed");
6232        let entries = vec![
6233            PackIndexEntry {
6234                oid: first,
6235                crc32: 0x1111_2222,
6236                offset: 0x8000_0000,
6237            },
6238            PackIndexEntry {
6239                oid: second,
6240                crc32: 0x3333_4444,
6241                offset: 0x1_0000_0042,
6242            },
6243        ];
6244        let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
6245            .expect("test operation should succeed");
6246
6247        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
6248        let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
6249            .expect("test operation should succeed");
6250        for entry in entries {
6251            assert_eq!(
6252                view.find(&entry.oid),
6253                Some(PackIndexLookup {
6254                    crc32: entry.crc32,
6255                    offset: entry.offset,
6256                })
6257            );
6258        }
6259    }
6260
6261    #[test]
6262    fn pack_index_view_default_parse_checks_index_checksum() {
6263        let oid = ObjectId::from_hex(
6264            ObjectFormat::Sha1,
6265            "ce013625030ba8dba906f756967f9e9ca394464a",
6266        )
6267        .expect("test operation should succeed");
6268        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6269            .expect("test operation should succeed");
6270        let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
6271        let last = index.len() - 1;
6272        index[last] ^= 1;
6273
6274        assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
6275        let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
6276            .expect("test operation should succeed");
6277        let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
6278            Arc::from(index.clone().into_boxed_slice()),
6279            ObjectFormat::Sha1,
6280        )
6281        .expect("test operation should succeed");
6282        assert_eq!(
6283            view.find(&oid),
6284            Some(PackIndexLookup {
6285                crc32: 0x1234_5678,
6286                offset: 12,
6287            })
6288        );
6289        assert_eq!(
6290            trusted_view.find(&oid),
6291            Some(PackIndexLookup {
6292                crc32: 0x1234_5678,
6293                offset: 12,
6294            })
6295        );
6296    }
6297
6298    #[test]
6299    fn parses_pack_reverse_index() {
6300        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6301            .expect("test operation should succeed");
6302        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6303            .expect("test operation should succeed");
6304        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6305            .expect("test operation should succeed");
6306        assert_eq!(parsed.version, 1);
6307        assert_eq!(parsed.format, ObjectFormat::Sha1);
6308        assert_eq!(parsed.positions, vec![2, 0, 1]);
6309        assert_eq!(parsed.pack_checksum, pack_checksum);
6310        assert_eq!(
6311            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6312                .expect("test operation should succeed"),
6313            reverse_index
6314        );
6315    }
6316
6317    #[test]
6318    fn rejects_bad_pack_reverse_index_checksum() {
6319        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6320            .expect("test operation should succeed");
6321        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6322            .expect("test operation should succeed");
6323        let last = reverse_index.len() - 1;
6324        reverse_index[last] ^= 1;
6325        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6326    }
6327
6328    #[test]
6329    fn rejects_bad_pack_reverse_index_positions() {
6330        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6331            .expect("test operation should succeed");
6332        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6333        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6334        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6335        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6336        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6337            .expect("test operation should succeed");
6338        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6339        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6340    }
6341
6342    #[test]
6343    fn parses_pack_mtimes() {
6344        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6345            .expect("test operation should succeed");
6346        let mtimes = PackMtimes::write(
6347            ObjectFormat::Sha1,
6348            &[1, 1_700_000_000, u32::MAX],
6349            &pack_checksum,
6350        )
6351        .expect("test operation should succeed");
6352        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6353            .expect("test operation should succeed");
6354        assert_eq!(parsed.version, 1);
6355        assert_eq!(parsed.format, ObjectFormat::Sha1);
6356        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6357        assert_eq!(parsed.pack_checksum, pack_checksum);
6358        assert_eq!(
6359            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6360                .expect("test operation should succeed"),
6361            mtimes
6362        );
6363    }
6364
6365    #[test]
6366    fn rejects_bad_pack_mtimes_checksum() {
6367        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6368            .expect("test operation should succeed");
6369        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6370            .expect("test operation should succeed");
6371        let last = mtimes.len() - 1;
6372        mtimes[last] ^= 1;
6373        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6374    }
6375
6376    #[test]
6377    fn rejects_bad_pack_mtimes_shape() {
6378        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6379            .expect("test operation should succeed");
6380        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6381        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6382
6383        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6384        wrong_hash[11] = 2;
6385        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6386        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6387            .expect("test operation should succeed");
6388        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6389        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6390    }
6391
6392    #[test]
6393    fn parses_multi_pack_index_header_and_chunk_lookup() {
6394        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6395            .expect("test operation should succeed");
6396        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6397            .expect("test operation should succeed");
6398        let chunks = midx_chunks_with_pack_names(
6399            ObjectFormat::Sha1,
6400            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6401            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6402        );
6403        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6404        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6405            .expect("test operation should succeed");
6406        assert_eq!(parsed.version, 2);
6407        assert_eq!(parsed.format, ObjectFormat::Sha1);
6408        assert_eq!(parsed.pack_count, 2);
6409        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6410        assert_eq!(parsed.object_count, 2);
6411        assert_eq!(parsed.objects.len(), 2);
6412        assert_eq!(
6413            parsed
6414                .find(&first)
6415                .expect("test operation should succeed")
6416                .pack_int_id,
6417            0
6418        );
6419        assert_eq!(
6420            parsed
6421                .find(&first)
6422                .expect("test operation should succeed")
6423                .offset,
6424            12
6425        );
6426        assert_eq!(
6427            parsed
6428                .find(&second)
6429                .expect("test operation should succeed")
6430                .pack_int_id,
6431            1
6432        );
6433        assert_eq!(
6434            parsed
6435                .find(&second)
6436                .expect("test operation should succeed")
6437                .offset,
6438            0x1_0000_0000
6439        );
6440        assert_eq!(parsed.reverse_index, None);
6441        assert_eq!(parsed.bitmapped_packs, None);
6442        assert_eq!(parsed.chunks.len(), 5);
6443        assert_eq!(parsed.chunks[0].id, *b"PNAM");
6444        assert_eq!(parsed.chunks[0].offset, 84);
6445        assert_eq!(parsed.chunks[0].len, 24);
6446        assert_eq!(parsed.chunks[1].id, *b"OIDF");
6447        assert_eq!(parsed.chunks[1].offset, 108);
6448        assert_eq!(parsed.chunks[1].len, 1024);
6449    }
6450
6451    #[test]
6452    fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6453        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6454            .expect("test operation should succeed");
6455        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6456            .expect("test operation should succeed");
6457        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6458            .expect("test operation should succeed");
6459        let chunks = midx_chunks_with_pack_names(
6460            ObjectFormat::Sha1,
6461            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6462            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6463        );
6464        let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6465        let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6466            .expect("test operation should succeed");
6467
6468        assert!(lookup.contains(&first));
6469        assert!(lookup.contains(&second));
6470        assert!(!lookup.contains(&missing));
6471
6472        let first_entry = lookup
6473            .find(&first)
6474            .expect("test operation should succeed")
6475            .expect("object should be present");
6476        assert_eq!(lookup.pack_name(first_entry.pack_int_id), Some("pack-a.idx"));
6477        assert_eq!(first_entry.offset, 12);
6478
6479        let second_entry = lookup
6480            .find(&second)
6481            .expect("test operation should succeed")
6482            .expect("object should be present");
6483        assert_eq!(lookup.pack_name(second_entry.pack_int_id), Some("pack-b.idx"));
6484        assert_eq!(second_entry.offset, 0x1_0000_0000);
6485        assert!(
6486            lookup
6487                .find(&missing)
6488                .expect("test operation should succeed")
6489                .is_none()
6490        );
6491    }
6492
6493    #[test]
6494    fn rejects_bad_multi_pack_index_checksum() {
6495        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6496        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6497        let last = midx.len() - 1;
6498        midx[last] ^= 1;
6499        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6500    }
6501
6502    #[test]
6503    fn rejects_bad_multi_pack_index_shape() {
6504        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6505        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6506        wrong_hash[5] = 2;
6507        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6508        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6509            .expect("test operation should succeed");
6510        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6511        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6512
6513        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6514        missing_terminator[12] = b'B';
6515        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6516        let checksum =
6517            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6518                .expect("test operation should succeed");
6519        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6520        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6521
6522        let mut bad_offset = multi_pack_index(
6523            ObjectFormat::Sha1,
6524            2,
6525            0,
6526            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6527        );
6528        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6529        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6530        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6531            .expect("test operation should succeed");
6532        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6533        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6534    }
6535
6536    #[test]
6537    fn rejects_bad_multi_pack_index_pack_names() {
6538        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6539        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6540
6541        let too_few = multi_pack_index(
6542            ObjectFormat::Sha1,
6543            2,
6544            2,
6545            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6546        );
6547        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6548
6549        let bad_padding = multi_pack_index(
6550            ObjectFormat::Sha1,
6551            2,
6552            1,
6553            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6554        );
6555        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6556
6557        let unsorted_v1 = multi_pack_index(
6558            ObjectFormat::Sha1,
6559            1,
6560            2,
6561            &midx_chunks_with_pack_names(
6562                ObjectFormat::Sha1,
6563                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6564                &[],
6565            ),
6566        );
6567        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6568
6569        let unsorted_v2 = multi_pack_index(
6570            ObjectFormat::Sha1,
6571            2,
6572            2,
6573            &midx_chunks_with_pack_names(
6574                ObjectFormat::Sha1,
6575                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6576                &[],
6577            ),
6578        );
6579        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6580            .expect("test operation should succeed");
6581        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6582    }
6583
6584    #[test]
6585    fn rejects_bad_multi_pack_index_object_tables() {
6586        let oid_a = ObjectId::from_hex(
6587            ObjectFormat::Sha1,
6588            "1111111111111111111111111111111111111111",
6589        )
6590        .expect("test operation should succeed");
6591        let oid_b = ObjectId::from_hex(
6592            ObjectFormat::Sha1,
6593            "2222222222222222222222222222222222222222",
6594        )
6595        .expect("test operation should succeed");
6596
6597        let missing_oidf = multi_pack_index(
6598            ObjectFormat::Sha1,
6599            2,
6600            1,
6601            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6602        );
6603        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6604
6605        let bad_fanout = vec![
6606            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6607            (*b"OIDF", vec![0; 256 * 4]),
6608            (*b"OIDL", oid_a.as_bytes().to_vec()),
6609            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6610        ];
6611        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6612        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6613
6614        let mut unsorted = Vec::new();
6615        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6616        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6617        let mut oid_lookup = Vec::new();
6618        oid_lookup.extend_from_slice(oid_b.as_bytes());
6619        oid_lookup.extend_from_slice(oid_a.as_bytes());
6620        unsorted.push((*b"OIDL", oid_lookup));
6621        unsorted.push((
6622            *b"OOFF",
6623            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6624        ));
6625        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6626        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6627
6628        let bad_pack = multi_pack_index(
6629            ObjectFormat::Sha1,
6630            2,
6631            1,
6632            &midx_chunks_with_pack_names(
6633                ObjectFormat::Sha1,
6634                b"pack-a.idx\0\0".to_vec(),
6635                &[(oid_a.clone(), 1, 12)],
6636            ),
6637        );
6638        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6639
6640        let mut large_offsets = Vec::new();
6641        let missing_loff = vec![
6642            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6643            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6644            (*b"OIDL", oid_a.as_bytes().to_vec()),
6645            (
6646                *b"OOFF",
6647                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6648            ),
6649        ];
6650        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6651        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6652
6653        let mut bad_loff =
6654            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6655        bad_loff.push((*b"LOFF", vec![0]));
6656        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6657        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6658    }
6659
6660    #[test]
6661    fn parses_multi_pack_index_bitmap_chunks() {
6662        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6663            .expect("test operation should succeed");
6664        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6665            .expect("test operation should succeed");
6666        let mut chunks = midx_chunks_with_pack_names(
6667            ObjectFormat::Sha1,
6668            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6669            &[(first, 0, 12), (second, 1, 24)],
6670        );
6671        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6672        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6673        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6674
6675        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6676            .expect("test operation should succeed");
6677        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6678        assert_eq!(
6679            parsed.bitmapped_packs,
6680            Some(vec![
6681                MultiPackBitmapPack {
6682                    bitmap_pos: 0,
6683                    bitmap_nr: 1,
6684                },
6685                MultiPackBitmapPack {
6686                    bitmap_pos: 1,
6687                    bitmap_nr: 1,
6688                },
6689            ])
6690        );
6691    }
6692
6693    #[test]
6694    fn writes_multi_pack_index_that_round_trips() {
6695        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6696            .expect("test operation should succeed");
6697        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6698            .expect("test operation should succeed");
6699        let bytes = MultiPackIndex::write(
6700            ObjectFormat::Sha1,
6701            2,
6702            &["pack-b.idx".into(), "pack-a.idx".into()],
6703            &[
6704                MultiPackIndexEntry {
6705                    oid: second.clone(),
6706                    pack_int_id: 0,
6707                    offset: 0x1_0000_0000,
6708                },
6709                MultiPackIndexEntry {
6710                    oid: first.clone(),
6711                    pack_int_id: 1,
6712                    offset: 12,
6713                },
6714            ],
6715        )
6716        .expect("test operation should succeed");
6717
6718        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6719            .expect("test operation should succeed");
6720        assert_eq!(parsed.version, 2);
6721        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6722        assert_eq!(parsed.object_count, 2);
6723        assert_eq!(
6724            parsed
6725                .find(&first)
6726                .expect("test operation should succeed")
6727                .pack_int_id,
6728            1
6729        );
6730        assert_eq!(
6731            parsed
6732                .find(&first)
6733                .expect("test operation should succeed")
6734                .offset,
6735            12
6736        );
6737        assert_eq!(
6738            parsed
6739                .find(&second)
6740                .expect("test operation should succeed")
6741                .pack_int_id,
6742            0
6743        );
6744        assert_eq!(
6745            parsed
6746                .find(&second)
6747                .expect("test operation should succeed")
6748                .offset,
6749            0x1_0000_0000
6750        );
6751        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6752    }
6753
6754    #[test]
6755    fn write_multi_pack_index_rejects_invalid_inputs() {
6756        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6757            .expect("test operation should succeed");
6758        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6759        assert!(
6760            MultiPackIndex::write(
6761                ObjectFormat::Sha1,
6762                1,
6763                &["pack-b.idx".into(), "pack-a.idx".into()],
6764                &[],
6765            )
6766            .is_err()
6767        );
6768        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6769        assert!(
6770            MultiPackIndex::write(
6771                ObjectFormat::Sha1,
6772                2,
6773                &["pack-a.idx".into()],
6774                &[MultiPackIndexEntry {
6775                    oid,
6776                    pack_int_id: 1,
6777                    offset: 12,
6778                }],
6779            )
6780            .is_err()
6781        );
6782        assert!(
6783            MultiPackIndex::write(
6784                ObjectFormat::Sha1,
6785                2,
6786                &["pack-a.idx".into()],
6787                &[
6788                    MultiPackIndexEntry {
6789                        oid,
6790                        pack_int_id: 0,
6791                        offset: 12,
6792                    },
6793                    MultiPackIndexEntry {
6794                        oid,
6795                        pack_int_id: 0,
6796                        offset: 24,
6797                    },
6798                ],
6799            )
6800            .is_err()
6801        );
6802    }
6803
6804    #[test]
6805    fn rejects_bad_multi_pack_index_bitmap_chunks() {
6806        let oid_a = ObjectId::from_hex(
6807            ObjectFormat::Sha1,
6808            "1111111111111111111111111111111111111111",
6809        )
6810        .expect("test operation should succeed");
6811        let oid_b = ObjectId::from_hex(
6812            ObjectFormat::Sha1,
6813            "2222222222222222222222222222222222222222",
6814        )
6815        .expect("test operation should succeed");
6816
6817        let mut duplicate_ridx = midx_chunks_with_pack_names(
6818            ObjectFormat::Sha1,
6819            b"pack-a.idx\0\0".to_vec(),
6820            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6821        );
6822        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6823        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6824        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6825
6826        let mut short_btmp = midx_chunks_with_pack_names(
6827            ObjectFormat::Sha1,
6828            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6829            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6830        );
6831        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6832        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6833        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6834
6835        let mut out_of_range_btmp = midx_chunks_with_pack_names(
6836            ObjectFormat::Sha1,
6837            b"pack-a.idx\0\0".to_vec(),
6838            &[(oid_a, 0, 12), (oid_b, 0, 24)],
6839        );
6840        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6841        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6842        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6843    }
6844
6845    #[test]
6846    fn parses_pack_bitmap_index_with_hash_cache() {
6847        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6848            .expect("test operation should succeed");
6849        let bitmap = pack_bitmap_index(
6850            ObjectFormat::Sha1,
6851            3,
6852            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6853            &pack_checksum,
6854            &[(2, 0, 1, &[0b101])],
6855            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6856        );
6857
6858        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6859            .expect("test operation should succeed");
6860        assert_eq!(parsed.version, 1);
6861        assert_eq!(parsed.format, ObjectFormat::Sha1);
6862        assert_eq!(
6863            parsed.options,
6864            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6865        );
6866        assert_eq!(parsed.pack_checksum, pack_checksum);
6867        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6868        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6869        assert_eq!(parsed.entries.len(), 1);
6870        let entry = parsed
6871            .entry_for_index_position(2)
6872            .expect("test operation should succeed");
6873        assert_eq!(entry.xor_offset, 0);
6874        assert_eq!(entry.flags, 1);
6875        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6876        assert_eq!(
6877            parsed.name_hash_cache,
6878            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6879        );
6880    }
6881
6882    #[test]
6883    fn parses_pack_bitmap_index_sha256() {
6884        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6885            .expect("test operation should succeed");
6886        let bitmap = pack_bitmap_index(
6887            ObjectFormat::Sha256,
6888            2,
6889            PackBitmapIndex::OPTION_FULL_DAG,
6890            &pack_checksum,
6891            &[(0, 0, 0, &[0b11])],
6892            None,
6893        );
6894
6895        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
6896            .expect("test operation should succeed");
6897        assert_eq!(parsed.version, 1);
6898        assert_eq!(parsed.format, ObjectFormat::Sha256);
6899        assert_eq!(parsed.pack_checksum, pack_checksum);
6900        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6901        assert_eq!(parsed.entries[0].object_position, 0);
6902        assert_eq!(parsed.name_hash_cache, None);
6903    }
6904
6905    #[test]
6906    fn parses_upstream_git_written_pack_bitmap_index() {
6907        let root = unique_temp_dir("git-pack-bitmap-upstream");
6908        fs::create_dir_all(&root).expect("test operation should succeed");
6909        {
6910            run_git_success(&root, &["init", "-q", "-b", "main"]);
6911            run_git_success(
6912                &root,
6913                &[
6914                    "-c",
6915                    "user.name=Example User",
6916                    "-c",
6917                    "user.email=example@example.invalid",
6918                    "commit",
6919                    "--allow-empty",
6920                    "-q",
6921                    "-m",
6922                    "one",
6923                ],
6924            );
6925            run_git_success(
6926                &root,
6927                &[
6928                    "-c",
6929                    "user.name=Example User",
6930                    "-c",
6931                    "user.email=example@example.invalid",
6932                    "commit",
6933                    "--allow-empty",
6934                    "-q",
6935                    "-m",
6936                    "two",
6937                ],
6938            );
6939            run_git_success(&root, &["repack", "-adb"]);
6940            let pack_dir = root.join(".git").join("objects").join("pack");
6941            let idx_path = single_path_with_extension(&pack_dir, "idx");
6942            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
6943            let index = PackIndex::parse(
6944                &fs::read(idx_path).expect("test operation should succeed"),
6945                ObjectFormat::Sha1,
6946            )
6947            .expect("test operation should succeed");
6948            let bitmap = PackBitmapIndex::parse(
6949                &fs::read(bitmap_path).expect("test operation should succeed"),
6950                ObjectFormat::Sha1,
6951                index.entries.len(),
6952            )
6953            .expect("test operation should succeed");
6954            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
6955            assert!(!bitmap.entries.is_empty());
6956        };
6957        let _ = fs::remove_dir_all(&root);
6958    }
6959
6960    #[test]
6961    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
6962        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6963            .expect("test operation should succeed");
6964        let bitmap = pack_bitmap_index(
6965            ObjectFormat::Sha1,
6966            1,
6967            PackBitmapIndex::OPTION_FULL_DAG,
6968            &pack_checksum,
6969            &[(0, 0, 0, &[1])],
6970            None,
6971        );
6972
6973        let mut bad_signature = bitmap.clone();
6974        bad_signature[0] = b'X';
6975        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
6976
6977        let mut bad_version = bitmap.clone();
6978        bad_version[5] = 2;
6979        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
6980        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
6981
6982        let mut bad_option = bitmap.clone();
6983        bad_option[7] = 0x20;
6984        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
6985        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
6986
6987        let mut bad_checksum = bitmap;
6988        let last = bad_checksum.len() - 1;
6989        bad_checksum[last] ^= 1;
6990        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
6991    }
6992
6993    #[test]
6994    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
6995        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6996            .expect("test operation should succeed");
6997        let bitmap = pack_bitmap_index(
6998            ObjectFormat::Sha1,
6999            2,
7000            PackBitmapIndex::OPTION_FULL_DAG,
7001            &pack_checksum,
7002            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
7003            None,
7004        );
7005
7006        let mut truncated = bitmap.clone();
7007        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
7008        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
7009        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
7010
7011        let mut out_of_range_position = pack_bitmap_index(
7012            ObjectFormat::Sha1,
7013            2,
7014            PackBitmapIndex::OPTION_FULL_DAG,
7015            &pack_checksum,
7016            &[(2, 0, 0, &[0b01])],
7017            None,
7018        );
7019        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7020        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
7021        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
7022
7023        let invalid_xor = pack_bitmap_index(
7024            ObjectFormat::Sha1,
7025            2,
7026            PackBitmapIndex::OPTION_FULL_DAG,
7027            &pack_checksum,
7028            &[(0, 1, 0, &[0b01])],
7029            None,
7030        );
7031        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
7032    }
7033
7034    #[test]
7035    fn parses_single_entry_pack_index_sha256() {
7036        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
7037            .expect("test operation should succeed");
7038        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7039            .expect("test operation should succeed");
7040        let index = single_entry_index(
7041            ObjectFormat::Sha256,
7042            oid,
7043            0x1234_5678,
7044            12,
7045            pack_checksum.clone(),
7046        );
7047        let parsed =
7048            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
7049        assert_eq!(parsed.version, 2);
7050        assert_eq!(parsed.pack_checksum, pack_checksum);
7051        assert_eq!(parsed.entries.len(), 1);
7052        assert_eq!(
7053            parsed
7054                .find(&oid)
7055                .expect("test operation should succeed")
7056                .offset,
7057            12
7058        );
7059        assert_eq!(
7060            parsed
7061                .find(&oid)
7062                .expect("test operation should succeed")
7063                .crc32,
7064            0x1234_5678
7065        );
7066        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7067        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
7068    }
7069
7070    #[test]
7071    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
7072        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
7073    }
7074
7075    #[test]
7076    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
7077        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
7078    }
7079
7080    #[test]
7081    fn write_packed_rejects_duplicate_objects() {
7082        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7083        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
7084    }
7085
7086    #[test]
7087    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
7088        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7089        let sha1 = object
7090            .object_id(ObjectFormat::Sha1)
7091            .expect("test operation should succeed");
7092        let sha256 = object
7093            .object_id(ObjectFormat::Sha256)
7094            .expect("test operation should succeed");
7095        let duplicate = [
7096            PackInput {
7097                oid: &sha1,
7098                object: &object,
7099            },
7100            PackInput {
7101                oid: &sha1,
7102                object: &object,
7103            },
7104        ];
7105        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
7106
7107        let wrong_format = [PackInput {
7108            oid: &sha256,
7109            object: &object,
7110        }];
7111        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
7112    }
7113
7114    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
7115        let objects = similar_blob_family(8);
7116        let packed =
7117            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7118        let undeltified =
7119            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
7120
7121        // The whole point of delta selection: the packed output is smaller than
7122        // storing every object undeltified.
7123        assert!(
7124            packed.pack.len() < undeltified.pack.len(),
7125            "expected delta pack ({}) smaller than undeltified pack ({})",
7126            packed.pack.len(),
7127            undeltified.pack.len()
7128        );
7129
7130        // At least one object must actually be stored as a delta.
7131        let kinds = pack_entry_kinds(&packed.pack, format);
7132        let delta_count = kinds
7133            .iter()
7134            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
7135            .count();
7136        assert!(
7137            delta_count >= 1,
7138            "expected at least one delta entry, found kinds {kinds:?}"
7139        );
7140
7141        // Round-trip: every original object reconstructs byte-for-byte.
7142        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7143        assert_eq!(parsed.entries.len(), objects.len());
7144        for object in &objects {
7145            let oid = object
7146                .object_id(format)
7147                .expect("test operation should succeed");
7148            let found = parsed
7149                .entries
7150                .iter()
7151                .find(|entry| entry.entry.oid == oid)
7152                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
7153            assert_eq!(&found.object, object, "object {oid} did not round-trip");
7154        }
7155
7156        // The index must agree with the pack and locate every object.
7157        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
7158        assert_eq!(index.pack_checksum, packed.checksum);
7159        for object in &objects {
7160            let oid = object
7161                .object_id(format)
7162                .expect("test operation should succeed");
7163            assert!(index.find(&oid).is_some(), "index missing {oid}");
7164        }
7165    }
7166
7167    #[test]
7168    fn write_packed_emits_ofs_delta_by_default() {
7169        let objects = similar_blob_family(6);
7170        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7171            .expect("test operation should succeed");
7172        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7173        assert!(
7174            kinds.contains(&PackObjectKind::OfsDelta),
7175            "expected an ofs-delta entry by default, found {kinds:?}"
7176        );
7177        assert!(
7178            !kinds.contains(&PackObjectKind::RefDelta),
7179            "default self-contained pack must not use ref-delta, found {kinds:?}"
7180        );
7181        // Round-trips.
7182        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
7183    }
7184
7185    #[test]
7186    fn write_packed_can_emit_ref_delta() {
7187        let objects = similar_blob_family(6);
7188        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
7189        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7190            .expect("test operation should succeed");
7191        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7192        assert!(
7193            kinds.contains(&PackObjectKind::RefDelta),
7194            "expected a ref-delta entry, found {kinds:?}"
7195        );
7196        assert!(
7197            !kinds.contains(&PackObjectKind::OfsDelta),
7198            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
7199        );
7200
7201        // Ref-delta packs are still self-contained here, so they round-trip
7202        // without any external base lookup.
7203        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
7204            .expect("test operation should succeed");
7205        assert_eq!(parsed.entries.len(), objects.len());
7206    }
7207
7208    #[test]
7209    fn write_packed_bounds_delta_chain_depth() {
7210        // A long chain of progressively-modified blobs. With a large window
7211        // every object could otherwise delta against its immediate predecessor,
7212        // forming a chain as long as the input.
7213        let objects = incremental_blob_chain(20);
7214        let format = ObjectFormat::Sha1;
7215
7216        for max_depth in [1usize, 2, 5] {
7217            let options = PackWriteOptions::new()
7218                .with_window(20)
7219                .with_depth(max_depth);
7220            let packed = PackFile::write_packed_with_options(&objects, format, &options)
7221                .expect("test operation should succeed");
7222
7223            let depths = pack_entry_depths(&packed.pack, format);
7224            let observed = depths.iter().copied().max().unwrap_or(0);
7225            assert!(
7226                observed <= max_depth,
7227                "max chain depth {observed} exceeded bound {max_depth}"
7228            );
7229
7230            // Still correct: round-trips byte-for-byte.
7231            let parsed =
7232                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7233            for object in &objects {
7234                let oid = object
7235                    .object_id(format)
7236                    .expect("test operation should succeed");
7237                let found = parsed
7238                    .entries
7239                    .iter()
7240                    .find(|entry| entry.entry.oid == oid)
7241                    .expect("test operation should succeed");
7242                assert_eq!(&found.object, object);
7243            }
7244        }
7245    }
7246
7247    #[test]
7248    fn write_packed_depth_zero_stores_everything_undeltified() {
7249        let objects = similar_blob_family(5);
7250        let options = PackWriteOptions::new().with_depth(0);
7251        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7252            .expect("test operation should succeed");
7253        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
7254        assert!(
7255            kinds
7256                .iter()
7257                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
7258            "depth 0 must disable deltas, found {kinds:?}"
7259        );
7260    }
7261
7262    #[test]
7263    fn write_thin_uses_external_base_and_round_trips_sha1() {
7264        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
7265    }
7266
7267    #[test]
7268    fn write_thin_uses_external_base_and_round_trips_sha256() {
7269        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
7270    }
7271
7272    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
7273        // The base object stays OUT of the pack; only `target` is written, as a
7274        // ref-delta against the external base's object id.
7275        let base = blob_with_marker("EXTERNAL-BASE");
7276        let target = blob_with_marker("EXTERNAL-TARGET");
7277        let base_oid = base
7278            .object_id(format)
7279            .expect("test operation should succeed");
7280
7281        let mut external = HashMap::new();
7282        external.insert(base_oid, base.clone());
7283        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
7284            .expect("test operation should succeed");
7285
7286        // Exactly one entry, encoded as a ref-delta to the external base.
7287        let kinds = pack_entry_kinds(&packed.pack, format);
7288        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7289
7290        // The external base reference must be the base oid.
7291        let mut offset = 12usize;
7292        let header =
7293            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7294        assert_eq!(header.kind, PackObjectKind::RefDelta);
7295        let referenced =
7296            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7297                .expect("test operation should succeed");
7298        assert_eq!(referenced, base_oid);
7299
7300        // A plain (non-thin) parse fails: the base is not present.
7301        assert!(PackFile::parse(&packed.pack, format).is_err());
7302
7303        // A thin parse that supplies the external base reconstructs the target.
7304        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7305            if oid == &base_oid {
7306                Ok(Some(base.clone()))
7307            } else {
7308                Ok(None)
7309            }
7310        })
7311        .expect("test operation should succeed");
7312        assert_eq!(parsed.entries.len(), 1);
7313        assert_eq!(parsed.entries[0].object, target);
7314    }
7315
7316    #[test]
7317    fn write_packed_preserves_distinct_objects_with_no_similarity() {
7318        // Unrelated objects: nothing should delta, but the pack must still be
7319        // valid and complete.
7320        let objects = vec![
7321            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7322            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7323            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7324        ];
7325        let format = ObjectFormat::Sha1;
7326        let packed =
7327            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7328        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7329        assert_eq!(parsed.entries.len(), objects.len());
7330        for object in &objects {
7331            let oid = object
7332                .object_id(format)
7333                .expect("test operation should succeed");
7334            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7335        }
7336    }
7337
7338    /// Build a family of blobs that all share a large common region but differ
7339    /// in a marker placed in the *middle*, so a good delta finds copy regions on
7340    /// both sides of the change.
7341    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7342        let mut common_head = Vec::new();
7343        for _ in 0..200 {
7344            common_head.extend_from_slice(b"shared header line for delta testing\n");
7345        }
7346        let mut common_tail = Vec::new();
7347        for _ in 0..200 {
7348            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7349        }
7350        (0..count)
7351            .map(|idx| {
7352                let mut body = common_head.clone();
7353                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7354                body.extend_from_slice(&common_tail);
7355                EncodedObject::new(ObjectType::Blob, body)
7356            })
7357            .collect()
7358    }
7359
7360    /// Build a chain where each blob is the previous one plus an appended line,
7361    /// so each is highly similar to its predecessor.
7362    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7363        let mut body = Vec::new();
7364        for _ in 0..100 {
7365            body.extend_from_slice(b"baseline content shared across the whole chain\n");
7366        }
7367        let mut objects = Vec::with_capacity(count);
7368        for idx in 0..count {
7369            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7370            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7371        }
7372        objects
7373    }
7374
7375    fn blob_with_marker(marker: &str) -> EncodedObject {
7376        let mut body = Vec::new();
7377        for _ in 0..150 {
7378            body.extend_from_slice(b"common body shared between base and target\n");
7379        }
7380        body.extend_from_slice(marker.as_bytes());
7381        body.push(b'\n');
7382        for _ in 0..150 {
7383            body.extend_from_slice(b"more common body shared between objects\n");
7384        }
7385        EncodedObject::new(ObjectType::Blob, body)
7386    }
7387
7388    /// Classify every entry in a pack (in pack order) by its on-disk kind.
7389    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7390        pack_entry_descriptors(pack, format)
7391            .into_iter()
7392            .map(|descriptor| descriptor.kind)
7393            .collect()
7394    }
7395
7396    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
7397    /// order. Entries always appear after their in-pack bases, so a single
7398    /// forward pass suffices.
7399    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7400        let descriptors = pack_entry_descriptors(pack, format);
7401        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7402        let mut depths = Vec::with_capacity(descriptors.len());
7403        for descriptor in &descriptors {
7404            let depth = match &descriptor.base {
7405                EntryBase::None => 0,
7406                EntryBase::Offset(base_offset) => {
7407                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7408                }
7409                // Ref-delta to an in-pack base: look it up by offset via oid is
7410                // unnecessary for these tests (which only use ofs-delta for the
7411                // chains), so treat as depth 1 if unknown.
7412                EntryBase::Ref => 1,
7413            };
7414            depth_by_offset.insert(descriptor.offset, depth);
7415            depths.push(depth);
7416        }
7417        depths
7418    }
7419
7420    struct EntryDescriptor {
7421        offset: u64,
7422        kind: PackObjectKind,
7423        base: EntryBase,
7424    }
7425
7426    enum EntryBase {
7427        None,
7428        Offset(u64),
7429        Ref,
7430    }
7431
7432    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7433        let trailer_offset = pack.len() - format.raw_len();
7434        let count = u32_be(&pack[8..12]) as usize;
7435        let mut offset = 12usize;
7436        let mut descriptors = Vec::with_capacity(count);
7437        for _ in 0..count {
7438            let entry_offset = offset as u64;
7439            let header =
7440                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7441            let base = match header.kind {
7442                PackObjectKind::OfsDelta => {
7443                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7444                        .expect("test operation should succeed");
7445                    EntryBase::Offset(base_offset)
7446                }
7447                PackObjectKind::RefDelta => {
7448                    offset += format.raw_len();
7449                    EntryBase::Ref
7450                }
7451                _ => EntryBase::None,
7452            };
7453            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7454            let mut body = Vec::new();
7455            decoder
7456                .read_to_end(&mut body)
7457                .expect("test operation should succeed");
7458            offset += decoder.total_in() as usize;
7459            descriptors.push(EntryDescriptor {
7460                offset: entry_offset,
7461                kind: header.kind,
7462                base,
7463            });
7464        }
7465        descriptors
7466    }
7467
7468    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7469        let mut base = Vec::new();
7470        for _ in 0..300 {
7471            base.extend_from_slice(b"common payload\n");
7472        }
7473        base.extend_from_slice(b"base\n");
7474        let mut changed = Vec::new();
7475        for _ in 0..300 {
7476            changed.extend_from_slice(b"common payload\n");
7477        }
7478        changed.extend_from_slice(b"changed\n");
7479        (
7480            EncodedObject::new(ObjectType::Blob, base),
7481            EncodedObject::new(ObjectType::Blob, changed),
7482        )
7483    }
7484
7485    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7486        let mut pack = Vec::new();
7487        pack.extend_from_slice(b"PACK");
7488        pack.extend_from_slice(&2u32.to_be_bytes());
7489        pack.extend_from_slice(&1u32.to_be_bytes());
7490        write_entry_header(&mut pack, object_type, body.len() as u64);
7491        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7492        encoder
7493            .write_all(body)
7494            .expect("test operation should succeed");
7495        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7496        let checksum =
7497            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7498        pack.extend_from_slice(checksum.as_bytes());
7499        pack
7500    }
7501
7502    #[derive(Clone, Copy, Debug)]
7503    enum DeltaKind {
7504        Offset,
7505        Ref,
7506    }
7507
7508    fn two_object_delta_pack(
7509        format: ObjectFormat,
7510        base: &[u8],
7511        result: &[u8],
7512        delta_kind: DeltaKind,
7513    ) -> Vec<u8> {
7514        let mut pack = Vec::new();
7515        pack.extend_from_slice(b"PACK");
7516        pack.extend_from_slice(&2u32.to_be_bytes());
7517        pack.extend_from_slice(&2u32.to_be_bytes());
7518
7519        let base_offset = pack.len();
7520        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7521        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7522        encoder
7523            .write_all(base)
7524            .expect("test operation should succeed");
7525        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7526
7527        let delta = append_suffix_delta(base, result);
7528        let delta_offset = pack.len();
7529        write_pack_entry_header_kind(
7530            &mut pack,
7531            match delta_kind {
7532                DeltaKind::Offset => 6,
7533                DeltaKind::Ref => 7,
7534            },
7535            delta.len() as u64,
7536        );
7537        match delta_kind {
7538            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7539            DeltaKind::Ref => {
7540                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7541                    .expect("test operation should succeed");
7542                pack.extend_from_slice(base_oid.as_bytes());
7543            }
7544        }
7545        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7546        encoder
7547            .write_all(&delta)
7548            .expect("test operation should succeed");
7549        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7550
7551        let checksum =
7552            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7553        pack.extend_from_slice(checksum.as_bytes());
7554        pack
7555    }
7556
7557    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7558        let mut pack = Vec::new();
7559        pack.extend_from_slice(b"PACK");
7560        pack.extend_from_slice(&2u32.to_be_bytes());
7561        pack.extend_from_slice(&1u32.to_be_bytes());
7562
7563        let delta = append_suffix_delta(base, result);
7564        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7565        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7566            .expect("test operation should succeed");
7567        pack.extend_from_slice(base_oid.as_bytes());
7568        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7569        encoder
7570            .write_all(&delta)
7571            .expect("test operation should succeed");
7572        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7573
7574        let checksum =
7575            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7576        pack.extend_from_slice(checksum.as_bytes());
7577        pack
7578    }
7579
7580    fn unique_temp_dir(name: &str) -> PathBuf {
7581        let nanos = SystemTime::now()
7582            .duration_since(UNIX_EPOCH)
7583            .expect("test operation should succeed")
7584            .as_nanos();
7585        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7586    }
7587
7588    fn run_git_success(cwd: &Path, args: &[&str]) {
7589        let output = Command::new("git")
7590            .current_dir(cwd)
7591            .args(args)
7592            .output()
7593            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7594        assert!(
7595            output.status.success(),
7596            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7597            output.status.code(),
7598            String::from_utf8_lossy(&output.stdout),
7599            String::from_utf8_lossy(&output.stderr)
7600        );
7601    }
7602
7603    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7604        let mut paths = fs::read_dir(dir)
7605            .expect("test operation should succeed")
7606            .map(|entry| entry.expect("test operation should succeed").path())
7607            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7608            .collect::<Vec<_>>();
7609        assert_eq!(paths.len(), 1, "expected one .{extension} file");
7610        paths.remove(0)
7611    }
7612
7613    fn pack_bitmap_index(
7614        format: ObjectFormat,
7615        object_count: u32,
7616        options: u16,
7617        pack_checksum: &ObjectId,
7618        entries: &[(u32, u8, u8, &[u64])],
7619        name_hash_cache: Option<&[u32]>,
7620    ) -> Vec<u8> {
7621        let mut out = Vec::new();
7622        out.extend_from_slice(b"BITM");
7623        out.extend_from_slice(&1u16.to_be_bytes());
7624        out.extend_from_slice(&options.to_be_bytes());
7625        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7626        out.extend_from_slice(pack_checksum.as_bytes());
7627        write_test_ewah(&mut out, object_count, &[0b001]);
7628        write_test_ewah(&mut out, object_count, &[0b010]);
7629        write_test_ewah(&mut out, object_count, &[0b100]);
7630        write_test_ewah(&mut out, object_count, &[0]);
7631        for (position, xor_offset, flags, words) in entries {
7632            out.extend_from_slice(&position.to_be_bytes());
7633            out.push(*xor_offset);
7634            out.push(*flags);
7635            write_test_ewah(&mut out, object_count, words);
7636        }
7637        if let Some(cache) = name_hash_cache {
7638            for value in cache {
7639                out.extend_from_slice(&value.to_be_bytes());
7640            }
7641        }
7642        let checksum =
7643            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7644        out.extend_from_slice(checksum.as_bytes());
7645        out
7646    }
7647
7648    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7649        out.extend_from_slice(&bit_size.to_be_bytes());
7650        let words = ewah_literal_words(literals);
7651        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7652        for word in words {
7653            out.extend_from_slice(&word.to_be_bytes());
7654        }
7655        out.extend_from_slice(&0u32.to_be_bytes());
7656    }
7657
7658    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7659        let rlw = (literals.len() as u64) << 33;
7660        let mut words = vec![rlw];
7661        words.extend_from_slice(literals);
7662        words
7663    }
7664
7665    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7666        let checksum_offset = bytes.len() - format.raw_len();
7667        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7668            .expect("test operation should succeed");
7669        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7670    }
7671
7672    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7673        assert!(result.starts_with(base));
7674        let suffix = &result[base.len()..];
7675        assert!(base.len() < 0x10000);
7676        assert!(suffix.len() < 0x80);
7677        let mut delta = Vec::new();
7678        write_delta_varint(&mut delta, base.len() as u64);
7679        write_delta_varint(&mut delta, result.len() as u64);
7680        delta.push(0x90);
7681        delta.push(base.len() as u8);
7682        delta.push(suffix.len() as u8);
7683        delta.extend_from_slice(suffix);
7684        delta
7685    }
7686
7687    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7688        loop {
7689            let mut byte = (value as u8) & 0x7f;
7690            value >>= 7;
7691            if value != 0 {
7692                byte |= 0x80;
7693            }
7694            out.push(byte);
7695            if value == 0 {
7696                break;
7697            }
7698        }
7699    }
7700
7701    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7702        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7703        size >>= 4;
7704        if size != 0 {
7705            byte |= 0x80;
7706        }
7707        out.push(byte);
7708        while size != 0 {
7709            let mut byte = (size as u8) & 0x7f;
7710            size >>= 7;
7711            if size != 0 {
7712                byte |= 0x80;
7713            }
7714            out.push(byte);
7715        }
7716    }
7717
7718    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7719        assert!(relative < 0x80);
7720        out.push(relative as u8);
7721    }
7722
7723    fn single_entry_index(
7724        format: ObjectFormat,
7725        oid: ObjectId,
7726        crc32: u32,
7727        offset: u32,
7728        pack_checksum: ObjectId,
7729    ) -> Vec<u8> {
7730        let mut index = Vec::new();
7731        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7732        index.extend_from_slice(&2u32.to_be_bytes());
7733        for idx in 0..256 {
7734            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7735                1u32
7736            } else {
7737                0u32
7738            };
7739            index.extend_from_slice(&count.to_be_bytes());
7740        }
7741        index.extend_from_slice(oid.as_bytes());
7742        index.extend_from_slice(&crc32.to_be_bytes());
7743        index.extend_from_slice(&offset.to_be_bytes());
7744        index.extend_from_slice(pack_checksum.as_bytes());
7745        let checksum =
7746            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7747        index.extend_from_slice(checksum.as_bytes());
7748        index
7749    }
7750
7751    fn single_entry_index_v1(
7752        format: ObjectFormat,
7753        oid: ObjectId,
7754        offset: u32,
7755        pack_checksum: ObjectId,
7756    ) -> Vec<u8> {
7757        let mut index = Vec::new();
7758        for idx in 0..256 {
7759            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7760                1u32
7761            } else {
7762                0u32
7763            };
7764            index.extend_from_slice(&count.to_be_bytes());
7765        }
7766        index.extend_from_slice(&offset.to_be_bytes());
7767        index.extend_from_slice(oid.as_bytes());
7768        index.extend_from_slice(pack_checksum.as_bytes());
7769        let checksum =
7770            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7771        index.extend_from_slice(checksum.as_bytes());
7772        index
7773    }
7774
7775    fn pack_reverse_index(
7776        format: ObjectFormat,
7777        positions: &[u32],
7778        pack_checksum: ObjectId,
7779    ) -> Vec<u8> {
7780        let mut reverse_index = Vec::new();
7781        reverse_index.extend_from_slice(b"RIDX");
7782        reverse_index.extend_from_slice(&1u32.to_be_bytes());
7783        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7784        for position in positions {
7785            reverse_index.extend_from_slice(&position.to_be_bytes());
7786        }
7787        reverse_index.extend_from_slice(pack_checksum.as_bytes());
7788        let checksum =
7789            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7790        reverse_index.extend_from_slice(checksum.as_bytes());
7791        reverse_index
7792    }
7793
7794    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7795        let mut out = Vec::new();
7796        out.extend_from_slice(b"MTME");
7797        out.extend_from_slice(&1u32.to_be_bytes());
7798        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7799        for mtime in mtimes {
7800            out.extend_from_slice(&mtime.to_be_bytes());
7801        }
7802        out.extend_from_slice(pack_checksum.as_bytes());
7803        let checksum =
7804            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7805        out.extend_from_slice(checksum.as_bytes());
7806        out
7807    }
7808
7809    fn midx_chunks_with_pack_names(
7810        _format: ObjectFormat,
7811        pack_names: Vec<u8>,
7812        entries: &[(ObjectId, u32, u64)],
7813    ) -> Vec<([u8; 4], Vec<u8>)> {
7814        let mut entries = entries.to_vec();
7815        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7816        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7817        let mut large_offsets = Vec::new();
7818        let mut chunks = vec![
7819            (*b"PNAM", pack_names),
7820            (*b"OIDF", midx_oid_fanout(&object_ids)),
7821            (*b"OIDL", midx_oid_lookup(&object_ids)),
7822            (
7823                *b"OOFF",
7824                midx_ooff_entries(
7825                    &entries
7826                        .iter()
7827                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7828                        .collect::<Vec<_>>(),
7829                    &mut large_offsets,
7830                ),
7831            ),
7832        ];
7833        if !large_offsets.is_empty() {
7834            chunks.push((*b"LOFF", large_offsets));
7835        }
7836        chunks
7837    }
7838
7839    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7840        let mut counts = [0u32; 256];
7841        for oid in object_ids {
7842            counts[oid.as_bytes()[0] as usize] += 1;
7843        }
7844        let mut running = 0u32;
7845        let mut out = Vec::new();
7846        for count in counts {
7847            running += count;
7848            out.extend_from_slice(&running.to_be_bytes());
7849        }
7850        out
7851    }
7852
7853    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7854        let mut out = Vec::new();
7855        for oid in object_ids {
7856            out.extend_from_slice(oid.as_bytes());
7857        }
7858        out
7859    }
7860
7861    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7862        let mut out = Vec::new();
7863        for (pack_int_id, offset) in entries {
7864            out.extend_from_slice(&pack_int_id.to_be_bytes());
7865            if *offset < 0x8000_0000 {
7866                out.extend_from_slice(&(*offset as u32).to_be_bytes());
7867            } else {
7868                let large_idx = (large_offsets.len() / 8) as u32;
7869                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7870                large_offsets.extend_from_slice(&offset.to_be_bytes());
7871            }
7872        }
7873        out
7874    }
7875
7876    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7877        let mut out = Vec::new();
7878        for value in values {
7879            out.extend_from_slice(&value.to_be_bytes());
7880        }
7881        out
7882    }
7883
7884    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
7885        let mut out = Vec::new();
7886        for (bitmap_pos, bitmap_nr) in entries {
7887            out.extend_from_slice(&bitmap_pos.to_be_bytes());
7888            out.extend_from_slice(&bitmap_nr.to_be_bytes());
7889        }
7890        out
7891    }
7892
7893    fn multi_pack_index(
7894        format: ObjectFormat,
7895        version: u8,
7896        pack_count: u32,
7897        chunks: &[([u8; 4], Vec<u8>)],
7898    ) -> Vec<u8> {
7899        let lookup_len = (chunks.len() + 1) * 12;
7900        let mut out = Vec::new();
7901        out.extend_from_slice(b"MIDX");
7902        out.push(version);
7903        out.push(hash_function_id(format) as u8);
7904        out.push(chunks.len() as u8);
7905        out.push(0);
7906        out.extend_from_slice(&pack_count.to_be_bytes());
7907        let mut chunk_offset = (12 + lookup_len) as u64;
7908        for (id, data) in chunks {
7909            out.extend_from_slice(id);
7910            out.extend_from_slice(&chunk_offset.to_be_bytes());
7911            chunk_offset += data.len() as u64;
7912        }
7913        out.extend_from_slice(&[0, 0, 0, 0]);
7914        out.extend_from_slice(&chunk_offset.to_be_bytes());
7915        for (_id, data) in chunks {
7916            out.extend_from_slice(data);
7917        }
7918        let checksum =
7919            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7920        out.extend_from_slice(checksum.as_bytes());
7921        out
7922    }
7923
7924    // ---- EWAH encoder / bitmap writer tests ------------------------------
7925
7926    fn pack_checksum_sha1() -> ObjectId {
7927        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
7928    }
7929
7930    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
7931        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
7932        // expects: a checksum offset that lies just past the serialised bitmap.
7933        let mut offset = 0usize;
7934        let checksum_offset = bytes.len();
7935        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
7936            .expect("test operation should succeed")
7937    }
7938
7939    #[test]
7940    fn ewah_encodes_single_literal_word_matching_helper() {
7941        // A bitmap whose only word is a literal must serialise as one RLW with
7942        // literal_len == 1 followed by the literal, identical to the test
7943        // helper used by the existing parser tests.
7944        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
7945        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
7946        assert_eq!(ewah.rlw_position, 0);
7947        assert_eq!(ewah.bit_size, 64);
7948    }
7949
7950    #[test]
7951    fn ewah_byte_layout_is_big_endian() {
7952        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
7953            .expect("test operation should succeed");
7954        let bytes = ewah.to_bytes();
7955        let mut expected = Vec::new();
7956        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
7957        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
7958        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
7959        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
7960        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
7961        assert_eq!(bytes, expected);
7962    }
7963
7964    #[test]
7965    fn ewah_empty_bitmap_serialises_like_git() {
7966        let ewah = EwahBitmap::empty();
7967        let bytes = ewah.to_bytes();
7968        // bit_size = 0, word_count = 0, rlw_position = 0.
7969        assert_eq!(bytes, vec![0u8; 12]);
7970        // It must still parse and decode to nothing.
7971        let parsed = parse_ewah_bytes(&bytes);
7972        assert_eq!(parsed, ewah);
7973        assert!(
7974            parsed
7975                .to_positions()
7976                .expect("test operation should succeed")
7977                .is_empty()
7978        );
7979    }
7980
7981    #[test]
7982    fn ewah_compresses_clean_zero_run() {
7983        // Three all-zero words followed by a literal: the encoder should emit a
7984        // single RLW carrying a run of 3 clean-zero words plus one literal.
7985        let ewah =
7986            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
7987        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
7988        let rlw = ewah.words[0];
7989        assert_eq!(rlw & 1, 0, "run bit should be zero");
7990        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7991        assert_eq!(rlw >> 33, 1, "literal length should be 1");
7992        assert_eq!(ewah.words[1], 0b1);
7993    }
7994
7995    #[test]
7996    fn ewah_compresses_clean_ones_run() {
7997        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
7998            .expect("test operation should succeed");
7999        // Pure run of ones, no literals: one RLW only.
8000        assert_eq!(ewah.words.len(), 1);
8001        let rlw = ewah.words[0];
8002        assert_eq!(rlw & 1, 1, "run bit should be one");
8003        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
8004        assert_eq!(rlw >> 33, 0, "no literals");
8005    }
8006
8007    #[test]
8008    fn ewah_run_then_literal_then_run_roundtrips() {
8009        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
8010        let bit_size = (words.len() * 64) as u32;
8011        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8012        assert_eq!(
8013            ewah.to_words().expect("test operation should succeed"),
8014            words
8015        );
8016    }
8017
8018    #[test]
8019    fn ewah_drops_trailing_clean_zero_words() {
8020        // Trailing all-zero words beyond a literal carry no information and git
8021        // does not serialise them, but to_words() restores them up to bit_size.
8022        let words = vec![0b1, 0, 0, 0];
8023        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
8024        // bit_size of 1 means a single backing word.
8025        assert_eq!(ewah.bit_size, 1);
8026        assert_eq!(
8027            ewah.to_words().expect("test operation should succeed"),
8028            vec![0b1]
8029        );
8030    }
8031
8032    #[test]
8033    fn ewah_from_positions_roundtrips_via_positions() {
8034        let positions = [0u32, 1, 63, 64, 65, 200, 511];
8035        let ewah =
8036            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
8037        let mut decoded = ewah.to_positions().expect("test operation should succeed");
8038        decoded.sort_unstable();
8039        assert_eq!(decoded, positions);
8040    }
8041
8042    #[test]
8043    fn ewah_from_positions_dedupes_and_orders() {
8044        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
8045            .expect("test operation should succeed");
8046        assert_eq!(
8047            ewah.to_positions().expect("test operation should succeed"),
8048            vec![5, 100]
8049        );
8050    }
8051
8052    #[test]
8053    fn ewah_huge_zero_run_spans_multiple_rlws() {
8054        // A run longer than the 32-bit running-length field forces the encoder
8055        // to emit more than one RLW. Use one literal bit far out, with a bit
8056        // size large enough to exceed u32::MAX clean words is impractical, so
8057        // assert the field arithmetic via a direct builder run instead.
8058        let mut builder = EwahBuilder::new(0);
8059        builder.add_empty_words(false, 0xffff_ffff);
8060        builder.add_empty_words(false, 5);
8061        let ewah = builder.finish().expect("test operation should succeed");
8062        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
8063        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
8064        assert_eq!(ewah.words[1] & 1, 0);
8065        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
8066        assert_eq!(ewah.rlw_position, 1);
8067    }
8068
8069    #[test]
8070    fn ewah_from_words_rejects_oversized_bit_size() {
8071        // bit_size demands two words but only one is supplied.
8072        assert!(EwahBitmap::from_words(65, &[0]).is_err());
8073    }
8074
8075    #[test]
8076    fn ewah_from_positions_rejects_out_of_range() {
8077        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
8078    }
8079
8080    #[test]
8081    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
8082        // Exercise the full encode -> serialise -> parse loop for a non-trivial
8083        // pattern and assert structural equality against the parser's model.
8084        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
8085        let bit_size = (words.len() * 64) as u32;
8086        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
8087        let bytes = ewah.to_bytes();
8088        let parsed = parse_ewah_bytes(&bytes);
8089        assert_eq!(parsed, ewah);
8090        assert_eq!(
8091            parsed.to_words().expect("test operation should succeed"),
8092            words
8093        );
8094    }
8095
8096    #[test]
8097    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
8098        // commit, tree, blob in pack order; one selected commit reaching all.
8099        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8100        let bytes = write_bitmap(
8101            ObjectFormat::Sha1,
8102            pack_checksum_sha1(),
8103            &object_types,
8104            &[(0u32, 0u32, vec![1u32, 2u32])],
8105            None,
8106        )
8107        .expect("test operation should succeed");
8108        assert_eq!(&bytes[..4], b"BITM");
8109
8110        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8111            .expect("test operation should succeed");
8112        assert_eq!(parsed.version, 1);
8113        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
8114        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
8115        assert_eq!(
8116            parsed
8117                .type_bitmaps
8118                .commits
8119                .to_positions()
8120                .expect("test operation should succeed"),
8121            vec![0]
8122        );
8123        assert_eq!(
8124            parsed
8125                .type_bitmaps
8126                .trees
8127                .to_positions()
8128                .expect("test operation should succeed"),
8129            vec![1]
8130        );
8131        assert_eq!(
8132            parsed
8133                .type_bitmaps
8134                .blobs
8135                .to_positions()
8136                .expect("test operation should succeed"),
8137            vec![2]
8138        );
8139        assert!(
8140            parsed
8141                .type_bitmaps
8142                .tags
8143                .to_positions()
8144                .expect("test operation should succeed")
8145                .is_empty()
8146        );
8147        assert_eq!(parsed.entries.len(), 1);
8148        let entry = parsed
8149            .entry_for_index_position(0)
8150            .expect("test operation should succeed");
8151        assert_eq!(entry.xor_offset, 0);
8152        assert_eq!(entry.flags, 0);
8153        assert_eq!(
8154            entry
8155                .bitmap
8156                .to_positions()
8157                .expect("test operation should succeed"),
8158            vec![0, 1, 2]
8159        );
8160        assert_eq!(parsed.name_hash_cache, None);
8161    }
8162
8163    #[test]
8164    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
8165        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8166            .expect("test operation should succeed");
8167        let object_types = [ObjectType::Commit, ObjectType::Tree];
8168        let bytes = write_bitmap(
8169            ObjectFormat::Sha256,
8170            pack_checksum.clone(),
8171            &object_types,
8172            &[(0u32, 0u32, vec![1u32])],
8173            None,
8174        )
8175        .expect("test operation should succeed");
8176        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
8177            .expect("test operation should succeed");
8178        assert_eq!(parsed.format, ObjectFormat::Sha256);
8179        assert_eq!(parsed.pack_checksum, pack_checksum);
8180        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8181        assert_eq!(
8182            parsed.entries[0]
8183                .bitmap
8184                .to_positions()
8185                .expect("test operation should succeed"),
8186            vec![0, 1]
8187        );
8188    }
8189
8190    #[test]
8191    fn pack_bitmap_index_write_includes_name_hash_cache() {
8192        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
8193        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
8194        let bytes = write_bitmap(
8195            ObjectFormat::Sha1,
8196            pack_checksum_sha1(),
8197            &object_types,
8198            &[(0u32, 0u32, vec![1u32, 2u32])],
8199            Some(cache.clone()),
8200        )
8201        .expect("test operation should succeed");
8202        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
8203            .expect("test operation should succeed");
8204        assert_eq!(
8205            parsed.options,
8206            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8207        );
8208        assert_eq!(parsed.name_hash_cache, Some(cache));
8209    }
8210
8211    #[test]
8212    fn pack_bitmap_writer_supports_multiple_commits() {
8213        let object_types = [
8214            ObjectType::Commit,
8215            ObjectType::Commit,
8216            ObjectType::Tree,
8217            ObjectType::Blob,
8218        ];
8219        let mut writer =
8220            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8221                .expect("test operation should succeed");
8222        writer
8223            .add_commit(0, 0, &[2, 3])
8224            .expect("test operation should succeed");
8225        writer
8226            .add_commit(1, 1, &[2])
8227            .expect("test operation should succeed");
8228        let bytes = writer.write().expect("test operation should succeed");
8229        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
8230            .expect("test operation should succeed");
8231        assert_eq!(parsed.entries.len(), 2);
8232        assert_eq!(
8233            parsed
8234                .type_bitmaps
8235                .commits
8236                .to_positions()
8237                .expect("test operation should succeed"),
8238            vec![0, 1]
8239        );
8240        let first = parsed
8241            .entry_for_index_position(0)
8242            .expect("test operation should succeed");
8243        assert_eq!(
8244            first
8245                .bitmap
8246                .to_positions()
8247                .expect("test operation should succeed"),
8248            vec![0, 2, 3]
8249        );
8250        let second = parsed
8251            .entry_for_index_position(1)
8252            .expect("test operation should succeed");
8253        assert_eq!(
8254            second
8255                .bitmap
8256                .to_positions()
8257                .expect("test operation should succeed"),
8258            vec![1, 2]
8259        );
8260    }
8261
8262    #[test]
8263    fn pack_bitmap_index_recomputes_checksum_on_write() {
8264        // The provided index_checksum field is ignored; write recomputes it so
8265        // a bogus placeholder still produces a valid, parseable file.
8266        let object_types = [ObjectType::Commit, ObjectType::Blob];
8267        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8268            .expect("test operation should succeed");
8269        let mut index = writer.build().expect("test operation should succeed");
8270        // build() sets an all-zero placeholder checksum.
8271        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
8272        index.entries.clear(); // mutate the model after build
8273        index.entries.push(PackBitmapEntry {
8274            object_position: 0,
8275            xor_offset: 0,
8276            flags: 0,
8277            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
8278        });
8279        let bytes = index.write().expect("test operation should succeed");
8280        // Parsing validates the trailing checksum, so a wrong checksum fails.
8281        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
8282            .expect("test operation should succeed");
8283        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
8284    }
8285
8286    #[test]
8287    fn pack_bitmap_writer_rejects_non_commit_selection() {
8288        let object_types = [ObjectType::Commit, ObjectType::Blob];
8289        let mut writer =
8290            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8291                .expect("test operation should succeed");
8292        // Position 1 is a blob, not a commit.
8293        assert!(writer.add_commit(1, 1, &[]).is_err());
8294        // Position 5 is out of range entirely.
8295        assert!(writer.add_commit(5, 5, &[]).is_err());
8296        // Index position out of range.
8297        assert!(writer.add_commit(0, 5, &[]).is_err());
8298        // Reachable position out of range.
8299        assert!(writer.add_commit(0, 0, &[9]).is_err());
8300    }
8301
8302    #[test]
8303    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8304        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8305            .expect("test operation should succeed");
8306        assert!(
8307            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8308                .is_err()
8309        );
8310    }
8311
8312    #[test]
8313    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8314        let writer = PackBitmapWriter::new(
8315            ObjectFormat::Sha1,
8316            pack_checksum_sha1(),
8317            &[ObjectType::Commit],
8318        )
8319        .expect("test operation should succeed");
8320        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8321    }
8322
8323    #[test]
8324    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8325        let mut index = PackBitmapWriter::new(
8326            ObjectFormat::Sha1,
8327            pack_checksum_sha1(),
8328            &[ObjectType::Commit],
8329        )
8330        .expect("test operation should succeed")
8331        .build()
8332        .expect("test operation should succeed");
8333        // Flag set but no cache present.
8334        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8335        assert!(index.write().is_err());
8336        // Cache present but flag missing.
8337        index.options = PackBitmapIndex::OPTION_FULL_DAG;
8338        index.name_hash_cache = Some(vec![0]);
8339        assert!(index.write().is_err());
8340    }
8341
8342    #[test]
8343    fn write_bitmap_roundtrips_through_upstream_git_parser() {
8344        // Build a real pack with git, then overwrite reachability with our own
8345        // writer using the real pack checksum and object types, and confirm our
8346        // bytes parse under the same parser that reads upstream bitmaps.
8347        let root = unique_temp_dir("git-pack-bitmap-writer");
8348        fs::create_dir_all(&root).expect("test operation should succeed");
8349        {
8350            run_git_success(&root, &["init", "-q", "-b", "main"]);
8351            run_git_success(
8352                &root,
8353                &[
8354                    "-c",
8355                    "user.name=Example User",
8356                    "-c",
8357                    "user.email=example@example.invalid",
8358                    "commit",
8359                    "--allow-empty",
8360                    "-q",
8361                    "-m",
8362                    "one",
8363                ],
8364            );
8365            run_git_success(&root, &["repack", "-adb"]);
8366            let pack_dir = root.join(".git").join("objects").join("pack");
8367            let idx_path = single_path_with_extension(&pack_dir, "idx");
8368            let index = PackIndex::parse(
8369                &fs::read(idx_path).expect("test operation should succeed"),
8370                ObjectFormat::Sha1,
8371            )
8372            .expect("test operation should succeed");
8373            // Read object types from the pack so the type bitmaps are accurate.
8374            let pack_path = single_path_with_extension(&pack_dir, "pack");
8375            let pack =
8376                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8377                    .expect("test operation should succeed");
8378            // Map each index entry (sorted by oid) to its pack offset, then to a
8379            // pack-order position so positions line up with the index ordering.
8380            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8381            offsets.sort_unstable();
8382            let position_of = |offset: u64| -> u32 {
8383                offsets
8384                    .iter()
8385                    .position(|value| *value == offset)
8386                    .expect("test operation should succeed") as u32
8387            };
8388            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8389            for entry in &index.entries {
8390                let position = position_of(entry.offset) as usize;
8391                // Find the parsed object at this pack offset to read its type.
8392                if let Some(parsed) = pack
8393                    .entries
8394                    .iter()
8395                    .find(|po| po.entry.offset == entry.offset)
8396                {
8397                    object_types[position] = parsed.object.object_type;
8398                }
8399            }
8400            // Select the first commit position we find and reach everything.
8401            let commit_position = object_types
8402                .iter()
8403                .position(|ty| *ty == ObjectType::Commit)
8404                .expect("test operation should succeed") as u32;
8405            // The entry records the commit's position in the oid-sorted index.
8406            let commit_index_position = index
8407                .entries
8408                .iter()
8409                .position(|entry| position_of(entry.offset) == commit_position)
8410                .expect("test operation should succeed")
8411                as u32;
8412            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8413            let bytes = write_bitmap(
8414                ObjectFormat::Sha1,
8415                index.pack_checksum.clone(),
8416                &object_types,
8417                &[(commit_position, commit_index_position, reachable)],
8418                None,
8419            )
8420            .expect("test operation should succeed");
8421            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8422                .expect("test operation should succeed");
8423            assert_eq!(parsed.pack_checksum, index.pack_checksum);
8424            assert_eq!(parsed.entries.len(), 1);
8425            assert_eq!(
8426                parsed.entries[0]
8427                    .bitmap
8428                    .to_positions()
8429                    .expect("test operation should succeed")
8430                    .len(),
8431                index.entries.len()
8432            );
8433        };
8434        let _ = fs::remove_dir_all(&root);
8435    }
8436}