Skip to main content

sley_pack/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18    pub oid: ObjectId,
19    pub compressed_size: u64,
20    pub uncompressed_size: u64,
21    pub offset: u64,
22}
23
24/// Default sliding-window size used by [`PackFile::write_packed`].
25///
26/// Each object is compared against up to this many previously emitted
27/// candidates of the same type when searching for a small delta. Matches git's
28/// default `pack.window`.
29pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31/// Default maximum delta chain depth used by [`PackFile::write_packed`].
32///
33/// A delta may reference a base that is itself a delta; this bounds how long
34/// such chains may grow so that reconstructing any object stays cheap and the
35/// reader's recursion stays shallow. Matches git's default `pack.depth`.
36pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38/// Object-count threshold before pack payload compression is fanned out across
39/// worker threads. Below this, thread setup and extra buffering cost more than
40/// they save.
41const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43/// Keep parallel compression bounded. Git gets much of its wall-clock win from
44/// using several cores, but unbounded threads can steal cache from delta
45/// planning and inflate peak memory on large packs.
46const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48/// Options controlling sliding-window delta selection during pack generation.
49///
50/// Construct with [`PackWriteOptions::new`] (sensible defaults) and adjust with
51/// the builder-style setters, or build one directly. Used by
52/// [`PackFile::write_packed_with_options`] and [`PackFile::write_thin`].
53#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55    /// Number of previous same-type candidates each object is deltified
56    /// against. Larger windows find better deltas at higher cost.
57    pub window: usize,
58    /// Maximum delta chain depth. A value of `0` disables deltification.
59    pub depth: usize,
60    /// When `true`, in-pack deltas are encoded as ofs-deltas (the default and
61    /// git's preference). When `false`, in-pack deltas use ref-deltas. Deltas
62    /// against external thin-pack bases always use ref-deltas regardless.
63    pub prefer_ofs_delta: bool,
64    /// External base objects, keyed by object id, that are *not* written into
65    /// the pack but may be used as delta bases. Supplying any entries here
66    /// produces a thin pack (see [`PackFile::write_thin`]). Empty by default,
67    /// yielding a self-contained pack.
68    pub thin_bases: HashMap<ObjectId, EncodedObject>,
69    /// When `true` (the default), objects are reordered by type and size for
70    /// better delta locality. When `false`, the input order is preserved (the
71    /// emitted pack lists objects in the order supplied); deltas then only
72    /// reference earlier input objects. Reordering is always skipped when
73    /// deltification is disabled (`depth == 0`), since it has no effect there.
74    pub reorder: bool,
75}
76
77impl Default for PackWriteOptions {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl PackWriteOptions {
84    /// Options with git-compatible defaults: window
85    /// [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`], ofs-deltas, and
86    /// no external thin bases.
87    pub fn new() -> Self {
88        Self {
89            window: DEFAULT_PACK_WINDOW,
90            depth: DEFAULT_PACK_DEPTH,
91            prefer_ofs_delta: true,
92            thin_bases: HashMap::new(),
93            reorder: true,
94        }
95    }
96
97    /// Set the sliding-window size.
98    pub fn with_window(mut self, window: usize) -> Self {
99        self.window = window;
100        self
101    }
102
103    /// Set the maximum delta chain depth (`0` disables deltas).
104    pub fn with_depth(mut self, depth: usize) -> Self {
105        self.depth = depth;
106        self
107    }
108
109    /// Choose whether in-pack deltas use ofs-delta (`true`) or ref-delta
110    /// (`false`) base references.
111    pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
112        self.prefer_ofs_delta = prefer_ofs_delta;
113        self
114    }
115
116    /// Provide the set of external base objects permitted for a thin pack.
117    pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
118        self.thin_bases = thin_bases;
119        self
120    }
121
122    /// Choose whether objects may be reordered for delta locality (`true`) or
123    /// emitted in input order (`false`).
124    pub fn with_reorder(mut self, reorder: bool) -> Self {
125        self.reorder = reorder;
126        self
127    }
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct RepackPolicy {
132    pub write_bitmaps: bool,
133    pub cruft_packs: bool,
134    pub geometric_factor: Option<u8>,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct PackFile {
139    pub version: u32,
140    pub entries: Vec<PackObject>,
141    pub checksum: ObjectId,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct PackObject {
146    pub entry: PackEntry,
147    pub object: EncodedObject,
148}
149
150#[derive(Debug, Clone, PartialEq, Eq)]
151pub struct PackWrite {
152    pub pack: Vec<u8>,
153    pub index: Vec<u8>,
154    pub checksum: ObjectId,
155    pub entries: Vec<PackIndexEntry>,
156}
157
158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub struct PackInput<'a> {
160    pub oid: &'a ObjectId,
161    pub object: &'a EncodedObject,
162}
163
164#[derive(Debug, Clone, PartialEq, Eq)]
165pub struct PackIndexBuild {
166    pub index: Vec<u8>,
167    pub pack_checksum: ObjectId,
168    pub entries: Vec<PackIndexEntry>,
169}
170
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct PackIndex {
173    pub version: u32,
174    pub fanout: [u32; 256],
175    pub entries: Vec<PackIndexEntry>,
176    pub pack_checksum: ObjectId,
177    pub index_checksum: ObjectId,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct PackIndexView<'a> {
182    pub version: u32,
183    pub count: usize,
184    pub fanout: [u32; 256],
185    pub pack_checksum: ObjectId,
186    pub index_checksum: ObjectId,
187    bytes: &'a [u8],
188    format: ObjectFormat,
189    tables: PackIndexViewTables,
190}
191
192pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
193    fn as_bytes(&self) -> &[u8];
194}
195
196impl<T> PackIndexByteSource for T
197where
198    T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
199{
200    fn as_bytes(&self) -> &[u8] {
201        self.as_ref()
202    }
203}
204
205#[derive(Debug)]
206struct SharedIndexBytes(Arc<[u8]>);
207
208impl PackIndexByteSource for SharedIndexBytes {
209    fn as_bytes(&self) -> &[u8] {
210        self.0.as_ref()
211    }
212}
213
214#[derive(Debug, Clone)]
215pub struct PackIndexViewData {
216    pub version: u32,
217    pub count: usize,
218    pub fanout: [u32; 256],
219    pub pack_checksum: ObjectId,
220    pub index_checksum: ObjectId,
221    bytes: Arc<dyn PackIndexByteSource>,
222    format: ObjectFormat,
223    tables: PackIndexViewTables,
224}
225
226#[derive(Debug, Clone, PartialEq, Eq)]
227pub struct PackIndexEntry {
228    pub oid: ObjectId,
229    pub crc32: u32,
230    pub offset: u64,
231}
232
233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
234pub struct PackIndexLookup {
235    pub crc32: u32,
236    pub offset: u64,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240enum PackIndexViewTables {
241    V1 {
242        entry_table: Range<usize>,
243    },
244    V2 {
245        oid_table: Range<usize>,
246        crc_table: Range<usize>,
247        small_offset_table: Range<usize>,
248        large_offset_table: Range<usize>,
249    },
250}
251
252#[derive(Debug, Clone, PartialEq, Eq)]
253pub struct PackReverseIndex {
254    pub version: u32,
255    pub format: ObjectFormat,
256    pub positions: Vec<u32>,
257    pub pack_checksum: ObjectId,
258    pub index_checksum: ObjectId,
259}
260
261#[derive(Debug, Clone, PartialEq, Eq)]
262pub struct PackMtimes {
263    pub version: u32,
264    pub format: ObjectFormat,
265    pub mtimes: Vec<u32>,
266    pub pack_checksum: ObjectId,
267    pub index_checksum: ObjectId,
268}
269
270#[derive(Debug, Clone, PartialEq, Eq)]
271pub struct PackBitmapIndex {
272    pub version: u16,
273    pub format: ObjectFormat,
274    pub options: u16,
275    pub pack_checksum: ObjectId,
276    pub index_checksum: ObjectId,
277    pub type_bitmaps: PackBitmapTypeBitmaps,
278    pub entries: Vec<PackBitmapEntry>,
279    pub name_hash_cache: Option<Vec<u32>>,
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct PackBitmapTypeBitmaps {
284    pub commits: EwahBitmap,
285    pub trees: EwahBitmap,
286    pub blobs: EwahBitmap,
287    pub tags: EwahBitmap,
288}
289
290#[derive(Debug, Clone, PartialEq, Eq)]
291pub struct PackBitmapEntry {
292    /// The commit's position in the *oid-sorted* pack index (`.idx` order),
293    /// NOT the pack-order position used for the bitmap's bit numbering.
294    /// Upstream writes `oid_pos(...)` here (pack-bitmap-write.c) and reads it
295    /// back via `nth_packed_object_id` (pack-bitmap.c).
296    pub object_position: u32,
297    pub xor_offset: u8,
298    pub flags: u8,
299    /// Reachability bitmap; bit `i` refers to the `i`-th object in *pack
300    /// order* (offset order), as mapped by the pack's reverse index.
301    pub bitmap: EwahBitmap,
302}
303
304#[derive(Debug, Clone, PartialEq, Eq)]
305pub struct EwahBitmap {
306    pub bit_size: u32,
307    pub words: Vec<u64>,
308    pub rlw_position: u32,
309}
310
311#[derive(Debug, Clone, PartialEq, Eq)]
312pub struct MultiPackIndex {
313    pub version: u8,
314    pub format: ObjectFormat,
315    pub pack_count: u32,
316    pub pack_names: Vec<String>,
317    pub object_count: u32,
318    pub fanout: [u32; 256],
319    pub objects: Vec<MultiPackIndexEntry>,
320    pub reverse_index: Option<Vec<u32>>,
321    pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
322    pub chunks: Vec<MultiPackIndexChunk>,
323    pub checksum: ObjectId,
324}
325
326#[derive(Debug, Clone)]
327pub struct MultiPackIndexOidLookup {
328    format: ObjectFormat,
329    pack_count: u32,
330    pack_names: Vec<String>,
331    fanout: [u32; 256],
332    object_count: usize,
333    oid_lookup_offset: usize,
334    object_offsets_offset: usize,
335    large_offsets_offset: Option<usize>,
336    large_offsets_len: usize,
337    bytes: Arc<Vec<u8>>,
338}
339
340#[derive(Debug, Clone, PartialEq, Eq)]
341pub struct MultiPackIndexEntry {
342    pub oid: ObjectId,
343    pub pack_int_id: u32,
344    pub offset: u64,
345}
346
347#[derive(Debug, Clone, PartialEq, Eq)]
348pub struct MultiPackBitmapPack {
349    pub bitmap_pos: u32,
350    pub bitmap_nr: u32,
351}
352
353#[derive(Debug, Clone, PartialEq, Eq)]
354pub struct MultiPackIndexChunk {
355    pub id: [u8; 4],
356    pub offset: u64,
357    pub len: u64,
358}
359
360#[derive(Debug, Clone, Copy, PartialEq, Eq)]
361enum PackObjectKind {
362    Commit,
363    Tree,
364    Blob,
365    Tag,
366    OfsDelta,
367    RefDelta,
368}
369
370#[derive(Debug, Clone, PartialEq, Eq)]
371enum ParsedPackEntry {
372    Resolved(PackObject),
373    Delta {
374        base: DeltaBase,
375        compressed_size: u64,
376        delta_size: u64,
377        offset: u64,
378        delta: Vec<u8>,
379    },
380}
381
382#[derive(Debug, Clone, PartialEq, Eq)]
383enum DeltaBase {
384    Offset(u64),
385    Ref(ObjectId),
386}
387
388impl PackFile {
389    pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
390        Self::parse(bytes, ObjectFormat::Sha1)
391    }
392
393    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
394        Self::parse_with_base(bytes, format, |_| Ok(None))
395    }
396
397    pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
398        Self::parse(&bundle.pack, bundle.format)
399    }
400
401    pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
402        let PackIndexBuild {
403            index,
404            pack_checksum,
405            entries,
406        } = PackIndex::write_v2_for_pack(bytes, format)?;
407        Ok(PackWrite {
408            pack: bytes.to_vec(),
409            index,
410            checksum: pack_checksum,
411            entries,
412        })
413    }
414
415    pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
416    where
417        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
418    {
419        Self::parse_with_base(bytes, format, external_base)
420    }
421
422    fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
423    where
424        F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
425    {
426        let trailer_len = format.raw_len();
427        if bytes.len() < 12 + trailer_len {
428            return Err(GitError::InvalidFormat("pack file too short".into()));
429        }
430        let trailer_offset = bytes.len() - trailer_len;
431        let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
432        let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
433        if checksum != expected {
434            return Err(GitError::InvalidFormat(format!(
435                "pack checksum mismatch: expected {expected}, got {checksum}"
436            )));
437        }
438
439        if &bytes[..4] != b"PACK" {
440            return Err(GitError::InvalidFormat("missing PACK signature".into()));
441        }
442        let version = u32_be(&bytes[4..8]);
443        if version != 2 && version != 3 {
444            return Err(GitError::Unsupported(format!("pack version {version}")));
445        }
446        let count = u32_be(&bytes[8..12]) as usize;
447        let mut offset = 12usize;
448        let mut entries = Vec::with_capacity(count);
449        for _ in 0..count {
450            let entry_offset = offset;
451            let header = parse_entry_header(bytes, &mut offset)?;
452            let base =
453                match header.kind {
454                    PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
455                        parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
456                    )),
457                    PackObjectKind::RefDelta => {
458                        let hash_len = format.raw_len();
459                        if offset + hash_len > trailer_offset {
460                            return Err(GitError::InvalidFormat(
461                                "truncated ref-delta base object id".into(),
462                            ));
463                        }
464                        let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
465                        offset += hash_len;
466                        Some(DeltaBase::Ref(oid))
467                    }
468                    _ => None,
469                };
470            let mut body = Vec::new();
471            let consumed = inflate_into(
472                &bytes[offset..trailer_offset],
473                &mut body,
474                header.size.min(usize::MAX as u64) as usize,
475            )?;
476            if body.len() as u64 != header.size {
477                return Err(GitError::InvalidObject(format!(
478                    "pack object declared {} bytes, decoded {}",
479                    header.size,
480                    body.len()
481                )));
482            }
483            if consumed == 0 {
484                return Err(GitError::InvalidFormat(
485                    "empty compressed pack entry".into(),
486                ));
487            }
488            offset = offset
489                .checked_add(consumed)
490                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
491            if offset > trailer_offset {
492                return Err(GitError::InvalidFormat(
493                    "pack entry extends past checksum".into(),
494                ));
495            }
496            if let Some(base) = base {
497                entries.push(ParsedPackEntry::Delta {
498                    base,
499                    compressed_size: consumed as u64,
500                    delta_size: header.size,
501                    offset: entry_offset as u64,
502                    delta: body,
503                });
504            } else {
505                let object_type = match header.kind {
506                    PackObjectKind::Commit => ObjectType::Commit,
507                    PackObjectKind::Tree => ObjectType::Tree,
508                    PackObjectKind::Blob => ObjectType::Blob,
509                    PackObjectKind::Tag => ObjectType::Tag,
510                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
511                };
512                let object = EncodedObject::new(object_type, body);
513                let oid = object.object_id(format)?;
514                entries.push(ParsedPackEntry::Resolved(PackObject {
515                    entry: PackEntry {
516                        oid,
517                        compressed_size: consumed as u64,
518                        uncompressed_size: header.size,
519                        offset: entry_offset as u64,
520                    },
521                    object,
522                }));
523            }
524        }
525        if offset != trailer_offset {
526            return Err(GitError::InvalidFormat(format!(
527                "pack has {} trailing bytes before checksum",
528                trailer_offset - offset
529            )));
530        }
531        Ok(Self {
532            version,
533            entries: resolve_pack_entries(entries, format, &mut external_base)?,
534            checksum,
535        })
536    }
537
538    pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
539    where
540        T: Borrow<EncodedObject>,
541    {
542        Self::write_undeltified(objects, ObjectFormat::Sha1)
543    }
544
545    /// Write a pack with every object stored undeltified (no delta entries).
546    ///
547    /// This is the simple, self-contained encoding; objects appear in the given
548    /// order. For smaller output that exploits similarity between objects, use
549    /// [`PackFile::write_packed`].
550    pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
551    where
552        T: Borrow<EncodedObject>,
553    {
554        let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
555        Self::write_packed_impl(objects, format, &options)
556    }
557
558    /// Write a pack using sliding-window delta selection with git-compatible
559    /// defaults (window [`DEFAULT_PACK_WINDOW`], depth [`DEFAULT_PACK_DEPTH`],
560    /// ofs-deltas, self-contained).
561    ///
562    /// Objects are grouped by type and ordered for good deltas, then each is
563    /// compared against a window of previously emitted candidates; the smallest
564    /// acceptable delta is kept, otherwise the object is stored undeltified. The
565    /// result round-trips through [`PackFile::parse`].
566    pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
567    where
568        T: Borrow<EncodedObject>,
569    {
570        Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
571    }
572
573    /// Like [`PackFile::write_packed`] but with caller-supplied
574    /// [`PackWriteOptions`] (window, depth, base-reference style, and optional
575    /// external thin bases).
576    pub fn write_packed_with_options<T>(
577        objects: &[T],
578        format: ObjectFormat,
579        options: &PackWriteOptions,
580    ) -> Result<PackWrite>
581    where
582        T: Borrow<EncodedObject>,
583    {
584        Self::write_packed_impl(objects, format, options)
585    }
586
587    /// Like [`PackFile::write_packed`], but uses caller-supplied object ids
588    /// instead of re-hashing each object before pack planning.
589    ///
590    /// This is intended for object-database paths that reached each object by
591    /// its id and already trust that id/object mapping. The function validates
592    /// id formats and duplicate ids, but it does not re-hash object bodies; use
593    /// [`PackFile::write_packed`] when the ids are not already known to be
594    /// canonical.
595    pub fn write_packed_with_known_ids(
596        inputs: &[PackInput<'_>],
597        format: ObjectFormat,
598    ) -> Result<PackWrite> {
599        Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
600    }
601
602    /// Like [`PackFile::write_packed_with_known_ids`] but with caller-supplied
603    /// [`PackWriteOptions`].
604    pub fn write_packed_with_known_ids_and_options(
605        inputs: &[PackInput<'_>],
606        format: ObjectFormat,
607        options: &PackWriteOptions,
608    ) -> Result<PackWrite> {
609        if inputs.len() > u32::MAX as usize {
610            return Err(GitError::InvalidFormat("too many pack objects".into()));
611        }
612        let mut objects = Vec::with_capacity(inputs.len());
613        let mut object_ids = Vec::with_capacity(inputs.len());
614        for input in inputs {
615            if input.oid.format() != format {
616                return Err(GitError::InvalidObjectId(format!(
617                    "pack object id {} uses {}, pack uses {}",
618                    input.oid,
619                    input.oid.format().name(),
620                    format.name()
621                )));
622            }
623            objects.push(input.object);
624            object_ids.push(*input.oid);
625        }
626        Self::write_packed_from_parts(objects, object_ids, format, options)
627    }
628
629    /// Write a thin pack: objects may be deltified against `external_bases`
630    /// that are *not* included in the pack, referenced by ref-delta to their
631    /// object id.
632    ///
633    /// The receiver must already have (or otherwise obtain) those base objects
634    /// and resolve the pack with [`PackFile::parse_thin`]. Window and depth use
635    /// the defaults; pass options via [`PackFile::write_packed_with_options`]
636    /// with [`PackWriteOptions::with_thin_bases`] for finer control.
637    pub fn write_thin<T>(
638        objects: &[T],
639        format: ObjectFormat,
640        external_bases: HashMap<ObjectId, EncodedObject>,
641    ) -> Result<PackWrite>
642    where
643        T: Borrow<EncodedObject>,
644    {
645        let options = PackWriteOptions::new().with_thin_bases(external_bases);
646        Self::write_packed_impl(objects, format, &options)
647    }
648
649    fn write_packed_impl<T>(
650        objects: &[T],
651        format: ObjectFormat,
652        options: &PackWriteOptions,
653    ) -> Result<PackWrite>
654    where
655        T: Borrow<EncodedObject>,
656    {
657        if objects.len() > u32::MAX as usize {
658            return Err(GitError::InvalidFormat("too many pack objects".into()));
659        }
660        let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
661
662        // Compute object ids up front; they are needed both for the index and,
663        // for ref-deltas, inside the pack entries themselves.
664        let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
665        for object in &objects {
666            object_ids.push(object.object_id(format)?);
667        }
668        Self::write_packed_from_parts(objects, object_ids, format, options)
669    }
670
671    fn write_packed_from_parts(
672        objects: Vec<&EncodedObject>,
673        object_ids: Vec<ObjectId>,
674        format: ObjectFormat,
675        options: &PackWriteOptions,
676    ) -> Result<PackWrite> {
677        let mut seen = HashSet::with_capacity(object_ids.len());
678        for oid in &object_ids {
679            if !seen.insert(oid) {
680                return Err(GitError::InvalidFormat(format!(
681                    "pack contains duplicate object id {oid}"
682                )));
683            }
684        }
685
686        // Validate external thin bases share the pack's hash format.
687        for oid in options.thin_bases.keys() {
688            if oid.format() != format {
689                return Err(GitError::InvalidObjectId(
690                    "thin pack base object id format does not match pack format".into(),
691                ));
692            }
693        }
694
695        // Decide, for each object, whether it is stored undeltified or as a
696        // delta against another object (in-pack or an external thin base), and
697        // obtain the emit order. In-pack deltas only ever reference candidates
698        // that appear earlier in `order`, so emitting in `order` guarantees a
699        // base is always written before any object that deltas against it.
700        let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
701
702        let mut pack = Vec::new();
703        pack.extend_from_slice(b"PACK");
704        pack.extend_from_slice(&2u32.to_be_bytes());
705        pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
706
707        let mut index_entries = Vec::with_capacity(objects.len());
708        // Pack offset at which each original object index was written, or
709        // `None` until it has been emitted.
710        let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
711
712        let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
713
714        for (order_pos, &idx) in order.iter().enumerate() {
715            let offset = pack.len() as u64;
716            let mut entry_bytes = Vec::new();
717            match &plan[idx].base {
718                PlannedBase::None => {
719                    write_entry_header(
720                        &mut entry_bytes,
721                        objects[idx].object_type,
722                        objects[idx].body.len() as u64,
723                    );
724                }
725                PlannedBase::InPack { base_idx, delta } => {
726                    let base_offset = written_offsets[*base_idx].ok_or_else(|| {
727                        GitError::InvalidFormat(
728                            "in-pack delta base emitted after dependent object".into(),
729                        )
730                    })?;
731                    if options.prefer_ofs_delta {
732                        write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
733                        let relative = offset.checked_sub(base_offset).ok_or_else(|| {
734                            GitError::InvalidFormat("ofs-delta base offset is after delta".into())
735                        })?;
736                        write_ofs_delta_offset(&mut entry_bytes, relative)?;
737                    } else {
738                        write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
739                        entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
740                    }
741                }
742                PlannedBase::External { base_oid, delta } => {
743                    write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
744                    entry_bytes.extend_from_slice(base_oid.as_bytes());
745                }
746            }
747            entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
748            let crc32 = crc32fast::hash(&entry_bytes);
749            pack.extend_from_slice(&entry_bytes);
750            written_offsets[idx] = Some(offset);
751            index_entries.push(PackIndexEntry {
752                oid: object_ids[idx].clone(),
753                crc32,
754                offset,
755            });
756        }
757
758        let checksum = sley_core::digest_bytes(format, &pack)?;
759        pack.extend_from_slice(checksum.as_bytes());
760        let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
761        Ok(PackWrite {
762            pack,
763            index,
764            checksum,
765            entries: index_entries,
766        })
767    }
768}
769
770impl<'a> PackIndexView<'a> {
771    pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
772        Self::parse(bytes, ObjectFormat::Sha1)
773    }
774
775    pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
776        Self::parse_impl(bytes, format, true, true)
777    }
778
779    /// Parse and validate the index layout without recomputing the trailing
780    /// index checksum. The checksum stored in the file is still exposed via
781    /// [`PackIndexView::index_checksum`].
782    pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
783        Self::parse_impl(bytes, format, false, true)
784    }
785
786    /// Parse a local/trusted pack index without recomputing the trailing index
787    /// checksum or walking every entry for canonical-order validation.
788    ///
789    /// This still validates the table layout and all lookup paths remain
790    /// bounds-checked, but it avoids O(number-of-objects) startup validation for
791    /// repository-owned `.idx` files in hot read paths.
792    pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
793        Self::parse_impl(bytes, format, false, false)
794    }
795
796    pub fn count(&self) -> usize {
797        self.count
798    }
799
800    pub fn fanout(&self) -> &[u32; 256] {
801        &self.fanout
802    }
803
804    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
805        if oid.format() != self.format {
806            return None;
807        }
808        let bucket = usize::from(oid.as_bytes()[0]);
809        let mut start = if bucket == 0 {
810            0
811        } else {
812            self.fanout[bucket - 1] as usize
813        };
814        let mut end = self.fanout[bucket] as usize;
815        let target = oid.as_bytes();
816
817        while start < end {
818            let mid = start + (end - start) / 2;
819            match self.oid_bytes_at(mid).cmp(target) {
820                std::cmp::Ordering::Less => start = mid + 1,
821                std::cmp::Ordering::Equal => return self.lookup_at(mid),
822                std::cmp::Ordering::Greater => end = mid,
823            }
824        }
825        None
826    }
827
828    fn parse_impl(
829        bytes: &'a [u8],
830        format: ObjectFormat,
831        verify_checksum: bool,
832        validate_entries: bool,
833    ) -> Result<Self> {
834        let hash_len = format.raw_len();
835        if bytes.len() < 4 {
836            return Err(GitError::InvalidFormat("pack index too short".into()));
837        }
838        if bytes[..4] != [0xff, b't', b'O', b'c'] {
839            return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
840        }
841        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
842            return Err(GitError::InvalidFormat("pack index too short".into()));
843        }
844        let version = u32_be(&bytes[4..8]);
845        if version != 2 {
846            return Err(GitError::Unsupported(format!(
847                "pack index version {version}"
848            )));
849        }
850        let index_checksum_offset = bytes.len() - hash_len;
851        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
852        if verify_checksum {
853            let actual_index_checksum =
854                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
855            if actual_index_checksum != index_checksum {
856                return Err(GitError::InvalidFormat(format!(
857                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
858                )));
859            }
860        }
861
862        let mut offset = 8usize;
863        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
864        let count = fanout[255] as usize;
865        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
866        offset = oid_table.end;
867        let crc_table = checked_range(offset, count, 4, bytes.len())?;
868        offset = crc_table.end;
869        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
870        offset = small_offset_table.end;
871
872        let large_offset_count = (0..count)
873            .filter(|idx| {
874                let start = small_offset_table.start + idx * 4;
875                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
876            })
877            .count();
878        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
879        offset = large_offset_table.end;
880
881        let expected_trailer_offset = bytes.len() - hash_len * 2;
882        if offset != expected_trailer_offset {
883            return Err(GitError::InvalidFormat(format!(
884                "pack index has {} unexpected bytes before trailer",
885                expected_trailer_offset.saturating_sub(offset)
886            )));
887        }
888        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
889
890        let view = Self {
891            version,
892            count,
893            fanout,
894            pack_checksum,
895            index_checksum,
896            bytes,
897            format,
898            tables: PackIndexViewTables::V2 {
899                oid_table,
900                crc_table,
901                small_offset_table,
902                large_offset_table,
903            },
904        };
905        if validate_entries {
906            view.validate_v2_entries()?;
907        }
908        Ok(view)
909    }
910
911    fn parse_v1_impl(
912        bytes: &'a [u8],
913        format: ObjectFormat,
914        verify_checksum: bool,
915        validate_entries: bool,
916    ) -> Result<Self> {
917        let hash_len = format.raw_len();
918        if bytes.len() < 256 * 4 + 2 * hash_len {
919            return Err(GitError::InvalidFormat("pack index too short".into()));
920        }
921        let index_checksum_offset = bytes.len() - hash_len;
922        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
923        if verify_checksum {
924            let actual_index_checksum =
925                sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
926            if actual_index_checksum != index_checksum {
927                return Err(GitError::InvalidFormat(format!(
928                    "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
929                )));
930            }
931        }
932
933        let mut offset = 0usize;
934        let fanout = read_pack_index_fanout(bytes, &mut offset)?;
935        let count = fanout[255] as usize;
936        let entry_len = hash_len
937            .checked_add(4)
938            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
939        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
940        offset = entry_table.end;
941        let expected_trailer_offset = bytes.len() - hash_len * 2;
942        if offset != expected_trailer_offset {
943            return Err(GitError::InvalidFormat(format!(
944                "pack index has {} unexpected bytes before trailer",
945                expected_trailer_offset.saturating_sub(offset)
946            )));
947        }
948        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
949
950        let view = Self {
951            version: 1,
952            count,
953            fanout,
954            pack_checksum,
955            index_checksum,
956            bytes,
957            format,
958            tables: PackIndexViewTables::V1 { entry_table },
959        };
960        if validate_entries {
961            view.validate_v1_entries()?;
962        }
963        Ok(view)
964    }
965
966    fn validate_v2_entries(&self) -> Result<()> {
967        let PackIndexViewTables::V2 {
968            oid_table,
969            small_offset_table,
970            large_offset_table,
971            ..
972        } = &self.tables
973        else {
974            unreachable!("v2 validation only runs for v2 views");
975        };
976        let oid_table = self.slice(oid_table.clone());
977        let small_offset_table = self.slice(small_offset_table.clone());
978        let large_offset_table = self.slice(large_offset_table.clone());
979        let hash_len = self.format.raw_len();
980        for idx in 0..self.count {
981            let oid_start = idx * hash_len;
982            let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
983            if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
984                return Err(GitError::InvalidFormat(
985                    "pack index object ids are not strictly ascending".into(),
986                ));
987            }
988            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
989
990            let offset_start = idx * 4;
991            let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
992            pack_index_v2_offset(raw_offset, large_offset_table)?;
993        }
994        Ok(())
995    }
996
997    fn validate_v1_entries(&self) -> Result<()> {
998        let PackIndexViewTables::V1 { entry_table } = &self.tables else {
999            unreachable!("v1 validation only runs for v1 views");
1000        };
1001        let entry_table = self.slice(entry_table.clone());
1002        let hash_len = self.format.raw_len();
1003        let entry_len = hash_len
1004            .checked_add(4)
1005            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1006        for idx in 0..self.count {
1007            let start = idx * entry_len;
1008            let oid_start = start + 4;
1009            let oid_bytes = &entry_table[oid_start..start + entry_len];
1010            if idx > 0 {
1011                let previous_oid_start = oid_start - entry_len;
1012                let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1013                if previous_oid >= oid_bytes {
1014                    return Err(GitError::InvalidFormat(
1015                        "pack index object ids are not strictly sorted".into(),
1016                    ));
1017                }
1018            }
1019            validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1020        }
1021        Ok(())
1022    }
1023
1024    fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1025        let hash_len = self.format.raw_len();
1026        match &self.tables {
1027            PackIndexViewTables::V1 { entry_table } => {
1028                let entry_table = self.slice(entry_table.clone());
1029                let entry_len = hash_len + 4;
1030                let start = idx * entry_len + 4;
1031                &entry_table[start..start + hash_len]
1032            }
1033            PackIndexViewTables::V2 { oid_table, .. } => {
1034                let oid_table = self.slice(oid_table.clone());
1035                let start = idx * hash_len;
1036                &oid_table[start..start + hash_len]
1037            }
1038        }
1039    }
1040
1041    fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1042        if idx >= self.count {
1043            return None;
1044        }
1045        let hash_len = self.format.raw_len();
1046        match &self.tables {
1047            PackIndexViewTables::V1 { entry_table } => {
1048                let entry_table = self.slice(entry_table.clone());
1049                let entry_len = hash_len + 4;
1050                let start = idx * entry_len;
1051                Some(PackIndexLookup {
1052                    crc32: 0,
1053                    offset: u64::from(u32_be(&entry_table[start..start + 4])),
1054                })
1055            }
1056            PackIndexViewTables::V2 {
1057                crc_table,
1058                small_offset_table,
1059                large_offset_table,
1060                ..
1061            } => {
1062                let crc_table = self.slice(crc_table.clone());
1063                let small_offset_table = self.slice(small_offset_table.clone());
1064                let large_offset_table = self.slice(large_offset_table.clone());
1065                let crc_start = idx * 4;
1066                let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1067                Some(PackIndexLookup {
1068                    crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1069                    offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1070                })
1071            }
1072        }
1073    }
1074
1075    fn slice(&self, range: Range<usize>) -> &'a [u8] {
1076        &self.bytes[range]
1077    }
1078}
1079
1080impl PackIndexViewData {
1081    pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1082        Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1083    }
1084
1085    /// Parse and validate an owned index view without recomputing the trailing
1086    /// index checksum. The stored checksum is still exposed via
1087    /// [`PackIndexViewData::index_checksum`].
1088    pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1089        Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1090    }
1091
1092    /// Parse a local/trusted owned index view without the checksum or full-entry
1093    /// validation passes.
1094    pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1095        Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1096    }
1097
1098    pub fn parse_source(
1099        bytes: Arc<dyn PackIndexByteSource>,
1100        format: ObjectFormat,
1101    ) -> Result<Self> {
1102        Self::parse_impl(bytes, format, true, true)
1103    }
1104
1105    pub fn parse_source_without_checksum(
1106        bytes: Arc<dyn PackIndexByteSource>,
1107        format: ObjectFormat,
1108    ) -> Result<Self> {
1109        Self::parse_impl(bytes, format, false, true)
1110    }
1111
1112    pub fn parse_trusted_source_without_checksum(
1113        bytes: Arc<dyn PackIndexByteSource>,
1114        format: ObjectFormat,
1115    ) -> Result<Self> {
1116        Self::parse_impl(bytes, format, false, false)
1117    }
1118
1119    pub fn count(&self) -> usize {
1120        self.count
1121    }
1122
1123    pub fn fanout(&self) -> &[u32; 256] {
1124        &self.fanout
1125    }
1126
1127    pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1128        self.as_view().find(oid)
1129    }
1130
1131    pub fn as_view(&self) -> PackIndexView<'_> {
1132        PackIndexView {
1133            version: self.version,
1134            count: self.count,
1135            fanout: self.fanout,
1136            pack_checksum: self.pack_checksum,
1137            index_checksum: self.index_checksum,
1138            bytes: self.bytes.as_bytes(),
1139            format: self.format,
1140            tables: self.tables.clone(),
1141        }
1142    }
1143
1144    fn parse_impl(
1145        bytes: Arc<dyn PackIndexByteSource>,
1146        format: ObjectFormat,
1147        verify_checksum: bool,
1148        validate_entries: bool,
1149    ) -> Result<Self> {
1150        let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1151            let view = PackIndexView::parse_impl(
1152                bytes.as_bytes(),
1153                format,
1154                verify_checksum,
1155                validate_entries,
1156            )?;
1157            (
1158                view.version,
1159                view.count,
1160                view.fanout,
1161                view.pack_checksum,
1162                view.index_checksum,
1163                view.tables,
1164            )
1165        };
1166        Ok(Self {
1167            version,
1168            count,
1169            fanout,
1170            pack_checksum,
1171            index_checksum,
1172            bytes,
1173            format,
1174            tables,
1175        })
1176    }
1177}
1178
1179impl PackIndex {
1180    pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1181        Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1182    }
1183
1184    pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1185        let trailer_len = format.raw_len();
1186        if pack_bytes.len() < 12 + trailer_len {
1187            return Err(GitError::InvalidFormat("pack file too short".into()));
1188        }
1189        let trailer_offset = pack_bytes.len() - trailer_len;
1190        let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1191        let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1192        if pack_checksum != expected {
1193            return Err(GitError::InvalidFormat(format!(
1194                "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1195            )));
1196        }
1197
1198        if &pack_bytes[..4] != b"PACK" {
1199            return Err(GitError::InvalidFormat("missing PACK signature".into()));
1200        }
1201        let version = u32_be(&pack_bytes[4..8]);
1202        if version != 2 && version != 3 {
1203            return Err(GitError::Unsupported(format!("pack version {version}")));
1204        }
1205        let count = u32_be(&pack_bytes[8..12]) as usize;
1206        let mut offset = 12usize;
1207        let mut parsed_entries = Vec::with_capacity(count);
1208        let mut raw_entries = Vec::with_capacity(count);
1209        for _ in 0..count {
1210            let entry_offset = offset;
1211            let header = parse_entry_header(pack_bytes, &mut offset)?;
1212            let base = match header.kind {
1213                PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1214                    pack_bytes,
1215                    &mut offset,
1216                    entry_offset as u64,
1217                )?)),
1218                PackObjectKind::RefDelta => {
1219                    let hash_len = format.raw_len();
1220                    if offset + hash_len > trailer_offset {
1221                        return Err(GitError::InvalidFormat(
1222                            "truncated ref-delta base object id".into(),
1223                        ));
1224                    }
1225                    let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1226                    offset += hash_len;
1227                    Some(DeltaBase::Ref(oid))
1228                }
1229                _ => None,
1230            };
1231            let mut body = Vec::new();
1232            let consumed = inflate_into(
1233                &pack_bytes[offset..trailer_offset],
1234                &mut body,
1235                header.size.min(usize::MAX as u64) as usize,
1236            )?;
1237            if body.len() as u64 != header.size {
1238                return Err(GitError::InvalidObject(format!(
1239                    "pack object declared {} bytes, decoded {}",
1240                    header.size,
1241                    body.len()
1242                )));
1243            }
1244            if consumed == 0 {
1245                return Err(GitError::InvalidFormat(
1246                    "empty compressed pack entry".into(),
1247                ));
1248            }
1249            offset = offset
1250                .checked_add(consumed)
1251                .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1252            if offset > trailer_offset {
1253                return Err(GitError::InvalidFormat(
1254                    "pack entry extends past checksum".into(),
1255                ));
1256            }
1257            raw_entries.push((
1258                entry_offset as u64,
1259                crc32fast::hash(&pack_bytes[entry_offset..offset]),
1260            ));
1261            if let Some(base) = base {
1262                parsed_entries.push(ParsedPackEntry::Delta {
1263                    base,
1264                    compressed_size: consumed as u64,
1265                    delta_size: header.size,
1266                    offset: entry_offset as u64,
1267                    delta: body,
1268                });
1269            } else {
1270                let object_type = match header.kind {
1271                    PackObjectKind::Commit => ObjectType::Commit,
1272                    PackObjectKind::Tree => ObjectType::Tree,
1273                    PackObjectKind::Blob => ObjectType::Blob,
1274                    PackObjectKind::Tag => ObjectType::Tag,
1275                    PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1276                };
1277                let object = EncodedObject::new(object_type, body);
1278                let oid = object.object_id(format)?;
1279                parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1280                    entry: PackEntry {
1281                        oid,
1282                        compressed_size: consumed as u64,
1283                        uncompressed_size: header.size,
1284                        offset: entry_offset as u64,
1285                    },
1286                    object,
1287                }));
1288            }
1289        }
1290        if offset != trailer_offset {
1291            return Err(GitError::InvalidFormat(format!(
1292                "pack has {} trailing bytes before checksum",
1293                trailer_offset - offset
1294            )));
1295        }
1296
1297        let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1298        let entries = resolved
1299            .iter()
1300            .zip(raw_entries)
1301            .map(|(object, (offset, crc32))| PackIndexEntry {
1302                oid: object.entry.oid,
1303                crc32,
1304                offset,
1305            })
1306            .collect::<Vec<_>>();
1307        let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1308        Ok(PackIndexBuild {
1309            index,
1310            pack_checksum,
1311            entries,
1312        })
1313    }
1314
1315    pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1316        Self::parse(bytes, ObjectFormat::Sha1)
1317    }
1318
1319    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1320        let hash_len = format.raw_len();
1321        if bytes.len() < 4 {
1322            return Err(GitError::InvalidFormat("pack index too short".into()));
1323        }
1324        if bytes[..4] != [0xff, b't', b'O', b'c'] {
1325            return Self::parse_v1(bytes, format);
1326        }
1327        if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1328            return Err(GitError::InvalidFormat("pack index too short".into()));
1329        }
1330        let version = u32_be(&bytes[4..8]);
1331        if version != 2 {
1332            return Err(GitError::Unsupported(format!(
1333                "pack index version {version}"
1334            )));
1335        }
1336        let index_checksum_offset = bytes.len() - hash_len;
1337        let actual_index_checksum =
1338            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1339        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1340        if actual_index_checksum != index_checksum {
1341            return Err(GitError::InvalidFormat(format!(
1342                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1343            )));
1344        }
1345
1346        let mut offset = 8usize;
1347        let mut fanout = [0u32; 256];
1348        let mut previous = 0u32;
1349        for slot in &mut fanout {
1350            *slot = u32_be(&bytes[offset..offset + 4]);
1351            if *slot < previous {
1352                return Err(GitError::InvalidFormat(
1353                    "pack index fanout is not monotonic".into(),
1354                ));
1355            }
1356            previous = *slot;
1357            offset += 4;
1358        }
1359        let count = fanout[255] as usize;
1360        let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1361        offset = oid_table.end;
1362        let crc_table = checked_range(offset, count, 4, bytes.len())?;
1363        offset = crc_table.end;
1364        let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1365        offset = small_offset_table.end;
1366
1367        let large_offset_count = (0..count)
1368            .filter(|idx| {
1369                let start = small_offset_table.start + idx * 4;
1370                u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1371            })
1372            .count();
1373        let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1374        offset = large_offset_table.end;
1375
1376        let expected_trailer_offset = bytes.len() - hash_len * 2;
1377        if offset != expected_trailer_offset {
1378            return Err(GitError::InvalidFormat(format!(
1379                "pack index has {} unexpected bytes before trailer",
1380                expected_trailer_offset.saturating_sub(offset)
1381            )));
1382        }
1383        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1384
1385        let mut entries = Vec::with_capacity(count);
1386        for idx in 0..count {
1387            let oid_start = oid_table.start + idx * hash_len;
1388            let crc_start = crc_table.start + idx * 4;
1389            let offset_start = small_offset_table.start + idx * 4;
1390            let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1391            // Object ids must be strictly ascending: lookup binary-searches them,
1392            // and the fanout must match the first byte. A malformed/forged index
1393            // (e.g. from a received pack) would otherwise yield silent misses.
1394            if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1395                return Err(GitError::InvalidFormat(
1396                    "pack index object ids are not strictly ascending".into(),
1397                ));
1398            }
1399            let expected_min = if oid_bytes[0] == 0 {
1400                0
1401            } else {
1402                fanout[usize::from(oid_bytes[0] - 1)]
1403            };
1404            if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1405                return Err(GitError::InvalidFormat(
1406                    "pack index object id is outside its fanout bucket".into(),
1407                ));
1408            }
1409            let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1410            let offset = if raw_offset & 0x8000_0000 == 0 {
1411                u64::from(raw_offset)
1412            } else {
1413                let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1414                let large_start = large_offset_table.start + large_idx * 8;
1415                if large_idx >= large_offset_count {
1416                    return Err(GitError::InvalidFormat(
1417                        "pack index large offset points past table".into(),
1418                    ));
1419                }
1420                u64_be(&bytes[large_start..large_start + 8])
1421            };
1422            entries.push(PackIndexEntry {
1423                oid: ObjectId::from_raw(format, oid_bytes)?,
1424                crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1425                offset,
1426            });
1427        }
1428        Ok(Self {
1429            version,
1430            fanout,
1431            entries,
1432            pack_checksum,
1433            index_checksum,
1434        })
1435    }
1436
1437    fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1438        let hash_len = format.raw_len();
1439        if bytes.len() < 256 * 4 + 2 * hash_len {
1440            return Err(GitError::InvalidFormat("pack index too short".into()));
1441        }
1442        let index_checksum_offset = bytes.len() - hash_len;
1443        let actual_index_checksum =
1444            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1445        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1446        if actual_index_checksum != index_checksum {
1447            return Err(GitError::InvalidFormat(format!(
1448                "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1449            )));
1450        }
1451
1452        let mut offset = 0usize;
1453        let mut fanout = [0u32; 256];
1454        let mut previous = 0u32;
1455        for slot in &mut fanout {
1456            *slot = u32_be(&bytes[offset..offset + 4]);
1457            if *slot < previous {
1458                return Err(GitError::InvalidFormat(
1459                    "pack index fanout is not monotonic".into(),
1460                ));
1461            }
1462            previous = *slot;
1463            offset += 4;
1464        }
1465        let count = fanout[255] as usize;
1466        let entry_len = hash_len
1467            .checked_add(4)
1468            .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1469        let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1470        offset = entry_table.end;
1471        let expected_trailer_offset = bytes.len() - hash_len * 2;
1472        if offset != expected_trailer_offset {
1473            return Err(GitError::InvalidFormat(format!(
1474                "pack index has {} unexpected bytes before trailer",
1475                expected_trailer_offset.saturating_sub(offset)
1476            )));
1477        }
1478        let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1479
1480        let mut entries = Vec::with_capacity(count);
1481        let mut previous_oid: Option<ObjectId> = None;
1482        for idx in 0..count {
1483            let start = entry_table.start + idx * entry_len;
1484            let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1485            if let Some(previous) = &previous_oid
1486                && previous.as_bytes() >= oid.as_bytes()
1487            {
1488                return Err(GitError::InvalidFormat(
1489                    "pack index object ids are not strictly sorted".into(),
1490                ));
1491            }
1492            previous_oid = Some(oid);
1493            entries.push(PackIndexEntry {
1494                oid,
1495                crc32: 0,
1496                offset: u64::from(u32_be(&bytes[start..start + 4])),
1497            });
1498        }
1499        Ok(Self {
1500            version: 1,
1501            fanout,
1502            entries,
1503            pack_checksum,
1504            index_checksum,
1505        })
1506    }
1507
1508    pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1509        self.entries
1510            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1511            .ok()
1512            .map(|idx| &self.entries[idx])
1513    }
1514
1515    pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1516        Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1517    }
1518
1519    pub fn write_v2(
1520        format: ObjectFormat,
1521        entries: &[PackIndexEntry],
1522        pack_checksum: &ObjectId,
1523    ) -> Result<Vec<u8>> {
1524        if pack_checksum.format() != format {
1525            return Err(GitError::InvalidObjectId(
1526                "pack checksum format does not match index format".into(),
1527            ));
1528        }
1529        let mut entries = entries.iter().collect::<Vec<_>>();
1530        entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1531        for pair in entries.windows(2) {
1532            if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1533                return Err(GitError::InvalidFormat(format!(
1534                    "pack index contains duplicate object id {}",
1535                    pair[0].oid
1536                )));
1537            }
1538        }
1539        let mut fanout = [0u32; 256];
1540        for entry in &entries {
1541            if entry.oid.format() != format {
1542                return Err(GitError::InvalidObjectId(
1543                    "pack index entry format does not match index format".into(),
1544                ));
1545            }
1546            let first = entry.oid.as_bytes()[0] as usize;
1547            fanout[first] = fanout[first]
1548                .checked_add(1)
1549                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1550        }
1551        let mut running = 0u32;
1552        for slot in &mut fanout {
1553            running = running
1554                .checked_add(*slot)
1555                .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1556            *slot = running;
1557        }
1558
1559        let mut index = Vec::new();
1560        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1561        index.extend_from_slice(&2u32.to_be_bytes());
1562        for count in fanout {
1563            index.extend_from_slice(&count.to_be_bytes());
1564        }
1565        for entry in &entries {
1566            index.extend_from_slice(entry.oid.as_bytes());
1567        }
1568        for entry in &entries {
1569            index.extend_from_slice(&entry.crc32.to_be_bytes());
1570        }
1571
1572        let mut large_offsets = Vec::new();
1573        for entry in &entries {
1574            if entry.offset < 0x8000_0000 {
1575                index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1576            } else {
1577                if large_offsets.len() > 0x7fff_ffff {
1578                    return Err(GitError::InvalidFormat(
1579                        "too many large pack offsets".into(),
1580                    ));
1581                }
1582                let large_idx = large_offsets.len() as u32;
1583                index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1584                large_offsets.push(entry.offset);
1585            }
1586        }
1587        for offset in large_offsets {
1588            index.extend_from_slice(&offset.to_be_bytes());
1589        }
1590        index.extend_from_slice(pack_checksum.as_bytes());
1591        let index_checksum = sley_core::digest_bytes(format, &index)?;
1592        index.extend_from_slice(index_checksum.as_bytes());
1593        Ok(index)
1594    }
1595}
1596
1597/// The `.rev` table for a pack: index positions (the rank of each object in
1598/// the oid-sorted `.idx`) listed in pack order (ascending pack offset), as
1599/// upstream `write_rev_file` lays them out. Accepts `entries` in any order;
1600/// the result feeds [`PackReverseIndex::write`].
1601pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1602    let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1603    oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1604    let mut index_position = vec![0u32; entries.len()];
1605    for (position, &entry) in oid_sorted.iter().enumerate() {
1606        index_position[entry] = position as u32;
1607    }
1608    let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1609    by_offset.sort_by_key(|&entry| entries[entry].offset);
1610    by_offset
1611        .into_iter()
1612        .map(|entry| index_position[entry])
1613        .collect()
1614}
1615
1616impl PackReverseIndex {
1617    pub fn write(
1618        format: ObjectFormat,
1619        positions: &[u32],
1620        pack_checksum: &ObjectId,
1621    ) -> Result<Vec<u8>> {
1622        if pack_checksum.format() != format {
1623            return Err(GitError::InvalidObjectId(
1624                "pack checksum format does not match reverse index format".into(),
1625            ));
1626        }
1627        validate_position_permutation(positions)?;
1628
1629        let mut out = Vec::new();
1630        out.extend_from_slice(b"RIDX");
1631        out.extend_from_slice(&1u32.to_be_bytes());
1632        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1633        for position in positions {
1634            out.extend_from_slice(&position.to_be_bytes());
1635        }
1636        out.extend_from_slice(pack_checksum.as_bytes());
1637        let checksum = sley_core::digest_bytes(format, &out)?;
1638        out.extend_from_slice(checksum.as_bytes());
1639        Ok(out)
1640    }
1641
1642    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1643        let hash_len = format.raw_len();
1644        let table_len = object_count
1645            .checked_mul(4)
1646            .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1647        let min_len = 12usize
1648            .checked_add(table_len)
1649            .and_then(|len| len.checked_add(hash_len * 2))
1650            .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1651        if bytes.len() < min_len {
1652            return Err(GitError::InvalidFormat("reverse index too short".into()));
1653        }
1654        if bytes.len() != min_len {
1655            return Err(GitError::InvalidFormat(format!(
1656                "reverse index has {} trailing bytes",
1657                bytes.len() - min_len
1658            )));
1659        }
1660        if &bytes[..4] != b"RIDX" {
1661            return Err(GitError::InvalidFormat(
1662                "missing reverse index signature".into(),
1663            ));
1664        }
1665        let version = u32_be(&bytes[4..8]);
1666        if version != 1 {
1667            return Err(GitError::Unsupported(format!(
1668                "reverse index version {version}"
1669            )));
1670        }
1671        let hash_id = u32_be(&bytes[8..12]);
1672        if hash_id != hash_function_id(format) {
1673            return Err(GitError::InvalidFormat(format!(
1674                "reverse index hash id {hash_id} does not match {}",
1675                format.name()
1676            )));
1677        }
1678
1679        let index_checksum_offset = bytes.len() - hash_len;
1680        let actual_index_checksum =
1681            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1682        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1683        if actual_index_checksum != index_checksum {
1684            return Err(GitError::InvalidFormat(format!(
1685                "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1686            )));
1687        }
1688
1689        let pack_checksum_offset = index_checksum_offset - hash_len;
1690        let pack_checksum =
1691            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1692        let mut positions = Vec::with_capacity(object_count);
1693        let mut offset = 12usize;
1694        for _ in 0..object_count {
1695            let position = u32_be(&bytes[offset..offset + 4]);
1696            positions.push(position);
1697            offset += 4;
1698        }
1699        validate_position_permutation(&positions)?;
1700
1701        Ok(Self {
1702            version,
1703            format,
1704            positions,
1705            pack_checksum,
1706            index_checksum,
1707        })
1708    }
1709}
1710
1711impl PackMtimes {
1712    pub fn write(
1713        format: ObjectFormat,
1714        mtimes: &[u32],
1715        pack_checksum: &ObjectId,
1716    ) -> Result<Vec<u8>> {
1717        if pack_checksum.format() != format {
1718            return Err(GitError::InvalidObjectId(
1719                "pack checksum format does not match mtimes format".into(),
1720            ));
1721        }
1722
1723        let mut out = Vec::new();
1724        out.extend_from_slice(b"MTME");
1725        out.extend_from_slice(&1u32.to_be_bytes());
1726        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1727        for mtime in mtimes {
1728            out.extend_from_slice(&mtime.to_be_bytes());
1729        }
1730        out.extend_from_slice(pack_checksum.as_bytes());
1731        let checksum = sley_core::digest_bytes(format, &out)?;
1732        out.extend_from_slice(checksum.as_bytes());
1733        Ok(out)
1734    }
1735
1736    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1737        let hash_len = format.raw_len();
1738        let table_len = object_count
1739            .checked_mul(4)
1740            .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
1741        let expected_len = 12usize
1742            .checked_add(table_len)
1743            .and_then(|len| len.checked_add(hash_len * 2))
1744            .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
1745        if bytes.len() < expected_len {
1746            return Err(GitError::InvalidFormat("mtimes file too short".into()));
1747        }
1748        if bytes.len() != expected_len {
1749            return Err(GitError::InvalidFormat(format!(
1750                "mtimes file has {} trailing bytes",
1751                bytes.len() - expected_len
1752            )));
1753        }
1754        if &bytes[..4] != b"MTME" {
1755            return Err(GitError::InvalidFormat("missing mtimes signature".into()));
1756        }
1757        let version = u32_be(&bytes[4..8]);
1758        if version != 1 {
1759            return Err(GitError::Unsupported(format!("mtimes version {version}")));
1760        }
1761        let hash_id = u32_be(&bytes[8..12]);
1762        if hash_id != hash_function_id(format) {
1763            return Err(GitError::InvalidFormat(format!(
1764                "mtimes hash id {hash_id} does not match {}",
1765                format.name()
1766            )));
1767        }
1768
1769        let index_checksum_offset = bytes.len() - hash_len;
1770        let actual_index_checksum =
1771            sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1772        let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1773        if actual_index_checksum != index_checksum {
1774            return Err(GitError::InvalidFormat(format!(
1775                "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1776            )));
1777        }
1778
1779        let pack_checksum_offset = index_checksum_offset - hash_len;
1780        let pack_checksum =
1781            ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1782        let mut mtimes = Vec::with_capacity(object_count);
1783        let mut offset = 12usize;
1784        for _ in 0..object_count {
1785            mtimes.push(u32_be(&bytes[offset..offset + 4]));
1786            offset += 4;
1787        }
1788
1789        Ok(Self {
1790            version,
1791            format,
1792            mtimes,
1793            pack_checksum,
1794            index_checksum,
1795        })
1796    }
1797}
1798
1799impl PackBitmapIndex {
1800    pub const OPTION_FULL_DAG: u16 = 0x0001;
1801    pub const OPTION_HASH_CACHE: u16 = 0x0004;
1802
1803    pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1804        let hash_len = format.raw_len();
1805        let min_len = 12usize
1806            .checked_add(hash_len * 2)
1807            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1808        if bytes.len() < min_len {
1809            return Err(GitError::InvalidFormat("bitmap index too short".into()));
1810        }
1811        if &bytes[..4] != b"BITM" {
1812            return Err(GitError::InvalidFormat(
1813                "missing bitmap index signature".into(),
1814            ));
1815        }
1816        let version = u16_be(&bytes[4..6]);
1817        if version != 1 {
1818            return Err(GitError::Unsupported(format!(
1819                "bitmap index version {version}"
1820            )));
1821        }
1822        let options = u16_be(&bytes[6..8]);
1823        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
1824        if options & !known_options != 0 {
1825            return Err(GitError::Unsupported(format!(
1826                "bitmap index options {:#06x}",
1827                options & !known_options
1828            )));
1829        }
1830        let entry_count = u32_be(&bytes[8..12]) as usize;
1831        let checksum_offset = bytes.len() - hash_len;
1832        let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1833        let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1834        if actual_index_checksum != index_checksum {
1835            return Err(GitError::InvalidFormat(format!(
1836                "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1837            )));
1838        }
1839
1840        let pack_checksum_end = 12usize
1841            .checked_add(hash_len)
1842            .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1843        let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
1844        let mut offset = pack_checksum_end;
1845        let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1846        let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1847        let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1848        let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1849
1850        let mut entries = Vec::with_capacity(entry_count);
1851        for idx in 0..entry_count {
1852            if checksum_offset.saturating_sub(offset) < 6 {
1853                return Err(GitError::InvalidFormat(
1854                    "truncated bitmap index entry".into(),
1855                ));
1856            }
1857            let object_position = u32_be(&bytes[offset..offset + 4]);
1858            offset += 4;
1859            if object_position as usize >= object_count {
1860                return Err(GitError::InvalidFormat(
1861                    "bitmap index entry points past object table".into(),
1862                ));
1863            }
1864            let xor_offset = bytes[offset];
1865            offset += 1;
1866            if xor_offset as usize > idx || xor_offset > 160 {
1867                return Err(GitError::InvalidFormat(
1868                    "bitmap index entry has invalid XOR offset".into(),
1869                ));
1870            }
1871            let flags = bytes[offset];
1872            offset += 1;
1873            let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1874            entries.push(PackBitmapEntry {
1875                object_position,
1876                xor_offset,
1877                flags,
1878                bitmap,
1879            });
1880        }
1881
1882        let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
1883            let cache_len = object_count
1884                .checked_mul(4)
1885                .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
1886            if checksum_offset.saturating_sub(offset) < cache_len {
1887                return Err(GitError::InvalidFormat(
1888                    "truncated bitmap hash cache".into(),
1889                ));
1890            }
1891            let mut cache = Vec::with_capacity(object_count);
1892            for _ in 0..object_count {
1893                cache.push(u32_be(&bytes[offset..offset + 4]));
1894                offset += 4;
1895            }
1896            Some(cache)
1897        } else {
1898            None
1899        };
1900
1901        if offset != checksum_offset {
1902            return Err(GitError::InvalidFormat(format!(
1903                "bitmap index has {} trailing bytes",
1904                checksum_offset - offset
1905            )));
1906        }
1907
1908        Ok(Self {
1909            version,
1910            format,
1911            options,
1912            pack_checksum,
1913            index_checksum,
1914            type_bitmaps: PackBitmapTypeBitmaps {
1915                commits,
1916                trees,
1917                blobs,
1918                tags,
1919            },
1920            entries,
1921            name_hash_cache,
1922        })
1923    }
1924
1925    /// Looks up the stored entry whose commit sits at `position` in the
1926    /// oid-sorted pack index (`.idx` order; see [`PackBitmapEntry::object_position`]).
1927    pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
1928        self.entries
1929            .iter()
1930            .find(|entry| entry.object_position == position)
1931    }
1932}
1933
1934fn parse_bitmap_ewah(
1935    bytes: &[u8],
1936    offset: &mut usize,
1937    checksum_offset: usize,
1938    _object_count: usize,
1939) -> Result<EwahBitmap> {
1940    if checksum_offset.saturating_sub(*offset) < 12 {
1941        return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
1942    }
1943    let bit_size = u32_be(&bytes[*offset..*offset + 4]);
1944    *offset += 4;
1945    let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
1946    *offset += 4;
1947    let words_len = word_count
1948        .checked_mul(8)
1949        .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
1950    if checksum_offset.saturating_sub(*offset) < words_len + 4 {
1951        return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
1952    }
1953    let mut words = Vec::with_capacity(word_count);
1954    for _ in 0..word_count {
1955        words.push(u64_be(&bytes[*offset..*offset + 8]));
1956        *offset += 8;
1957    }
1958    let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
1959    *offset += 4;
1960    validate_ewah_words(bit_size, &words, rlw_position)?;
1961    Ok(EwahBitmap {
1962        bit_size,
1963        words,
1964        rlw_position,
1965    })
1966}
1967
1968fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
1969    if words.is_empty() {
1970        if rlw_position != 0 || bit_size != 0 {
1971            return Err(GitError::InvalidFormat(
1972                "EWAH bitmap has invalid empty RLW".into(),
1973            ));
1974        }
1975        return Ok(());
1976    }
1977    if rlw_position as usize >= words.len() {
1978        return Err(GitError::InvalidFormat(
1979            "EWAH RLW position points past word table".into(),
1980        ));
1981    }
1982    let mut word_idx = 0usize;
1983    let mut decoded_words = 0u64;
1984    while word_idx < words.len() {
1985        let rlw = words[word_idx];
1986        let run_words = (rlw >> 1) & 0xffff_ffff;
1987        let literal_words = (rlw >> 33) as usize;
1988        word_idx += 1;
1989        word_idx = word_idx
1990            .checked_add(literal_words)
1991            .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
1992        if word_idx > words.len() {
1993            return Err(GitError::InvalidFormat(
1994                "EWAH literal words extend past word table".into(),
1995            ));
1996        }
1997        decoded_words = decoded_words
1998            .checked_add(run_words)
1999            .and_then(|value| value.checked_add(literal_words as u64))
2000            .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2001    }
2002    let decoded_bits = decoded_words
2003        .checked_mul(64)
2004        .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2005    if decoded_bits < u64::from(bit_size) {
2006        return Err(GitError::InvalidFormat(
2007            "EWAH bitmap decodes fewer bits than declared".into(),
2008        ));
2009    }
2010    Ok(())
2011}
2012
2013impl MultiPackIndex {
2014    pub fn write(
2015        format: ObjectFormat,
2016        version: u8,
2017        pack_names: &[String],
2018        objects: &[MultiPackIndexEntry],
2019    ) -> Result<Vec<u8>> {
2020        Self::write_with_reverse_index(format, version, pack_names, objects, None)
2021    }
2022
2023    /// Like [`MultiPackIndex::write`], but when `preferred_pack` is `Some`,
2024    /// additionally emits the `RIDX` chunk: the object order a multi-pack
2025    /// `.bitmap` numbers its bits in ("pseudo-pack order" — every object of
2026    /// the preferred pack first, then the rest by pack id, each pack's slice
2027    /// in offset order), stored as one u32 midx position per object.
2028    ///
2029    /// `preferred_pack` is the pack-int-id receiving pseudo-pack priority; it
2030    /// must be in range.
2031    pub fn write_with_reverse_index(
2032        format: ObjectFormat,
2033        version: u8,
2034        pack_names: &[String],
2035        objects: &[MultiPackIndexEntry],
2036        preferred_pack: Option<u32>,
2037    ) -> Result<Vec<u8>> {
2038        if let Some(preferred) = preferred_pack
2039            && preferred as usize >= pack_names.len()
2040        {
2041            return Err(GitError::InvalidFormat(format!(
2042                "preferred pack {preferred} out of range for {} packs",
2043                pack_names.len()
2044            )));
2045        }
2046        if version != 1 && version != 2 {
2047            return Err(GitError::Unsupported(format!(
2048                "multi-pack-index version {version}"
2049            )));
2050        }
2051        if pack_names.len() > u32::MAX as usize {
2052            return Err(GitError::InvalidFormat(
2053                "too many multi-pack-index packs".into(),
2054            ));
2055        }
2056        if objects.len() > u32::MAX as usize {
2057            return Err(GitError::InvalidFormat(
2058                "too many multi-pack-index objects".into(),
2059            ));
2060        }
2061        validate_midx_pack_names(pack_names)?;
2062        if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2063            return Err(GitError::InvalidFormat(
2064                "multi-pack-index v1 pack names must be sorted".into(),
2065            ));
2066        }
2067
2068        let mut objects = objects.iter().collect::<Vec<_>>();
2069        objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2070        let mut previous_oid: Option<&ObjectId> = None;
2071        for object in &objects {
2072            if object.oid.format() != format {
2073                return Err(GitError::InvalidObjectId(
2074                    "multi-pack-index object format does not match index format".into(),
2075                ));
2076            }
2077            if let Some(previous) = previous_oid
2078                && previous.as_bytes() == object.oid.as_bytes()
2079            {
2080                return Err(GitError::InvalidFormat(
2081                    "multi-pack-index contains duplicate object ids".into(),
2082                ));
2083            }
2084            if object.pack_int_id as usize >= pack_names.len() {
2085                return Err(GitError::InvalidFormat(
2086                    "multi-pack-index object points past pack table".into(),
2087                ));
2088            }
2089            previous_oid = Some(&object.oid);
2090        }
2091
2092        let mut large_offsets = Vec::new();
2093        let mut chunks = vec![
2094            (*b"PNAM", write_midx_pack_names(pack_names)),
2095            (*b"OIDF", write_midx_oid_fanout(&objects)?),
2096            (*b"OIDL", write_midx_oid_lookup(&objects)),
2097            (
2098                *b"OOFF",
2099                write_midx_object_offsets(&objects, &mut large_offsets)?,
2100            ),
2101        ];
2102        if !large_offsets.is_empty() {
2103            chunks.push((*b"LOFF", large_offsets));
2104        }
2105        if let Some(preferred) = preferred_pack {
2106            // `objects` is already in midx (oid-sorted) order here; the chunk
2107            // lists each object's midx position in pseudo-pack order.
2108            let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2109            pseudo.sort_by_key(|&midx_pos| {
2110                let object = objects[midx_pos as usize];
2111                (
2112                    object.pack_int_id != preferred,
2113                    object.pack_int_id,
2114                    object.offset,
2115                )
2116            });
2117            let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2118            for midx_pos in pseudo {
2119                ridx.extend_from_slice(&midx_pos.to_be_bytes());
2120            }
2121            chunks.push((*b"RIDX", ridx));
2122        }
2123        write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2124    }
2125
2126    pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2127        let hash_len = format.raw_len();
2128        if bytes.len() < 12 + 12 + hash_len {
2129            return Err(GitError::InvalidFormat(
2130                "multi-pack-index file too short".into(),
2131            ));
2132        }
2133        if &bytes[..4] != b"MIDX" {
2134            return Err(GitError::InvalidFormat(
2135                "missing multi-pack-index signature".into(),
2136            ));
2137        }
2138        let version = bytes[4];
2139        if version != 1 && version != 2 {
2140            return Err(GitError::Unsupported(format!(
2141                "multi-pack-index version {version}"
2142            )));
2143        }
2144        let hash_id = bytes[5];
2145        if u32::from(hash_id) != hash_function_id(format) {
2146            return Err(GitError::InvalidFormat(format!(
2147                "multi-pack-index hash id {hash_id} does not match {}",
2148                format.name()
2149            )));
2150        }
2151        let chunk_count = bytes[6] as usize;
2152        let base_midx_count = bytes[7];
2153        if base_midx_count != 0 {
2154            return Err(GitError::Unsupported(format!(
2155                "multi-pack-index base count {base_midx_count}"
2156            )));
2157        }
2158        let pack_count = u32_be(&bytes[8..12]);
2159        let lookup_len = (chunk_count + 1)
2160            .checked_mul(12)
2161            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2162        let data_start = 12usize
2163            .checked_add(lookup_len)
2164            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2165        let checksum_offset = bytes.len() - hash_len;
2166        if data_start > checksum_offset {
2167            return Err(GitError::InvalidFormat(
2168                "truncated multi-pack-index chunk lookup".into(),
2169            ));
2170        }
2171
2172        let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2173        let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2174        if actual_checksum != checksum {
2175            return Err(GitError::InvalidFormat(format!(
2176                "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2177            )));
2178        }
2179
2180        let mut entries = Vec::with_capacity(chunk_count + 1);
2181        let mut offset = 12usize;
2182        for _ in 0..=chunk_count {
2183            let id = [
2184                bytes[offset],
2185                bytes[offset + 1],
2186                bytes[offset + 2],
2187                bytes[offset + 3],
2188            ];
2189            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2190            entries.push((id, chunk_offset));
2191            offset += 12;
2192        }
2193        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2194            return Err(GitError::InvalidFormat(
2195                "multi-pack-index chunk lookup is empty".into(),
2196            ));
2197        };
2198        if terminator_id != [0, 0, 0, 0] {
2199            return Err(GitError::InvalidFormat(
2200                "multi-pack-index chunk lookup missing terminator".into(),
2201            ));
2202        }
2203        if terminator_offset != checksum_offset as u64 {
2204            return Err(GitError::InvalidFormat(
2205                "multi-pack-index terminator does not point at checksum".into(),
2206            ));
2207        }
2208
2209        let mut chunks = Vec::with_capacity(chunk_count);
2210        let mut previous_offset = data_start as u64;
2211        for pair in entries.windows(2) {
2212            let (id, chunk_offset) = pair[0];
2213            let (_next_id, next_offset) = pair[1];
2214            if id == [0, 0, 0, 0] {
2215                return Err(GitError::InvalidFormat(
2216                    "multi-pack-index chunk id is zero before terminator".into(),
2217                ));
2218            }
2219            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2220                return Err(GitError::InvalidFormat(
2221                    "multi-pack-index chunk offsets are not monotonic".into(),
2222                ));
2223            }
2224            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2225                return Err(GitError::InvalidFormat(
2226                    "multi-pack-index chunk length is invalid".into(),
2227                ));
2228            }
2229            chunks.push(MultiPackIndexChunk {
2230                id,
2231                offset: chunk_offset,
2232                len: next_offset - chunk_offset,
2233            });
2234            previous_offset = chunk_offset;
2235        }
2236
2237        let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2238        let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2239        let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2240        let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2241        let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2242        let bitmapped_packs =
2243            parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2244
2245        Ok(Self {
2246            version,
2247            format,
2248            pack_count,
2249            pack_names,
2250            object_count: object_count as u32,
2251            fanout,
2252            objects,
2253            reverse_index,
2254            bitmapped_packs,
2255            chunks,
2256            checksum,
2257        })
2258    }
2259
2260    pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2261        self.objects
2262            .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2263            .ok()
2264            .map(|idx| &self.objects[idx])
2265    }
2266}
2267
2268impl MultiPackIndexOidLookup {
2269    pub fn parse(bytes: Arc<Vec<u8>>, format: ObjectFormat) -> Result<Self> {
2270        let hash_len = format.raw_len();
2271        if bytes.len() < 12 + 12 + hash_len {
2272            return Err(GitError::InvalidFormat(
2273                "multi-pack-index file too short".into(),
2274            ));
2275        }
2276        if &bytes[..4] != b"MIDX" {
2277            return Err(GitError::InvalidFormat(
2278                "missing multi-pack-index signature".into(),
2279            ));
2280        }
2281        let version = bytes[4];
2282        if version != 1 && version != 2 {
2283            return Err(GitError::Unsupported(format!(
2284                "multi-pack-index version {version}"
2285            )));
2286        }
2287        let hash_id = bytes[5];
2288        if u32::from(hash_id) != hash_function_id(format) {
2289            return Err(GitError::InvalidFormat(format!(
2290                "multi-pack-index hash id {hash_id} does not match {}",
2291                format.name()
2292            )));
2293        }
2294        let chunk_count = bytes[6] as usize;
2295        let base_midx_count = bytes[7];
2296        if base_midx_count != 0 {
2297            return Err(GitError::Unsupported(format!(
2298                "multi-pack-index base count {base_midx_count}"
2299            )));
2300        }
2301        let pack_count = u32_be(&bytes[8..12]);
2302        let lookup_len = (chunk_count + 1)
2303            .checked_mul(12)
2304            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2305        let data_start = 12usize
2306            .checked_add(lookup_len)
2307            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2308        let checksum_offset = bytes.len() - hash_len;
2309        if data_start > checksum_offset {
2310            return Err(GitError::InvalidFormat(
2311                "truncated multi-pack-index chunk lookup".into(),
2312            ));
2313        }
2314
2315        let mut entries = Vec::with_capacity(chunk_count + 1);
2316        let mut offset = 12usize;
2317        for _ in 0..=chunk_count {
2318            let id = [
2319                bytes[offset],
2320                bytes[offset + 1],
2321                bytes[offset + 2],
2322                bytes[offset + 3],
2323            ];
2324            let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2325            entries.push((id, chunk_offset));
2326            offset += 12;
2327        }
2328        let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2329            return Err(GitError::InvalidFormat(
2330                "multi-pack-index chunk lookup is empty".into(),
2331            ));
2332        };
2333        if terminator_id != [0, 0, 0, 0] {
2334            return Err(GitError::InvalidFormat(
2335                "multi-pack-index chunk lookup missing terminator".into(),
2336            ));
2337        }
2338        if terminator_offset != checksum_offset as u64 {
2339            return Err(GitError::InvalidFormat(
2340                "multi-pack-index terminator does not point at checksum".into(),
2341            ));
2342        }
2343
2344        let mut chunks = Vec::with_capacity(chunk_count);
2345        let mut previous_offset = data_start as u64;
2346        for pair in entries.windows(2) {
2347            let (id, chunk_offset) = pair[0];
2348            let (_next_id, next_offset) = pair[1];
2349            if id == [0, 0, 0, 0] {
2350                return Err(GitError::InvalidFormat(
2351                    "multi-pack-index chunk id is zero before terminator".into(),
2352                ));
2353            }
2354            if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2355                return Err(GitError::InvalidFormat(
2356                    "multi-pack-index chunk offsets are not monotonic".into(),
2357                ));
2358            }
2359            if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2360                return Err(GitError::InvalidFormat(
2361                    "multi-pack-index chunk length is invalid".into(),
2362                ));
2363            }
2364            chunks.push(MultiPackIndexChunk {
2365                id,
2366                offset: chunk_offset,
2367                len: next_offset - chunk_offset,
2368            });
2369            previous_offset = chunk_offset;
2370        }
2371
2372        let pack_names = parse_midx_pack_names(&bytes, &chunks, pack_count as usize, version)?;
2373        let (fanout, object_count) = parse_midx_oid_fanout(&bytes, &chunks)?;
2374        let oid_lookup = midx_chunk_data(&bytes, &chunks, *b"OIDL", true)?
2375            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2376        let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2377            GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2378        })?;
2379        if oid_lookup.len() != expected_len {
2380            return Err(GitError::InvalidFormat(
2381                "multi-pack-index OIDL chunk has invalid length".into(),
2382            ));
2383        }
2384        let object_offsets = midx_chunk_data(&bytes, &chunks, *b"OOFF", true)?
2385            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2386        let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2387            GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2388        })?;
2389        if object_offsets.len() != expected_offsets_len {
2390            return Err(GitError::InvalidFormat(
2391                "multi-pack-index OOFF chunk has invalid length".into(),
2392            ));
2393        }
2394        let large_offsets = midx_chunk_data(&bytes, &chunks, *b"LOFF", false)?;
2395        if let Some(large_offsets) = large_offsets
2396            && large_offsets.len() % 8 != 0
2397        {
2398            return Err(GitError::InvalidFormat(
2399                "multi-pack-index LOFF chunk has invalid length".into(),
2400            ));
2401        }
2402        let oid_lookup_offset = oid_lookup.as_ptr() as usize - bytes.as_ptr() as usize;
2403        let object_offsets_offset = object_offsets.as_ptr() as usize - bytes.as_ptr() as usize;
2404        let (large_offsets_offset, large_offsets_len) = match large_offsets {
2405            Some(large_offsets) => (
2406                Some(large_offsets.as_ptr() as usize - bytes.as_ptr() as usize),
2407                large_offsets.len(),
2408            ),
2409            None => (None, 0),
2410        };
2411        Ok(Self {
2412            format,
2413            pack_count,
2414            pack_names,
2415            fanout,
2416            object_count,
2417            oid_lookup_offset,
2418            object_offsets_offset,
2419            large_offsets_offset,
2420            large_offsets_len,
2421            bytes,
2422        })
2423    }
2424
2425    pub fn contains(&self, oid: &ObjectId) -> bool {
2426        self.find_position(oid).is_some()
2427    }
2428
2429    pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2430        let Some(position) = self.find_position(oid) else {
2431            return Ok(None);
2432        };
2433        let hash_len = self.format.raw_len();
2434        let oid_start = self
2435            .oid_lookup_offset
2436            .checked_add(position * hash_len)
2437            .ok_or_else(|| {
2438                GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2439            })?;
2440        let oid = ObjectId::from_raw(self.format, &self.bytes[oid_start..oid_start + hash_len])?;
2441        let offset_start = self
2442            .object_offsets_offset
2443            .checked_add(position * 8)
2444            .ok_or_else(|| {
2445                GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2446            })?;
2447        let data = &self.bytes[offset_start..offset_start + 8];
2448        let pack_int_id = u32_be(&data[..4]);
2449        if pack_int_id >= self.pack_count {
2450            return Err(GitError::InvalidFormat(
2451                "multi-pack-index object points past pack table".into(),
2452            ));
2453        }
2454        let raw_offset = u32_be(&data[4..8]);
2455        let offset = if raw_offset & 0x8000_0000 == 0 {
2456            u64::from(raw_offset)
2457        } else {
2458            let Some(large_offsets_offset) = self.large_offsets_offset else {
2459                return Err(GitError::InvalidFormat(
2460                    "multi-pack-index large offset missing LOFF chunk".into(),
2461                ));
2462            };
2463            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2464            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2465                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2466            })?;
2467            let large_end = large_start.checked_add(8).ok_or_else(|| {
2468                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2469            })?;
2470            if large_end > self.large_offsets_len {
2471                return Err(GitError::InvalidFormat(
2472                    "multi-pack-index large offset points past LOFF chunk".into(),
2473                ));
2474            }
2475            let start = large_offsets_offset + large_start;
2476            u64_be(&self.bytes[start..start + 8])
2477        };
2478        Ok(Some(MultiPackIndexEntry {
2479            oid,
2480            pack_int_id,
2481            offset,
2482        }))
2483    }
2484
2485    pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2486        self.pack_names
2487            .get(pack_int_id as usize)
2488            .map(String::as_str)
2489    }
2490
2491    fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2492        if oid.format() != self.format || self.object_count == 0 {
2493            return None;
2494        }
2495        let first = oid.as_bytes()[0] as usize;
2496        let start = if first == 0 {
2497            0
2498        } else {
2499            self.fanout[first - 1] as usize
2500        };
2501        let end = self.fanout[first] as usize;
2502        if start >= end || end > self.object_count {
2503            return None;
2504        }
2505        let hash_len = self.format.raw_len();
2506        let table_start = self.oid_lookup_offset;
2507        let table_end = table_start + self.object_count * hash_len;
2508        let table = &self.bytes[table_start..table_end];
2509        let needle = oid.as_bytes();
2510        let mut low = start;
2511        let mut high = end;
2512        while low < high {
2513            let mid = low + (high - low) / 2;
2514            let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2515            match raw.cmp(needle) {
2516                std::cmp::Ordering::Less => low = mid + 1,
2517                std::cmp::Ordering::Equal => return Some(mid),
2518                std::cmp::Ordering::Greater => high = mid,
2519            }
2520        }
2521        None
2522    }
2523}
2524
2525fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2526    for name in pack_names {
2527        if name.is_empty() {
2528            return Err(GitError::InvalidFormat(
2529                "multi-pack-index pack name is empty".into(),
2530            ));
2531        }
2532        if name
2533            .bytes()
2534            .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2535        {
2536            return Err(GitError::InvalidFormat(
2537                "multi-pack-index pack name contains an invalid byte".into(),
2538            ));
2539        }
2540    }
2541    Ok(())
2542}
2543
2544fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2545    let mut out = Vec::new();
2546    for name in pack_names {
2547        out.extend_from_slice(name.as_bytes());
2548        out.push(0);
2549    }
2550    while out.len() % 4 != 0 {
2551        out.push(0);
2552    }
2553    out
2554}
2555
2556fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2557    let mut counts = [0u32; 256];
2558    for object in objects {
2559        let first = object.oid.as_bytes()[0] as usize;
2560        counts[first] = counts[first]
2561            .checked_add(1)
2562            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2563    }
2564    let mut running = 0u32;
2565    let mut out = Vec::with_capacity(256 * 4);
2566    for count in counts {
2567        running = running
2568            .checked_add(count)
2569            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2570        out.extend_from_slice(&running.to_be_bytes());
2571    }
2572    Ok(out)
2573}
2574
2575fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2576    let mut out = Vec::new();
2577    for object in objects {
2578        out.extend_from_slice(object.oid.as_bytes());
2579    }
2580    out
2581}
2582
2583fn write_midx_object_offsets(
2584    objects: &[&MultiPackIndexEntry],
2585    large_offsets: &mut Vec<u8>,
2586) -> Result<Vec<u8>> {
2587    let mut out = Vec::new();
2588    for object in objects {
2589        out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2590        if object.offset < 0x8000_0000 {
2591            out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2592        } else {
2593            let large_idx = large_offsets.len() / 8;
2594            if large_idx > 0x7fff_ffff {
2595                return Err(GitError::InvalidFormat(
2596                    "too many multi-pack-index large offsets".into(),
2597                ));
2598            }
2599            out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2600            large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2601        }
2602    }
2603    Ok(out)
2604}
2605
2606fn write_multi_pack_index_chunks(
2607    format: ObjectFormat,
2608    version: u8,
2609    pack_count: u32,
2610    chunks: &[([u8; 4], Vec<u8>)],
2611) -> Result<Vec<u8>> {
2612    if chunks.len() > u8::MAX as usize {
2613        return Err(GitError::InvalidFormat(
2614            "too many multi-pack-index chunks".into(),
2615        ));
2616    }
2617    let lookup_len = (chunks.len() + 1)
2618        .checked_mul(12)
2619        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2620    let mut out = Vec::new();
2621    out.extend_from_slice(b"MIDX");
2622    out.push(version);
2623    out.push(hash_function_id(format) as u8);
2624    out.push(chunks.len() as u8);
2625    out.push(0);
2626    out.extend_from_slice(&pack_count.to_be_bytes());
2627    let mut chunk_offset = (12usize)
2628        .checked_add(lookup_len)
2629        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
2630        as u64;
2631    for (id, data) in chunks {
2632        out.extend_from_slice(id);
2633        out.extend_from_slice(&chunk_offset.to_be_bytes());
2634        chunk_offset = chunk_offset
2635            .checked_add(data.len() as u64)
2636            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
2637    }
2638    out.extend_from_slice(&[0, 0, 0, 0]);
2639    out.extend_from_slice(&chunk_offset.to_be_bytes());
2640    for (_id, data) in chunks {
2641        out.extend_from_slice(data);
2642    }
2643    let checksum = sley_core::digest_bytes(format, &out)?;
2644    out.extend_from_slice(checksum.as_bytes());
2645    Ok(out)
2646}
2647
2648#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2649struct EntryHeader {
2650    kind: PackObjectKind,
2651    size: u64,
2652}
2653
2654/// A cache of objects already decoded from one specific pack, keyed by the
2655/// in-pack byte offset at which each object's entry begins.
2656///
2657/// Delta resolution within a pack walks a chain of base objects by offset; the
2658/// same base is the parent of many deltas, so without a cache the entire chain
2659/// is re-inflated and re-applied on every read. Implementors let
2660/// [`read_object_at_with_cache`] reuse a warm base instead.
2661///
2662/// Correctness contract: a given `offset` within a given pack's bytes always
2663/// decodes to exactly one object, so caching by offset can never serve the wrong
2664/// object **provided the same cache is only ever used with one pack's bytes**.
2665/// Callers must therefore scope a cache to a single pack (e.g. key it by pack
2666/// path). The default [`read_object_at`] uses a no-op cache and is unaffected.
2667pub trait PackDeltaCache {
2668    /// Return the decoded object whose entry begins at `offset`, if cached.
2669    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
2670    /// Record that the entry beginning at `offset` decodes to `object`.
2671    fn insert(&self, offset: u64, object: Arc<EncodedObject>);
2672}
2673
2674/// A [`PackDeltaCache`] that stores nothing; used by [`read_object_at`] to keep
2675/// the original, allocation-free behavior for callers that do not opt in.
2676struct NoopDeltaCache;
2677
2678impl PackDeltaCache for NoopDeltaCache {
2679    fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
2680        None
2681    }
2682    fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
2683}
2684
2685// Reused zlib inflate state. Resetting and reusing one `Decompress` avoids
2686// allocating a fresh (~10 KiB) `InflateState` for every object and delta decoded —
2687// an allocation that dominated bulk reads. Borrowed only for the duration of a
2688// single inflate; the recursive pack reader fully inflates each entry's data before
2689// recursing to its base, so the borrow never nests.
2690thread_local! {
2691    static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
2692}
2693
2694/// The largest ratio by which a single DEFLATE/zlib member can expand its input.
2695/// The theoretical worst case for raw DEFLATE is ~1032:1 (a maximally efficient
2696/// run of back-references). We pre-reserve no more than this multiple of the
2697/// available compressed input, so an attacker who declares a huge `size_hint`
2698/// (e.g. `u64::MAX`) cannot make us reserve — and thus commit — gigabytes of
2699/// memory before the inflate has produced a single byte. The stream's *actual*
2700/// output is still verified against the declared size by the caller; this only
2701/// bounds the speculative allocation. git never pre-allocates an attacker's
2702/// declared size beyond a streaming buffer either (see index-pack.c's
2703/// `unpack_entry_data`).
2704const MAX_INFLATE_EXPANSION: usize = 1032;
2705
2706/// An absolute ceiling on the speculative pre-reservation, independent of the
2707/// input length, so even a large legitimate-looking compressed input can't be
2708/// turned into a multi-gigabyte up-front allocation. Inflate still grows the
2709/// output buffer organically past this when a real stream genuinely produces
2710/// that much — this only caps the *speculative* reserve.
2711const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
2712
2713/// Bound a caller-supplied (possibly attacker-controlled) decompressed-size hint
2714/// to something safe to reserve up front: no larger than what `compressed_len`
2715/// input bytes could plausibly inflate to, and never above a fixed ceiling. The
2716/// returned value is only used to size the initial allocation; the inflate loop
2717/// grows the buffer as the real stream produces output, so legitimate large
2718/// objects still decode correctly — they just don't get the whole allocation at
2719/// once.
2720fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
2721    let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
2722    // 64 (floor) <= MAX_INFLATE_RESERVE (ceiling) always, so `clamp` cannot panic.
2723    size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
2724}
2725
2726/// Inflate the entire zlib stream at the front of `compressed`, appending the
2727/// decoded bytes to `out`, reusing the thread-local inflate state. `size_hint`
2728/// is the caller's expectation for the decompressed length, but it is treated as
2729/// untrusted: the up-front reservation is bounded by [`bounded_inflate_reserve`]
2730/// so a crafted hint can never drive an out-of-memory pre-allocation. Returns the
2731/// number of *compressed* bytes consumed (so callers stepping through a pack can
2732/// advance to the next entry). Byte-for-byte equivalent to
2733/// `ZlibDecoder::read_to_end` + `total_in`.
2734fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
2735    INFLATE.with(|cell| {
2736        let mut decompress = cell.borrow_mut();
2737        decompress.reset(true);
2738        out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
2739        let mut input = compressed;
2740        let mut consumed_total = 0usize;
2741        loop {
2742            // Always leave output room so a zero-progress result means the input
2743            // (not the buffer) is exhausted.
2744            if out.len() == out.capacity() {
2745                out.reserve(out.len().max(64));
2746            }
2747            let before_in = decompress.total_in();
2748            let before_out = decompress.total_out();
2749            let status = decompress
2750                .decompress_vec(input, out, flate2::FlushDecompress::None)
2751                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2752            let consumed = (decompress.total_in() - before_in) as usize;
2753            let produced = decompress.total_out() - before_out;
2754            input = &input[consumed..];
2755            consumed_total += consumed;
2756            match status {
2757                flate2::Status::StreamEnd => return Ok(consumed_total),
2758                _ if consumed == 0 && produced == 0 => {
2759                    return Err(GitError::InvalidObject("truncated zlib stream".into()));
2760                }
2761                _ => {}
2762            }
2763        }
2764    })
2765}
2766
2767/// Inflate at least `max_out` bytes (or until the stream ends) from `compressed`
2768/// into `out`, reusing the thread-local state. Used to read a delta's leading
2769/// base-size / result-size varints without inflating the whole instruction stream.
2770fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
2771    INFLATE.with(|cell| {
2772        let mut decompress = cell.borrow_mut();
2773        decompress.reset(true);
2774        out.reserve(max_out.max(16));
2775        let mut input = compressed;
2776        while out.len() < max_out {
2777            if out.len() == out.capacity() {
2778                out.reserve(out.len().max(16));
2779            }
2780            let before_in = decompress.total_in();
2781            let before_out = decompress.total_out();
2782            let status = decompress
2783                .decompress_vec(input, out, flate2::FlushDecompress::None)
2784                .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2785            let consumed = (decompress.total_in() - before_in) as usize;
2786            let produced = decompress.total_out() - before_out;
2787            input = &input[consumed..];
2788            if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
2789                break;
2790            }
2791        }
2792        Ok(())
2793    })
2794}
2795
2796/// Decode the single object stored at byte `offset` within `pack_bytes`, reading
2797/// only that object and its delta-base chain instead of parsing the whole pack.
2798///
2799/// Ofs-delta bases are followed by offset (recursively, within this pack);
2800/// ref-delta bases are obtained from `resolve_ref_base`, which the caller backs
2801/// with the surrounding object store (so a base in another pack or loose still
2802/// resolves). The pack trailer checksum is the final `format.raw_len()` bytes.
2803pub fn read_object_at_arc<F>(
2804    pack_bytes: &[u8],
2805    offset: u64,
2806    format: ObjectFormat,
2807    resolve_ref_base: F,
2808) -> Result<Arc<EncodedObject>>
2809where
2810    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2811{
2812    read_object_at_with_cache_arc(
2813        pack_bytes,
2814        offset,
2815        format,
2816        resolve_ref_base,
2817        &NoopDeltaCache,
2818    )
2819}
2820
2821/// Like [`read_object_at_arc`], but reuses already-decoded objects from `cache`
2822/// (keyed by in-pack offset) and records every object it decodes.
2823///
2824/// This turns repeated reads from the same pack — where many deltas share a base
2825/// chain — from re-inflating each chain per read into resolving each base once.
2826/// `cache` must be scoped to the pack `pack_bytes` belongs to (see
2827/// [`PackDeltaCache`]). The decoded object is returned behind an [`Arc`] so
2828/// callers can reuse cache handles without cloning full object bodies.
2829pub fn read_object_at_with_cache_arc<F, C>(
2830    pack_bytes: &[u8],
2831    offset: u64,
2832    format: ObjectFormat,
2833    mut resolve_ref_base: F,
2834    cache: &C,
2835) -> Result<Arc<EncodedObject>>
2836where
2837    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2838    C: PackDeltaCache + ?Sized,
2839{
2840    read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
2841}
2842
2843fn read_object_at_inner<F, C>(
2844    pack_bytes: &[u8],
2845    offset: u64,
2846    format: ObjectFormat,
2847    resolve_ref_base: &mut F,
2848    cache: &C,
2849) -> Result<Arc<EncodedObject>>
2850where
2851    F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2852    C: PackDeltaCache + ?Sized,
2853{
2854    // A warm cache entry for this exact offset is already the fully resolved
2855    // object, so the whole base chain below can be skipped.
2856    if let Some(object) = cache.get(offset) {
2857        return Ok(object);
2858    }
2859    let trailer_offset = pack_bytes
2860        .len()
2861        .checked_sub(format.raw_len())
2862        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
2863    let mut cursor = usize::try_from(offset)
2864        .ok()
2865        .filter(|&value| value < trailer_offset)
2866        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
2867    let header = parse_entry_header(pack_bytes, &mut cursor)?;
2868    let base = match header.kind {
2869        PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
2870            pack_bytes,
2871            &mut cursor,
2872            offset,
2873        )?)),
2874        PackObjectKind::RefDelta => {
2875            let hash_len = format.raw_len();
2876            if cursor + hash_len > trailer_offset {
2877                return Err(GitError::InvalidFormat(
2878                    "truncated ref-delta base object id".into(),
2879                ));
2880            }
2881            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
2882            cursor += hash_len;
2883            Some(DeltaBase::Ref(oid))
2884        }
2885        _ => None,
2886    };
2887    let mut body = Vec::new();
2888    inflate_into(
2889        &pack_bytes[cursor..trailer_offset],
2890        &mut body,
2891        header.size.min(usize::MAX as u64) as usize,
2892    )?;
2893    if body.len() as u64 != header.size {
2894        return Err(GitError::InvalidObject(format!(
2895            "pack object declared {} bytes, decoded {}",
2896            header.size,
2897            body.len()
2898        )));
2899    }
2900    let object = match base {
2901        None => {
2902            let object_type = match header.kind {
2903                PackObjectKind::Commit => ObjectType::Commit,
2904                PackObjectKind::Tree => ObjectType::Tree,
2905                PackObjectKind::Blob => ObjectType::Blob,
2906                PackObjectKind::Tag => ObjectType::Tag,
2907                PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
2908                    return Err(GitError::InvalidFormat(
2909                        "delta pack entry decoded without a base".into(),
2910                    ));
2911                }
2912            };
2913            Arc::new(EncodedObject::new(object_type, body))
2914        }
2915        Some(DeltaBase::Offset(base_offset)) => {
2916            let base =
2917                read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
2918            let resolved = apply_pack_delta(&base.body, &body)?;
2919            Arc::new(EncodedObject::new(base.object_type, resolved))
2920        }
2921        Some(DeltaBase::Ref(base_oid)) => {
2922            let base = resolve_ref_base(&base_oid)?
2923                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
2924            let resolved = apply_pack_delta(&base.body, &body)?;
2925            Arc::new(EncodedObject::new(base.object_type, resolved))
2926        }
2927    };
2928    // Record the fully resolved object so any later read that walks through this
2929    // offset (as a delta base or directly) reuses it. Bases are inserted as the
2930    // recursion unwinds, so a chain is decoded at most once across reads.
2931    cache.insert(offset, Arc::clone(&object));
2932    Ok(object)
2933}
2934
2935/// The object type and final (inflated) size of the entry at `offset`, *without*
2936/// materializing the object body — git's `cat-file --batch-check` fast path.
2937///
2938/// A base object's size is already in its pack entry header, and a delta's result
2939/// size is the second varint at the front of its (small) delta stream, so neither
2940/// inflates the full content. The reported type is the type at the end of the
2941/// delta chain (deltas inherit their base's type). `resolve_ref_base_type` supplies
2942/// the type of a ref-delta base that lives outside this pack (resolved through the
2943/// wider object store); ofs-delta bases are followed within `pack_bytes` directly.
2944pub fn read_object_header_at<F>(
2945    pack_bytes: &[u8],
2946    offset: u64,
2947    format: ObjectFormat,
2948    mut resolve_ref_base_type: F,
2949) -> Result<(ObjectType, u64)>
2950where
2951    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2952{
2953    read_object_header_at_inner(
2954        pack_bytes,
2955        offset,
2956        format,
2957        &mut resolve_ref_base_type,
2958        &mut NoopHeaderTypeCache,
2959    )
2960}
2961
2962/// Memo of `pack offset -> resolved header (end-of-chain type, result size)` for
2963/// the `cat-file --batch-check` header fast path.
2964///
2965/// Without it, resolving the *type* of an ofs-delta walks the whole delta chain
2966/// to its base on every header read, re-inflating each link's leading varints
2967/// from scratch — so reading every object in a deeply-deltified pack costs
2968/// O(objects x chain-depth) and goes super-linear (sley#26). Two reuses fall out
2969/// of memoizing `offset -> (type, size)`:
2970///
2971/// * a chain's end-of-chain type is resolved at most once, so later objects on
2972///   the same chain skip the walk; and
2973/// * a repeated lookup of the same object (common in batch input) returns from
2974///   the memo without re-inflating its delta header at all.
2975///
2976/// The size stored is the object's final (inflated) result size — read from its
2977/// own pack/delta header, never by materializing the body.
2978pub trait HeaderTypeCache {
2979    /// The previously resolved header at `pack_offset`, if any.
2980    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
2981    /// Record the resolved header at `pack_offset` for reuse by later reads.
2982    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
2983}
2984
2985struct NoopHeaderTypeCache;
2986
2987impl HeaderTypeCache for NoopHeaderTypeCache {
2988    fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
2989        None
2990    }
2991    fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
2992}
2993
2994/// Like [`read_object_header_at`] but threads a caller-owned [`HeaderTypeCache`]
2995/// through the read so (a) the ofs-delta chain's end-of-chain type is resolved at
2996/// most once per chain and (b) a repeated lookup of the same offset returns from
2997/// the memo without re-inflating (sley#26). The cache is keyed by in-pack offset,
2998/// so it must be scoped to a single pack's bytes by the caller.
2999pub fn read_object_header_at_with_cache<F, C>(
3000    pack_bytes: &[u8],
3001    offset: u64,
3002    format: ObjectFormat,
3003    mut resolve_ref_base_type: F,
3004    type_cache: &mut C,
3005) -> Result<(ObjectType, u64)>
3006where
3007    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3008    C: HeaderTypeCache + ?Sized,
3009{
3010    if let Some(header) = type_cache.get(offset) {
3011        return Ok(header);
3012    }
3013    read_object_header_at_inner(
3014        pack_bytes,
3015        offset,
3016        format,
3017        &mut resolve_ref_base_type,
3018        type_cache,
3019    )
3020}
3021
3022fn read_object_header_at_inner<F, C>(
3023    pack_bytes: &[u8],
3024    offset: u64,
3025    format: ObjectFormat,
3026    resolve_ref_base_type: &mut F,
3027    type_cache: &mut C,
3028) -> Result<(ObjectType, u64)>
3029where
3030    F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3031    C: HeaderTypeCache + ?Sized,
3032{
3033    let trailer_offset = pack_bytes
3034        .len()
3035        .checked_sub(format.raw_len())
3036        .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3037    let mut cursor = usize::try_from(offset)
3038        .ok()
3039        .filter(|&value| value < trailer_offset)
3040        .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3041    let header = parse_entry_header(pack_bytes, &mut cursor)?;
3042    let resolved = match header.kind {
3043        PackObjectKind::Commit => (ObjectType::Commit, header.size),
3044        PackObjectKind::Tree => (ObjectType::Tree, header.size),
3045        PackObjectKind::Blob => (ObjectType::Blob, header.size),
3046        PackObjectKind::Tag => (ObjectType::Tag, header.size),
3047        PackObjectKind::OfsDelta => {
3048            let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3049            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3050            // The end-of-chain type only depends on the base, so reuse it across
3051            // reads instead of re-walking the chain per object (sley#26).
3052            let base_type = match type_cache.get(base_offset) {
3053                Some((base_type, _)) => base_type,
3054                None => {
3055                    let (base_type, _) = read_object_header_at_inner(
3056                        pack_bytes,
3057                        base_offset,
3058                        format,
3059                        resolve_ref_base_type,
3060                        type_cache,
3061                    )?;
3062                    base_type
3063                }
3064            };
3065            (base_type, size)
3066        }
3067        PackObjectKind::RefDelta => {
3068            let hash_len = format.raw_len();
3069            if cursor + hash_len > trailer_offset {
3070                return Err(GitError::InvalidFormat(
3071                    "truncated ref-delta base object id".into(),
3072                ));
3073            }
3074            let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3075            cursor += hash_len;
3076            let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3077            let base_type = resolve_ref_base_type(&oid)?
3078                .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3079            (base_type, size)
3080        }
3081    };
3082    // Memoize the fully resolved header so a repeated lookup of this offset (or a
3083    // chain that bases on it) returns without re-inflating (sley#26).
3084    type_cache.put(offset, resolved);
3085    Ok(resolved)
3086}
3087
3088/// Number of inflated delta-stream bytes to read when only the leading base-size
3089/// and result-size varints are needed. Each varint is at most 10 bytes, so a short
3090/// prefix always covers both without inflating the delta instructions.
3091const DELTA_HEADER_PREFIX_LEN: usize = 32;
3092
3093/// Result size of a delta whose zlib-compressed stream starts at `compressed`,
3094/// inflating only the short prefix that holds its two leading varints.
3095fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3096    let mut prefix = Vec::new();
3097    inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3098    decoded_delta_result_size(&prefix)
3099}
3100
3101fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3102    let first = next_byte(bytes, offset)?;
3103    let mut size = u64::from(first & 0x0f);
3104    let kind = match (first >> 4) & 0x07 {
3105        1 => PackObjectKind::Commit,
3106        2 => PackObjectKind::Tree,
3107        3 => PackObjectKind::Blob,
3108        4 => PackObjectKind::Tag,
3109        6 => PackObjectKind::OfsDelta,
3110        7 => PackObjectKind::RefDelta,
3111        other => {
3112            return Err(GitError::InvalidFormat(format!(
3113                "invalid pack object type {other}"
3114            )));
3115        }
3116    };
3117    let mut shift = 4;
3118    let mut byte = first;
3119    while byte & 0x80 != 0 {
3120        byte = next_byte(bytes, offset)?;
3121        let part = u64::from(byte & 0x7f);
3122        size = size
3123            .checked_add(
3124                part.checked_shl(shift)
3125                    .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3126            )
3127            .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3128        shift += 7;
3129    }
3130    Ok(EntryHeader { kind, size })
3131}
3132
3133fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3134    let mut byte = next_byte(bytes, offset)?;
3135    let mut relative = u64::from(byte & 0x7f);
3136    while byte & 0x80 != 0 {
3137        byte = next_byte(bytes, offset)?;
3138        relative = relative
3139            .checked_add(1)
3140            .and_then(|value| value.checked_shl(7))
3141            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3142            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3143    }
3144    entry_offset
3145        .checked_sub(relative)
3146        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3147}
3148
3149fn resolve_pack_entries<F>(
3150    parsed: Vec<ParsedPackEntry>,
3151    format: ObjectFormat,
3152    external_base: &mut F,
3153) -> Result<Vec<PackObject>>
3154where
3155    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3156{
3157    let mut offset_to_index = HashMap::with_capacity(parsed.len());
3158    for (idx, entry) in parsed.iter().enumerate() {
3159        offset_to_index.insert(parsed_entry_offset(entry), idx);
3160    }
3161
3162    let mut resolved = vec![None; parsed.len()];
3163    let mut oid_to_index = HashMap::new();
3164    let mut unresolved = 0usize;
3165    for (idx, entry) in parsed.iter().enumerate() {
3166        match entry {
3167            ParsedPackEntry::Resolved(object) => {
3168                oid_to_index.insert(object.entry.oid, idx);
3169                resolved[idx] = Some(object.clone());
3170            }
3171            ParsedPackEntry::Delta { .. } => unresolved += 1,
3172        }
3173    }
3174
3175    while unresolved != 0 {
3176        let mut progress = false;
3177        for idx in 0..parsed.len() {
3178            if resolved[idx].is_some() {
3179                continue;
3180            }
3181            let ParsedPackEntry::Delta {
3182                base,
3183                compressed_size,
3184                delta_size,
3185                offset,
3186                delta,
3187            } = &parsed[idx]
3188            else {
3189                continue;
3190            };
3191            let Some(base_object) = delta_base_object(
3192                base,
3193                &offset_to_index,
3194                &oid_to_index,
3195                &resolved,
3196                external_base,
3197            )?
3198            else {
3199                continue;
3200            };
3201            let body = apply_pack_delta(base_object.body(), delta)?;
3202            let object = EncodedObject::new(base_object.object_type(), body);
3203            let oid = object.object_id(format)?;
3204            let pack_object = PackObject {
3205                entry: PackEntry {
3206                    oid,
3207                    compressed_size: *compressed_size,
3208                    uncompressed_size: object.body.len() as u64,
3209                    offset: *offset,
3210                },
3211                object,
3212            };
3213            if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3214                return Err(GitError::InvalidObject(
3215                    "resolved delta size does not match delta header".into(),
3216                ));
3217            }
3218            if *delta_size != delta.len() as u64 {
3219                return Err(GitError::InvalidObject(format!(
3220                    "pack delta declared {delta_size} bytes, decoded {}",
3221                    delta.len()
3222                )));
3223            }
3224            oid_to_index.insert(oid, idx);
3225            resolved[idx] = Some(pack_object);
3226            unresolved -= 1;
3227            progress = true;
3228        }
3229        if !progress {
3230            return Err(GitError::Unsupported("unresolved delta base".into()));
3231        }
3232    }
3233
3234    resolved
3235        .into_iter()
3236        .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3237        .collect()
3238}
3239
3240fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3241    match entry {
3242        ParsedPackEntry::Resolved(object) => object.entry.offset,
3243        ParsedPackEntry::Delta { offset, .. } => *offset,
3244    }
3245}
3246
3247enum DeltaBaseObject<'a> {
3248    Borrowed(&'a EncodedObject),
3249    Owned(EncodedObject),
3250}
3251
3252impl DeltaBaseObject<'_> {
3253    fn object_type(&self) -> ObjectType {
3254        match self {
3255            Self::Borrowed(object) => object.object_type,
3256            Self::Owned(object) => object.object_type,
3257        }
3258    }
3259
3260    fn body(&self) -> &[u8] {
3261        match self {
3262            Self::Borrowed(object) => &object.body,
3263            Self::Owned(object) => &object.body,
3264        }
3265    }
3266}
3267
3268fn delta_base_object<'a, F>(
3269    base: &DeltaBase,
3270    offset_to_index: &HashMap<u64, usize>,
3271    oid_to_index: &HashMap<ObjectId, usize>,
3272    resolved: &'a [Option<PackObject>],
3273    external_base: &mut F,
3274) -> Result<Option<DeltaBaseObject<'a>>>
3275where
3276    F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3277{
3278    match base {
3279        DeltaBase::Offset(offset) => {
3280            let Some(index) = offset_to_index.get(offset).copied() else {
3281                return Err(GitError::InvalidFormat(format!(
3282                    "ofs-delta base offset {offset} not found"
3283                )));
3284            };
3285            Ok(resolved[index]
3286                .as_ref()
3287                .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3288        }
3289        DeltaBase::Ref(oid) => {
3290            if let Some(index) = oid_to_index.get(oid).copied() {
3291                return Ok(resolved[index]
3292                    .as_ref()
3293                    .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3294            }
3295            external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3296        }
3297    }
3298}
3299
3300fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3301    let mut cursor = 0usize;
3302    let base_size = read_delta_varint(delta, &mut cursor)?;
3303    if base_size != base.len() as u64 {
3304        return Err(GitError::InvalidObject(format!(
3305            "delta base size mismatch: expected {base_size}, got {}",
3306            base.len()
3307        )));
3308    }
3309    let result_size = read_delta_varint(delta, &mut cursor)?;
3310    // `result_size` is an attacker-controlled delta varint from a network pack
3311    // (install_raw_pack -> sley-fetch). On 64-bit a naive `result_size as usize`
3312    // (or `.min(usize::MAX)`, a no-op there) lets a tiny delta declare
3313    // `u64::MAX`/1 TiB and drive `with_capacity` to abort the process before the
3314    // size-mismatch check below can fire. Route the up-front reservation through
3315    // the sley#2 bound so the speculative allocation is capped; `result.extend`
3316    // still grows the buffer organically and the post-decode length check
3317    // (`result.len() != result_size`) rejects the lie cleanly.
3318    let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3319    let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3320    while cursor < delta.len() {
3321        let command = delta[cursor];
3322        cursor += 1;
3323        if command & 0x80 != 0 {
3324            let copy_offset =
3325                read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3326            let mut copy_size =
3327                read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3328            if copy_size == 0 {
3329                copy_size = 0x10000;
3330            }
3331            let start = usize::try_from(copy_offset)
3332                .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3333            let len = usize::try_from(copy_size)
3334                .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3335            let end = start
3336                .checked_add(len)
3337                .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3338            let Some(slice) = base.get(start..end) else {
3339                return Err(GitError::InvalidObject(
3340                    "delta copy range exceeds base object".into(),
3341                ));
3342            };
3343            result.extend_from_slice(slice);
3344        } else if command != 0 {
3345            let len = usize::from(command);
3346            let end = cursor
3347                .checked_add(len)
3348                .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3349            let Some(slice) = delta.get(cursor..end) else {
3350                return Err(GitError::InvalidObject(
3351                    "delta insert range exceeds delta data".into(),
3352                ));
3353            };
3354            result.extend_from_slice(slice);
3355            cursor = end;
3356        } else {
3357            return Err(GitError::InvalidObject(
3358                "delta contains reserved zero command".into(),
3359            ));
3360        }
3361    }
3362    if result.len() as u64 != result_size {
3363        return Err(GitError::InvalidObject(format!(
3364            "delta result size mismatch: expected {result_size}, got {}",
3365            result.len()
3366        )));
3367    }
3368    Ok(result)
3369}
3370
3371fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3372    let mut cursor = 0usize;
3373    let _ = read_delta_varint(delta, &mut cursor)?;
3374    read_delta_varint(delta, &mut cursor)
3375}
3376
3377/// Size, in bytes, of the fixed blocks used to index a base object for delta
3378/// compression. Matches git's `diff-delta.c` block size.
3379const DELTA_BLOCK_SIZE: usize = 16;
3380
3381/// Distance between indexed base anchors. Delta generation still scans target
3382/// objects byte-by-byte once there is evidence of shared content; anchoring the
3383/// base at block boundaries keeps the index compact and avoids per-object
3384/// hash-table allocation storms on unrelated blobs.
3385const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3386
3387/// Number of hash buckets used by [`DeltaIndex`]. Bucketing avoids sorting each
3388/// base object's anchors while keeping exact-hash candidate scans short.
3389const DELTA_BUCKET_BITS: usize = 12;
3390const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3391const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3392
3393/// An index over a base object's content used to generate deltas against it.
3394///
3395/// The index hashes block-sized anchors of the base, groups them into fixed
3396/// buckets, and verifies exact byte matches before copying. This avoids both
3397/// per-bucket allocation storms and the per-object sort needed by a single
3398/// sorted vector.
3399struct DeltaIndex<'a> {
3400    base: &'a [u8],
3401    blocks: Vec<DeltaBlock>,
3402    buckets: Vec<usize>,
3403}
3404
3405#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3406struct DeltaBlock {
3407    hash: u32,
3408    offset: usize,
3409}
3410
3411impl<'a> DeltaIndex<'a> {
3412    fn new(base: &'a [u8]) -> Self {
3413        let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3414        let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3415        for_each_delta_anchor(base.len(), |offset| {
3416            let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3417            buckets[delta_bucket(hash) + 1] += 1;
3418            anchors.push(DeltaBlock { hash, offset });
3419        });
3420        for idx in 1..buckets.len() {
3421            buckets[idx] += buckets[idx - 1];
3422        }
3423
3424        let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3425        let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3426        for anchor in anchors {
3427            let bucket = delta_bucket(anchor.hash);
3428            let next = &mut next_offsets[bucket];
3429            blocks[*next] = anchor;
3430            *next += 1;
3431        }
3432
3433        Self {
3434            base,
3435            blocks,
3436            buckets,
3437        }
3438    }
3439
3440    fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3441        let bucket = delta_bucket(hash);
3442        let start = self.buckets[bucket];
3443        let end = self.buckets[bucket + 1];
3444        self.blocks[start..end]
3445            .iter()
3446            .filter(move |block| block.hash == hash)
3447    }
3448
3449    fn has_hash(&self, hash: u32) -> bool {
3450        self.candidate_blocks(hash).next().is_some()
3451    }
3452
3453    fn has_shared_anchor(&self, target: &[u8]) -> bool {
3454        if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3455            return false;
3456        }
3457        let last = target.len() - DELTA_BLOCK_SIZE;
3458        for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3459            let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3460            if self.has_hash(hash) {
3461                return true;
3462            }
3463        }
3464        if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3465            let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3466            if self.has_hash(hash) {
3467                return true;
3468            }
3469        }
3470        false
3471    }
3472
3473    /// Generate a delta that reconstructs `target` from this index's base.
3474    fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3475        if !self.has_shared_anchor(target) {
3476            return None;
3477        }
3478        let base = self.base;
3479        let mut delta = Vec::new();
3480        write_delta_varint(&mut delta, base.len() as u64);
3481        write_delta_varint(&mut delta, target.len() as u64);
3482
3483        let mut pending_insert_start = 0usize;
3484        let mut pos = 0usize;
3485        while pos < target.len() {
3486            let mut best_len = 0usize;
3487            let mut best_offset = 0usize;
3488            if pos + DELTA_BLOCK_SIZE <= target.len() {
3489                let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3490                for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3491                    // Confirm the block actually matches (hash collisions are
3492                    // possible) before measuring how far it extends.
3493                    let candidate = candidate.offset;
3494                    let max_len = (base.len() - candidate).min(target.len() - pos);
3495                    let mut len = 0usize;
3496                    while len < max_len && base[candidate + len] == target[pos + len] {
3497                        len += 1;
3498                    }
3499                    if len > best_len {
3500                        best_len = len;
3501                        best_offset = candidate;
3502                    }
3503                }
3504            }
3505
3506            if best_len >= DELTA_BLOCK_SIZE {
3507                if pending_insert_start < pos {
3508                    write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3509                }
3510                write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3511                pos += best_len;
3512                pending_insert_start = pos;
3513            } else {
3514                pos += 1;
3515            }
3516        }
3517        if pending_insert_start < target.len() {
3518            write_delta_insert(&mut delta, &target[pending_insert_start..]);
3519        }
3520        Some(delta)
3521    }
3522}
3523
3524fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3525    if len < DELTA_BLOCK_SIZE {
3526        return;
3527    }
3528    len -= DELTA_BLOCK_SIZE;
3529    for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3530        visit(offset);
3531    }
3532    if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3533        visit(len);
3534    }
3535}
3536
3537fn delta_anchor_count(len: usize) -> usize {
3538    if len < DELTA_BLOCK_SIZE {
3539        return 0;
3540    }
3541    let last = len - DELTA_BLOCK_SIZE;
3542    (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3543}
3544
3545fn delta_bucket(hash: u32) -> usize {
3546    (hash as usize) & DELTA_BUCKET_MASK
3547}
3548
3549/// Maximum number of base offsets retained per block-hash bucket. Caps the work
3550/// done extending candidate matches for inputs with many repeated blocks.
3551const DELTA_MAX_CHAIN: usize = 64;
3552
3553/// Hash a fixed-size block of base/target bytes into a bucket key.
3554///
3555/// A simple multiplicative (FNV-style) hash is sufficient here: matches are
3556/// always verified byte-for-byte before use, so collisions only cost a little
3557/// extra comparison work and never affect correctness.
3558fn block_hash(block: &[u8]) -> u32 {
3559    let mut hash = 0u32;
3560    for &byte in block {
3561        hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3562    }
3563    hash
3564}
3565
3566/// The chosen storage form for a single object during pack generation.
3567#[derive(Debug, Clone, PartialEq, Eq)]
3568enum PlannedBase {
3569    /// Stored undeltified (a base for others, or no good delta was found).
3570    None,
3571    /// Delta against another object in this pack, identified by its original
3572    /// index. The pre-computed `delta` bytes reconstruct the object from that
3573    /// base's body.
3574    InPack { base_idx: usize, delta: Vec<u8> },
3575    /// Delta against an external (thin-pack) base, referenced by object id.
3576    External { base_oid: ObjectId, delta: Vec<u8> },
3577}
3578
3579#[derive(Debug, Clone, PartialEq, Eq)]
3580struct PlannedEntry {
3581    base: PlannedBase,
3582}
3583
3584fn compress_planned_payloads(
3585    objects: &[&EncodedObject],
3586    plan: &[PlannedEntry],
3587    order: &[usize],
3588) -> Result<Vec<Vec<u8>>> {
3589    if order.is_empty() {
3590        return Ok(Vec::new());
3591    }
3592
3593    let worker_count = std::thread::available_parallelism()
3594        .map(|threads| threads.get())
3595        .unwrap_or(1)
3596        .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3597        .min(order.len());
3598    if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3599        let mut payloads = Vec::with_capacity(order.len());
3600        for &idx in order {
3601            payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
3602        }
3603        return Ok(payloads);
3604    }
3605
3606    let chunk_len = order.len().div_ceil(worker_count);
3607    let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
3608    std::thread::scope(|scope| {
3609        let mut handles = Vec::new();
3610        for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
3611            let chunk_start = chunk_idx * chunk_len;
3612            handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
3613                let mut chunk_payloads = Vec::with_capacity(chunk.len());
3614                for (offset, &idx) in chunk.iter().enumerate() {
3615                    chunk_payloads.push((
3616                        chunk_start + offset,
3617                        compressed_payload(planned_payload(objects, plan, idx))?,
3618                    ));
3619                }
3620                Ok(chunk_payloads)
3621            }));
3622        }
3623
3624        let mut first_error = None;
3625        for handle in handles {
3626            match handle.join() {
3627                Ok(Ok(chunk_payloads)) => {
3628                    if first_error.is_none() {
3629                        for (pos, payload) in chunk_payloads {
3630                            payloads[pos] = payload;
3631                        }
3632                    }
3633                }
3634                Ok(Err(err)) => {
3635                    first_error.get_or_insert(err);
3636                }
3637                Err(_) => {
3638                    first_error.get_or_insert_with(|| {
3639                        GitError::InvalidObject("pack compression worker panicked".into())
3640                    });
3641                }
3642            }
3643        }
3644
3645        match first_error {
3646            Some(err) => Err(err),
3647            None => Ok(()),
3648        }
3649    })?;
3650    Ok(payloads)
3651}
3652
3653fn planned_payload<'a>(
3654    objects: &'a [&'a EncodedObject],
3655    plan: &'a [PlannedEntry],
3656    idx: usize,
3657) -> &'a [u8] {
3658    match &plan[idx].base {
3659        PlannedBase::None => &objects[idx].body,
3660        PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
3661    }
3662}
3663
3664fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
3665    let mut out = Vec::new();
3666    write_compressed_payload(&mut out, body)?;
3667    Ok(out)
3668}
3669
3670/// Maximum number of external thin-pack bases compared against any single
3671/// object. Bounds the work of the thin path when a large base set is supplied.
3672const DELTA_MAX_EXTERNAL_BASES: usize = 64;
3673
3674struct DeltaWindowEntry<'a> {
3675    idx: usize,
3676    index: DeltaIndex<'a>,
3677}
3678
3679/// Rank object types for delta grouping. Objects of the same type are far more
3680/// likely to delta well, so the sort groups by this rank first.
3681fn delta_type_rank(object_type: ObjectType) -> u8 {
3682    match object_type {
3683        ObjectType::Commit => 0,
3684        ObjectType::Tree => 1,
3685        ObjectType::Blob => 2,
3686        ObjectType::Tag => 3,
3687    }
3688}
3689
3690/// Decide how each object is stored (undeltified or deltified) and the order in
3691/// which objects are emitted into the pack.
3692///
3693/// # Ordering
3694///
3695/// Candidates are sorted by `(type, size descending, object id)`:
3696/// * **type** — only same-type objects are deltified against one another, so
3697///   grouping by type keeps the sliding window full of viable bases. Type rank
3698///   follows [`delta_type_rank`] (commit, tree, blob, tag).
3699/// * **size descending** — larger objects come first so smaller, later objects
3700///   delta against larger bases (git's heuristic). Raw [`EncodedObject`]s carry
3701///   no path/name, so the usual path-hash key is unavailable; size is the next
3702///   best locality signal.
3703/// * **object id** — a deterministic tiebreaker for reproducible packs.
3704///
3705/// # Selection
3706///
3707/// Each object is compared against the previous up to `window` same-type
3708/// candidates (and, for thin packs, up to [`DELTA_MAX_EXTERNAL_BASES`] external
3709/// bases of the same type). The smallest delta whose encoded length is strictly
3710/// less than the object's own body is kept; otherwise the object is stored
3711/// undeltified. Delta chain depth is bounded by `options.depth` (a base may
3712/// only be used if doing so keeps the resulting chain within the bound); a depth
3713/// of `0` disables deltification entirely.
3714///
3715/// Returns the per-object plan (indexed by original object index) together with
3716/// the emit order. Every in-pack delta references a candidate that is earlier in
3717/// the emit order, so emitting in that order writes each base before any object
3718/// that depends on it.
3719fn plan_pack_deltas(
3720    objects: &[&EncodedObject],
3721    object_ids: &[ObjectId],
3722    options: &PackWriteOptions,
3723) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
3724    let count = objects.len();
3725    let mut plan: Vec<PlannedEntry> = (0..count)
3726        .map(|_| PlannedEntry {
3727            base: PlannedBase::None,
3728        })
3729        .collect();
3730
3731    // Processing order. Deltas only point backwards within this order, which is
3732    // therefore also a valid emit order. Reordering by type/size improves delta
3733    // locality but is skipped when disabled or when deltification is off.
3734    let mut order: Vec<usize> = (0..count).collect();
3735    if options.reorder && options.depth > 0 {
3736        order.sort_by(|&left, &right| {
3737            delta_type_rank(objects[left].object_type)
3738                .cmp(&delta_type_rank(objects[right].object_type))
3739                .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
3740                .then_with(|| {
3741                    object_ids[left]
3742                        .as_bytes()
3743                        .cmp(object_ids[right].as_bytes())
3744                })
3745        });
3746    }
3747
3748    if options.depth == 0 {
3749        return Ok((plan, order));
3750    }
3751
3752    // Pre-build delta indexes for external thin-pack bases, grouped by type so
3753    // an object only compares against compatible bases.
3754    let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
3755        Vec::with_capacity(options.thin_bases.len());
3756    for (oid, object) in &options.thin_bases {
3757        external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
3758    }
3759
3760    // Chain depth ending at each object (0 = undeltified). Used to keep delta
3761    // chains within `options.depth`.
3762    let mut depth = vec![0usize; count];
3763    // Sliding window of recently processed original indices, most recent last.
3764    let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
3765        std::collections::VecDeque::new();
3766
3767    for &idx in &order {
3768        let target = &objects[idx].body;
3769        let target_type = objects[idx].object_type;
3770
3771        let mut best_delta: Option<Vec<u8>> = None;
3772        let mut best_base = PlannedBase::None;
3773
3774        // Try in-pack candidates from the window (same type only).
3775        for base_entry in window.iter().rev() {
3776            let base_idx = base_entry.idx;
3777            if objects[base_idx].object_type != target_type {
3778                continue;
3779            }
3780            // Using this base would make the new chain depth + 1; skip if that
3781            // would exceed the configured maximum.
3782            if depth[base_idx] + 1 > options.depth {
3783                continue;
3784            }
3785            let Some(delta) = base_entry.index.delta(target) else {
3786                continue;
3787            };
3788            if !delta_is_acceptable(&delta, target.len()) {
3789                continue;
3790            }
3791            if best_delta
3792                .as_ref()
3793                .is_none_or(|current| delta.len() < current.len())
3794            {
3795                best_delta = Some(delta);
3796                best_base = PlannedBase::InPack {
3797                    base_idx,
3798                    delta: Vec::new(),
3799                };
3800            }
3801        }
3802
3803        // Try external thin-pack bases (ref-delta; external base is depth 0, so
3804        // the resulting chain depth is 1, always within a non-zero bound).
3805        for (base_oid, base_type, base_index) in
3806            external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
3807        {
3808            if *base_type != target_type {
3809                continue;
3810            }
3811            let Some(delta) = base_index.delta(target) else {
3812                continue;
3813            };
3814            if !delta_is_acceptable(&delta, target.len()) {
3815                continue;
3816            }
3817            if best_delta
3818                .as_ref()
3819                .is_none_or(|current| delta.len() < current.len())
3820            {
3821                best_delta = Some(delta);
3822                best_base = PlannedBase::External {
3823                    base_oid: *base_oid,
3824                    delta: Vec::new(),
3825                };
3826            }
3827        }
3828
3829        if let Some(delta) = best_delta {
3830            match best_base {
3831                PlannedBase::InPack { base_idx, .. } => {
3832                    depth[idx] = depth[base_idx] + 1;
3833                    plan[idx].base = PlannedBase::InPack { base_idx, delta };
3834                }
3835                PlannedBase::External { base_oid, .. } => {
3836                    depth[idx] = 1;
3837                    plan[idx].base = PlannedBase::External { base_oid, delta };
3838                }
3839                PlannedBase::None => {}
3840            }
3841        }
3842
3843        // Add this object to the window for subsequent candidates.
3844        window.push_back(DeltaWindowEntry {
3845            idx,
3846            index: DeltaIndex::new(&objects[idx].body),
3847        });
3848        while window.len() > options.window {
3849            window.pop_front();
3850        }
3851    }
3852
3853    Ok((plan, order))
3854}
3855
3856/// Whether a generated delta is worth using instead of storing the object
3857/// undeltified. The encoded delta must be strictly smaller than the object's own
3858/// body; otherwise the undeltified form is the same size or smaller and is
3859/// always self-contained.
3860fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
3861    !delta.is_empty() && delta.len() < target_len
3862}
3863
3864fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
3865    loop {
3866        let mut byte = (value as u8) & 0x7f;
3867        value >>= 7;
3868        if value != 0 {
3869            byte |= 0x80;
3870        }
3871        out.push(byte);
3872        if value == 0 {
3873            break;
3874        }
3875    }
3876}
3877
3878fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
3879    while size != 0 {
3880        let chunk = size.min(0x10000);
3881        let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
3882        let mut command = 0x80u8;
3883        let mut payload = [0u8; 7];
3884        let mut payload_len = 0usize;
3885        for idx in 0..4 {
3886            let byte = ((offset >> (idx * 8)) & 0xff) as u8;
3887            if byte != 0 {
3888                command |= 1 << idx;
3889                payload[payload_len] = byte;
3890                payload_len += 1;
3891            }
3892        }
3893        for idx in 0..3 {
3894            let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
3895            if byte != 0 {
3896                command |= 0x10 << idx;
3897                payload[payload_len] = byte;
3898                payload_len += 1;
3899            }
3900        }
3901        out.push(command);
3902        out.extend_from_slice(&payload[..payload_len]);
3903        offset += chunk;
3904        size -= chunk;
3905    }
3906}
3907
3908fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
3909    while !bytes.is_empty() {
3910        let chunk_len = bytes.len().min(0x7f);
3911        out.push(chunk_len as u8);
3912        out.extend_from_slice(&bytes[..chunk_len]);
3913        bytes = &bytes[chunk_len..];
3914    }
3915}
3916
3917fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
3918    let mut value = 0u64;
3919    let mut shift = 0u32;
3920    loop {
3921        let Some(byte) = delta.get(*cursor).copied() else {
3922            return Err(GitError::InvalidObject("truncated delta size".into()));
3923        };
3924        *cursor += 1;
3925        value = value
3926            .checked_add(
3927                u64::from(byte & 0x7f)
3928                    .checked_shl(shift)
3929                    .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
3930            )
3931            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3932        if byte & 0x80 == 0 {
3933            return Ok(value);
3934        }
3935        shift = shift
3936            .checked_add(7)
3937            .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3938    }
3939}
3940
3941fn read_delta_copy_value(
3942    delta: &[u8],
3943    cursor: &mut usize,
3944    command: u8,
3945    masks: &[u8],
3946) -> Result<u64> {
3947    let mut value = 0u64;
3948    for (shift, mask) in masks.iter().enumerate() {
3949        if command & mask != 0 {
3950            let Some(byte) = delta.get(*cursor).copied() else {
3951                return Err(GitError::InvalidObject(
3952                    "truncated delta copy command".into(),
3953                ));
3954            };
3955            *cursor += 1;
3956            value |= u64::from(byte) << (shift * 8);
3957        }
3958    }
3959    Ok(value)
3960}
3961
3962thread_local! {
3963    static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
3964}
3965
3966fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
3967    DEFLATE.with(|cell| {
3968        let mut compressor = cell.borrow_mut();
3969        compressor.reset();
3970        out.reserve(zlib_compress_bound(body.len()));
3971        let status = compressor
3972            .compress_vec(body, out, FlushCompress::Finish)
3973            .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
3974        if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
3975            return Err(GitError::InvalidObject(
3976                "zlib compression did not finish pack entry".into(),
3977            ));
3978        }
3979        Ok(())
3980    })
3981}
3982
3983fn zlib_compress_bound(len: usize) -> usize {
3984    len.saturating_add(len >> 12)
3985        .saturating_add(len >> 14)
3986        .saturating_add(len >> 25)
3987        .saturating_add(13)
3988}
3989
3990fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
3991    let type_code = match object_type {
3992        ObjectType::Commit => 1,
3993        ObjectType::Tree => 2,
3994        ObjectType::Blob => 3,
3995        ObjectType::Tag => 4,
3996    };
3997    write_pack_entry_header_kind(out, type_code, size);
3998}
3999
4000fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4001    let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4002    size >>= 4;
4003    if size != 0 {
4004        byte |= 0x80;
4005    }
4006    out.push(byte);
4007    while size != 0 {
4008        let mut byte = (size as u8) & 0x7f;
4009        size >>= 7;
4010        if size != 0 {
4011            byte |= 0x80;
4012        }
4013        out.push(byte);
4014    }
4015}
4016
4017fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4018    if relative == 0 {
4019        return Err(GitError::InvalidFormat(
4020            "ofs-delta relative offset cannot be zero".into(),
4021        ));
4022    }
4023    let mut value = relative;
4024    let mut bytes = vec![(value & 0x7f) as u8];
4025    value >>= 7;
4026    while value != 0 {
4027        value -= 1;
4028        bytes.push(((value & 0x7f) as u8) | 0x80);
4029        value >>= 7;
4030    }
4031    bytes.reverse();
4032    out.extend_from_slice(&bytes);
4033    Ok(())
4034}
4035
4036fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4037    let Some(byte) = bytes.get(*offset).copied() else {
4038        return Err(GitError::InvalidFormat(
4039            "truncated pack entry header".into(),
4040        ));
4041    };
4042    *offset += 1;
4043    Ok(byte)
4044}
4045
4046fn u16_be(bytes: &[u8]) -> u16 {
4047    u16::from_be_bytes([bytes[0], bytes[1]])
4048}
4049
4050fn u32_be(bytes: &[u8]) -> u32 {
4051    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4052}
4053
4054fn u64_be(bytes: &[u8]) -> u64 {
4055    u64::from_be_bytes([
4056        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4057    ])
4058}
4059
4060fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4061    let mut fanout = [0u32; 256];
4062    let mut previous = 0u32;
4063    for slot in &mut fanout {
4064        *slot = u32_be(&bytes[*offset..*offset + 4]);
4065        if *slot < previous {
4066            return Err(GitError::InvalidFormat(
4067                "pack index fanout is not monotonic".into(),
4068            ));
4069        }
4070        previous = *slot;
4071        *offset += 4;
4072    }
4073    Ok(fanout)
4074}
4075
4076fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4077    let expected_min = if oid_bytes[0] == 0 {
4078        0
4079    } else {
4080        fanout[usize::from(oid_bytes[0] - 1)]
4081    };
4082    if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4083        return Err(GitError::InvalidFormat(
4084            "pack index object id is outside its fanout bucket".into(),
4085        ));
4086    }
4087    Ok(())
4088}
4089
4090fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4091    if raw_offset & 0x8000_0000 == 0 {
4092        return Ok(u64::from(raw_offset));
4093    }
4094    let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4095    let large_start = large_idx
4096        .checked_mul(8)
4097        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4098    let large_end = large_start
4099        .checked_add(8)
4100        .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4101    if large_end > large_offset_table.len() {
4102        return Err(GitError::InvalidFormat(
4103            "pack index large offset points past table".into(),
4104        ));
4105    }
4106    Ok(u64_be(&large_offset_table[large_start..large_end]))
4107}
4108
4109fn checked_range(
4110    start: usize,
4111    count: usize,
4112    width: usize,
4113    total: usize,
4114) -> Result<std::ops::Range<usize>> {
4115    let len = count
4116        .checked_mul(width)
4117        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4118    let end = start
4119        .checked_add(len)
4120        .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4121    if end > total {
4122        return Err(GitError::InvalidFormat("truncated pack index table".into()));
4123    }
4124    Ok(start..end)
4125}
4126
4127fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4128    let mut seen = vec![false; positions.len()];
4129    for position in positions {
4130        let idx = *position as usize;
4131        if idx >= positions.len() {
4132            return Err(GitError::InvalidFormat(
4133                "reverse index position points past object table".into(),
4134            ));
4135        }
4136        if seen[idx] {
4137            return Err(GitError::InvalidFormat(
4138                "reverse index position is duplicated".into(),
4139            ));
4140        }
4141        seen[idx] = true;
4142    }
4143    Ok(())
4144}
4145
4146fn parse_midx_pack_names(
4147    bytes: &[u8],
4148    chunks: &[MultiPackIndexChunk],
4149    pack_count: usize,
4150    version: u8,
4151) -> Result<Vec<String>> {
4152    let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4153        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4154    let mut names = Vec::with_capacity(pack_count);
4155    let mut offset = 0usize;
4156    while names.len() < pack_count {
4157        let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4158            return Err(GitError::InvalidFormat(
4159                "multi-pack-index PNAM entry is unterminated".into(),
4160            ));
4161        };
4162        let name_bytes = &data[offset..offset + relative_end];
4163        if name_bytes.is_empty() {
4164            return Err(GitError::InvalidFormat(
4165                "multi-pack-index PNAM entry is empty".into(),
4166            ));
4167        }
4168        let name = std::str::from_utf8(name_bytes)
4169            .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4170        if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4171            return Err(GitError::InvalidFormat(
4172                "multi-pack-index PNAM entry contains a path separator".into(),
4173            ));
4174        }
4175        names.push(name.to_string());
4176        offset += relative_end + 1;
4177    }
4178    let padding = &data[offset..];
4179    if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4180        return Err(GitError::InvalidFormat(
4181            "multi-pack-index PNAM padding is invalid".into(),
4182        ));
4183    }
4184    if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4185        return Err(GitError::InvalidFormat(
4186            "multi-pack-index v1 PNAM entries are not sorted".into(),
4187        ));
4188    }
4189    Ok(names)
4190}
4191
4192fn parse_midx_oid_fanout(
4193    bytes: &[u8],
4194    chunks: &[MultiPackIndexChunk],
4195) -> Result<([u32; 256], usize)> {
4196    let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4197        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4198    if data.len() != 256 * 4 {
4199        return Err(GitError::InvalidFormat(
4200            "multi-pack-index OIDF chunk has invalid length".into(),
4201        ));
4202    }
4203    let mut fanout = [0u32; 256];
4204    let mut previous = 0u32;
4205    for (idx, slot) in fanout.iter_mut().enumerate() {
4206        let start = idx * 4;
4207        *slot = u32_be(&data[start..start + 4]);
4208        if *slot < previous {
4209            return Err(GitError::InvalidFormat(
4210                "multi-pack-index OIDF fanout is not monotonic".into(),
4211            ));
4212        }
4213        previous = *slot;
4214    }
4215    Ok((fanout, fanout[255] as usize))
4216}
4217
4218fn parse_midx_object_ids(
4219    bytes: &[u8],
4220    chunks: &[MultiPackIndexChunk],
4221    format: ObjectFormat,
4222    object_count: usize,
4223    fanout: &[u32; 256],
4224) -> Result<Vec<ObjectId>> {
4225    let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4226        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4227    let expected_len = object_count
4228        .checked_mul(format.raw_len())
4229        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4230    if data.len() != expected_len {
4231        return Err(GitError::InvalidFormat(
4232            "multi-pack-index OIDL chunk has invalid length".into(),
4233        ));
4234    }
4235
4236    let mut ids = Vec::with_capacity(object_count);
4237    let mut counts = [0u32; 256];
4238    let mut previous_oid: Option<ObjectId> = None;
4239    for idx in 0..object_count {
4240        let start = idx * format.raw_len();
4241        let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4242        if let Some(previous) = &previous_oid
4243            && previous.as_bytes() >= oid.as_bytes()
4244        {
4245            return Err(GitError::InvalidFormat(
4246                "multi-pack-index OIDL object ids are not strictly sorted".into(),
4247            ));
4248        }
4249        counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4250            .checked_add(1)
4251            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4252        previous_oid = Some(oid);
4253        ids.push(oid);
4254    }
4255
4256    let mut running = 0u32;
4257    for (idx, count) in counts.iter().enumerate() {
4258        running = running
4259            .checked_add(*count)
4260            .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4261        if fanout[idx] != running {
4262            return Err(GitError::InvalidFormat(
4263                "multi-pack-index OIDF fanout does not match OIDL".into(),
4264            ));
4265        }
4266    }
4267    Ok(ids)
4268}
4269
4270fn parse_midx_object_offsets(
4271    bytes: &[u8],
4272    chunks: &[MultiPackIndexChunk],
4273    object_ids: Vec<ObjectId>,
4274    pack_count: u32,
4275) -> Result<Vec<MultiPackIndexEntry>> {
4276    let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4277        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4278    let expected_len = object_ids
4279        .len()
4280        .checked_mul(8)
4281        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4282    if data.len() != expected_len {
4283        return Err(GitError::InvalidFormat(
4284            "multi-pack-index OOFF chunk has invalid length".into(),
4285        ));
4286    }
4287    let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4288    if let Some(large_offsets) = large_offsets
4289        && large_offsets.len() % 8 != 0
4290    {
4291        return Err(GitError::InvalidFormat(
4292            "multi-pack-index LOFF chunk has invalid length".into(),
4293        ));
4294    }
4295
4296    let mut entries = Vec::with_capacity(object_ids.len());
4297    for (idx, oid) in object_ids.into_iter().enumerate() {
4298        let start = idx * 8;
4299        let pack_int_id = u32_be(&data[start..start + 4]);
4300        if pack_int_id >= pack_count {
4301            return Err(GitError::InvalidFormat(
4302                "multi-pack-index object points past pack table".into(),
4303            ));
4304        }
4305        let raw_offset = u32_be(&data[start + 4..start + 8]);
4306        let offset = if raw_offset & 0x8000_0000 == 0 {
4307            u64::from(raw_offset)
4308        } else {
4309            let Some(large_offsets) = large_offsets else {
4310                return Err(GitError::InvalidFormat(
4311                    "multi-pack-index large offset missing LOFF chunk".into(),
4312                ));
4313            };
4314            let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4315            let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4316                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4317            })?;
4318            let large_end = large_start.checked_add(8).ok_or_else(|| {
4319                GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4320            })?;
4321            if large_end > large_offsets.len() {
4322                return Err(GitError::InvalidFormat(
4323                    "multi-pack-index large offset points past LOFF chunk".into(),
4324                ));
4325            }
4326            u64_be(&large_offsets[large_start..large_end])
4327        };
4328        entries.push(MultiPackIndexEntry {
4329            oid,
4330            pack_int_id,
4331            offset,
4332        });
4333    }
4334    Ok(entries)
4335}
4336
4337fn parse_midx_reverse_index(
4338    bytes: &[u8],
4339    chunks: &[MultiPackIndexChunk],
4340    object_count: usize,
4341) -> Result<Option<Vec<u32>>> {
4342    let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4343        return Ok(None);
4344    };
4345    let expected_len = object_count
4346        .checked_mul(4)
4347        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4348    if data.len() != expected_len {
4349        return Err(GitError::InvalidFormat(
4350            "multi-pack-index RIDX chunk has invalid length".into(),
4351        ));
4352    }
4353    let mut positions = Vec::with_capacity(object_count);
4354    for idx in 0..object_count {
4355        let start = idx * 4;
4356        positions.push(u32_be(&data[start..start + 4]));
4357    }
4358    validate_position_permutation(&positions)?;
4359    Ok(Some(positions))
4360}
4361
4362fn parse_midx_bitmapped_packs(
4363    bytes: &[u8],
4364    chunks: &[MultiPackIndexChunk],
4365    pack_count: usize,
4366    object_count: usize,
4367) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4368    let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4369        return Ok(None);
4370    };
4371    let expected_len = pack_count
4372        .checked_mul(8)
4373        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4374    if data.len() != expected_len {
4375        return Err(GitError::InvalidFormat(
4376            "multi-pack-index BTMP chunk has invalid length".into(),
4377        ));
4378    }
4379    let mut entries = Vec::with_capacity(pack_count);
4380    for idx in 0..pack_count {
4381        let start = idx * 8;
4382        let bitmap_pos = u32_be(&data[start..start + 4]);
4383        let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4384        let bitmap_end = u64::from(bitmap_pos)
4385            .checked_add(u64::from(bitmap_nr))
4386            .ok_or_else(|| {
4387                GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4388            })?;
4389        if bitmap_end > object_count as u64 {
4390            return Err(GitError::InvalidFormat(
4391                "multi-pack-index BTMP range points past object table".into(),
4392            ));
4393        }
4394        entries.push(MultiPackBitmapPack {
4395            bitmap_pos,
4396            bitmap_nr,
4397        });
4398    }
4399    Ok(Some(entries))
4400}
4401
4402fn midx_chunk_data<'a>(
4403    bytes: &'a [u8],
4404    chunks: &[MultiPackIndexChunk],
4405    id: [u8; 4],
4406    required: bool,
4407) -> Result<Option<&'a [u8]>> {
4408    let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4409        if required {
4410            return Err(GitError::InvalidFormat(format!(
4411                "multi-pack-index missing {} chunk",
4412                std::str::from_utf8(&id).unwrap_or("required")
4413            )));
4414        }
4415        return Ok(None);
4416    };
4417    let start = usize::try_from(chunk.offset)
4418        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4419    let len = usize::try_from(chunk.len)
4420        .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4421    let end = start
4422        .checked_add(len)
4423        .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4424    let Some(data) = bytes.get(start..end) else {
4425        return Err(GitError::InvalidFormat(
4426            "multi-pack-index chunk extends past file".into(),
4427        ));
4428    };
4429    Ok(Some(data))
4430}
4431
4432fn hash_function_id(format: ObjectFormat) -> u32 {
4433    match format {
4434        ObjectFormat::Sha1 => 1,
4435        ObjectFormat::Sha256 => 2,
4436    }
4437}
4438
4439/// Maximum number of clean (run) words that a single EWAH running-length word
4440/// can describe. The field is 32 bits wide (bits 1..=32 of the RLW).
4441const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4442
4443/// Maximum number of literal (dirty) words that can trail a single EWAH
4444/// running-length word. The field is 31 bits wide (bits 33..=63 of the RLW).
4445const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4446
4447/// All-ones 64-bit word, used to recognise a "clean" run of set bits.
4448const EWAH_ALL_ONES: u64 = u64::MAX;
4449
4450impl EwahBitmap {
4451    /// Constructs an [`EwahBitmap`] in git's canonical EWAH compressed form
4452    /// from a slice of raw uncompressed 64-bit words.
4453    ///
4454    /// Within each word bit `i` corresponds to position `word_index * 64 + i`,
4455    /// matching git's on-disk convention. `bit_size` records the number of
4456    /// logical bits the bitmap spans; it must not exceed `words.len() * 64`.
4457    ///
4458    /// This mirrors libgit's `ewah_add`/`ewah_add_empty_words` incremental
4459    /// encoder: consecutive all-zero or all-one words collapse into a run, and
4460    /// any other word is stored verbatim as a literal. Only the first
4461    /// `bit_size.div_ceil(64)` words back the declared bits; any extra trailing
4462    /// words supplied by the caller are ignored, just as git encodes a bitmap
4463    /// sized to its highest set bit.
4464    pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4465        let required_words = bit_size.div_ceil(64) as usize;
4466        if required_words > words.len() {
4467            return Err(GitError::InvalidFormat(format!(
4468                "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4469                words.len()
4470            )));
4471        }
4472        // Only the words that actually back the declared bits matter; libgit
4473        // never emits clean trailing zero words for the unused tail.
4474        let significant = &words[..required_words];
4475        let mut builder = EwahBuilder::new(bit_size);
4476        for &word in significant {
4477            if word == 0 {
4478                builder.add_empty_words(false, 1);
4479            } else if word == EWAH_ALL_ONES {
4480                builder.add_empty_words(true, 1);
4481            } else {
4482                builder.add_literal(word);
4483            }
4484        }
4485        builder.finish()
4486    }
4487
4488    /// Constructs an [`EwahBitmap`] from a set of bit positions.
4489    ///
4490    /// `bit_size` is the number of logical bits (typically the pack object
4491    /// count). Every position in `positions` must be strictly less than
4492    /// `bit_size`. Positions may be given in any order and may repeat.
4493    pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4494        let word_count = bit_size.div_ceil(64) as usize;
4495        let mut words = vec![0u64; word_count];
4496        for &position in positions {
4497            if position >= bit_size {
4498                return Err(GitError::InvalidFormat(format!(
4499                    "EWAH bit position {position} out of range for bit_size {bit_size}"
4500                )));
4501            }
4502            let word_index = (position / 64) as usize;
4503            let bit_index = position % 64;
4504            words[word_index] |= 1u64 << bit_index;
4505        }
4506        Self::from_words(bit_size, &words)
4507    }
4508
4509    /// An empty EWAH bitmap (no bits, no words). This is what git writes for an
4510    /// all-zero type bitmap (e.g. when a pack has no tags).
4511    pub fn empty() -> Self {
4512        Self {
4513            bit_size: 0,
4514            words: Vec::new(),
4515            rlw_position: 0,
4516        }
4517    }
4518
4519    /// Decodes the compressed EWAH back into raw 64-bit words, LSB-first within
4520    /// each word. The returned vector has `bit_size.div_ceil(64)` entries.
4521    ///
4522    /// This is the inverse of [`EwahBitmap::from_words`] for the bits the
4523    /// bitmap actually covers and is primarily used to validate roundtrips.
4524    pub fn to_words(&self) -> Result<Vec<u64>> {
4525        let mut out = Vec::new();
4526        let mut word_idx = 0usize;
4527        while word_idx < self.words.len() {
4528            let rlw = self.words[word_idx];
4529            let run_bit = rlw & 1;
4530            let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4531            let literal_words = (rlw >> 33) as usize;
4532            word_idx += 1;
4533            let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4534            for _ in 0..run_words {
4535                out.push(fill);
4536            }
4537            let literal_end = word_idx
4538                .checked_add(literal_words)
4539                .filter(|end| *end <= self.words.len())
4540                .ok_or_else(|| {
4541                    GitError::InvalidFormat("EWAH literal words extend past word table".into())
4542                })?;
4543            out.extend_from_slice(&self.words[word_idx..literal_end]);
4544            word_idx = literal_end;
4545        }
4546        let required_words = (self.bit_size as usize).div_ceil(64);
4547        if out.len() < required_words {
4548            out.resize(required_words, 0);
4549        }
4550        out.truncate(required_words);
4551        Ok(out)
4552    }
4553
4554    /// Returns the sorted set bit positions covered by this bitmap.
4555    pub fn to_positions(&self) -> Result<Vec<u32>> {
4556        let words = self.to_words()?;
4557        let mut positions = Vec::new();
4558        for (word_index, word) in words.iter().enumerate() {
4559            let mut remaining = *word;
4560            while remaining != 0 {
4561                let bit = remaining.trailing_zeros();
4562                let position = (word_index as u64) * 64 + u64::from(bit);
4563                if position < u64::from(self.bit_size) {
4564                    // position always fits in u32 because bit_size is u32.
4565                    positions.push(position as u32);
4566                }
4567                remaining &= remaining - 1;
4568            }
4569        }
4570        Ok(positions)
4571    }
4572
4573    /// Serialises the bitmap to git's on-disk EWAH byte layout: `bit_size`
4574    /// (u32 BE), word count (u32 BE), each compressed word (u64 BE), then the
4575    /// running-length-word position (u32 BE).
4576    pub fn to_bytes(&self) -> Vec<u8> {
4577        let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4578        self.append_bytes(&mut out);
4579        out
4580    }
4581
4582    fn append_bytes(&self, out: &mut Vec<u8>) {
4583        out.extend_from_slice(&self.bit_size.to_be_bytes());
4584        out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4585        for word in &self.words {
4586            out.extend_from_slice(&word.to_be_bytes());
4587        }
4588        out.extend_from_slice(&self.rlw_position.to_be_bytes());
4589    }
4590}
4591
4592/// Incremental EWAH compressed-buffer builder mirroring libgit's `ewah_add`.
4593///
4594/// The buffer is a sequence of blocks. Each block begins with a running-length
4595/// word (RLW) and is followed by zero or more literal words:
4596///   * bit 0      => value of the clean run words (0 or 1)
4597///   * bits 1..=32 => number of clean run words (32-bit field)
4598///   * bits 33..=63 => number of trailing literal words (31-bit field)
4599struct EwahBuilder {
4600    bit_size: u32,
4601    words: Vec<u64>,
4602    rlw_position: usize,
4603}
4604
4605impl EwahBuilder {
4606    fn new(bit_size: u32) -> Self {
4607        // Every EWAH buffer begins with an RLW, even an empty one.
4608        Self {
4609            bit_size,
4610            words: vec![0u64],
4611            rlw_position: 0,
4612        }
4613    }
4614
4615    fn rlw(&self) -> u64 {
4616        self.words[self.rlw_position]
4617    }
4618
4619    fn set_rlw(&mut self, value: u64) {
4620        self.words[self.rlw_position] = value;
4621    }
4622
4623    fn rlw_running_len(&self) -> u64 {
4624        (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
4625    }
4626
4627    fn rlw_running_bit(&self) -> bool {
4628        self.rlw() & 1 == 1
4629    }
4630
4631    fn rlw_literal_len(&self) -> u64 {
4632        self.rlw() >> 33
4633    }
4634
4635    fn set_running_bit(&mut self, bit: bool) {
4636        let mut value = self.rlw();
4637        value &= !1;
4638        value |= u64::from(bit);
4639        self.set_rlw(value);
4640    }
4641
4642    fn set_running_len(&mut self, len: u64) {
4643        let mut value = self.rlw();
4644        value &= !(EWAH_MAX_RUNNING_LEN << 1);
4645        value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
4646        self.set_rlw(value);
4647    }
4648
4649    fn set_literal_len(&mut self, len: u64) {
4650        let mut value = self.rlw();
4651        value &= (1u64 << 33) - 1;
4652        value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
4653        self.set_rlw(value);
4654    }
4655
4656    /// Begins a fresh RLW block at the end of the buffer.
4657    fn push_rlw(&mut self) {
4658        self.rlw_position = self.words.len();
4659        self.words.push(0);
4660    }
4661
4662    /// Appends `number` clean words whose bits are all `value`, mirroring
4663    /// libgit's `ewah_add_empty_words`.
4664    ///
4665    /// A run can only be merged into the current RLW when that RLW has not yet
4666    /// emitted any literal words and its run either is empty or already carries
4667    /// the same fill value. Otherwise a fresh RLW block must be started, because
4668    /// every block stores its run strictly before its literals.
4669    fn add_empty_words(&mut self, value: bool, mut number: u64) {
4670        while number > 0 {
4671            // The current RLW can absorb more run words only when it has no
4672            // literals yet, its run is either empty or already the right fill
4673            // value, and the 32-bit run-length field is not already saturated.
4674            let can_extend = self.rlw_literal_len() == 0
4675                && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
4676                && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
4677            if !can_extend {
4678                self.push_rlw();
4679            }
4680            if self.rlw_running_len() == 0 {
4681                self.set_running_bit(value);
4682            }
4683            let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
4684            let take = available.min(number);
4685            self.set_running_len(self.rlw_running_len() + take);
4686            number -= take;
4687        }
4688    }
4689
4690    /// Appends a single literal (dirty) word verbatim, mirroring libgit's
4691    /// `ewah_add_dirty_words` for a count of one.
4692    fn add_literal(&mut self, word: u64) {
4693        if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
4694            self.push_rlw();
4695        }
4696        let literal_len = self.rlw_literal_len();
4697        self.set_literal_len(literal_len + 1);
4698        self.words.push(word);
4699    }
4700
4701    fn finish(self) -> Result<EwahBitmap> {
4702        let rlw_position = u32::try_from(self.rlw_position)
4703            .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
4704        if self.words.len() > u32::MAX as usize {
4705            return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
4706        }
4707        Ok(EwahBitmap {
4708            bit_size: self.bit_size,
4709            words: self.words,
4710            rlw_position,
4711        })
4712    }
4713}
4714
4715/// Builder that assembles a reachability bitmap (`.bitmap`) for a pack.
4716///
4717/// The writer is constructed from the object layout of a pack (one
4718/// [`ObjectType`] per object, in pack order) and the pack's trailing checksum.
4719/// Callers then register one selected commit per [`add_commit`] call, supplying
4720/// the set of pack positions reachable from that commit. [`build`]/[`write`]
4721/// produce a [`PackBitmapIndex`] / serialised `.bitmap` bytes matching git's
4722/// on-disk format (signature `BITM`, version 1).
4723///
4724/// [`add_commit`]: PackBitmapWriter::add_commit
4725/// [`build`]: PackBitmapWriter::build
4726/// [`write`]: PackBitmapWriter::write
4727#[derive(Debug, Clone)]
4728pub struct PackBitmapWriter {
4729    format: ObjectFormat,
4730    pack_checksum: ObjectId,
4731    object_count: u32,
4732    commit_positions: Vec<u32>,
4733    tree_positions: Vec<u32>,
4734    blob_positions: Vec<u32>,
4735    tag_positions: Vec<u32>,
4736    name_hash_cache: Option<Vec<u32>>,
4737    selected: Vec<SelectedCommit>,
4738}
4739
4740#[derive(Debug, Clone)]
4741struct SelectedCommit {
4742    /// Oid-sorted `.idx` position (what the on-disk entry records). The
4743    /// commit's pack-order position lives in `reachable` with the rest of the
4744    /// bits.
4745    commit_index_position: u32,
4746    flags: u8,
4747    reachable: Vec<u32>,
4748}
4749
4750impl PackBitmapWriter {
4751    /// `OBJ_NONE` selection flag: this commit's bitmap is stored in full (no XOR
4752    /// compression against a previously selected commit). This is the only flag
4753    /// value this writer emits.
4754    pub const FLAG_NONE: u8 = 0;
4755
4756    /// Creates a writer for a pack whose objects (in pack order) have the given
4757    /// [`ObjectType`]s and whose trailing checksum is `pack_checksum`.
4758    ///
4759    /// Returns an error if the pack contains more than `u32::MAX` objects, if
4760    /// `pack_checksum`'s format does not match `format`, or if any object type
4761    /// is not one of the four reachable git object kinds.
4762    pub fn new(
4763        format: ObjectFormat,
4764        pack_checksum: ObjectId,
4765        object_types: &[ObjectType],
4766    ) -> Result<Self> {
4767        if object_types.len() > u32::MAX as usize {
4768            return Err(GitError::InvalidFormat(
4769                "too many objects for a pack bitmap".into(),
4770            ));
4771        }
4772        if pack_checksum.format() != format {
4773            return Err(GitError::InvalidObjectId(
4774                "pack checksum format does not match bitmap format".into(),
4775            ));
4776        }
4777        let object_count = object_types.len() as u32;
4778        let mut commit_positions = Vec::new();
4779        let mut tree_positions = Vec::new();
4780        let mut blob_positions = Vec::new();
4781        let mut tag_positions = Vec::new();
4782        for (index, object_type) in object_types.iter().enumerate() {
4783            let position = index as u32;
4784            match object_type {
4785                ObjectType::Commit => commit_positions.push(position),
4786                ObjectType::Tree => tree_positions.push(position),
4787                ObjectType::Blob => blob_positions.push(position),
4788                ObjectType::Tag => tag_positions.push(position),
4789            }
4790        }
4791        Ok(Self {
4792            format,
4793            pack_checksum,
4794            object_count,
4795            commit_positions,
4796            tree_positions,
4797            blob_positions,
4798            tag_positions,
4799            name_hash_cache: None,
4800            selected: Vec::new(),
4801        })
4802    }
4803
4804    /// Attaches a name-hash cache (one `u32` per object, in pack order). When
4805    /// set, the written bitmap advertises [`PackBitmapIndex::OPTION_HASH_CACHE`]
4806    /// and appends the cache after the bitmap entries, exactly as git does.
4807    ///
4808    /// Returns an error if the cache length does not equal the object count.
4809    pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
4810        if cache.len() != self.object_count as usize {
4811            return Err(GitError::InvalidFormat(format!(
4812                "name hash cache has {} entries but pack has {} objects",
4813                cache.len(),
4814                self.object_count
4815            )));
4816        }
4817        self.name_hash_cache = Some(cache);
4818        Ok(self)
4819    }
4820
4821    /// Registers a selected commit and the pack positions reachable from it.
4822    ///
4823    /// `commit_position` is the *pack-order* position of the commit itself (the
4824    /// bit-number space); it must reference a commit object and is implicitly
4825    /// part of the reachable set. `commit_index_position` is the commit's
4826    /// position in the *oid-sorted* pack index — this is what the on-disk entry
4827    /// records (upstream `oid_pos`); bits and entry positions live in different
4828    /// spaces. `reachable` lists the pack-order positions of every object
4829    /// reachable from the commit (it may include or omit `commit_position`;
4830    /// duplicates are fine). All positions must be in range. The commit's full
4831    /// (non-XORed) bitmap is stored.
4832    pub fn add_commit(
4833        &mut self,
4834        commit_position: u32,
4835        commit_index_position: u32,
4836        reachable: &[u32],
4837    ) -> Result<()> {
4838        if commit_position >= self.object_count {
4839            return Err(GitError::InvalidFormat(format!(
4840                "commit position {commit_position} out of range for {} objects",
4841                self.object_count
4842            )));
4843        }
4844        if commit_index_position >= self.object_count {
4845            return Err(GitError::InvalidFormat(format!(
4846                "commit index position {commit_index_position} out of range for {} objects",
4847                self.object_count
4848            )));
4849        }
4850        if !self.commit_positions.contains(&commit_position) {
4851            return Err(GitError::InvalidFormat(format!(
4852                "bitmap commit position {commit_position} is not a commit object"
4853            )));
4854        }
4855        for &position in reachable {
4856            if position >= self.object_count {
4857                return Err(GitError::InvalidFormat(format!(
4858                    "reachable position {position} out of range for {} objects",
4859                    self.object_count
4860                )));
4861            }
4862        }
4863        let mut reachable = reachable.to_vec();
4864        reachable.push(commit_position);
4865        self.selected.push(SelectedCommit {
4866            commit_index_position,
4867            flags: Self::FLAG_NONE,
4868            reachable,
4869        });
4870        Ok(())
4871    }
4872
4873    /// Builds the in-memory [`PackBitmapIndex`] without serialising it.
4874    ///
4875    /// The resulting index always advertises
4876    /// [`PackBitmapIndex::OPTION_FULL_DAG`] (the four type bitmaps fully cover
4877    /// the pack) and, when a name-hash cache was attached,
4878    /// [`PackBitmapIndex::OPTION_HASH_CACHE`].
4879    pub fn build(&self) -> Result<PackBitmapIndex> {
4880        let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
4881        let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
4882        let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
4883        let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
4884
4885        let mut entries = Vec::with_capacity(self.selected.len());
4886        for selected in &self.selected {
4887            let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
4888            entries.push(PackBitmapEntry {
4889                object_position: selected.commit_index_position,
4890                xor_offset: 0,
4891                flags: selected.flags,
4892                bitmap,
4893            });
4894        }
4895
4896        let mut options = PackBitmapIndex::OPTION_FULL_DAG;
4897        if self.name_hash_cache.is_some() {
4898            options |= PackBitmapIndex::OPTION_HASH_CACHE;
4899        }
4900
4901        // The index checksum is only known once the body is serialised; the
4902        // dedicated `write` path fills it in. `build` reports a placeholder of
4903        // the correct format so the struct is self-consistent for callers that
4904        // only need the decoded bitmaps.
4905        let placeholder_checksum = ObjectId::null(self.format);
4906        Ok(PackBitmapIndex {
4907            version: 1,
4908            format: self.format,
4909            options,
4910            pack_checksum: self.pack_checksum.clone(),
4911            index_checksum: placeholder_checksum,
4912            type_bitmaps: PackBitmapTypeBitmaps {
4913                commits,
4914                trees,
4915                blobs,
4916                tags,
4917            },
4918            entries,
4919            name_hash_cache: self.name_hash_cache.clone(),
4920        })
4921    }
4922
4923    /// Builds and serialises the `.bitmap` file, returning the on-disk bytes
4924    /// (including the trailing index checksum).
4925    pub fn write(&self) -> Result<Vec<u8>> {
4926        self.build()?.write()
4927    }
4928}
4929
4930impl PackBitmapIndex {
4931    /// Serialises this index into git's on-disk `.bitmap` byte layout.
4932    ///
4933    /// This is the exact inverse of [`PackBitmapIndex::parse`]: signature
4934    /// `BITM`, version (u16 BE), options (u16 BE), entry count (u32 BE), the
4935    /// pack checksum, the four type bitmaps (commits, trees, blobs, tags), each
4936    /// commit entry (object position, XOR offset, flags, EWAH bitmap), the
4937    /// optional name-hash cache, and finally the trailing index checksum over
4938    /// everything written so far.
4939    ///
4940    /// The `index_checksum` field of `self` is ignored and recomputed from the
4941    /// serialised body. Returns an error for unsupported versions, mismatched
4942    /// object-id formats, an oversized entry table, or an inconsistent name-hash
4943    /// cache.
4944    pub fn write(&self) -> Result<Vec<u8>> {
4945        if self.version != 1 {
4946            return Err(GitError::Unsupported(format!(
4947                "bitmap index version {}",
4948                self.version
4949            )));
4950        }
4951        let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
4952        if self.options & !known_options != 0 {
4953            return Err(GitError::Unsupported(format!(
4954                "bitmap index options {:#06x}",
4955                self.options & !known_options
4956            )));
4957        }
4958        if self.pack_checksum.format() != self.format {
4959            return Err(GitError::InvalidObjectId(
4960                "bitmap pack checksum format does not match index format".into(),
4961            ));
4962        }
4963        if self.entries.len() > u32::MAX as usize {
4964            return Err(GitError::InvalidFormat(
4965                "too many bitmap index entries".into(),
4966            ));
4967        }
4968        let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
4969        match (&self.name_hash_cache, want_cache) {
4970            (Some(_), false) => {
4971                return Err(GitError::InvalidFormat(
4972                    "name hash cache present without OPTION_HASH_CACHE".into(),
4973                ));
4974            }
4975            (None, true) => {
4976                return Err(GitError::InvalidFormat(
4977                    "OPTION_HASH_CACHE set without a name hash cache".into(),
4978                ));
4979            }
4980            _ => {}
4981        }
4982
4983        let mut out = Vec::new();
4984        out.extend_from_slice(b"BITM");
4985        out.extend_from_slice(&self.version.to_be_bytes());
4986        out.extend_from_slice(&self.options.to_be_bytes());
4987        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
4988        out.extend_from_slice(self.pack_checksum.as_bytes());
4989
4990        self.type_bitmaps.commits.append_bytes(&mut out);
4991        self.type_bitmaps.trees.append_bytes(&mut out);
4992        self.type_bitmaps.blobs.append_bytes(&mut out);
4993        self.type_bitmaps.tags.append_bytes(&mut out);
4994
4995        for (idx, entry) in self.entries.iter().enumerate() {
4996            if entry.xor_offset as usize > idx {
4997                return Err(GitError::InvalidFormat(
4998                    "bitmap index entry has invalid XOR offset".into(),
4999                ));
5000            }
5001            out.extend_from_slice(&entry.object_position.to_be_bytes());
5002            out.push(entry.xor_offset);
5003            out.push(entry.flags);
5004            entry.bitmap.append_bytes(&mut out);
5005        }
5006
5007        if let Some(cache) = &self.name_hash_cache {
5008            for value in cache {
5009                out.extend_from_slice(&value.to_be_bytes());
5010            }
5011        }
5012
5013        let checksum = sley_core::digest_bytes(self.format, &out)?;
5014        out.extend_from_slice(checksum.as_bytes());
5015        Ok(out)
5016    }
5017}
5018
5019/// Convenience wrapper that builds a `.bitmap` file in one call.
5020///
5021/// `object_types` lists the [`ObjectType`] of every pack object in pack order,
5022/// `pack_checksum` is the pack's trailing checksum, and `commits` carries, per
5023/// selected commit, `(pack_position, index_position, reachable_pack_positions)`
5024/// (see [`PackBitmapWriter::add_commit`] for the two position spaces). An
5025/// optional `name_hash_cache` (one entry per object) may be supplied to emit
5026/// the hash-cache extension.
5027pub fn write_bitmap(
5028    format: ObjectFormat,
5029    pack_checksum: ObjectId,
5030    object_types: &[ObjectType],
5031    commits: &[(u32, u32, Vec<u32>)],
5032    name_hash_cache: Option<Vec<u32>>,
5033) -> Result<Vec<u8>> {
5034    let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5035    if let Some(cache) = name_hash_cache {
5036        writer = writer.with_name_hash_cache(cache)?;
5037    }
5038    for (commit_position, commit_index_position, reachable) in commits {
5039        writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5040    }
5041    writer.write()
5042}
5043
5044#[cfg(test)]
5045mod tests {
5046    use super::*;
5047    use flate2::Compression;
5048    use flate2::read::ZlibDecoder;
5049    use flate2::write::ZlibEncoder;
5050    use std::fs;
5051    use std::io::Read;
5052    use std::io::Write;
5053    use std::path::{Path, PathBuf};
5054    use std::process::Command;
5055    use std::time::{SystemTime, UNIX_EPOCH};
5056
5057    fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5058        PackWriteOptions::new()
5059            .with_prefer_ofs_delta(prefer_ofs_delta)
5060            .with_reorder(false)
5061    }
5062
5063    #[test]
5064    fn parses_single_blob_pack() {
5065        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5066        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5067        assert_eq!(parsed.version, 2);
5068        assert_eq!(parsed.entries.len(), 1);
5069        let object = &parsed.entries[0].object;
5070        assert_eq!(object.object_type, ObjectType::Blob);
5071        assert_eq!(object.body, b"hello\n");
5072        assert_eq!(
5073            parsed.entries[0].entry.oid.to_hex(),
5074            "ce013625030ba8dba906f756967f9e9ca394464a"
5075        );
5076    }
5077
5078    #[test]
5079    fn parses_single_blob_pack_sha256() {
5080        let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5081        let parsed =
5082            PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5083        assert_eq!(parsed.version, 2);
5084        assert_eq!(parsed.entries.len(), 1);
5085        let object = &parsed.entries[0].object;
5086        assert_eq!(object.object_type, ObjectType::Blob);
5087        assert_eq!(object.body, b"hello\n");
5088        assert_eq!(
5089            parsed.entries[0].entry.oid,
5090            object
5091                .object_id(ObjectFormat::Sha256)
5092                .expect("test operation should succeed")
5093        );
5094    }
5095
5096    #[test]
5097    fn parses_bundle_pack_payload_with_bundle_format() {
5098        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5099        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5100            .expect("test operation should succeed");
5101        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5102            .into_bytes()
5103            .into_iter()
5104            .chain(pack)
5105            .collect::<Vec<_>>();
5106        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5107            .expect("test operation should succeed");
5108
5109        let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5110        assert_eq!(parsed.entries.len(), 1);
5111        assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5112        assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5113    }
5114
5115    /// Build a pack whose single blob entry header LIES about its decompressed
5116    /// size: it declares `declared_size` while the actual zlib payload only
5117    /// inflates to `real_body`. A short `real_body` plus a `declared_size` of
5118    /// `u64::MAX` is the decompression-bomb shape — the header claims terabytes
5119    /// from a handful of compressed bytes.
5120    fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5121        let mut pack = Vec::new();
5122        pack.extend_from_slice(b"PACK");
5123        pack.extend_from_slice(&2u32.to_be_bytes());
5124        pack.extend_from_slice(&1u32.to_be_bytes());
5125        // Object type 3 == blob; size varint encodes the *attacker-declared* size.
5126        write_pack_entry_header_kind(&mut pack, 3, declared_size);
5127        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5128        encoder
5129            .write_all(real_body)
5130            .expect("test operation should succeed");
5131        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5132        let checksum =
5133            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5134        pack.extend_from_slice(checksum.as_bytes());
5135        pack
5136    }
5137
5138    /// Regression: a crafted pack object header declaring a gigantic decompressed
5139    /// size with a tiny compressed payload must NOT drive an up-front
5140    /// reservation/allocation of that declared size (OOM/abort). sley#2: the
5141    /// header `size` is attacker-controlled over the network (install_raw_pack →
5142    /// sley-fetch), so it must be validated/bounded before any `Vec::reserve`.
5143    ///
5144    /// On the unfixed code, `inflate_into` did `out.reserve(header.size as usize)`
5145    /// with `header.size == u64::MAX`, which panics with "capacity overflow" (or
5146    /// aborts on alloc failure) *before* the size-mismatch check could fire. We
5147    /// run parse on a worker thread so that panic surfaces as a `join()` error
5148    /// rather than killing the test process; the fix turns this into a clean
5149    /// `Err` returned normally.
5150    #[test]
5151    fn rejects_decompression_bomb_header_without_oom() {
5152        for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5153            let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5154            let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5155            let result = handle.join();
5156            // The parse thread must not have panicked/aborted on a huge reserve.
5157            assert!(
5158                result.is_ok(),
5159                "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5160            );
5161            // And parsing must reject the lie (decoded len != declared size).
5162            let parse_result = result.expect("parse thread should not panic on a bomb header");
5163            assert!(
5164                parse_result.is_err(),
5165                "bomb header (declared={declared}) should be rejected as invalid"
5166            );
5167        }
5168    }
5169
5170    /// Build a 2-object pack: a real base blob followed by a delta (ref or ofs)
5171    /// whose *result-size* varint lies, declaring `declared_result_size`, while
5172    /// carrying a tiny real instruction stream. The delta's base-size varint is
5173    /// set correctly (so the base-size check at the top of `apply_pack_delta`
5174    /// passes and we reach the result reservation). Used to drive the sley#35
5175    /// delta-result-size bomb.
5176    fn lying_result_size_delta_pack(
5177        format: ObjectFormat,
5178        declared_result_size: u64,
5179        delta_kind: DeltaKind,
5180    ) -> Vec<u8> {
5181        let base = b"hello";
5182        let result = b"hello world"; // real produced length = 11
5183
5184        // Hand-build a delta with a truthful base-size and a LYING result-size.
5185        let mut delta = Vec::new();
5186        write_delta_varint(&mut delta, base.len() as u64);
5187        write_delta_varint(&mut delta, declared_result_size);
5188        // Real instructions: copy `base` then insert " world".
5189        let suffix = &result[base.len()..];
5190        delta.push(0x90); // copy, 1 size byte present (bit 0x10)
5191        delta.push(base.len() as u8);
5192        delta.push(suffix.len() as u8);
5193        delta.extend_from_slice(suffix);
5194
5195        let mut pack = Vec::new();
5196        pack.extend_from_slice(b"PACK");
5197        pack.extend_from_slice(&2u32.to_be_bytes());
5198        pack.extend_from_slice(&2u32.to_be_bytes());
5199
5200        let base_offset = pack.len();
5201        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5202        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5203        encoder
5204            .write_all(base)
5205            .expect("test operation should succeed");
5206        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5207
5208        let delta_offset = pack.len();
5209        write_pack_entry_header_kind(
5210            &mut pack,
5211            match delta_kind {
5212                DeltaKind::Offset => 6,
5213                DeltaKind::Ref => 7,
5214            },
5215            delta.len() as u64,
5216        );
5217        match delta_kind {
5218            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5219            DeltaKind::Ref => {
5220                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5221                    .expect("test operation should succeed");
5222                pack.extend_from_slice(base_oid.as_bytes());
5223            }
5224        }
5225        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5226        encoder
5227            .write_all(&delta)
5228            .expect("test operation should succeed");
5229        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5230
5231        let checksum =
5232            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5233        pack.extend_from_slice(checksum.as_bytes());
5234        pack
5235    }
5236
5237    /// Regression (sley#35): the 2nd instance of the sley#2 decompression-bomb
5238    /// class. `apply_pack_delta` read an attacker-controlled `result_size` varint
5239    /// from a network delta and fed it straight to `Vec::with_capacity`. A tiny
5240    /// delta declaring `result_size == u64::MAX` (or ~1 TiB) aborts the process
5241    /// ("capacity overflow"/alloc failure, SIGABRT) BEFORE the post-decode
5242    /// size-mismatch check can reject the lie. Both ref-delta and ofs-delta paths
5243    /// reach the same reservation, so both must be safe. We resolve the pack on a
5244    /// worker thread so an abort/panic surfaces as a `join()` error rather than
5245    /// killing the whole test binary; the fix turns the bomb into a clean `Err`.
5246    #[test]
5247    fn rejects_delta_result_size_bomb_without_oom() {
5248        let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5249        for &declared in bombs {
5250            for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5251                let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5252                let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5253                let join_result = handle.join();
5254                assert!(
5255                    join_result.is_ok(),
5256                    "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5257                     instead of erroring cleanly"
5258                );
5259                let parse_result =
5260                    join_result.expect("parse thread should not panic on a delta bomb");
5261                assert!(
5262                    parse_result.is_err(),
5263                    "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5264                     as invalid (result.len() != declared)"
5265                );
5266            }
5267        }
5268    }
5269
5270    /// A legitimate (truthful) delta whose result-size varint matches the real
5271    /// produced length must still resolve correctly — the bound only caps the
5272    /// speculative reservation, it must not break real delta application.
5273    #[test]
5274    fn applies_legitimate_delta_after_result_size_bound() {
5275        for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5276            let base = b"hello";
5277            let result = b"hello world";
5278            let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5279            let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5280            assert_eq!(parsed.entries.len(), 2);
5281            assert_eq!(parsed.entries[0].object.body, base);
5282            assert_eq!(parsed.entries[1].object.body, result);
5283        }
5284    }
5285
5286    #[test]
5287    fn bounded_inflate_reserve_caps_attacker_declared_size() {
5288        // A tiny compressed input can't justify a multi-gigabyte reservation.
5289        assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5290        // The absolute ceiling caps even a large input-justified hint.
5291        assert_eq!(
5292            bounded_inflate_reserve(usize::MAX, usize::MAX),
5293            MAX_INFLATE_RESERVE
5294        );
5295        // A modest legitimate hint is preserved unchanged (no regression for real
5296        // objects): 1000 bytes of output from 500 bytes of input is well within
5297        // both bounds.
5298        assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5299        // Floor of 64 for tiny hints.
5300        assert_eq!(bounded_inflate_reserve(0, 0), 64);
5301    }
5302
5303    #[test]
5304    fn rejects_bundle_pack_payload_with_wrong_object_format() {
5305        let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5306        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5307            .expect("test operation should succeed");
5308        let bundle_bytes =
5309            format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5310                .into_bytes()
5311                .into_iter()
5312                .chain(pack)
5313                .collect::<Vec<_>>();
5314        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5315            .expect("test operation should succeed");
5316
5317        assert!(PackFile::parse_bundle(&bundle).is_err());
5318    }
5319
5320    fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5321        let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5322        let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5323        let owned_view =
5324            PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5325                .expect("test operation should succeed");
5326
5327        assert_eq!(view.version, owned.version);
5328        assert_eq!(view.count, owned.entries.len());
5329        assert_eq!(view.count(), owned.entries.len());
5330        assert_eq!(view.fanout(), &owned.fanout);
5331        assert_eq!(view.pack_checksum, owned.pack_checksum);
5332        assert_eq!(view.index_checksum, owned.index_checksum);
5333        assert_eq!(owned_view.version, owned.version);
5334        assert_eq!(owned_view.count(), owned.entries.len());
5335        assert_eq!(owned_view.fanout(), &owned.fanout);
5336        assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5337        assert_eq!(owned_view.index_checksum, owned.index_checksum);
5338        for entry in &owned.entries {
5339            let owned_found = owned
5340                .find(&entry.oid)
5341                .expect("test operation should succeed");
5342            let expected = Some(PackIndexLookup {
5343                crc32: owned_found.crc32,
5344                offset: owned_found.offset,
5345            });
5346            assert_eq!(view.find(&entry.oid), expected);
5347            assert_eq!(owned_view.find(&entry.oid), expected);
5348        }
5349    }
5350
5351    #[test]
5352    fn writes_pack_and_index_that_round_trip() {
5353        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5354        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5355            .expect("test operation should succeed");
5356        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5357        let index =
5358            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5359        let oid = object
5360            .object_id(ObjectFormat::Sha1)
5361            .expect("test operation should succeed");
5362        assert_eq!(pack.entries[0].object, object);
5363        assert_eq!(index.pack_checksum, pack.checksum);
5364        assert_eq!(
5365            index
5366                .find(&oid)
5367                .expect("test operation should succeed")
5368                .offset,
5369            12
5370        );
5371    }
5372
5373    #[test]
5374    fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5375        let objects = (0..8)
5376            .map(|idx| {
5377                EncodedObject::new(
5378                    ObjectType::Blob,
5379                    format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5380                )
5381            })
5382            .collect::<Vec<_>>();
5383        let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5384            .expect("test operation should succeed");
5385
5386        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5387
5388        let view =
5389            PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5390        let missing = sley_core::object_id_for_bytes(
5391            ObjectFormat::Sha1,
5392            "blob",
5393            b"not present in borrowed index\n",
5394        )
5395        .expect("test operation should succeed");
5396        assert_eq!(view.find(&missing), None);
5397    }
5398
5399    #[test]
5400    fn writes_sha256_pack_and_index_that_round_trip() {
5401        let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5402        let written =
5403            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5404                .expect("test operation should succeed");
5405        let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5406            .expect("test operation should succeed");
5407        let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5408            .expect("test operation should succeed");
5409        let oid = object
5410            .object_id(ObjectFormat::Sha256)
5411            .expect("test operation should succeed");
5412        assert_eq!(pack.entries[0].object, object);
5413        assert_eq!(index.pack_checksum, pack.checksum);
5414        assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5415        assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5416        assert_eq!(
5417            index
5418                .find(&oid)
5419                .expect("test operation should succeed")
5420                .offset,
5421            12
5422        );
5423    }
5424
5425    #[test]
5426    fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5427        let objects = (0..4)
5428            .map(|idx| {
5429                EncodedObject::new(
5430                    ObjectType::Blob,
5431                    format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5432                )
5433            })
5434            .collect::<Vec<_>>();
5435        let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5436            .expect("test operation should succeed");
5437
5438        assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5439    }
5440
5441    #[test]
5442    fn indexes_existing_sha256_pack_bytes() {
5443        let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5444        let written =
5445            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5446                .expect("test operation should succeed");
5447
5448        let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5449            .expect("test operation should succeed");
5450        let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5451            .expect("test operation should succeed");
5452
5453        assert_eq!(indexed.pack_checksum, written.checksum);
5454        assert_eq!(indexed.entries, written.entries);
5455        assert_eq!(index.pack_checksum, written.checksum);
5456        assert_eq!(index.entries, written.entries);
5457    }
5458
5459    #[test]
5460    fn indexes_existing_delta_pack_bytes() {
5461        let (base, changed) = similar_blob_objects();
5462        let options = delta_pack_options(true);
5463        let written = PackFile::write_packed_with_options(
5464            &[base, changed.clone()],
5465            ObjectFormat::Sha1,
5466            &options,
5467        )
5468        .expect("test operation should succeed");
5469
5470        let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5471            .expect("test operation should succeed");
5472        let index =
5473            PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5474        let changed_oid = changed
5475            .object_id(ObjectFormat::Sha1)
5476            .expect("test operation should succeed");
5477
5478        assert_eq!(indexed.pack_checksum, written.checksum);
5479        assert_eq!(indexed.entries, written.entries);
5480        assert_eq!(
5481            index
5482                .find(&changed_oid)
5483                .expect("test operation should succeed")
5484                .offset,
5485            written.entries[1].offset
5486        );
5487        assert_eq!(
5488            index
5489                .find(&changed_oid)
5490                .expect("test operation should succeed")
5491                .crc32,
5492            written.entries[1].crc32
5493        );
5494    }
5495
5496    #[test]
5497    fn writes_ref_delta_pack_and_index_that_round_trip() {
5498        let (base, changed) = similar_blob_objects();
5499        let options = delta_pack_options(false);
5500        let written = PackFile::write_packed_with_options(
5501            &[base.clone(), changed.clone()],
5502            ObjectFormat::Sha1,
5503            &options,
5504        )
5505        .expect("test operation should succeed");
5506        let mut second_offset = written.entries[1].offset as usize;
5507        let header = parse_entry_header(&written.pack, &mut second_offset)
5508            .expect("test operation should succeed");
5509        assert_eq!(header.kind, PackObjectKind::RefDelta);
5510
5511        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5512        let index =
5513            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5514        let oid = changed
5515            .object_id(ObjectFormat::Sha1)
5516            .expect("test operation should succeed");
5517        assert_eq!(pack.entries[0].object, base);
5518        assert_eq!(pack.entries[1].object, changed);
5519        assert_eq!(index.pack_checksum, pack.checksum);
5520        assert_eq!(
5521            index
5522                .find(&oid)
5523                .expect("test operation should succeed")
5524                .offset,
5525            written.entries[1].offset
5526        );
5527    }
5528
5529    #[test]
5530    fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5531        let (base, changed) = similar_blob_objects();
5532        let options = delta_pack_options(true);
5533        let written = PackFile::write_packed_with_options(
5534            &[base, changed.clone()],
5535            ObjectFormat::Sha1,
5536            &options,
5537        )
5538        .expect("test operation should succeed");
5539        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5540        let mut second = written.entries[1].offset as usize;
5541        assert_eq!(
5542            parse_entry_header(&written.pack, &mut second)
5543                .expect("test operation should succeed")
5544                .kind,
5545            PackObjectKind::OfsDelta
5546        );
5547        // Ground truth from a full parse; single-object decode must match at every offset.
5548        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5549        for po in &parsed.entries {
5550            let got =
5551                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5552                    Ok(None)
5553                })
5554                .expect("test operation should succeed");
5555            assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5556        }
5557    }
5558
5559    /// A [`HeaderTypeCache`] over a plain map, for asserting the cached header
5560    /// read is byte-identical to the uncached one cold and warm (sley#26).
5561    #[derive(Default)]
5562    struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5563
5564    impl HeaderTypeCache for MapHeaderTypeCache {
5565        fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5566            self.0.get(&pack_offset).copied()
5567        }
5568        fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5569            self.0.insert(pack_offset, header);
5570        }
5571    }
5572
5573    #[test]
5574    fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5575        let (base, changed) = similar_blob_objects();
5576        let options = delta_pack_options(true);
5577        let written =
5578            PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5579                .expect("test operation should succeed");
5580        // Ensure the pack genuinely contains an ofs-delta (else the test is vacuous).
5581        let mut second = written.entries[1].offset as usize;
5582        assert_eq!(
5583            parse_entry_header(&written.pack, &mut second)
5584                .expect("test operation should succeed")
5585                .kind,
5586            PackObjectKind::OfsDelta
5587        );
5588
5589        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5590        let mut cache = MapHeaderTypeCache::default();
5591        for po in &parsed.entries {
5592            let uncached =
5593                read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5594                    Ok(None)
5595                })
5596                .expect("test operation should succeed");
5597            // Type inherited from the chain base; size is the inflated body length.
5598            assert_eq!(
5599                uncached,
5600                (po.object.object_type, po.object.body.len() as u64),
5601                "uncached header at offset {}",
5602                po.entry.offset
5603            );
5604            // Cold cache: must agree with the uncached read and populate the memo.
5605            let cold = read_object_header_at_with_cache(
5606                &written.pack,
5607                po.entry.offset,
5608                ObjectFormat::Sha1,
5609                |_| Ok(None),
5610                &mut cache,
5611            )
5612            .expect("test operation should succeed");
5613            assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
5614        }
5615        // Warm cache: every offset now resolves from the memo and is still correct,
5616        // proving the fast path does not change behavior (sley#26).
5617        for po in &parsed.entries {
5618            let warm = read_object_header_at_with_cache(
5619                &written.pack,
5620                po.entry.offset,
5621                ObjectFormat::Sha1,
5622                |_| panic!("warm cache must not re-walk the chain"),
5623                &mut cache,
5624            )
5625            .expect("test operation should succeed");
5626            assert_eq!(
5627                warm,
5628                (po.object.object_type, po.object.body.len() as u64),
5629                "warm cache at offset {}",
5630                po.entry.offset
5631            );
5632        }
5633    }
5634
5635    #[test]
5636    fn read_object_at_matches_full_parse_for_ref_delta_pack() {
5637        let (base, changed) = similar_blob_objects();
5638        let options = delta_pack_options(false);
5639        let written = PackFile::write_packed_with_options(
5640            &[base, changed.clone()],
5641            ObjectFormat::Sha1,
5642            &options,
5643        )
5644        .expect("test operation should succeed");
5645        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5646        let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
5647            .entries
5648            .iter()
5649            .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
5650            .collect();
5651        for po in &parsed.entries {
5652            let got =
5653                read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
5654                    Ok(by_oid.get(oid).cloned())
5655                })
5656                .expect("test operation should succeed");
5657            assert_eq!(*got, po.object);
5658        }
5659    }
5660
5661    /// A test-only [`PackDeltaCache`] that records every decode and counts hits,
5662    /// used to prove the cached decode path is byte-identical to the uncached
5663    /// one and that bases are reused across reads.
5664    #[derive(Default)]
5665    struct CountingDeltaCache {
5666        map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
5667        hits: std::cell::Cell<usize>,
5668        inserts: std::cell::Cell<usize>,
5669    }
5670
5671    impl PackDeltaCache for CountingDeltaCache {
5672        fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
5673            let hit = self.map.borrow().get(&offset).cloned();
5674            if hit.is_some() {
5675                self.hits.set(self.hits.get() + 1);
5676            }
5677            hit
5678        }
5679        fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
5680            self.inserts.set(self.inserts.get() + 1);
5681            self.map.borrow_mut().insert(offset, object);
5682        }
5683    }
5684
5685    #[test]
5686    fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
5687        // A multi-object pack with a real ofs-delta chain so the cache has bases
5688        // to reuse. Build several similar blobs to encourage deltification.
5689        let mut objects = Vec::new();
5690        for idx in 0..8u32 {
5691            let mut body = vec![b'x'; 4096];
5692            body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
5693            objects.push(EncodedObject::new(ObjectType::Blob, body));
5694        }
5695        let options = delta_pack_options(true);
5696        let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5697            .expect("test operation should succeed");
5698        let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5699
5700        let cache = CountingDeltaCache::default();
5701        // Read every object twice through the cache; each result must equal the
5702        // ground-truth from the full parse, byte for byte, both times.
5703        for _ in 0..2 {
5704            for po in &parsed.entries {
5705                let got = read_object_at_with_cache_arc(
5706                    &written.pack,
5707                    po.entry.offset,
5708                    ObjectFormat::Sha1,
5709                    |_| Ok(None),
5710                    &cache,
5711                )
5712                .expect("test operation should succeed");
5713                assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5714            }
5715        }
5716        // The second pass reads everything straight from the cache, so there must
5717        // be at least one hit (proving reuse, not just correctness).
5718        assert!(cache.hits.get() > 0, "cache never served a warm object");
5719    }
5720
5721    #[test]
5722    fn writes_ofs_delta_pack_and_index_that_round_trip() {
5723        let (base, changed) = similar_blob_objects();
5724        let options = delta_pack_options(true);
5725        let written = PackFile::write_packed_with_options(
5726            &[base.clone(), changed.clone()],
5727            ObjectFormat::Sha1,
5728            &options,
5729        )
5730        .expect("test operation should succeed");
5731        let mut second_offset = written.entries[1].offset as usize;
5732        let header = parse_entry_header(&written.pack, &mut second_offset)
5733            .expect("test operation should succeed");
5734        assert_eq!(header.kind, PackObjectKind::OfsDelta);
5735
5736        let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5737        let index =
5738            PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5739        let oid = changed
5740            .object_id(ObjectFormat::Sha1)
5741            .expect("test operation should succeed");
5742        assert_eq!(pack.entries[0].object, base);
5743        assert_eq!(pack.entries[1].object, changed);
5744        assert_eq!(index.pack_checksum, pack.checksum);
5745        assert_eq!(
5746            index
5747                .find(&oid)
5748                .expect("test operation should succeed")
5749                .offset,
5750            written.entries[1].offset
5751        );
5752    }
5753
5754    #[test]
5755    fn resolves_ofs_delta_pack_entry() {
5756        let base = b"hello";
5757        let result = b"hello world";
5758        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
5759        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5760        assert_eq!(parsed.entries.len(), 2);
5761        assert_eq!(parsed.entries[0].object.body, base);
5762        assert_eq!(parsed.entries[1].object.body, result);
5763        assert_eq!(
5764            parsed.entries[1].entry.oid,
5765            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
5766                .expect("test operation should succeed")
5767        );
5768    }
5769
5770    #[test]
5771    fn resolves_ref_delta_pack_entry() {
5772        let base = b"hello";
5773        let result = b"hello world";
5774        let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
5775        let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5776        assert_eq!(parsed.entries.len(), 2);
5777        assert_eq!(parsed.entries[0].object.body, base);
5778        assert_eq!(parsed.entries[1].object.body, result);
5779        assert_eq!(
5780            parsed.entries[1].entry.oid,
5781            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
5782                .expect("test operation should succeed")
5783        );
5784    }
5785
5786    #[test]
5787    fn resolves_thin_ref_delta_pack_entry_with_external_base() {
5788        let base = b"hello";
5789        let result = b"hello world";
5790        let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
5791        assert!(PackFile::parse_sha1(&pack).is_err());
5792
5793        let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
5794            .expect("test operation should succeed");
5795        let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
5796            if oid == &base_oid {
5797                Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
5798            } else {
5799                Ok(None)
5800            }
5801        })
5802        .expect("test operation should succeed");
5803        assert_eq!(parsed.entries.len(), 1);
5804        assert_eq!(parsed.entries[0].object.body, result);
5805        assert_eq!(
5806            parsed.entries[0].entry.oid,
5807            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
5808                .expect("test operation should succeed")
5809        );
5810    }
5811
5812    #[test]
5813    fn rejects_bad_pack_checksum() {
5814        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5815        let last = pack.len() - 1;
5816        pack[last] ^= 1;
5817        assert!(PackFile::parse_sha1(&pack).is_err());
5818    }
5819
5820    #[test]
5821    fn raw_pack_index_rejects_bad_pack_checksum() {
5822        let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5823        let last = pack.len() - 1;
5824        pack[last] ^= 1;
5825        assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
5826    }
5827
5828    #[test]
5829    fn pack_index_writer_rejects_duplicate_object_ids() {
5830        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
5831            .expect("test operation should succeed");
5832        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5833            .expect("test operation should succeed");
5834        let entries = vec![
5835            PackIndexEntry {
5836                oid,
5837                crc32: 1,
5838                offset: 12,
5839            },
5840            PackIndexEntry {
5841                oid,
5842                crc32: 2,
5843                offset: 24,
5844            },
5845        ];
5846        assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
5847    }
5848
5849    #[test]
5850    fn parses_single_entry_pack_index() {
5851        let oid = ObjectId::from_hex(
5852            ObjectFormat::Sha1,
5853            "ce013625030ba8dba906f756967f9e9ca394464a",
5854        )
5855        .expect("test operation should succeed");
5856        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5857            .expect("test operation should succeed");
5858        let index = single_entry_index(
5859            ObjectFormat::Sha1,
5860            oid,
5861            0x1234_5678,
5862            12,
5863            pack_checksum.clone(),
5864        );
5865        let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
5866        assert_eq!(parsed.version, 2);
5867        assert_eq!(parsed.pack_checksum, pack_checksum);
5868        assert_eq!(parsed.entries.len(), 1);
5869        assert_eq!(
5870            parsed
5871                .find(&oid)
5872                .expect("test operation should succeed")
5873                .offset,
5874            12
5875        );
5876        assert_eq!(
5877            parsed
5878                .find(&oid)
5879                .expect("test operation should succeed")
5880                .crc32,
5881            0x1234_5678
5882        );
5883        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
5884    }
5885
5886    #[test]
5887    fn parses_single_entry_pack_index_v1() {
5888        let oid = ObjectId::from_hex(
5889            ObjectFormat::Sha1,
5890            "ce013625030ba8dba906f756967f9e9ca394464a",
5891        )
5892        .expect("test operation should succeed");
5893        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5894            .expect("test operation should succeed");
5895        let index =
5896            single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
5897        let parsed =
5898            PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
5899        assert_eq!(parsed.version, 1);
5900        assert_eq!(parsed.pack_checksum, pack_checksum);
5901        assert_eq!(parsed.entries.len(), 1);
5902        assert_eq!(
5903            parsed
5904                .find(&oid)
5905                .expect("test operation should succeed")
5906                .offset,
5907            0x1234_5678
5908        );
5909        assert_eq!(
5910            parsed
5911                .find(&oid)
5912                .expect("test operation should succeed")
5913                .crc32,
5914            0
5915        );
5916        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
5917    }
5918
5919    #[test]
5920    fn rejects_bad_pack_index_v1_checksum() {
5921        let oid = ObjectId::from_hex(
5922            ObjectFormat::Sha1,
5923            "ce013625030ba8dba906f756967f9e9ca394464a",
5924        )
5925        .expect("test operation should succeed");
5926        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5927            .expect("test operation should succeed");
5928        let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
5929        let last = index.len() - 1;
5930        index[last] ^= 1;
5931        assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
5932    }
5933
5934    #[test]
5935    fn pack_index_view_reads_v2_large_offsets() {
5936        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
5937            .expect("test operation should succeed");
5938        let second =
5939            sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
5940                .expect("test operation should succeed");
5941        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5942            .expect("test operation should succeed");
5943        let entries = vec![
5944            PackIndexEntry {
5945                oid: first,
5946                crc32: 0x1111_2222,
5947                offset: 0x8000_0000,
5948            },
5949            PackIndexEntry {
5950                oid: second,
5951                crc32: 0x3333_4444,
5952                offset: 0x1_0000_0042,
5953            },
5954        ];
5955        let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
5956            .expect("test operation should succeed");
5957
5958        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
5959        let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
5960            .expect("test operation should succeed");
5961        for entry in entries {
5962            assert_eq!(
5963                view.find(&entry.oid),
5964                Some(PackIndexLookup {
5965                    crc32: entry.crc32,
5966                    offset: entry.offset,
5967                })
5968            );
5969        }
5970    }
5971
5972    #[test]
5973    fn pack_index_view_default_parse_checks_index_checksum() {
5974        let oid = ObjectId::from_hex(
5975            ObjectFormat::Sha1,
5976            "ce013625030ba8dba906f756967f9e9ca394464a",
5977        )
5978        .expect("test operation should succeed");
5979        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5980            .expect("test operation should succeed");
5981        let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
5982        let last = index.len() - 1;
5983        index[last] ^= 1;
5984
5985        assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
5986        let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
5987            .expect("test operation should succeed");
5988        let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
5989            Arc::from(index.clone().into_boxed_slice()),
5990            ObjectFormat::Sha1,
5991        )
5992        .expect("test operation should succeed");
5993        assert_eq!(
5994            view.find(&oid),
5995            Some(PackIndexLookup {
5996                crc32: 0x1234_5678,
5997                offset: 12,
5998            })
5999        );
6000        assert_eq!(
6001            trusted_view.find(&oid),
6002            Some(PackIndexLookup {
6003                crc32: 0x1234_5678,
6004                offset: 12,
6005            })
6006        );
6007    }
6008
6009    #[test]
6010    fn parses_pack_reverse_index() {
6011        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6012            .expect("test operation should succeed");
6013        let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6014            .expect("test operation should succeed");
6015        let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6016            .expect("test operation should succeed");
6017        assert_eq!(parsed.version, 1);
6018        assert_eq!(parsed.format, ObjectFormat::Sha1);
6019        assert_eq!(parsed.positions, vec![2, 0, 1]);
6020        assert_eq!(parsed.pack_checksum, pack_checksum);
6021        assert_eq!(
6022            PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6023                .expect("test operation should succeed"),
6024            reverse_index
6025        );
6026    }
6027
6028    #[test]
6029    fn rejects_bad_pack_reverse_index_checksum() {
6030        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6031            .expect("test operation should succeed");
6032        let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6033            .expect("test operation should succeed");
6034        let last = reverse_index.len() - 1;
6035        reverse_index[last] ^= 1;
6036        assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6037    }
6038
6039    #[test]
6040    fn rejects_bad_pack_reverse_index_positions() {
6041        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6042            .expect("test operation should succeed");
6043        let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6044        assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6045        let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6046        assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6047        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6048            .expect("test operation should succeed");
6049        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6050        assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6051    }
6052
6053    #[test]
6054    fn parses_pack_mtimes() {
6055        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6056            .expect("test operation should succeed");
6057        let mtimes = PackMtimes::write(
6058            ObjectFormat::Sha1,
6059            &[1, 1_700_000_000, u32::MAX],
6060            &pack_checksum,
6061        )
6062        .expect("test operation should succeed");
6063        let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6064            .expect("test operation should succeed");
6065        assert_eq!(parsed.version, 1);
6066        assert_eq!(parsed.format, ObjectFormat::Sha1);
6067        assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6068        assert_eq!(parsed.pack_checksum, pack_checksum);
6069        assert_eq!(
6070            PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6071                .expect("test operation should succeed"),
6072            mtimes
6073        );
6074    }
6075
6076    #[test]
6077    fn rejects_bad_pack_mtimes_checksum() {
6078        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6079            .expect("test operation should succeed");
6080        let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6081            .expect("test operation should succeed");
6082        let last = mtimes.len() - 1;
6083        mtimes[last] ^= 1;
6084        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6085    }
6086
6087    #[test]
6088    fn rejects_bad_pack_mtimes_shape() {
6089        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6090            .expect("test operation should succeed");
6091        let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6092        assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6093
6094        let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6095        wrong_hash[11] = 2;
6096        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6097        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6098            .expect("test operation should succeed");
6099        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6100        assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6101    }
6102
6103    #[test]
6104    fn parses_multi_pack_index_header_and_chunk_lookup() {
6105        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6106            .expect("test operation should succeed");
6107        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6108            .expect("test operation should succeed");
6109        let chunks = midx_chunks_with_pack_names(
6110            ObjectFormat::Sha1,
6111            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6112            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6113        );
6114        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6115        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6116            .expect("test operation should succeed");
6117        assert_eq!(parsed.version, 2);
6118        assert_eq!(parsed.format, ObjectFormat::Sha1);
6119        assert_eq!(parsed.pack_count, 2);
6120        assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6121        assert_eq!(parsed.object_count, 2);
6122        assert_eq!(parsed.objects.len(), 2);
6123        assert_eq!(
6124            parsed
6125                .find(&first)
6126                .expect("test operation should succeed")
6127                .pack_int_id,
6128            0
6129        );
6130        assert_eq!(
6131            parsed
6132                .find(&first)
6133                .expect("test operation should succeed")
6134                .offset,
6135            12
6136        );
6137        assert_eq!(
6138            parsed
6139                .find(&second)
6140                .expect("test operation should succeed")
6141                .pack_int_id,
6142            1
6143        );
6144        assert_eq!(
6145            parsed
6146                .find(&second)
6147                .expect("test operation should succeed")
6148                .offset,
6149            0x1_0000_0000
6150        );
6151        assert_eq!(parsed.reverse_index, None);
6152        assert_eq!(parsed.bitmapped_packs, None);
6153        assert_eq!(parsed.chunks.len(), 5);
6154        assert_eq!(parsed.chunks[0].id, *b"PNAM");
6155        assert_eq!(parsed.chunks[0].offset, 84);
6156        assert_eq!(parsed.chunks[0].len, 24);
6157        assert_eq!(parsed.chunks[1].id, *b"OIDF");
6158        assert_eq!(parsed.chunks[1].offset, 108);
6159        assert_eq!(parsed.chunks[1].len, 1024);
6160    }
6161
6162    #[test]
6163    fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6164        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6165            .expect("test operation should succeed");
6166        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6167            .expect("test operation should succeed");
6168        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6169            .expect("test operation should succeed");
6170        let chunks = midx_chunks_with_pack_names(
6171            ObjectFormat::Sha1,
6172            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6173            &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6174        );
6175        let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6176        let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6177            .expect("test operation should succeed");
6178
6179        assert!(lookup.contains(&first));
6180        assert!(lookup.contains(&second));
6181        assert!(!lookup.contains(&missing));
6182
6183        let first_entry = lookup
6184            .find(&first)
6185            .expect("test operation should succeed")
6186            .expect("object should be present");
6187        assert_eq!(lookup.pack_name(first_entry.pack_int_id), Some("pack-a.idx"));
6188        assert_eq!(first_entry.offset, 12);
6189
6190        let second_entry = lookup
6191            .find(&second)
6192            .expect("test operation should succeed")
6193            .expect("object should be present");
6194        assert_eq!(lookup.pack_name(second_entry.pack_int_id), Some("pack-b.idx"));
6195        assert_eq!(second_entry.offset, 0x1_0000_0000);
6196        assert!(
6197            lookup
6198                .find(&missing)
6199                .expect("test operation should succeed")
6200                .is_none()
6201        );
6202    }
6203
6204    #[test]
6205    fn rejects_bad_multi_pack_index_checksum() {
6206        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6207        let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6208        let last = midx.len() - 1;
6209        midx[last] ^= 1;
6210        assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6211    }
6212
6213    #[test]
6214    fn rejects_bad_multi_pack_index_shape() {
6215        let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6216        let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6217        wrong_hash[5] = 2;
6218        let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6219        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6220            .expect("test operation should succeed");
6221        wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6222        assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6223
6224        let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6225        missing_terminator[12] = b'B';
6226        let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6227        let checksum =
6228            sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6229                .expect("test operation should succeed");
6230        missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6231        assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6232
6233        let mut bad_offset = multi_pack_index(
6234            ObjectFormat::Sha1,
6235            2,
6236            0,
6237            &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6238        );
6239        bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6240        let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6241        let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6242            .expect("test operation should succeed");
6243        bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6244        assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6245    }
6246
6247    #[test]
6248    fn rejects_bad_multi_pack_index_pack_names() {
6249        let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6250        assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6251
6252        let too_few = multi_pack_index(
6253            ObjectFormat::Sha1,
6254            2,
6255            2,
6256            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6257        );
6258        assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6259
6260        let bad_padding = multi_pack_index(
6261            ObjectFormat::Sha1,
6262            2,
6263            1,
6264            &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6265        );
6266        assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6267
6268        let unsorted_v1 = multi_pack_index(
6269            ObjectFormat::Sha1,
6270            1,
6271            2,
6272            &midx_chunks_with_pack_names(
6273                ObjectFormat::Sha1,
6274                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6275                &[],
6276            ),
6277        );
6278        assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6279
6280        let unsorted_v2 = multi_pack_index(
6281            ObjectFormat::Sha1,
6282            2,
6283            2,
6284            &midx_chunks_with_pack_names(
6285                ObjectFormat::Sha1,
6286                b"pack-b.idx\0pack-a.idx\0".to_vec(),
6287                &[],
6288            ),
6289        );
6290        let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6291            .expect("test operation should succeed");
6292        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6293    }
6294
6295    #[test]
6296    fn rejects_bad_multi_pack_index_object_tables() {
6297        let oid_a = ObjectId::from_hex(
6298            ObjectFormat::Sha1,
6299            "1111111111111111111111111111111111111111",
6300        )
6301        .expect("test operation should succeed");
6302        let oid_b = ObjectId::from_hex(
6303            ObjectFormat::Sha1,
6304            "2222222222222222222222222222222222222222",
6305        )
6306        .expect("test operation should succeed");
6307
6308        let missing_oidf = multi_pack_index(
6309            ObjectFormat::Sha1,
6310            2,
6311            1,
6312            &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6313        );
6314        assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6315
6316        let bad_fanout = vec![
6317            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6318            (*b"OIDF", vec![0; 256 * 4]),
6319            (*b"OIDL", oid_a.as_bytes().to_vec()),
6320            (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6321        ];
6322        let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6323        assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6324
6325        let mut unsorted = Vec::new();
6326        unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6327        unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6328        let mut oid_lookup = Vec::new();
6329        oid_lookup.extend_from_slice(oid_b.as_bytes());
6330        oid_lookup.extend_from_slice(oid_a.as_bytes());
6331        unsorted.push((*b"OIDL", oid_lookup));
6332        unsorted.push((
6333            *b"OOFF",
6334            midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6335        ));
6336        let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6337        assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6338
6339        let bad_pack = multi_pack_index(
6340            ObjectFormat::Sha1,
6341            2,
6342            1,
6343            &midx_chunks_with_pack_names(
6344                ObjectFormat::Sha1,
6345                b"pack-a.idx\0\0".to_vec(),
6346                &[(oid_a.clone(), 1, 12)],
6347            ),
6348        );
6349        assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6350
6351        let mut large_offsets = Vec::new();
6352        let missing_loff = vec![
6353            (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6354            (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6355            (*b"OIDL", oid_a.as_bytes().to_vec()),
6356            (
6357                *b"OOFF",
6358                midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6359            ),
6360        ];
6361        let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6362        assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6363
6364        let mut bad_loff =
6365            midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6366        bad_loff.push((*b"LOFF", vec![0]));
6367        let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6368        assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6369    }
6370
6371    #[test]
6372    fn parses_multi_pack_index_bitmap_chunks() {
6373        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6374            .expect("test operation should succeed");
6375        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6376            .expect("test operation should succeed");
6377        let mut chunks = midx_chunks_with_pack_names(
6378            ObjectFormat::Sha1,
6379            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6380            &[(first, 0, 12), (second, 1, 24)],
6381        );
6382        chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6383        chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6384        let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6385
6386        let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6387            .expect("test operation should succeed");
6388        assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6389        assert_eq!(
6390            parsed.bitmapped_packs,
6391            Some(vec![
6392                MultiPackBitmapPack {
6393                    bitmap_pos: 0,
6394                    bitmap_nr: 1,
6395                },
6396                MultiPackBitmapPack {
6397                    bitmap_pos: 1,
6398                    bitmap_nr: 1,
6399                },
6400            ])
6401        );
6402    }
6403
6404    #[test]
6405    fn writes_multi_pack_index_that_round_trips() {
6406        let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6407            .expect("test operation should succeed");
6408        let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6409            .expect("test operation should succeed");
6410        let bytes = MultiPackIndex::write(
6411            ObjectFormat::Sha1,
6412            2,
6413            &["pack-b.idx".into(), "pack-a.idx".into()],
6414            &[
6415                MultiPackIndexEntry {
6416                    oid: second.clone(),
6417                    pack_int_id: 0,
6418                    offset: 0x1_0000_0000,
6419                },
6420                MultiPackIndexEntry {
6421                    oid: first.clone(),
6422                    pack_int_id: 1,
6423                    offset: 12,
6424                },
6425            ],
6426        )
6427        .expect("test operation should succeed");
6428
6429        let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6430            .expect("test operation should succeed");
6431        assert_eq!(parsed.version, 2);
6432        assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6433        assert_eq!(parsed.object_count, 2);
6434        assert_eq!(
6435            parsed
6436                .find(&first)
6437                .expect("test operation should succeed")
6438                .pack_int_id,
6439            1
6440        );
6441        assert_eq!(
6442            parsed
6443                .find(&first)
6444                .expect("test operation should succeed")
6445                .offset,
6446            12
6447        );
6448        assert_eq!(
6449            parsed
6450                .find(&second)
6451                .expect("test operation should succeed")
6452                .pack_int_id,
6453            0
6454        );
6455        assert_eq!(
6456            parsed
6457                .find(&second)
6458                .expect("test operation should succeed")
6459                .offset,
6460            0x1_0000_0000
6461        );
6462        assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6463    }
6464
6465    #[test]
6466    fn write_multi_pack_index_rejects_invalid_inputs() {
6467        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6468            .expect("test operation should succeed");
6469        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6470        assert!(
6471            MultiPackIndex::write(
6472                ObjectFormat::Sha1,
6473                1,
6474                &["pack-b.idx".into(), "pack-a.idx".into()],
6475                &[],
6476            )
6477            .is_err()
6478        );
6479        assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6480        assert!(
6481            MultiPackIndex::write(
6482                ObjectFormat::Sha1,
6483                2,
6484                &["pack-a.idx".into()],
6485                &[MultiPackIndexEntry {
6486                    oid,
6487                    pack_int_id: 1,
6488                    offset: 12,
6489                }],
6490            )
6491            .is_err()
6492        );
6493        assert!(
6494            MultiPackIndex::write(
6495                ObjectFormat::Sha1,
6496                2,
6497                &["pack-a.idx".into()],
6498                &[
6499                    MultiPackIndexEntry {
6500                        oid,
6501                        pack_int_id: 0,
6502                        offset: 12,
6503                    },
6504                    MultiPackIndexEntry {
6505                        oid,
6506                        pack_int_id: 0,
6507                        offset: 24,
6508                    },
6509                ],
6510            )
6511            .is_err()
6512        );
6513    }
6514
6515    #[test]
6516    fn rejects_bad_multi_pack_index_bitmap_chunks() {
6517        let oid_a = ObjectId::from_hex(
6518            ObjectFormat::Sha1,
6519            "1111111111111111111111111111111111111111",
6520        )
6521        .expect("test operation should succeed");
6522        let oid_b = ObjectId::from_hex(
6523            ObjectFormat::Sha1,
6524            "2222222222222222222222222222222222222222",
6525        )
6526        .expect("test operation should succeed");
6527
6528        let mut duplicate_ridx = midx_chunks_with_pack_names(
6529            ObjectFormat::Sha1,
6530            b"pack-a.idx\0\0".to_vec(),
6531            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6532        );
6533        duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6534        let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6535        assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6536
6537        let mut short_btmp = midx_chunks_with_pack_names(
6538            ObjectFormat::Sha1,
6539            b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6540            &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6541        );
6542        short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6543        let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6544        assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6545
6546        let mut out_of_range_btmp = midx_chunks_with_pack_names(
6547            ObjectFormat::Sha1,
6548            b"pack-a.idx\0\0".to_vec(),
6549            &[(oid_a, 0, 12), (oid_b, 0, 24)],
6550        );
6551        out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6552        let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6553        assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6554    }
6555
6556    #[test]
6557    fn parses_pack_bitmap_index_with_hash_cache() {
6558        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6559            .expect("test operation should succeed");
6560        let bitmap = pack_bitmap_index(
6561            ObjectFormat::Sha1,
6562            3,
6563            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6564            &pack_checksum,
6565            &[(2, 0, 1, &[0b101])],
6566            Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6567        );
6568
6569        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6570            .expect("test operation should succeed");
6571        assert_eq!(parsed.version, 1);
6572        assert_eq!(parsed.format, ObjectFormat::Sha1);
6573        assert_eq!(
6574            parsed.options,
6575            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6576        );
6577        assert_eq!(parsed.pack_checksum, pack_checksum);
6578        assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6579        assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6580        assert_eq!(parsed.entries.len(), 1);
6581        let entry = parsed
6582            .entry_for_index_position(2)
6583            .expect("test operation should succeed");
6584        assert_eq!(entry.xor_offset, 0);
6585        assert_eq!(entry.flags, 1);
6586        assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6587        assert_eq!(
6588            parsed.name_hash_cache,
6589            Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6590        );
6591    }
6592
6593    #[test]
6594    fn parses_pack_bitmap_index_sha256() {
6595        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6596            .expect("test operation should succeed");
6597        let bitmap = pack_bitmap_index(
6598            ObjectFormat::Sha256,
6599            2,
6600            PackBitmapIndex::OPTION_FULL_DAG,
6601            &pack_checksum,
6602            &[(0, 0, 0, &[0b11])],
6603            None,
6604        );
6605
6606        let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
6607            .expect("test operation should succeed");
6608        assert_eq!(parsed.version, 1);
6609        assert_eq!(parsed.format, ObjectFormat::Sha256);
6610        assert_eq!(parsed.pack_checksum, pack_checksum);
6611        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6612        assert_eq!(parsed.entries[0].object_position, 0);
6613        assert_eq!(parsed.name_hash_cache, None);
6614    }
6615
6616    #[test]
6617    fn parses_upstream_git_written_pack_bitmap_index() {
6618        let root = unique_temp_dir("git-pack-bitmap-upstream");
6619        fs::create_dir_all(&root).expect("test operation should succeed");
6620        {
6621            run_git_success(&root, &["init", "-q", "-b", "main"]);
6622            run_git_success(
6623                &root,
6624                &[
6625                    "-c",
6626                    "user.name=Example User",
6627                    "-c",
6628                    "user.email=example@example.invalid",
6629                    "commit",
6630                    "--allow-empty",
6631                    "-q",
6632                    "-m",
6633                    "one",
6634                ],
6635            );
6636            run_git_success(
6637                &root,
6638                &[
6639                    "-c",
6640                    "user.name=Example User",
6641                    "-c",
6642                    "user.email=example@example.invalid",
6643                    "commit",
6644                    "--allow-empty",
6645                    "-q",
6646                    "-m",
6647                    "two",
6648                ],
6649            );
6650            run_git_success(&root, &["repack", "-adb"]);
6651            let pack_dir = root.join(".git").join("objects").join("pack");
6652            let idx_path = single_path_with_extension(&pack_dir, "idx");
6653            let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
6654            let index = PackIndex::parse(
6655                &fs::read(idx_path).expect("test operation should succeed"),
6656                ObjectFormat::Sha1,
6657            )
6658            .expect("test operation should succeed");
6659            let bitmap = PackBitmapIndex::parse(
6660                &fs::read(bitmap_path).expect("test operation should succeed"),
6661                ObjectFormat::Sha1,
6662                index.entries.len(),
6663            )
6664            .expect("test operation should succeed");
6665            assert_eq!(bitmap.pack_checksum, index.pack_checksum);
6666            assert!(!bitmap.entries.is_empty());
6667        };
6668        let _ = fs::remove_dir_all(&root);
6669    }
6670
6671    #[test]
6672    fn rejects_bad_pack_bitmap_index_header_and_checksum() {
6673        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6674            .expect("test operation should succeed");
6675        let bitmap = pack_bitmap_index(
6676            ObjectFormat::Sha1,
6677            1,
6678            PackBitmapIndex::OPTION_FULL_DAG,
6679            &pack_checksum,
6680            &[(0, 0, 0, &[1])],
6681            None,
6682        );
6683
6684        let mut bad_signature = bitmap.clone();
6685        bad_signature[0] = b'X';
6686        assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
6687
6688        let mut bad_version = bitmap.clone();
6689        bad_version[5] = 2;
6690        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
6691        assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
6692
6693        let mut bad_option = bitmap.clone();
6694        bad_option[7] = 0x20;
6695        refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
6696        assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
6697
6698        let mut bad_checksum = bitmap;
6699        let last = bad_checksum.len() - 1;
6700        bad_checksum[last] ^= 1;
6701        assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
6702    }
6703
6704    #[test]
6705    fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
6706        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6707            .expect("test operation should succeed");
6708        let bitmap = pack_bitmap_index(
6709            ObjectFormat::Sha1,
6710            2,
6711            PackBitmapIndex::OPTION_FULL_DAG,
6712            &pack_checksum,
6713            &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
6714            None,
6715        );
6716
6717        let mut truncated = bitmap.clone();
6718        truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
6719        refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
6720        assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
6721
6722        let mut out_of_range_position = pack_bitmap_index(
6723            ObjectFormat::Sha1,
6724            2,
6725            PackBitmapIndex::OPTION_FULL_DAG,
6726            &pack_checksum,
6727            &[(2, 0, 0, &[0b01])],
6728            None,
6729        );
6730        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
6731        refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
6732        assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
6733
6734        let invalid_xor = pack_bitmap_index(
6735            ObjectFormat::Sha1,
6736            2,
6737            PackBitmapIndex::OPTION_FULL_DAG,
6738            &pack_checksum,
6739            &[(0, 1, 0, &[0b01])],
6740            None,
6741        );
6742        assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
6743    }
6744
6745    #[test]
6746    fn parses_single_entry_pack_index_sha256() {
6747        let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
6748            .expect("test operation should succeed");
6749        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6750            .expect("test operation should succeed");
6751        let index = single_entry_index(
6752            ObjectFormat::Sha256,
6753            oid,
6754            0x1234_5678,
6755            12,
6756            pack_checksum.clone(),
6757        );
6758        let parsed =
6759            PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
6760        assert_eq!(parsed.version, 2);
6761        assert_eq!(parsed.pack_checksum, pack_checksum);
6762        assert_eq!(parsed.entries.len(), 1);
6763        assert_eq!(
6764            parsed
6765                .find(&oid)
6766                .expect("test operation should succeed")
6767                .offset,
6768            12
6769        );
6770        assert_eq!(
6771            parsed
6772                .find(&oid)
6773                .expect("test operation should succeed")
6774                .crc32,
6775            0x1234_5678
6776        );
6777        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6778        assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
6779    }
6780
6781    #[test]
6782    fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
6783        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
6784    }
6785
6786    #[test]
6787    fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
6788        write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
6789    }
6790
6791    #[test]
6792    fn write_packed_rejects_duplicate_objects() {
6793        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6794        assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
6795    }
6796
6797    #[test]
6798    fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
6799        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6800        let sha1 = object
6801            .object_id(ObjectFormat::Sha1)
6802            .expect("test operation should succeed");
6803        let sha256 = object
6804            .object_id(ObjectFormat::Sha256)
6805            .expect("test operation should succeed");
6806        let duplicate = [
6807            PackInput {
6808                oid: &sha1,
6809                object: &object,
6810            },
6811            PackInput {
6812                oid: &sha1,
6813                object: &object,
6814            },
6815        ];
6816        assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
6817
6818        let wrong_format = [PackInput {
6819            oid: &sha256,
6820            object: &object,
6821        }];
6822        assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
6823    }
6824
6825    fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
6826        let objects = similar_blob_family(8);
6827        let packed =
6828            PackFile::write_packed(&objects, format).expect("test operation should succeed");
6829        let undeltified =
6830            PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
6831
6832        // The whole point of delta selection: the packed output is smaller than
6833        // storing every object undeltified.
6834        assert!(
6835            packed.pack.len() < undeltified.pack.len(),
6836            "expected delta pack ({}) smaller than undeltified pack ({})",
6837            packed.pack.len(),
6838            undeltified.pack.len()
6839        );
6840
6841        // At least one object must actually be stored as a delta.
6842        let kinds = pack_entry_kinds(&packed.pack, format);
6843        let delta_count = kinds
6844            .iter()
6845            .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
6846            .count();
6847        assert!(
6848            delta_count >= 1,
6849            "expected at least one delta entry, found kinds {kinds:?}"
6850        );
6851
6852        // Round-trip: every original object reconstructs byte-for-byte.
6853        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
6854        assert_eq!(parsed.entries.len(), objects.len());
6855        for object in &objects {
6856            let oid = object
6857                .object_id(format)
6858                .expect("test operation should succeed");
6859            let found = parsed
6860                .entries
6861                .iter()
6862                .find(|entry| entry.entry.oid == oid)
6863                .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
6864            assert_eq!(&found.object, object, "object {oid} did not round-trip");
6865        }
6866
6867        // The index must agree with the pack and locate every object.
6868        let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
6869        assert_eq!(index.pack_checksum, packed.checksum);
6870        for object in &objects {
6871            let oid = object
6872                .object_id(format)
6873                .expect("test operation should succeed");
6874            assert!(index.find(&oid).is_some(), "index missing {oid}");
6875        }
6876    }
6877
6878    #[test]
6879    fn write_packed_emits_ofs_delta_by_default() {
6880        let objects = similar_blob_family(6);
6881        let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
6882            .expect("test operation should succeed");
6883        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
6884        assert!(
6885            kinds.contains(&PackObjectKind::OfsDelta),
6886            "expected an ofs-delta entry by default, found {kinds:?}"
6887        );
6888        assert!(
6889            !kinds.contains(&PackObjectKind::RefDelta),
6890            "default self-contained pack must not use ref-delta, found {kinds:?}"
6891        );
6892        // Round-trips.
6893        assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
6894    }
6895
6896    #[test]
6897    fn write_packed_can_emit_ref_delta() {
6898        let objects = similar_blob_family(6);
6899        let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
6900        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6901            .expect("test operation should succeed");
6902        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
6903        assert!(
6904            kinds.contains(&PackObjectKind::RefDelta),
6905            "expected a ref-delta entry, found {kinds:?}"
6906        );
6907        assert!(
6908            !kinds.contains(&PackObjectKind::OfsDelta),
6909            "ref-delta mode must not emit ofs-delta, found {kinds:?}"
6910        );
6911
6912        // Ref-delta packs are still self-contained here, so they round-trip
6913        // without any external base lookup.
6914        let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
6915            .expect("test operation should succeed");
6916        assert_eq!(parsed.entries.len(), objects.len());
6917    }
6918
6919    #[test]
6920    fn write_packed_bounds_delta_chain_depth() {
6921        // A long chain of progressively-modified blobs. With a large window
6922        // every object could otherwise delta against its immediate predecessor,
6923        // forming a chain as long as the input.
6924        let objects = incremental_blob_chain(20);
6925        let format = ObjectFormat::Sha1;
6926
6927        for max_depth in [1usize, 2, 5] {
6928            let options = PackWriteOptions::new()
6929                .with_window(20)
6930                .with_depth(max_depth);
6931            let packed = PackFile::write_packed_with_options(&objects, format, &options)
6932                .expect("test operation should succeed");
6933
6934            let depths = pack_entry_depths(&packed.pack, format);
6935            let observed = depths.iter().copied().max().unwrap_or(0);
6936            assert!(
6937                observed <= max_depth,
6938                "max chain depth {observed} exceeded bound {max_depth}"
6939            );
6940
6941            // Still correct: round-trips byte-for-byte.
6942            let parsed =
6943                PackFile::parse(&packed.pack, format).expect("test operation should succeed");
6944            for object in &objects {
6945                let oid = object
6946                    .object_id(format)
6947                    .expect("test operation should succeed");
6948                let found = parsed
6949                    .entries
6950                    .iter()
6951                    .find(|entry| entry.entry.oid == oid)
6952                    .expect("test operation should succeed");
6953                assert_eq!(&found.object, object);
6954            }
6955        }
6956    }
6957
6958    #[test]
6959    fn write_packed_depth_zero_stores_everything_undeltified() {
6960        let objects = similar_blob_family(5);
6961        let options = PackWriteOptions::new().with_depth(0);
6962        let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6963            .expect("test operation should succeed");
6964        let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
6965        assert!(
6966            kinds
6967                .iter()
6968                .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
6969            "depth 0 must disable deltas, found {kinds:?}"
6970        );
6971    }
6972
6973    #[test]
6974    fn write_thin_uses_external_base_and_round_trips_sha1() {
6975        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
6976    }
6977
6978    #[test]
6979    fn write_thin_uses_external_base_and_round_trips_sha256() {
6980        write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
6981    }
6982
6983    fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
6984        // The base object stays OUT of the pack; only `target` is written, as a
6985        // ref-delta against the external base's object id.
6986        let base = blob_with_marker("EXTERNAL-BASE");
6987        let target = blob_with_marker("EXTERNAL-TARGET");
6988        let base_oid = base
6989            .object_id(format)
6990            .expect("test operation should succeed");
6991
6992        let mut external = HashMap::new();
6993        external.insert(base_oid, base.clone());
6994        let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
6995            .expect("test operation should succeed");
6996
6997        // Exactly one entry, encoded as a ref-delta to the external base.
6998        let kinds = pack_entry_kinds(&packed.pack, format);
6999        assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7000
7001        // The external base reference must be the base oid.
7002        let mut offset = 12usize;
7003        let header =
7004            parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7005        assert_eq!(header.kind, PackObjectKind::RefDelta);
7006        let referenced =
7007            ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7008                .expect("test operation should succeed");
7009        assert_eq!(referenced, base_oid);
7010
7011        // A plain (non-thin) parse fails: the base is not present.
7012        assert!(PackFile::parse(&packed.pack, format).is_err());
7013
7014        // A thin parse that supplies the external base reconstructs the target.
7015        let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7016            if oid == &base_oid {
7017                Ok(Some(base.clone()))
7018            } else {
7019                Ok(None)
7020            }
7021        })
7022        .expect("test operation should succeed");
7023        assert_eq!(parsed.entries.len(), 1);
7024        assert_eq!(parsed.entries[0].object, target);
7025    }
7026
7027    #[test]
7028    fn write_packed_preserves_distinct_objects_with_no_similarity() {
7029        // Unrelated objects: nothing should delta, but the pack must still be
7030        // valid and complete.
7031        let objects = vec![
7032            EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7033            EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7034            EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7035        ];
7036        let format = ObjectFormat::Sha1;
7037        let packed =
7038            PackFile::write_packed(&objects, format).expect("test operation should succeed");
7039        let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7040        assert_eq!(parsed.entries.len(), objects.len());
7041        for object in &objects {
7042            let oid = object
7043                .object_id(format)
7044                .expect("test operation should succeed");
7045            assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7046        }
7047    }
7048
7049    /// Build a family of blobs that all share a large common region but differ
7050    /// in a marker placed in the *middle*, so a good delta finds copy regions on
7051    /// both sides of the change.
7052    fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7053        let mut common_head = Vec::new();
7054        for _ in 0..200 {
7055            common_head.extend_from_slice(b"shared header line for delta testing\n");
7056        }
7057        let mut common_tail = Vec::new();
7058        for _ in 0..200 {
7059            common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7060        }
7061        (0..count)
7062            .map(|idx| {
7063                let mut body = common_head.clone();
7064                body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7065                body.extend_from_slice(&common_tail);
7066                EncodedObject::new(ObjectType::Blob, body)
7067            })
7068            .collect()
7069    }
7070
7071    /// Build a chain where each blob is the previous one plus an appended line,
7072    /// so each is highly similar to its predecessor.
7073    fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7074        let mut body = Vec::new();
7075        for _ in 0..100 {
7076            body.extend_from_slice(b"baseline content shared across the whole chain\n");
7077        }
7078        let mut objects = Vec::with_capacity(count);
7079        for idx in 0..count {
7080            body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7081            objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7082        }
7083        objects
7084    }
7085
7086    fn blob_with_marker(marker: &str) -> EncodedObject {
7087        let mut body = Vec::new();
7088        for _ in 0..150 {
7089            body.extend_from_slice(b"common body shared between base and target\n");
7090        }
7091        body.extend_from_slice(marker.as_bytes());
7092        body.push(b'\n');
7093        for _ in 0..150 {
7094            body.extend_from_slice(b"more common body shared between objects\n");
7095        }
7096        EncodedObject::new(ObjectType::Blob, body)
7097    }
7098
7099    /// Classify every entry in a pack (in pack order) by its on-disk kind.
7100    fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7101        pack_entry_descriptors(pack, format)
7102            .into_iter()
7103            .map(|descriptor| descriptor.kind)
7104            .collect()
7105    }
7106
7107    /// Compute each entry's delta chain depth (0 = undeltified base), in pack
7108    /// order. Entries always appear after their in-pack bases, so a single
7109    /// forward pass suffices.
7110    fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7111        let descriptors = pack_entry_descriptors(pack, format);
7112        let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7113        let mut depths = Vec::with_capacity(descriptors.len());
7114        for descriptor in &descriptors {
7115            let depth = match &descriptor.base {
7116                EntryBase::None => 0,
7117                EntryBase::Offset(base_offset) => {
7118                    depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7119                }
7120                // Ref-delta to an in-pack base: look it up by offset via oid is
7121                // unnecessary for these tests (which only use ofs-delta for the
7122                // chains), so treat as depth 1 if unknown.
7123                EntryBase::Ref => 1,
7124            };
7125            depth_by_offset.insert(descriptor.offset, depth);
7126            depths.push(depth);
7127        }
7128        depths
7129    }
7130
7131    struct EntryDescriptor {
7132        offset: u64,
7133        kind: PackObjectKind,
7134        base: EntryBase,
7135    }
7136
7137    enum EntryBase {
7138        None,
7139        Offset(u64),
7140        Ref,
7141    }
7142
7143    fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7144        let trailer_offset = pack.len() - format.raw_len();
7145        let count = u32_be(&pack[8..12]) as usize;
7146        let mut offset = 12usize;
7147        let mut descriptors = Vec::with_capacity(count);
7148        for _ in 0..count {
7149            let entry_offset = offset as u64;
7150            let header =
7151                parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7152            let base = match header.kind {
7153                PackObjectKind::OfsDelta => {
7154                    let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7155                        .expect("test operation should succeed");
7156                    EntryBase::Offset(base_offset)
7157                }
7158                PackObjectKind::RefDelta => {
7159                    offset += format.raw_len();
7160                    EntryBase::Ref
7161                }
7162                _ => EntryBase::None,
7163            };
7164            let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7165            let mut body = Vec::new();
7166            decoder
7167                .read_to_end(&mut body)
7168                .expect("test operation should succeed");
7169            offset += decoder.total_in() as usize;
7170            descriptors.push(EntryDescriptor {
7171                offset: entry_offset,
7172                kind: header.kind,
7173                base,
7174            });
7175        }
7176        descriptors
7177    }
7178
7179    fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7180        let mut base = Vec::new();
7181        for _ in 0..300 {
7182            base.extend_from_slice(b"common payload\n");
7183        }
7184        base.extend_from_slice(b"base\n");
7185        let mut changed = Vec::new();
7186        for _ in 0..300 {
7187            changed.extend_from_slice(b"common payload\n");
7188        }
7189        changed.extend_from_slice(b"changed\n");
7190        (
7191            EncodedObject::new(ObjectType::Blob, base),
7192            EncodedObject::new(ObjectType::Blob, changed),
7193        )
7194    }
7195
7196    fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7197        let mut pack = Vec::new();
7198        pack.extend_from_slice(b"PACK");
7199        pack.extend_from_slice(&2u32.to_be_bytes());
7200        pack.extend_from_slice(&1u32.to_be_bytes());
7201        write_entry_header(&mut pack, object_type, body.len() as u64);
7202        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7203        encoder
7204            .write_all(body)
7205            .expect("test operation should succeed");
7206        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7207        let checksum =
7208            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7209        pack.extend_from_slice(checksum.as_bytes());
7210        pack
7211    }
7212
7213    #[derive(Clone, Copy, Debug)]
7214    enum DeltaKind {
7215        Offset,
7216        Ref,
7217    }
7218
7219    fn two_object_delta_pack(
7220        format: ObjectFormat,
7221        base: &[u8],
7222        result: &[u8],
7223        delta_kind: DeltaKind,
7224    ) -> Vec<u8> {
7225        let mut pack = Vec::new();
7226        pack.extend_from_slice(b"PACK");
7227        pack.extend_from_slice(&2u32.to_be_bytes());
7228        pack.extend_from_slice(&2u32.to_be_bytes());
7229
7230        let base_offset = pack.len();
7231        write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7232        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7233        encoder
7234            .write_all(base)
7235            .expect("test operation should succeed");
7236        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7237
7238        let delta = append_suffix_delta(base, result);
7239        let delta_offset = pack.len();
7240        write_pack_entry_header_kind(
7241            &mut pack,
7242            match delta_kind {
7243                DeltaKind::Offset => 6,
7244                DeltaKind::Ref => 7,
7245            },
7246            delta.len() as u64,
7247        );
7248        match delta_kind {
7249            DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7250            DeltaKind::Ref => {
7251                let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7252                    .expect("test operation should succeed");
7253                pack.extend_from_slice(base_oid.as_bytes());
7254            }
7255        }
7256        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7257        encoder
7258            .write_all(&delta)
7259            .expect("test operation should succeed");
7260        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7261
7262        let checksum =
7263            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7264        pack.extend_from_slice(checksum.as_bytes());
7265        pack
7266    }
7267
7268    fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7269        let mut pack = Vec::new();
7270        pack.extend_from_slice(b"PACK");
7271        pack.extend_from_slice(&2u32.to_be_bytes());
7272        pack.extend_from_slice(&1u32.to_be_bytes());
7273
7274        let delta = append_suffix_delta(base, result);
7275        write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7276        let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7277            .expect("test operation should succeed");
7278        pack.extend_from_slice(base_oid.as_bytes());
7279        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7280        encoder
7281            .write_all(&delta)
7282            .expect("test operation should succeed");
7283        pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7284
7285        let checksum =
7286            sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7287        pack.extend_from_slice(checksum.as_bytes());
7288        pack
7289    }
7290
7291    fn unique_temp_dir(name: &str) -> PathBuf {
7292        let nanos = SystemTime::now()
7293            .duration_since(UNIX_EPOCH)
7294            .expect("test operation should succeed")
7295            .as_nanos();
7296        std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7297    }
7298
7299    fn run_git_success(cwd: &Path, args: &[&str]) {
7300        let output = Command::new("git")
7301            .current_dir(cwd)
7302            .args(args)
7303            .output()
7304            .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7305        assert!(
7306            output.status.success(),
7307            "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7308            output.status.code(),
7309            String::from_utf8_lossy(&output.stdout),
7310            String::from_utf8_lossy(&output.stderr)
7311        );
7312    }
7313
7314    fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7315        let mut paths = fs::read_dir(dir)
7316            .expect("test operation should succeed")
7317            .map(|entry| entry.expect("test operation should succeed").path())
7318            .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7319            .collect::<Vec<_>>();
7320        assert_eq!(paths.len(), 1, "expected one .{extension} file");
7321        paths.remove(0)
7322    }
7323
7324    fn pack_bitmap_index(
7325        format: ObjectFormat,
7326        object_count: u32,
7327        options: u16,
7328        pack_checksum: &ObjectId,
7329        entries: &[(u32, u8, u8, &[u64])],
7330        name_hash_cache: Option<&[u32]>,
7331    ) -> Vec<u8> {
7332        let mut out = Vec::new();
7333        out.extend_from_slice(b"BITM");
7334        out.extend_from_slice(&1u16.to_be_bytes());
7335        out.extend_from_slice(&options.to_be_bytes());
7336        out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7337        out.extend_from_slice(pack_checksum.as_bytes());
7338        write_test_ewah(&mut out, object_count, &[0b001]);
7339        write_test_ewah(&mut out, object_count, &[0b010]);
7340        write_test_ewah(&mut out, object_count, &[0b100]);
7341        write_test_ewah(&mut out, object_count, &[0]);
7342        for (position, xor_offset, flags, words) in entries {
7343            out.extend_from_slice(&position.to_be_bytes());
7344            out.push(*xor_offset);
7345            out.push(*flags);
7346            write_test_ewah(&mut out, object_count, words);
7347        }
7348        if let Some(cache) = name_hash_cache {
7349            for value in cache {
7350                out.extend_from_slice(&value.to_be_bytes());
7351            }
7352        }
7353        let checksum =
7354            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7355        out.extend_from_slice(checksum.as_bytes());
7356        out
7357    }
7358
7359    fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7360        out.extend_from_slice(&bit_size.to_be_bytes());
7361        let words = ewah_literal_words(literals);
7362        out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7363        for word in words {
7364            out.extend_from_slice(&word.to_be_bytes());
7365        }
7366        out.extend_from_slice(&0u32.to_be_bytes());
7367    }
7368
7369    fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7370        let rlw = (literals.len() as u64) << 33;
7371        let mut words = vec![rlw];
7372        words.extend_from_slice(literals);
7373        words
7374    }
7375
7376    fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7377        let checksum_offset = bytes.len() - format.raw_len();
7378        let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7379            .expect("test operation should succeed");
7380        bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7381    }
7382
7383    fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7384        assert!(result.starts_with(base));
7385        let suffix = &result[base.len()..];
7386        assert!(base.len() < 0x10000);
7387        assert!(suffix.len() < 0x80);
7388        let mut delta = Vec::new();
7389        write_delta_varint(&mut delta, base.len() as u64);
7390        write_delta_varint(&mut delta, result.len() as u64);
7391        delta.push(0x90);
7392        delta.push(base.len() as u8);
7393        delta.push(suffix.len() as u8);
7394        delta.extend_from_slice(suffix);
7395        delta
7396    }
7397
7398    fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7399        loop {
7400            let mut byte = (value as u8) & 0x7f;
7401            value >>= 7;
7402            if value != 0 {
7403                byte |= 0x80;
7404            }
7405            out.push(byte);
7406            if value == 0 {
7407                break;
7408            }
7409        }
7410    }
7411
7412    fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7413        let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7414        size >>= 4;
7415        if size != 0 {
7416            byte |= 0x80;
7417        }
7418        out.push(byte);
7419        while size != 0 {
7420            let mut byte = (size as u8) & 0x7f;
7421            size >>= 7;
7422            if size != 0 {
7423                byte |= 0x80;
7424            }
7425            out.push(byte);
7426        }
7427    }
7428
7429    fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7430        assert!(relative < 0x80);
7431        out.push(relative as u8);
7432    }
7433
7434    fn single_entry_index(
7435        format: ObjectFormat,
7436        oid: ObjectId,
7437        crc32: u32,
7438        offset: u32,
7439        pack_checksum: ObjectId,
7440    ) -> Vec<u8> {
7441        let mut index = Vec::new();
7442        index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7443        index.extend_from_slice(&2u32.to_be_bytes());
7444        for idx in 0..256 {
7445            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7446                1u32
7447            } else {
7448                0u32
7449            };
7450            index.extend_from_slice(&count.to_be_bytes());
7451        }
7452        index.extend_from_slice(oid.as_bytes());
7453        index.extend_from_slice(&crc32.to_be_bytes());
7454        index.extend_from_slice(&offset.to_be_bytes());
7455        index.extend_from_slice(pack_checksum.as_bytes());
7456        let checksum =
7457            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7458        index.extend_from_slice(checksum.as_bytes());
7459        index
7460    }
7461
7462    fn single_entry_index_v1(
7463        format: ObjectFormat,
7464        oid: ObjectId,
7465        offset: u32,
7466        pack_checksum: ObjectId,
7467    ) -> Vec<u8> {
7468        let mut index = Vec::new();
7469        for idx in 0..256 {
7470            let count = if idx >= usize::from(oid.as_bytes()[0]) {
7471                1u32
7472            } else {
7473                0u32
7474            };
7475            index.extend_from_slice(&count.to_be_bytes());
7476        }
7477        index.extend_from_slice(&offset.to_be_bytes());
7478        index.extend_from_slice(oid.as_bytes());
7479        index.extend_from_slice(pack_checksum.as_bytes());
7480        let checksum =
7481            sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7482        index.extend_from_slice(checksum.as_bytes());
7483        index
7484    }
7485
7486    fn pack_reverse_index(
7487        format: ObjectFormat,
7488        positions: &[u32],
7489        pack_checksum: ObjectId,
7490    ) -> Vec<u8> {
7491        let mut reverse_index = Vec::new();
7492        reverse_index.extend_from_slice(b"RIDX");
7493        reverse_index.extend_from_slice(&1u32.to_be_bytes());
7494        reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7495        for position in positions {
7496            reverse_index.extend_from_slice(&position.to_be_bytes());
7497        }
7498        reverse_index.extend_from_slice(pack_checksum.as_bytes());
7499        let checksum =
7500            sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7501        reverse_index.extend_from_slice(checksum.as_bytes());
7502        reverse_index
7503    }
7504
7505    fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7506        let mut out = Vec::new();
7507        out.extend_from_slice(b"MTME");
7508        out.extend_from_slice(&1u32.to_be_bytes());
7509        out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7510        for mtime in mtimes {
7511            out.extend_from_slice(&mtime.to_be_bytes());
7512        }
7513        out.extend_from_slice(pack_checksum.as_bytes());
7514        let checksum =
7515            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7516        out.extend_from_slice(checksum.as_bytes());
7517        out
7518    }
7519
7520    fn midx_chunks_with_pack_names(
7521        _format: ObjectFormat,
7522        pack_names: Vec<u8>,
7523        entries: &[(ObjectId, u32, u64)],
7524    ) -> Vec<([u8; 4], Vec<u8>)> {
7525        let mut entries = entries.to_vec();
7526        entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7527        let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7528        let mut large_offsets = Vec::new();
7529        let mut chunks = vec![
7530            (*b"PNAM", pack_names),
7531            (*b"OIDF", midx_oid_fanout(&object_ids)),
7532            (*b"OIDL", midx_oid_lookup(&object_ids)),
7533            (
7534                *b"OOFF",
7535                midx_ooff_entries(
7536                    &entries
7537                        .iter()
7538                        .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7539                        .collect::<Vec<_>>(),
7540                    &mut large_offsets,
7541                ),
7542            ),
7543        ];
7544        if !large_offsets.is_empty() {
7545            chunks.push((*b"LOFF", large_offsets));
7546        }
7547        chunks
7548    }
7549
7550    fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7551        let mut counts = [0u32; 256];
7552        for oid in object_ids {
7553            counts[oid.as_bytes()[0] as usize] += 1;
7554        }
7555        let mut running = 0u32;
7556        let mut out = Vec::new();
7557        for count in counts {
7558            running += count;
7559            out.extend_from_slice(&running.to_be_bytes());
7560        }
7561        out
7562    }
7563
7564    fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7565        let mut out = Vec::new();
7566        for oid in object_ids {
7567            out.extend_from_slice(oid.as_bytes());
7568        }
7569        out
7570    }
7571
7572    fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7573        let mut out = Vec::new();
7574        for (pack_int_id, offset) in entries {
7575            out.extend_from_slice(&pack_int_id.to_be_bytes());
7576            if *offset < 0x8000_0000 {
7577                out.extend_from_slice(&(*offset as u32).to_be_bytes());
7578            } else {
7579                let large_idx = (large_offsets.len() / 8) as u32;
7580                out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7581                large_offsets.extend_from_slice(&offset.to_be_bytes());
7582            }
7583        }
7584        out
7585    }
7586
7587    fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7588        let mut out = Vec::new();
7589        for value in values {
7590            out.extend_from_slice(&value.to_be_bytes());
7591        }
7592        out
7593    }
7594
7595    fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
7596        let mut out = Vec::new();
7597        for (bitmap_pos, bitmap_nr) in entries {
7598            out.extend_from_slice(&bitmap_pos.to_be_bytes());
7599            out.extend_from_slice(&bitmap_nr.to_be_bytes());
7600        }
7601        out
7602    }
7603
7604    fn multi_pack_index(
7605        format: ObjectFormat,
7606        version: u8,
7607        pack_count: u32,
7608        chunks: &[([u8; 4], Vec<u8>)],
7609    ) -> Vec<u8> {
7610        let lookup_len = (chunks.len() + 1) * 12;
7611        let mut out = Vec::new();
7612        out.extend_from_slice(b"MIDX");
7613        out.push(version);
7614        out.push(hash_function_id(format) as u8);
7615        out.push(chunks.len() as u8);
7616        out.push(0);
7617        out.extend_from_slice(&pack_count.to_be_bytes());
7618        let mut chunk_offset = (12 + lookup_len) as u64;
7619        for (id, data) in chunks {
7620            out.extend_from_slice(id);
7621            out.extend_from_slice(&chunk_offset.to_be_bytes());
7622            chunk_offset += data.len() as u64;
7623        }
7624        out.extend_from_slice(&[0, 0, 0, 0]);
7625        out.extend_from_slice(&chunk_offset.to_be_bytes());
7626        for (_id, data) in chunks {
7627            out.extend_from_slice(data);
7628        }
7629        let checksum =
7630            sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7631        out.extend_from_slice(checksum.as_bytes());
7632        out
7633    }
7634
7635    // ---- EWAH encoder / bitmap writer tests ------------------------------
7636
7637    fn pack_checksum_sha1() -> ObjectId {
7638        sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
7639    }
7640
7641    fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
7642        // Wrap the EWAH body with the surrounding offset bookkeeping the parser
7643        // expects: a checksum offset that lies just past the serialised bitmap.
7644        let mut offset = 0usize;
7645        let checksum_offset = bytes.len();
7646        parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
7647            .expect("test operation should succeed")
7648    }
7649
7650    #[test]
7651    fn ewah_encodes_single_literal_word_matching_helper() {
7652        // A bitmap whose only word is a literal must serialise as one RLW with
7653        // literal_len == 1 followed by the literal, identical to the test
7654        // helper used by the existing parser tests.
7655        let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
7656        assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
7657        assert_eq!(ewah.rlw_position, 0);
7658        assert_eq!(ewah.bit_size, 64);
7659    }
7660
7661    #[test]
7662    fn ewah_byte_layout_is_big_endian() {
7663        let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
7664            .expect("test operation should succeed");
7665        let bytes = ewah.to_bytes();
7666        let mut expected = Vec::new();
7667        expected.extend_from_slice(&64u32.to_be_bytes()); // bit_size
7668        expected.extend_from_slice(&2u32.to_be_bytes()); // word count: rlw + literal
7669        expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); // rlw: literal_len = 1
7670        expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
7671        expected.extend_from_slice(&0u32.to_be_bytes()); // rlw_position
7672        assert_eq!(bytes, expected);
7673    }
7674
7675    #[test]
7676    fn ewah_empty_bitmap_serialises_like_git() {
7677        let ewah = EwahBitmap::empty();
7678        let bytes = ewah.to_bytes();
7679        // bit_size = 0, word_count = 0, rlw_position = 0.
7680        assert_eq!(bytes, vec![0u8; 12]);
7681        // It must still parse and decode to nothing.
7682        let parsed = parse_ewah_bytes(&bytes);
7683        assert_eq!(parsed, ewah);
7684        assert!(
7685            parsed
7686                .to_positions()
7687                .expect("test operation should succeed")
7688                .is_empty()
7689        );
7690    }
7691
7692    #[test]
7693    fn ewah_compresses_clean_zero_run() {
7694        // Three all-zero words followed by a literal: the encoder should emit a
7695        // single RLW carrying a run of 3 clean-zero words plus one literal.
7696        let ewah =
7697            EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
7698        assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
7699        let rlw = ewah.words[0];
7700        assert_eq!(rlw & 1, 0, "run bit should be zero");
7701        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7702        assert_eq!(rlw >> 33, 1, "literal length should be 1");
7703        assert_eq!(ewah.words[1], 0b1);
7704    }
7705
7706    #[test]
7707    fn ewah_compresses_clean_ones_run() {
7708        let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
7709            .expect("test operation should succeed");
7710        // Pure run of ones, no literals: one RLW only.
7711        assert_eq!(ewah.words.len(), 1);
7712        let rlw = ewah.words[0];
7713        assert_eq!(rlw & 1, 1, "run bit should be one");
7714        assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7715        assert_eq!(rlw >> 33, 0, "no literals");
7716    }
7717
7718    #[test]
7719    fn ewah_run_then_literal_then_run_roundtrips() {
7720        let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
7721        let bit_size = (words.len() * 64) as u32;
7722        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
7723        assert_eq!(
7724            ewah.to_words().expect("test operation should succeed"),
7725            words
7726        );
7727    }
7728
7729    #[test]
7730    fn ewah_drops_trailing_clean_zero_words() {
7731        // Trailing all-zero words beyond a literal carry no information and git
7732        // does not serialise them, but to_words() restores them up to bit_size.
7733        let words = vec![0b1, 0, 0, 0];
7734        let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
7735        // bit_size of 1 means a single backing word.
7736        assert_eq!(ewah.bit_size, 1);
7737        assert_eq!(
7738            ewah.to_words().expect("test operation should succeed"),
7739            vec![0b1]
7740        );
7741    }
7742
7743    #[test]
7744    fn ewah_from_positions_roundtrips_via_positions() {
7745        let positions = [0u32, 1, 63, 64, 65, 200, 511];
7746        let ewah =
7747            EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
7748        let mut decoded = ewah.to_positions().expect("test operation should succeed");
7749        decoded.sort_unstable();
7750        assert_eq!(decoded, positions);
7751    }
7752
7753    #[test]
7754    fn ewah_from_positions_dedupes_and_orders() {
7755        let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
7756            .expect("test operation should succeed");
7757        assert_eq!(
7758            ewah.to_positions().expect("test operation should succeed"),
7759            vec![5, 100]
7760        );
7761    }
7762
7763    #[test]
7764    fn ewah_huge_zero_run_spans_multiple_rlws() {
7765        // A run longer than the 32-bit running-length field forces the encoder
7766        // to emit more than one RLW. Use one literal bit far out, with a bit
7767        // size large enough to exceed u32::MAX clean words is impractical, so
7768        // assert the field arithmetic via a direct builder run instead.
7769        let mut builder = EwahBuilder::new(0);
7770        builder.add_empty_words(false, 0xffff_ffff);
7771        builder.add_empty_words(false, 5);
7772        let ewah = builder.finish().expect("test operation should succeed");
7773        assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
7774        assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
7775        assert_eq!(ewah.words[1] & 1, 0);
7776        assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
7777        assert_eq!(ewah.rlw_position, 1);
7778    }
7779
7780    #[test]
7781    fn ewah_from_words_rejects_oversized_bit_size() {
7782        // bit_size demands two words but only one is supplied.
7783        assert!(EwahBitmap::from_words(65, &[0]).is_err());
7784    }
7785
7786    #[test]
7787    fn ewah_from_positions_rejects_out_of_range() {
7788        assert!(EwahBitmap::from_positions(64, &[64]).is_err());
7789    }
7790
7791    #[test]
7792    fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
7793        // Exercise the full encode -> serialise -> parse loop for a non-trivial
7794        // pattern and assert structural equality against the parser's model.
7795        let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
7796        let bit_size = (words.len() * 64) as u32;
7797        let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
7798        let bytes = ewah.to_bytes();
7799        let parsed = parse_ewah_bytes(&bytes);
7800        assert_eq!(parsed, ewah);
7801        assert_eq!(
7802            parsed.to_words().expect("test operation should succeed"),
7803            words
7804        );
7805    }
7806
7807    #[test]
7808    fn pack_bitmap_index_write_parse_roundtrip_sha1() {
7809        // commit, tree, blob in pack order; one selected commit reaching all.
7810        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
7811        let bytes = write_bitmap(
7812            ObjectFormat::Sha1,
7813            pack_checksum_sha1(),
7814            &object_types,
7815            &[(0u32, 0u32, vec![1u32, 2u32])],
7816            None,
7817        )
7818        .expect("test operation should succeed");
7819        assert_eq!(&bytes[..4], b"BITM");
7820
7821        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
7822            .expect("test operation should succeed");
7823        assert_eq!(parsed.version, 1);
7824        assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
7825        assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
7826        assert_eq!(
7827            parsed
7828                .type_bitmaps
7829                .commits
7830                .to_positions()
7831                .expect("test operation should succeed"),
7832            vec![0]
7833        );
7834        assert_eq!(
7835            parsed
7836                .type_bitmaps
7837                .trees
7838                .to_positions()
7839                .expect("test operation should succeed"),
7840            vec![1]
7841        );
7842        assert_eq!(
7843            parsed
7844                .type_bitmaps
7845                .blobs
7846                .to_positions()
7847                .expect("test operation should succeed"),
7848            vec![2]
7849        );
7850        assert!(
7851            parsed
7852                .type_bitmaps
7853                .tags
7854                .to_positions()
7855                .expect("test operation should succeed")
7856                .is_empty()
7857        );
7858        assert_eq!(parsed.entries.len(), 1);
7859        let entry = parsed
7860            .entry_for_index_position(0)
7861            .expect("test operation should succeed");
7862        assert_eq!(entry.xor_offset, 0);
7863        assert_eq!(entry.flags, 0);
7864        assert_eq!(
7865            entry
7866                .bitmap
7867                .to_positions()
7868                .expect("test operation should succeed"),
7869            vec![0, 1, 2]
7870        );
7871        assert_eq!(parsed.name_hash_cache, None);
7872    }
7873
7874    #[test]
7875    fn pack_bitmap_index_write_parse_roundtrip_sha256() {
7876        let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7877            .expect("test operation should succeed");
7878        let object_types = [ObjectType::Commit, ObjectType::Tree];
7879        let bytes = write_bitmap(
7880            ObjectFormat::Sha256,
7881            pack_checksum.clone(),
7882            &object_types,
7883            &[(0u32, 0u32, vec![1u32])],
7884            None,
7885        )
7886        .expect("test operation should succeed");
7887        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
7888            .expect("test operation should succeed");
7889        assert_eq!(parsed.format, ObjectFormat::Sha256);
7890        assert_eq!(parsed.pack_checksum, pack_checksum);
7891        assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7892        assert_eq!(
7893            parsed.entries[0]
7894                .bitmap
7895                .to_positions()
7896                .expect("test operation should succeed"),
7897            vec![0, 1]
7898        );
7899    }
7900
7901    #[test]
7902    fn pack_bitmap_index_write_includes_name_hash_cache() {
7903        let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
7904        let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
7905        let bytes = write_bitmap(
7906            ObjectFormat::Sha1,
7907            pack_checksum_sha1(),
7908            &object_types,
7909            &[(0u32, 0u32, vec![1u32, 2u32])],
7910            Some(cache.clone()),
7911        )
7912        .expect("test operation should succeed");
7913        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
7914            .expect("test operation should succeed");
7915        assert_eq!(
7916            parsed.options,
7917            PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
7918        );
7919        assert_eq!(parsed.name_hash_cache, Some(cache));
7920    }
7921
7922    #[test]
7923    fn pack_bitmap_writer_supports_multiple_commits() {
7924        let object_types = [
7925            ObjectType::Commit,
7926            ObjectType::Commit,
7927            ObjectType::Tree,
7928            ObjectType::Blob,
7929        ];
7930        let mut writer =
7931            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
7932                .expect("test operation should succeed");
7933        writer
7934            .add_commit(0, 0, &[2, 3])
7935            .expect("test operation should succeed");
7936        writer
7937            .add_commit(1, 1, &[2])
7938            .expect("test operation should succeed");
7939        let bytes = writer.write().expect("test operation should succeed");
7940        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
7941            .expect("test operation should succeed");
7942        assert_eq!(parsed.entries.len(), 2);
7943        assert_eq!(
7944            parsed
7945                .type_bitmaps
7946                .commits
7947                .to_positions()
7948                .expect("test operation should succeed"),
7949            vec![0, 1]
7950        );
7951        let first = parsed
7952            .entry_for_index_position(0)
7953            .expect("test operation should succeed");
7954        assert_eq!(
7955            first
7956                .bitmap
7957                .to_positions()
7958                .expect("test operation should succeed"),
7959            vec![0, 2, 3]
7960        );
7961        let second = parsed
7962            .entry_for_index_position(1)
7963            .expect("test operation should succeed");
7964        assert_eq!(
7965            second
7966                .bitmap
7967                .to_positions()
7968                .expect("test operation should succeed"),
7969            vec![1, 2]
7970        );
7971    }
7972
7973    #[test]
7974    fn pack_bitmap_index_recomputes_checksum_on_write() {
7975        // The provided index_checksum field is ignored; write recomputes it so
7976        // a bogus placeholder still produces a valid, parseable file.
7977        let object_types = [ObjectType::Commit, ObjectType::Blob];
7978        let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
7979            .expect("test operation should succeed");
7980        let mut index = writer.build().expect("test operation should succeed");
7981        // build() sets an all-zero placeholder checksum.
7982        assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
7983        index.entries.clear(); // mutate the model after build
7984        index.entries.push(PackBitmapEntry {
7985            object_position: 0,
7986            xor_offset: 0,
7987            flags: 0,
7988            bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
7989        });
7990        let bytes = index.write().expect("test operation should succeed");
7991        // Parsing validates the trailing checksum, so a wrong checksum fails.
7992        let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
7993            .expect("test operation should succeed");
7994        assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
7995    }
7996
7997    #[test]
7998    fn pack_bitmap_writer_rejects_non_commit_selection() {
7999        let object_types = [ObjectType::Commit, ObjectType::Blob];
8000        let mut writer =
8001            PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8002                .expect("test operation should succeed");
8003        // Position 1 is a blob, not a commit.
8004        assert!(writer.add_commit(1, 1, &[]).is_err());
8005        // Position 5 is out of range entirely.
8006        assert!(writer.add_commit(5, 5, &[]).is_err());
8007        // Index position out of range.
8008        assert!(writer.add_commit(0, 5, &[]).is_err());
8009        // Reachable position out of range.
8010        assert!(writer.add_commit(0, 0, &[9]).is_err());
8011    }
8012
8013    #[test]
8014    fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8015        let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8016            .expect("test operation should succeed");
8017        assert!(
8018            PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8019                .is_err()
8020        );
8021    }
8022
8023    #[test]
8024    fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8025        let writer = PackBitmapWriter::new(
8026            ObjectFormat::Sha1,
8027            pack_checksum_sha1(),
8028            &[ObjectType::Commit],
8029        )
8030        .expect("test operation should succeed");
8031        assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8032    }
8033
8034    #[test]
8035    fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8036        let mut index = PackBitmapWriter::new(
8037            ObjectFormat::Sha1,
8038            pack_checksum_sha1(),
8039            &[ObjectType::Commit],
8040        )
8041        .expect("test operation should succeed")
8042        .build()
8043        .expect("test operation should succeed");
8044        // Flag set but no cache present.
8045        index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8046        assert!(index.write().is_err());
8047        // Cache present but flag missing.
8048        index.options = PackBitmapIndex::OPTION_FULL_DAG;
8049        index.name_hash_cache = Some(vec![0]);
8050        assert!(index.write().is_err());
8051    }
8052
8053    #[test]
8054    fn write_bitmap_roundtrips_through_upstream_git_parser() {
8055        // Build a real pack with git, then overwrite reachability with our own
8056        // writer using the real pack checksum and object types, and confirm our
8057        // bytes parse under the same parser that reads upstream bitmaps.
8058        let root = unique_temp_dir("git-pack-bitmap-writer");
8059        fs::create_dir_all(&root).expect("test operation should succeed");
8060        {
8061            run_git_success(&root, &["init", "-q", "-b", "main"]);
8062            run_git_success(
8063                &root,
8064                &[
8065                    "-c",
8066                    "user.name=Example User",
8067                    "-c",
8068                    "user.email=example@example.invalid",
8069                    "commit",
8070                    "--allow-empty",
8071                    "-q",
8072                    "-m",
8073                    "one",
8074                ],
8075            );
8076            run_git_success(&root, &["repack", "-adb"]);
8077            let pack_dir = root.join(".git").join("objects").join("pack");
8078            let idx_path = single_path_with_extension(&pack_dir, "idx");
8079            let index = PackIndex::parse(
8080                &fs::read(idx_path).expect("test operation should succeed"),
8081                ObjectFormat::Sha1,
8082            )
8083            .expect("test operation should succeed");
8084            // Read object types from the pack so the type bitmaps are accurate.
8085            let pack_path = single_path_with_extension(&pack_dir, "pack");
8086            let pack =
8087                PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8088                    .expect("test operation should succeed");
8089            // Map each index entry (sorted by oid) to its pack offset, then to a
8090            // pack-order position so positions line up with the index ordering.
8091            let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8092            offsets.sort_unstable();
8093            let position_of = |offset: u64| -> u32 {
8094                offsets
8095                    .iter()
8096                    .position(|value| *value == offset)
8097                    .expect("test operation should succeed") as u32
8098            };
8099            let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8100            for entry in &index.entries {
8101                let position = position_of(entry.offset) as usize;
8102                // Find the parsed object at this pack offset to read its type.
8103                if let Some(parsed) = pack
8104                    .entries
8105                    .iter()
8106                    .find(|po| po.entry.offset == entry.offset)
8107                {
8108                    object_types[position] = parsed.object.object_type;
8109                }
8110            }
8111            // Select the first commit position we find and reach everything.
8112            let commit_position = object_types
8113                .iter()
8114                .position(|ty| *ty == ObjectType::Commit)
8115                .expect("test operation should succeed") as u32;
8116            // The entry records the commit's position in the oid-sorted index.
8117            let commit_index_position = index
8118                .entries
8119                .iter()
8120                .position(|entry| position_of(entry.offset) == commit_position)
8121                .expect("test operation should succeed")
8122                as u32;
8123            let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8124            let bytes = write_bitmap(
8125                ObjectFormat::Sha1,
8126                index.pack_checksum.clone(),
8127                &object_types,
8128                &[(commit_position, commit_index_position, reachable)],
8129                None,
8130            )
8131            .expect("test operation should succeed");
8132            let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8133                .expect("test operation should succeed");
8134            assert_eq!(parsed.pack_checksum, index.pack_checksum);
8135            assert_eq!(parsed.entries.len(), 1);
8136            assert_eq!(
8137                parsed.entries[0]
8138                    .bitmap
8139                    .to_positions()
8140                    .expect("test operation should succeed")
8141                    .len(),
8142                index.entries.len()
8143            );
8144        };
8145        let _ = fs::remove_dir_all(&root);
8146    }
8147}