Skip to main content

sley_odb/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
12use sley_pack::{
13    MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
14    PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
15};
16use std::collections::{HashMap, HashSet};
17use std::io::{Read, Write};
18use std::path::{Path, PathBuf};
19use std::sync::atomic::{AtomicU64, Ordering};
20use std::sync::{Arc, Mutex, OnceLock};
21use std::{env, fs};
22
23static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
24
25pub trait ObjectReader {
26    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
27
28    /// Graft-points seam (shallow clones today, replace refs/grafts later):
29    /// `true` when history is cut at `oid`, so every walk must treat the
30    /// commit as parentless even though its raw body still names parents.
31    ///
32    /// [`FileObjectDatabase`] answers from `$GIT_DIR/shallow`; readers that
33    /// are not backed by a repository (in-memory stores, pack overlays)
34    /// keep the default "no grafts".
35    fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
36        false
37    }
38
39    /// Whether this reader has any shallow/graft boundaries at all. Walkers can
40    /// use this to choose dense graph-only traversal when no boundary can cut
41    /// parent edges.
42    fn has_shallow_grafts(&self) -> bool {
43        false
44    }
45}
46
47fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
48    (*oid == ObjectId::empty_tree(format))
49        .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
50}
51
52fn with_missing_object_context(
53    err: GitError,
54    oid: ObjectId,
55    context: MissingObjectContext,
56) -> GitError {
57    let kind = err
58        .not_found_kind()
59        .and_then(sley_core::NotFoundKind::missing_object_kind);
60    match kind {
61        Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
62        None => err,
63    }
64}
65
66/// Parents of a parsed commit with the graft seam applied: empty when the
67/// reader cuts history at `oid` (shallow boundary), the raw parsed parents
68/// otherwise.
69pub fn grafted_parents<R: ObjectReader + ?Sized>(
70    reader: &R,
71    oid: &ObjectId,
72    parents: Vec<ObjectId>,
73) -> Vec<ObjectId> {
74    if reader.is_shallow_graft(oid) {
75        Vec::new()
76    } else {
77        parents
78    }
79}
80
81pub trait ObjectWriter {
82    /// Write `object`, returning its id. Takes `&self`: every implementation's
83    /// write state (in-memory map, loose-object cache) is behind interior
84    /// mutability, so a single handle can interleave reads and writes without a
85    /// `&mut` borrow. This lets the merge engine read and write through one `db`
86    /// instead of opening a second read-only handle that re-warms the caches.
87    fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct BundleUnbundleResult {
92    pub written_objects: Vec<ObjectId>,
93    pub references: Vec<BundleReference>,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct PackUnpackResult {
98    pub written_objects: Vec<ObjectId>,
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct PackInstallResult {
103    pub pack_name: String,
104    pub pack_path: PathBuf,
105    pub index_path: PathBuf,
106    pub promisor_path: Option<PathBuf>,
107    pub object_ids: Vec<ObjectId>,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct RawPackInstallResult {
112    pub object_ids: Vec<ObjectId>,
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
116pub struct RawPackInstallOptions {
117    pub promisor: bool,
118}
119
120pub trait RawPackInstaller {
121    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub enum ObjectPrefixResolution {
126    Missing,
127    Unique(ObjectId),
128    Ambiguous(Vec<ObjectId>),
129}
130
131#[derive(Debug, Clone, PartialEq, Eq)]
132pub struct ObjectStorageInfo {
133    pub disk_size: u64,
134    pub deltabase: ObjectId,
135}
136
137impl RawPackInstaller for FileObjectDatabase {
138    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
139        let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
140        Ok(RawPackInstallResult {
141            object_ids: result.object_ids,
142        })
143    }
144}
145
146impl RawPackInstaller for ObjectDatabase {
147    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
148        let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
149        Ok(RawPackInstallResult {
150            object_ids: result.written_objects,
151        })
152    }
153}
154
155pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
156    let mut missing = Vec::new();
157    for prerequisite in &bundle.prerequisites {
158        match reader.read_object(&prerequisite.oid) {
159            Ok(object) => {
160                let actual = object.object_id(bundle.format)?;
161                if actual != prerequisite.oid {
162                    return Err(GitError::InvalidObject(format!(
163                        "bundle prerequisite {} hashes to {actual}",
164                        prerequisite.oid
165                    )));
166                }
167            }
168            Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
169            Err(err) => return Err(err),
170        }
171    }
172    if missing.is_empty() {
173        return Ok(());
174    }
175    Err(GitError::object_not_found_in(
176        missing[0],
177        MissingObjectContext::PackInstall,
178    ))
179}
180
181pub fn unbundle_objects<R, W>(
182    bundle: &Bundle,
183    prerequisite_reader: &R,
184    writer: &mut W,
185) -> Result<BundleUnbundleResult>
186where
187    R: ObjectReader,
188    W: ObjectWriter,
189{
190    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
191    let pack = PackFile::parse_bundle(bundle)?;
192    let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
193    Ok(BundleUnbundleResult {
194        written_objects,
195        references: bundle.references.clone(),
196    })
197}
198
199pub fn install_bundle_pack<R>(
200    bundle: &Bundle,
201    prerequisite_reader: &R,
202    destination: &impl RawPackInstaller,
203) -> Result<BundleUnbundleResult>
204where
205    R: ObjectReader,
206{
207    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
208    let install = destination.install_raw_pack(&bundle.pack)?;
209    Ok(BundleUnbundleResult {
210        written_objects: install.object_ids,
211        references: bundle.references.clone(),
212    })
213}
214
215pub fn unpack_packfile_objects<W>(
216    pack_bytes: &[u8],
217    format: ObjectFormat,
218    writer: &W,
219) -> Result<PackUnpackResult>
220where
221    W: ObjectWriter,
222{
223    let pack = PackFile::parse(pack_bytes, format)?;
224    write_pack_objects(pack, writer, "pack")
225}
226
227fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
228where
229    W: ObjectWriter,
230{
231    let mut written_objects = Vec::with_capacity(pack.entries.len());
232    for entry in pack.entries {
233        let expected = entry.entry.oid;
234        let actual = writer.write_object(entry.object)?;
235        if actual != expected {
236            return Err(GitError::InvalidObject(format!(
237                "{source} object id mismatch: expected {expected}, wrote {actual}"
238            )));
239        }
240        written_objects.push(actual);
241    }
242    Ok(PackUnpackResult { written_objects })
243}
244
245pub fn collect_reachable_object_ids<R, I>(
246    reader: &R,
247    format: ObjectFormat,
248    starts: I,
249) -> Result<HashSet<ObjectId>>
250where
251    R: ObjectReader,
252    I: IntoIterator<Item = ObjectId>,
253{
254    walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
255}
256
257/// [`collect_reachable_object_ids`] with a cut set: commits in `cut` are
258/// collected, but the walk does not continue to their parents — the view a
259/// shallow repository has of its own refs (`$GIT_DIR/shallow` of the *other*
260/// side, threaded explicitly because `reader` belongs to this side).
261pub fn collect_reachable_object_ids_with_cut<R, I>(
262    reader: &R,
263    format: ObjectFormat,
264    starts: I,
265    cut: &HashSet<ObjectId>,
266) -> Result<HashSet<ObjectId>>
267where
268    R: ObjectReader,
269    I: IntoIterator<Item = ObjectId>,
270{
271    walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
272}
273
274/// [`collect_reachable_object_ids`] with a stop set: objects in `excluded` are
275/// not visited and not expanded, so the walk never sees anything reachable only
276/// through them (used to truncate history at a shallow boundary).
277pub fn collect_reachable_object_ids_excluding<R, I>(
278    reader: &R,
279    format: ObjectFormat,
280    starts: I,
281    excluded: &HashSet<ObjectId>,
282) -> Result<HashSet<ObjectId>>
283where
284    R: ObjectReader,
285    I: IntoIterator<Item = ObjectId>,
286{
287    walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
288}
289
290pub fn collect_reachable_objects<R, I>(
291    reader: &R,
292    format: ObjectFormat,
293    starts: I,
294    excluded: &HashSet<ObjectId>,
295) -> Result<Vec<Arc<EncodedObject>>>
296where
297    R: ObjectReader,
298    I: IntoIterator<Item = ObjectId>,
299{
300    let mut objects = Vec::new();
301    walk_reachable_objects(reader, format, starts, excluded, |_, object| {
302        objects.push(Arc::clone(object));
303    })?;
304    Ok(objects)
305}
306
307#[derive(Debug, Clone)]
308struct ReachablePackObject {
309    oid: ObjectId,
310    object: Arc<EncodedObject>,
311}
312
313fn collect_reachable_pack_objects<R, I>(
314    reader: &R,
315    format: ObjectFormat,
316    starts: I,
317    excluded: &HashSet<ObjectId>,
318) -> Result<Vec<ReachablePackObject>>
319where
320    R: ObjectReader,
321    I: IntoIterator<Item = ObjectId>,
322{
323    let mut objects = Vec::new();
324    walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
325        objects.push(ReachablePackObject {
326            oid: *oid,
327            object: Arc::clone(object),
328        });
329    })?;
330    Ok(objects)
331}
332
333fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
334    objects
335        .iter()
336        .map(|entry| PackInput {
337            oid: &entry.oid,
338            object: &entry.object,
339        })
340        .collect()
341}
342
343pub fn install_reachable_pack<I>(
344    source: &impl ObjectReader,
345    destination: &impl RawPackInstaller,
346    format: ObjectFormat,
347    starts: I,
348) -> Result<Option<RawPackInstallResult>>
349where
350    I: IntoIterator<Item = ObjectId>,
351{
352    install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
353}
354
355pub fn install_reachable_pack_excluding<I>(
356    source: &impl ObjectReader,
357    destination: &impl RawPackInstaller,
358    format: ObjectFormat,
359    starts: I,
360    excluded: &HashSet<ObjectId>,
361) -> Result<Option<RawPackInstallResult>>
362where
363    I: IntoIterator<Item = ObjectId>,
364{
365    let pack = match build_reachable_pack(source, format, starts, excluded)? {
366        Some(pack) => pack,
367        None => return Ok(None),
368    };
369    destination.install_raw_pack(&pack.pack).map(Some)
370}
371
372pub fn build_reachable_pack<R, I>(
373    reader: &R,
374    format: ObjectFormat,
375    starts: I,
376    excluded: &HashSet<ObjectId>,
377) -> Result<Option<PackWrite>>
378where
379    R: ObjectReader,
380    I: IntoIterator<Item = ObjectId>,
381{
382    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
383    if objects.is_empty() {
384        return Ok(None);
385    }
386    // Delta-compress reachable packs (used by install/push/fetch) via git-pack's
387    // sliding-window selection. Self-contained, ofs-delta by default; round-trips
388    // through the existing parser. PackWrite shape is unchanged, so callers are
389    // unaffected.
390    let inputs = pack_inputs(&objects);
391    PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
392}
393
394pub fn build_and_install_reachable_pack<R, I>(
395    source: &R,
396    destination: &FileObjectDatabase,
397    format: ObjectFormat,
398    starts: I,
399    excluded: &HashSet<ObjectId>,
400    options: RawPackInstallOptions,
401) -> Result<Option<PackInstallResult>>
402where
403    R: ObjectReader,
404    I: IntoIterator<Item = ObjectId>,
405{
406    build_and_install_reachable_pack_filtered(
407        source,
408        destination,
409        format,
410        starts,
411        excluded,
412        options,
413        None,
414        None,
415    )
416}
417
418/// A partial-clone object filter applied while building a transfer pack.
419///
420/// Mirrors the subset of upstream's `list-objects-filter` the in-process local
421/// server supports: directly-wanted tips are always packed; the filter only
422/// prunes objects reached *through* the traversal (upstream's
423/// `filter_blobs_none` runs on traversed blobs, never on wanted tips).
424#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425pub enum PackObjectFilter {
426    /// `blob:none`: omit every blob reached through tree traversal.
427    BlobNone,
428}
429
430/// [`build_and_install_reachable_pack`] with an optional partial-clone
431/// `filter`. With `Some(BlobNone)`, blobs are dropped from the pack unless
432/// they are directly wanted (named in `starts`).
433#[allow(clippy::too_many_arguments)]
434pub fn build_and_install_reachable_pack_filtered<R, I>(
435    source: &R,
436    destination: &FileObjectDatabase,
437    format: ObjectFormat,
438    starts: I,
439    excluded: &HashSet<ObjectId>,
440    options: RawPackInstallOptions,
441    filter: Option<PackObjectFilter>,
442    unpack_limit: Option<usize>,
443) -> Result<Option<PackInstallResult>>
444where
445    R: ObjectReader,
446    I: IntoIterator<Item = ObjectId>,
447{
448    let starts: Vec<ObjectId> = starts.into_iter().collect();
449    let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
450    let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
451    match filter {
452        Some(PackObjectFilter::BlobNone) => {
453            objects.retain(|entry| {
454                entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
455            });
456        }
457        None => {}
458    }
459    if objects.is_empty() {
460        return Ok(None);
461    }
462    // Mirror fetch-pack's unpack-limit: small transfers are exploded into
463    // loose objects instead of landing as a pack (upstream `get_pack` picks
464    // unpack-objects when the header count is below fetch/transfer.unpackLimit).
465    if let Some(limit) = unpack_limit
466        && objects.len() < limit
467    {
468        for entry in &objects {
469            destination.loose().write_object((*entry.object).clone())?;
470        }
471        return Ok(None);
472    }
473    let inputs = pack_inputs(&objects);
474    let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
475    destination
476        .install_generated_pack_unchecked(&pack, options)
477        .map(Some)
478}
479
480/// Assemble a pack stream that reuses an existing pack's object data verbatim
481/// (upstream pack-objects' "pack reuse" fast path, full-pack case) and appends
482/// `appended` as freshly encoded undeltified entries.
483///
484/// The reused pack's entry bytes are copied as-is between our own header and
485/// trailer: a full-pack copy preserves every relative distance, so internal
486/// `OFS_DELTA` bases stay valid. The header object count covers both the
487/// reused and appended entries, and the trailing pack checksum is recomputed
488/// over the assembled stream.
489pub fn assemble_pack_with_verbatim_reuse(
490    format: ObjectFormat,
491    reused_pack_bytes: &[u8],
492    appended: &[PackInput<'_>],
493) -> Result<(Vec<u8>, u32)> {
494    assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
495}
496
497/// Like [`assemble_pack_with_verbatim_reuse`], but concatenates multiple whole
498/// packs before appending fresh entries.
499pub fn assemble_pack_with_verbatim_reuses(
500    format: ObjectFormat,
501    reused_packs: &[&[u8]],
502    appended: &[PackInput<'_>],
503) -> Result<(Vec<u8>, u32)> {
504    let hash_len = format.raw_len();
505    let mut reused_count = 0u32;
506    let mut capacity = 12 + hash_len + 64 * appended.len();
507    for reused_pack_bytes in reused_packs {
508        if reused_pack_bytes.len() < 12 + hash_len {
509            return Err(GitError::InvalidFormat("reused pack too short".into()));
510        }
511        if &reused_pack_bytes[..4] != b"PACK" {
512            return Err(GitError::InvalidFormat(
513                "reused pack has no signature".into(),
514            ));
515        }
516        let version = u32::from_be_bytes([
517            reused_pack_bytes[4],
518            reused_pack_bytes[5],
519            reused_pack_bytes[6],
520            reused_pack_bytes[7],
521        ]);
522        if version != 2 {
523            return Err(GitError::Unsupported(format!(
524                "reused pack version {version}"
525            )));
526        }
527        let count = u32::from_be_bytes([
528            reused_pack_bytes[8],
529            reused_pack_bytes[9],
530            reused_pack_bytes[10],
531            reused_pack_bytes[11],
532        ]);
533        reused_count = reused_count
534            .checked_add(count)
535            .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
536        capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
537    }
538    let total = reused_count
539        .checked_add(appended.len() as u32)
540        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
541
542    let mut out = Vec::with_capacity(capacity);
543    out.extend_from_slice(b"PACK");
544    out.extend_from_slice(&2u32.to_be_bytes());
545    out.extend_from_slice(&total.to_be_bytes());
546    for reused_pack_bytes in reused_packs {
547        out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
548    }
549    for input in appended {
550        write_undeltified_pack_entry(&mut out, input.object)?;
551    }
552    let checksum = sley_core::digest_bytes(format, &out)?;
553    out.extend_from_slice(checksum.as_bytes());
554    Ok((out, reused_count))
555}
556
557/// Assemble a pack stream by copying already-encoded pack entries verbatim and
558/// appending freshly encoded undeltified entries.
559pub fn assemble_pack_with_verbatim_entries(
560    format: ObjectFormat,
561    reused_entries: &[&[u8]],
562    appended: &[PackInput<'_>],
563) -> Result<(Vec<u8>, u32)> {
564    let reused_count = u32::try_from(reused_entries.len())
565        .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
566    let total = reused_count
567        .checked_add(appended.len() as u32)
568        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
569
570    let mut capacity = 12 + format.raw_len() + 64 * appended.len();
571    for entry in reused_entries {
572        capacity = capacity.saturating_add(entry.len());
573    }
574    let mut out = Vec::with_capacity(capacity);
575    out.extend_from_slice(b"PACK");
576    out.extend_from_slice(&2u32.to_be_bytes());
577    out.extend_from_slice(&total.to_be_bytes());
578    for entry in reused_entries {
579        out.extend_from_slice(entry);
580    }
581    for input in appended {
582        write_undeltified_pack_entry(&mut out, input.object)?;
583    }
584    let checksum = sley_core::digest_bytes(format, &out)?;
585    out.extend_from_slice(checksum.as_bytes());
586    Ok((out, reused_count))
587}
588
589/// Append one undeltified pack entry (type/size varint header + zlib body).
590fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
591    let type_bits: u8 = match object.object_type {
592        ObjectType::Commit => 1,
593        ObjectType::Tree => 2,
594        ObjectType::Blob => 3,
595        ObjectType::Tag => 4,
596    };
597    let mut size = object.body.len() as u64;
598    let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
599    size >>= 4;
600    while size > 0 {
601        out.push(byte | 0x80);
602        byte = (size & 0x7f) as u8;
603        size >>= 7;
604    }
605    out.push(byte);
606    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
607    encoder.write_all(&object.body)?;
608    out.extend_from_slice(&encoder.finish()?);
609    Ok(())
610}
611
612/// Outcome of consolidating every object in a repository into a single pack.
613///
614/// This is the engine for `git gc` / `git repack`: [`repack_all_objects`]
615/// produces the bytes for one new delta-compressed pack plus its index, and
616/// reports which on-disk artifacts the caller could now remove. No deletions
617/// are performed by the engine itself; the CLI decides reachability policy and
618/// performs any pruning (see [`install_repack_result`]).
619#[derive(Debug, Clone, PartialEq, Eq)]
620pub struct RepackResult {
621    /// Bytes of the freshly written `.pack` file.
622    pub pack: Vec<u8>,
623    /// Bytes of the matching `.idx` file for [`RepackResult::pack`].
624    pub idx: Vec<u8>,
625    /// Number of distinct objects contained in the new pack.
626    pub object_count: usize,
627    /// Absolute paths of pre-existing `*.pack` files now superseded by the new
628    /// pack (every object they hold is present in [`RepackResult::pack`]).
629    pub obsolete_packs: Vec<PathBuf>,
630    /// Loose object ids that are now also present in the new pack and therefore
631    /// redundant on disk.
632    pub packed_loose: Vec<ObjectId>,
633    pack_checksum: ObjectId,
634    index_entries: Vec<PackIndexEntry>,
635}
636
637/// Gather every object in `git_dir` (loose objects and every existing pack) and
638/// write them into a single new delta-compressed pack.
639///
640/// Returns the new pack/index bytes, the count of packed objects, the list of
641/// pre-existing pack files that the new pack supersedes, and the loose object
642/// ids that are now packed. Nothing is deleted: the caller (CLI) decides
643/// reachability policy and performs any pruning, optionally via
644/// [`install_repack_result`].
645///
646/// Returns `Ok(None)` when the repository contains no objects at all.
647/// `git repack -a`'s gathering rule: pack the reachability closure of `roots`
648/// (ref tips, `HEAD`, reflog entries, indexed objects) instead of everything
649/// on disk. Borrowed objects (alternates) reachable from the roots are packed
650/// into the new local pack like upstream `pack-objects --all` without
651/// `--local`; previously-packed objects that are no longer reachable are NOT
652/// carried forward (that is how `repack -a -d` drops them). Missing objects
653/// are tolerated (stale reflog entries may reference pruned history).
654///
655/// Returns `Ok(None)` when no roots resolve to any object.
656pub fn repack_reachable_objects(
657    git_dir: &Path,
658    format: ObjectFormat,
659    roots: &[ObjectId],
660) -> Result<Option<RepackResult>> {
661    let objects_dir = repository_objects_dir(git_dir);
662    let database = FileObjectDatabase::new(objects_dir.clone(), format);
663
664    let mut seen: HashSet<ObjectId> = HashSet::new();
665    let mut objects: Vec<ReachablePackObject> = Vec::new();
666    let mut pending: Vec<ObjectId> = roots.to_vec();
667    while let Some(oid) = pending.pop() {
668        if !seen.insert(oid) {
669            continue;
670        }
671        let object = match database.read_object(&oid) {
672            Ok(object) => object,
673            Err(GitError::NotFound(_)) => continue,
674            Err(err) => return Err(err),
675        };
676        match object.object_type {
677            ObjectType::Commit => {
678                let commit = Commit::parse_ref(format, &object.body)?;
679                pending.extend(grafted_parents(&database, &oid, commit.parents));
680                pending.push(commit.tree);
681            }
682            ObjectType::Tree => {
683                for entry in TreeEntries::new(format, &object.body) {
684                    let entry = entry?;
685                    if !entry.is_gitlink() {
686                        pending.push(entry.oid);
687                    }
688                }
689            }
690            ObjectType::Tag => {
691                let tag = Tag::parse_ref(format, &object.body)?;
692                pending.push(tag.object);
693            }
694            ObjectType::Blob => {}
695        }
696        objects.push(ReachablePackObject { oid, object });
697    }
698    if objects.is_empty() {
699        return Ok(None);
700    }
701
702    let inputs = pack_inputs(&objects);
703    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
704    let object_count = written.entries.len();
705
706    // Every pre-existing local pack is superseded under `-a` (their reachable
707    // objects are in the new pack; their unreachable ones are being dropped).
708    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
709    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
710        .into_iter()
711        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
712        .collect();
713
714    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
715    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
716        .into_iter()
717        .filter(|oid| packed_oid_set.contains(oid))
718        .collect();
719    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
720
721    let pack_checksum = written.checksum;
722    let index_entries = written.entries.clone();
723    Ok(Some(RepackResult {
724        pack: written.pack,
725        idx: written.index,
726        object_count,
727        obsolete_packs,
728        packed_loose,
729        pack_checksum,
730        index_entries,
731    }))
732}
733
734pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
735    let objects_dir = repository_objects_dir(git_dir);
736    let database = FileObjectDatabase::new(objects_dir.clone(), format);
737
738    // Enumerate every object id reachable on disk: loose objects, every pack
739    // index, and any multi-pack-index. `object_ids_in_objects_dir` already
740    // unions all of these and de-duplicates them.
741    let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
742    if all_oids.is_empty() {
743        return Ok(None);
744    }
745
746    // Read each object's canonical encoding so the new pack stores byte-for-byte
747    // identical payloads. Loose objects take precedence over packed copies in
748    // `FileObjectDatabase::read_object`, but both decode to the same bytes.
749    let mut objects = Vec::with_capacity(all_oids.len());
750    for oid in &all_oids {
751        objects.push(ReachablePackObject {
752            oid: *oid,
753            object: database.read_object(oid)?,
754        });
755    }
756
757    let inputs = pack_inputs(&objects);
758    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
759    let object_count = written.entries.len();
760
761    // The new pack contains every object on disk, so every pre-existing pack is
762    // fully superseded. We still record the exact pack paths (not the index
763    // paths) so the caller can delete the right files. The pack we are about to
764    // write is excluded by name in case its checksum collides with an existing
765    // pack (identical contents).
766    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
767    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
768        .into_iter()
769        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
770        .collect();
771
772    // Loose object ids that the new pack now also holds (which is all of them,
773    // since they were gathered into it).
774    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
775    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
776        .into_iter()
777        .filter(|oid| packed_oid_set.contains(oid))
778        .collect();
779    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
780
781    Ok(Some(RepackResult {
782        pack: written.pack,
783        idx: written.index,
784        object_count,
785        obsolete_packs,
786        packed_loose,
787        pack_checksum: written.checksum,
788        index_entries: written.entries,
789    }))
790}
791
792/// Gather only loose objects in `git_dir` and write them into a new pack.
793///
794/// This is the engine for plain `git repack -d` (without `-a`): existing packs
795/// remain in place, and pruning removes only the loose copies that the new pack
796/// now serves.
797pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
798    let objects_dir = repository_objects_dir(git_dir);
799    let database = FileObjectDatabase::new(objects_dir.clone(), format);
800    let loose_oids = loose_object_ids(&objects_dir, format)?;
801    if loose_oids.is_empty() {
802        return Ok(None);
803    }
804
805    let mut objects = Vec::with_capacity(loose_oids.len());
806    for oid in &loose_oids {
807        objects.push(ReachablePackObject {
808            oid: *oid,
809            object: database.read_object(oid)?,
810        });
811    }
812
813    let inputs = pack_inputs(&objects);
814    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
815    let object_count = written.entries.len();
816    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
817    let mut packed_loose: Vec<ObjectId> = loose_oids
818        .into_iter()
819        .filter(|oid| packed_oid_set.contains(oid))
820        .collect();
821    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
822
823    let pack_checksum = written.checksum;
824    let index_entries = written.entries.clone();
825    Ok(Some(RepackResult {
826        pack: written.pack,
827        idx: written.index,
828        object_count,
829        obsolete_packs: Vec::new(),
830        packed_loose,
831        pack_checksum,
832        index_entries,
833    }))
834}
835
836/// A local, non-kept, non-cruft pack considered for a geometric rollup,
837/// paired with the object count that orders it in the progression.
838#[derive(Debug, Clone)]
839struct GeometryPack {
840    /// Absolute path to the `.pack` file.
841    pack_path: PathBuf,
842    /// Object ids the pack holds (from its `.idx`).
843    oids: Vec<ObjectId>,
844    /// `num_objects` weight used to order the progression.
845    weight: u64,
846    /// True when this pack is a promisor pack (`.promisor` sidecar).
847    is_promisor: bool,
848}
849
850/// The outcome of a geometric rollup: the new pack (if one was written) plus
851/// the rolled-up packs whose objects it now serves.
852#[derive(Debug, Clone)]
853pub struct GeometricRepackResult {
854    /// `Some` when a new pack was written; `None` when nothing needed packing.
855    pub result: Option<RepackResult>,
856    /// Pack `.pack` paths below the split that may now be removed under `-d`.
857    pub rolled_up_packs: Vec<PathBuf>,
858}
859
860/// Collect the local non-cruft, non-kept packs eligible for geometric rollup,
861/// keyed by promisor-ness, ordered ascending by object count.
862fn collect_geometry_packs(
863    objects_dir: &Path,
864    format: ObjectFormat,
865    kept_pack_stems: &HashSet<String>,
866) -> Result<Vec<GeometryPack>> {
867    let pack_dir = objects_dir.join("pack");
868    let mut packs = Vec::new();
869    for pack_path in existing_pack_files(&pack_dir)? {
870        // Cruft packs (`.mtimes` sidecar) and kept packs are excluded from the
871        // progression, matching `pack_geometry_init` in repack-geometry.c.
872        if pack_path.with_extension("mtimes").exists() {
873            continue;
874        }
875        if pack_path.with_extension("keep").exists() {
876            continue;
877        }
878        let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
879            continue;
880        };
881        if kept_pack_stems.contains(stem) {
882            continue;
883        }
884        let index_path = pack_path.with_extension("idx");
885        if !index_path.exists() {
886            continue;
887        }
888        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
889        let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
890        let weight = oids.len() as u64;
891        packs.push(GeometryPack {
892            is_promisor: pack_path.with_extension("promisor").exists(),
893            pack_path,
894            oids,
895            weight,
896        });
897    }
898    // Ascending by weight; pack_path breaks ties deterministically.
899    packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
900    Ok(packs)
901}
902
903/// Port of `compute_pack_geometry_split` (repack-geometry.c): given packs in
904/// ascending weight order, return the split index — packs `[0..split)` roll up
905/// into one new pack, packs `[split..)` are left alone.
906fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
907    let pack_nr = packs.len();
908    if pack_nr == 0 {
909        return 0;
910    }
911    // Count packs (descending size) that already form a geometric progression.
912    let mut i = pack_nr - 1;
913    while i > 0 {
914        let ours = packs[i].weight;
915        let prev = packs[i - 1].weight;
916        if ours < split_factor.saturating_mul(prev) {
917            break;
918        }
919        i -= 1;
920    }
921    let mut split = i;
922    if split != 0 {
923        // The top of the last-compared pair can't be in the progression.
924        split += 1;
925    }
926
927    // Roll up everything below `split`; pulling those into a new pack may break
928    // the progression in the heavy half, so absorb heavy-half packs until it
929    // holds again.
930    let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
931    let mut split = split;
932    for pack in &packs[split..] {
933        if pack.weight < split_factor.saturating_mul(total_size) {
934            split += 1;
935            total_size = total_size.saturating_add(pack.weight);
936        } else {
937            break;
938        }
939    }
940    split
941}
942
943/// `git repack --geometric=<factor>`: roll up the smallest packs (plus loose
944/// unpacked objects) so the surviving packs form a geometric progression by
945/// object count. Objects in the rolled-up packs and loose objects are gathered
946/// into one new pack; packs at/above the split are left in place. The new pack
947/// excludes objects already served by a left-alone pack.
948///
949/// Returns the new pack plus the rolled-up pack paths the caller may delete
950/// under `-d`. Returns an all-`None`/empty result when nothing needs packing
951/// ("Nothing new to pack").
952pub fn repack_geometric(
953    git_dir: &Path,
954    format: ObjectFormat,
955    split_factor: u64,
956    kept_pack_stems: &HashSet<String>,
957) -> Result<GeometricRepackResult> {
958    let objects_dir = repository_objects_dir(git_dir);
959    let database = FileObjectDatabase::new(objects_dir.clone(), format);
960
961    // Promisor packs follow their own progression; the non-promisor packs are
962    // the common case the test-suite exercises. Build the rollup from the
963    // non-promisor packs plus loose objects.
964    let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
965    let packs: Vec<GeometryPack> = all_packs
966        .into_iter()
967        .filter(|pack| !pack.is_promisor)
968        .collect();
969
970    let split = compute_geometry_split(&packs, split_factor);
971
972    let loose_oids = loose_object_ids(&objects_dir, format)?;
973
974    // The objects that end up in the new pack: every object in a rolled-up pack,
975    // plus every loose object — but NOT objects already served by a pack left in
976    // place (those above the split). This mirrors the `^pack` exclusion markers
977    // that repack.c feeds to `pack-objects --stdin-packs`.
978    let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
979    for pack in &packs[split..] {
980        excluded_oids.extend(pack.oids.iter().copied());
981    }
982
983    let mut included: Vec<ObjectId> = Vec::new();
984    let mut seen: HashSet<ObjectId> = HashSet::new();
985    for pack in &packs[..split] {
986        for oid in &pack.oids {
987            if excluded_oids.contains(oid) {
988                continue;
989            }
990            if seen.insert(*oid) {
991                included.push(*oid);
992            }
993        }
994    }
995    for oid in &loose_oids {
996        if excluded_oids.contains(oid) {
997            continue;
998        }
999        if seen.insert(*oid) {
1000            included.push(*oid);
1001        }
1002    }
1003
1004    // "Nothing new to pack": no packs roll up and no loose objects need packing.
1005    if included.is_empty() {
1006        return Ok(GeometricRepackResult {
1007            result: None,
1008            rolled_up_packs: Vec::new(),
1009        });
1010    }
1011
1012    included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1013    let mut objects = Vec::with_capacity(included.len());
1014    for oid in &included {
1015        objects.push(ReachablePackObject {
1016            oid: *oid,
1017            object: database.read_object(oid)?,
1018        });
1019    }
1020
1021    let inputs = pack_inputs(&objects);
1022    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1023    let object_count = written.entries.len();
1024
1025    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1026    let mut packed_loose: Vec<ObjectId> = loose_oids
1027        .into_iter()
1028        .filter(|oid| packed_oid_set.contains(oid))
1029        .collect();
1030    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1031
1032    let rolled_up_packs: Vec<PathBuf> = packs[..split]
1033        .iter()
1034        .map(|pack| pack.pack_path.clone())
1035        .collect();
1036
1037    let pack_checksum = written.checksum;
1038    let index_entries = written.entries.clone();
1039    Ok(GeometricRepackResult {
1040        result: Some(RepackResult {
1041            pack: written.pack,
1042            idx: written.index,
1043            object_count,
1044            obsolete_packs: rolled_up_packs.clone(),
1045            packed_loose,
1046            pack_checksum,
1047            index_entries,
1048        }),
1049        rolled_up_packs,
1050    })
1051}
1052
1053/// Write the consolidated pack from a [`RepackResult`] into
1054/// `objects/pack/` and, when `prune` is set, remove the now-redundant
1055/// pre-existing packs and packed loose objects.
1056///
1057/// Pruning is opt-in and deliberately conservative: an object or pack is only
1058/// removed after verifying it is actually present in the freshly written pack
1059/// on disk. Concretely:
1060///
1061/// * a loose object is removed only if its id appears in the new pack;
1062/// * a pre-existing pack is removed only if it is not the pack we just wrote
1063///   *and* every object listed in its `.idx` is present in the new pack (its
1064///   `.idx` and known sidecars are removed alongside it);
1065/// * a stale `multi-pack-index` is removed only if every pack it references is
1066///   being removed, so no reader is ever left pointing at a deleted pack.
1067pub fn install_repack_result(
1068    git_dir: &Path,
1069    format: ObjectFormat,
1070    result: &RepackResult,
1071    prune: bool,
1072) -> Result<()> {
1073    install_repack_result_with_bitmap(git_dir, format, result, prune, None)
1074}
1075
1076/// [`install_repack_result`] that additionally writes a `pack-<checksum>.bitmap`
1077/// reachability bitmap alongside the new pack when `bitmap_tips` is `Some`.
1078/// `bitmap_tips` carries the repository's ref tips (peeled to commits): they
1079/// receive selection preference, mirroring upstream's `NEEDS_BITMAP` flagging of
1080/// ref tips in `git repack -b` / `pack-objects --write-bitmap-index`.
1081pub fn install_repack_result_with_bitmap(
1082    git_dir: &Path,
1083    format: ObjectFormat,
1084    result: &RepackResult,
1085    prune: bool,
1086    bitmap_tips: Option<&HashSet<ObjectId>>,
1087) -> Result<()> {
1088    let objects_dir = repository_objects_dir(git_dir);
1089    let pack_dir = objects_dir.join("pack");
1090    fs::create_dir_all(&pack_dir)?;
1091
1092    // Validate the public bytes against the private provenance that
1093    // `repack_all_objects` captured from `PackFile::write_packed`. This avoids
1094    // inflating and resolving the freshly-written pack a second time while still
1095    // catching caller mutations before anything is written or pruned.
1096    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1097    let parsed_index = PackIndex::parse(&result.idx, format)?;
1098    if parsed_index.pack_checksum != result.pack_checksum {
1099        return Err(GitError::InvalidFormat(
1100            "repack index checksum does not match the new pack".into(),
1101        ));
1102    }
1103    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1104        return Err(GitError::InvalidFormat(
1105            "repack index does not match the new pack contents".into(),
1106        ));
1107    }
1108    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1109    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1110    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1111    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1112    // git writes a `.rev` alongside every repacked pack (`pack.writeReverseIndex`
1113    // defaults to true). Write it before the `.idx` so the index never becomes
1114    // visible ahead of its companions, mirroring upstream's finalize order.
1115    let reverse_index = sley_pack::PackReverseIndex::write(
1116        format,
1117        &sley_pack::pack_order_index_positions(&parsed_index.entries),
1118        &result.pack_checksum,
1119    )?;
1120    write_pack_component(&new_pack_path, &result.pack)?;
1121    write_pack_component(&new_rev_path, &reverse_index)?;
1122    write_pack_component(&new_index_path, &result.idx)?;
1123
1124    if let Some(tips) = bitmap_tips {
1125        // Build before pruning: the closure walk reads objects through the
1126        // pre-existing packs/loose store (the new pack holds the same bytes).
1127        let database = FileObjectDatabase::new(objects_dir.clone(), format);
1128        if let Some(bitmap) = build_pack_bitmap(
1129            &database,
1130            format,
1131            &result.index_entries,
1132            &result.pack_checksum,
1133            tips,
1134        )? {
1135            // Unlike the pack/idx/rev (content-addressed by the pack
1136            // checksum), the bitmap depends on selection inputs (e.g.
1137            // pack.preferBitmapTips), so an existing file must be replaced —
1138            // write_pack_component's exists-skip would keep a stale selection.
1139            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1140            remove_file_if_exists(&bitmap_path)?;
1141            write_pack_component(&bitmap_path, &bitmap)?;
1142        }
1143    }
1144
1145    if !prune {
1146        return Ok(());
1147    }
1148
1149    // Prune based on the objects the new pack's *index* can resolve (what reads use
1150    // once the old packs are gone), not just what the pack contains — so a stale
1151    // pack is never removed for an object the new index cannot serve.
1152    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1153
1154    prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
1155    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1156    Ok(())
1157}
1158
1159/// Install a [`repack_geometric`] result: write the new pack, then under `prune`
1160/// remove EXACTLY the rolled-up packs (those below the geometric split) plus the
1161/// loose objects now packed. Unlike [`install_repack_result`], packs left in
1162/// place above the split are never removed even though some of their objects may
1163/// also live in the new pack.
1164pub fn install_geometric_repack_result(
1165    git_dir: &Path,
1166    format: ObjectFormat,
1167    geometric: &GeometricRepackResult,
1168    prune: bool,
1169    bitmap_tips: Option<&HashSet<ObjectId>>,
1170) -> Result<()> {
1171    let Some(result) = geometric.result.as_ref() else {
1172        return Ok(());
1173    };
1174    let objects_dir = repository_objects_dir(git_dir);
1175    let pack_dir = objects_dir.join("pack");
1176    fs::create_dir_all(&pack_dir)?;
1177
1178    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1179    let parsed_index = PackIndex::parse(&result.idx, format)?;
1180    if parsed_index.pack_checksum != result.pack_checksum {
1181        return Err(GitError::InvalidFormat(
1182            "repack index checksum does not match the new pack".into(),
1183        ));
1184    }
1185    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1186        return Err(GitError::InvalidFormat(
1187            "repack index does not match the new pack contents".into(),
1188        ));
1189    }
1190    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1191    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1192    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1193    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1194    let reverse_index = sley_pack::PackReverseIndex::write(
1195        format,
1196        &sley_pack::pack_order_index_positions(&parsed_index.entries),
1197        &result.pack_checksum,
1198    )?;
1199    write_pack_component(&new_pack_path, &result.pack)?;
1200    write_pack_component(&new_rev_path, &reverse_index)?;
1201    write_pack_component(&new_index_path, &result.idx)?;
1202
1203    if let Some(tips) = bitmap_tips {
1204        let database = FileObjectDatabase::new(objects_dir.clone(), format);
1205        if let Some(bitmap) = build_pack_bitmap(
1206            &database,
1207            format,
1208            &result.index_entries,
1209            &result.pack_checksum,
1210            tips,
1211        )? {
1212            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1213            remove_file_if_exists(&bitmap_path)?;
1214            write_pack_component(&bitmap_path, &bitmap)?;
1215        }
1216    }
1217
1218    if !prune {
1219        return Ok(());
1220    }
1221
1222    // Remove exactly the rolled-up packs (below the split). Never touch packs
1223    // left in place above the split.
1224    for pack_path in &geometric.rolled_up_packs {
1225        if *pack_path == new_pack_path {
1226            continue;
1227        }
1228        if pack_path.with_extension("keep").exists() {
1229            continue;
1230        }
1231        remove_file_if_exists(pack_path)?;
1232        remove_file_if_exists(&pack_path.with_extension("idx"))?;
1233        for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1234            remove_file_if_exists(&pack_path.with_extension(ext))?;
1235        }
1236    }
1237
1238    // Drop loose copies now served by the new pack.
1239    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1240    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1241
1242    // A multi-pack-index that references any removed pack is now stale.
1243    let removed_stems: HashSet<String> = geometric
1244        .rolled_up_packs
1245        .iter()
1246        .filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
1247        .collect();
1248    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1249    Ok(())
1250}
1251
1252fn validate_pack_checksum(
1253    pack: &[u8],
1254    format: ObjectFormat,
1255    expected: &ObjectId,
1256    context: &str,
1257) -> Result<()> {
1258    if expected.format() != format {
1259        return Err(GitError::InvalidObjectId(format!(
1260            "{context} checksum format does not match object format"
1261        )));
1262    }
1263    let hash_len = format.raw_len();
1264    if pack.len() < 12 + hash_len {
1265        return Err(GitError::InvalidFormat(format!(
1266            "{context} pack file too short"
1267        )));
1268    }
1269    if &pack[..4] != b"PACK" {
1270        return Err(GitError::InvalidFormat(format!(
1271            "{context} pack file missing PACK signature"
1272        )));
1273    }
1274    let trailer_offset = pack.len() - hash_len;
1275    let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
1276    let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
1277    if &actual != expected || trailer != *expected {
1278        return Err(GitError::InvalidFormat(format!(
1279            "{context} pack checksum does not match generated pack"
1280        )));
1281    }
1282    Ok(())
1283}
1284
1285/// The UNIX-seconds mtime of a path, or `0` when unavailable.
1286fn path_mtime_secs(path: &Path) -> u32 {
1287    fs::metadata(path)
1288        .and_then(|metadata| metadata.modified())
1289        .ok()
1290        .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
1291        .map(|dur| dur.as_secs() as u32)
1292        .unwrap_or(0)
1293}
1294
1295/// The bytes of one cruft `.mtimes` pack plus its sidecars and checksum, ready
1296/// to install under `objects/pack/`.
1297#[derive(Debug, Clone)]
1298pub struct CruftPack {
1299    pub pack: Vec<u8>,
1300    pub idx: Vec<u8>,
1301    pub rev: Vec<u8>,
1302    pub mtimes: Vec<u8>,
1303    pub checksum: ObjectId,
1304    /// Object ids the cruft pack holds (its surviving unreachable set).
1305    pub oids: Vec<ObjectId>,
1306}
1307
1308/// Outcome of `git repack --cruft`: the reachable pack (if any) plus the cruft
1309/// `.mtimes` pack of surviving unreachable objects.
1310#[derive(Debug, Clone)]
1311pub struct CruftRepackResult {
1312    /// The all-into-one reachable pack, or `None` when nothing is reachable.
1313    pub reachable: Option<RepackResult>,
1314    /// The cruft pack of unreachable objects, or `None` when there are none.
1315    pub cruft: Option<CruftPack>,
1316    /// Pre-existing non-cruft, non-kept pack `.pack` paths superseded by the
1317    /// reachable pack (removed under `-d`).
1318    pub obsolete_packs: Vec<PathBuf>,
1319    /// Pre-existing cruft `.pack` paths whose objects are now in the new cruft
1320    /// pack (removed under `-d`).
1321    pub obsolete_cruft_packs: Vec<PathBuf>,
1322}
1323
1324/// Gather every object id on disk together with the best (max) mtime of any
1325/// copy: a packed object contributes its pack's mtime (or its own recorded
1326/// mtime inside a cruft pack), a loose object contributes its file mtime.
1327pub fn object_mtimes_on_disk_pub(
1328    objects_dir: &Path,
1329    format: ObjectFormat,
1330) -> Result<HashMap<ObjectId, u32>> {
1331    object_mtimes_on_disk(objects_dir, format)
1332}
1333
1334fn object_mtimes_on_disk(
1335    objects_dir: &Path,
1336    format: ObjectFormat,
1337) -> Result<HashMap<ObjectId, u32>> {
1338    let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
1339    let mut record = |oid: ObjectId, mtime: u32| {
1340        mtimes
1341            .entry(oid)
1342            .and_modify(|existing| {
1343                if mtime > *existing {
1344                    *existing = mtime;
1345                }
1346            })
1347            .or_insert(mtime);
1348    };
1349
1350    let pack_dir = objects_dir.join("pack");
1351    if let Ok(entries) = fs::read_dir(&pack_dir) {
1352        let mut idx_paths: Vec<PathBuf> = Vec::new();
1353        for entry in entries {
1354            let path = entry?.path();
1355            if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
1356                idx_paths.push(path);
1357            }
1358        }
1359        idx_paths.sort();
1360        for idx_path in idx_paths {
1361            let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
1362            let pack_path = idx_path.with_extension("pack");
1363            let mtimes_path = idx_path.with_extension("mtimes");
1364            let pack_object_mtimes: Option<Vec<u32>> =
1365                fs::read(&mtimes_path).ok().and_then(|bytes| {
1366                    sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
1367                        .ok()
1368                        .map(|parsed| parsed.mtimes)
1369                });
1370            let pack_mtime = path_mtime_secs(&pack_path);
1371            for (pos, entry) in index.entries.iter().enumerate() {
1372                let mtime = pack_object_mtimes
1373                    .as_ref()
1374                    .and_then(|table| table.get(pos).copied())
1375                    .unwrap_or(pack_mtime);
1376                record(entry.oid, mtime);
1377            }
1378        }
1379    }
1380
1381    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1382    for oid in loose_object_ids(objects_dir, format)? {
1383        let path = store.object_path(&oid)?;
1384        record(oid, path_mtime_secs(&path));
1385    }
1386    Ok(mtimes)
1387}
1388
1389/// Public wrapper over [`build_cruft_pack`] for the `--expire-to` limbo pack.
1390pub fn build_cruft_pack_pub(
1391    database: &FileObjectDatabase,
1392    format: ObjectFormat,
1393    survivors: &HashMap<ObjectId, u32>,
1394) -> Result<Option<CruftPack>> {
1395    build_cruft_pack(database, format, survivors)
1396}
1397
1398/// Build the cruft `.mtimes` pack from the surviving unreachable objects and
1399/// their timestamps.
1400fn build_cruft_pack(
1401    database: &FileObjectDatabase,
1402    format: ObjectFormat,
1403    survivors: &HashMap<ObjectId, u32>,
1404) -> Result<Option<CruftPack>> {
1405    if survivors.is_empty() {
1406        return Ok(None);
1407    }
1408    let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
1409    ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
1410
1411    let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
1412    let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
1413    let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
1414    for (oid, mtime) in ordered {
1415        match database.read_object(&oid) {
1416            Ok(object) => {
1417                oids.push(oid);
1418                objects.push(object);
1419                mtime_by_oid.insert(oid, mtime);
1420            }
1421            Err(GitError::NotFound(_)) => {}
1422            Err(err) => return Err(err),
1423        }
1424    }
1425    if oids.is_empty() {
1426        return Ok(None);
1427    }
1428
1429    let inputs: Vec<PackInput<'_>> = oids
1430        .iter()
1431        .zip(&objects)
1432        .map(|(oid, object)| PackInput {
1433            oid,
1434            object: object.as_ref(),
1435        })
1436        .collect();
1437    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1438
1439    // `.mtimes` table is in lexicographic (index/fanout) order.
1440    let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
1441    sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
1442    let mtimes_table: Vec<u32> = sorted_entries
1443        .iter()
1444        .map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
1445        .collect();
1446    let positions = sley_pack::pack_order_index_positions(&written.entries);
1447    let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
1448    let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
1449
1450    let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
1451    cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1452    Ok(Some(CruftPack {
1453        pack: written.pack,
1454        idx: written.index,
1455        rev,
1456        mtimes,
1457        checksum: written.checksum,
1458        oids: cruft_oids,
1459    }))
1460}
1461
1462/// `git repack --cruft [--cruft-expiration=<t>] [-d]`: pack the reachable
1463/// closure of `roots` into one new pack, then collect every unreachable object
1464/// into a `.mtimes`-stamped cruft pack (honouring `cruft_expiration`). The
1465/// caller installs the result and, under `-d`, removes the superseded non-cruft
1466/// and old cruft packs.
1467///
1468/// Mirrors builtin/repack.c's PACK_CRUFT path + repack-cruft.c `write_cruft_pack`
1469/// without the per-pack stdin protocol: unreachable objects are everything on
1470/// disk minus the reachable set.
1471pub fn repack_cruft(
1472    git_dir: &Path,
1473    format: ObjectFormat,
1474    roots: &[ObjectId],
1475    cruft_expiration: Option<u32>,
1476) -> Result<CruftRepackResult> {
1477    let objects_dir = repository_objects_dir(git_dir);
1478    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1479
1480    // Reachable closure → the new "reachable" pack.
1481    let reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
1482    let reachable_result = if reachable_ids.is_empty() {
1483        None
1484    } else {
1485        let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
1486        ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1487        let mut objects = Vec::with_capacity(ids.len());
1488        for oid in &ids {
1489            match database.read_object(oid) {
1490                Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
1491                Err(GitError::NotFound(_)) => {}
1492                Err(err) => return Err(err),
1493            }
1494        }
1495        if objects.is_empty() {
1496            None
1497        } else {
1498            let inputs = pack_inputs(&objects);
1499            let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1500            let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1501            let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1502                .into_iter()
1503                .filter(|oid| packed_set.contains(oid))
1504                .collect();
1505            packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1506            Some(RepackResult {
1507                pack: written.pack,
1508                idx: written.index,
1509                object_count: written.entries.len(),
1510                obsolete_packs: Vec::new(),
1511                packed_loose,
1512                pack_checksum: written.checksum,
1513                index_entries: written.entries,
1514            })
1515        }
1516    };
1517
1518    // Unreachable objects = everything on disk minus the reachable set, stamped
1519    // with their best mtime.
1520    let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
1521        .into_iter()
1522        .filter(|(oid, _)| !reachable_ids.contains(oid))
1523        .collect();
1524
1525    // Expiration: rescue older objects reachable from a recent one, drop the rest.
1526    if let Some(expiration) = cruft_expiration {
1527        rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
1528    }
1529
1530    let cruft = build_cruft_pack(&database, format, &survivors)?;
1531
1532    // The packs the reachable+cruft packs supersede: every pre-existing
1533    // non-kept pack. Cruft packs are tracked separately.
1534    let pack_dir = objects_dir.join("pack");
1535    let mut obsolete_packs = Vec::new();
1536    let mut obsolete_cruft_packs = Vec::new();
1537    for pack_path in existing_pack_files(&pack_dir)? {
1538        if pack_path.with_extension("keep").exists() {
1539            continue;
1540        }
1541        if pack_path.with_extension("mtimes").exists() {
1542            obsolete_cruft_packs.push(pack_path);
1543        } else {
1544            obsolete_packs.push(pack_path);
1545        }
1546    }
1547
1548    Ok(CruftRepackResult {
1549        reachable: reachable_result,
1550        cruft,
1551        obsolete_packs,
1552        obsolete_cruft_packs,
1553    })
1554}
1555
1556/// Apply `--cruft-expiration` over the survivor map in place: starting from the
1557/// recent candidates (mtime strictly newer than `expiration`), walk reachability
1558/// and rescue every dependency at the cutoff mtime; drop older candidates that
1559/// no recent object reaches. Mirrors the pack-objects cruft expiry traversal.
1560fn rescue_and_expire_cruft_objects(
1561    database: &FileObjectDatabase,
1562    format: ObjectFormat,
1563    survivors: &mut HashMap<ObjectId, u32>,
1564    expiration: u32,
1565) -> Result<()> {
1566    let recent: Vec<ObjectId> = survivors
1567        .iter()
1568        .filter(|(_, mtime)| **mtime > expiration)
1569        .map(|(oid, _)| *oid)
1570        .collect();
1571
1572    let mut keep: HashSet<ObjectId> = HashSet::new();
1573    let mut pending: Vec<ObjectId> = recent.clone();
1574    while let Some(oid) = pending.pop() {
1575        if !keep.insert(oid) {
1576            continue;
1577        }
1578        let Ok(object) = database.read_object(&oid) else {
1579            continue;
1580        };
1581        match object.object_type {
1582            ObjectType::Commit => {
1583                if let Ok(commit) = Commit::parse_ref(format, &object.body) {
1584                    pending.extend(commit.parents.iter().copied());
1585                    pending.push(commit.tree);
1586                }
1587            }
1588            ObjectType::Tree => {
1589                for entry in TreeEntries::new(format, &object.body).flatten() {
1590                    if !entry.is_gitlink() {
1591                        pending.push(entry.oid);
1592                    }
1593                }
1594            }
1595            ObjectType::Tag => {
1596                if let Ok(tag) = Tag::parse_ref(format, &object.body) {
1597                    pending.push(tag.object);
1598                }
1599            }
1600            ObjectType::Blob => {}
1601        }
1602    }
1603
1604    // Drop any survivor that is neither recent nor rescued; rescued-but-older
1605    // objects keep their recorded mtime (already >= 0), recent ones unchanged.
1606    survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
1607    Ok(())
1608}
1609
1610/// Install a [`repack_cruft`] result: write the reachable pack and the cruft
1611/// `.mtimes` pack, then under `prune` remove the superseded non-cruft packs, old
1612/// cruft packs, and the loose objects now served.
1613pub fn install_cruft_repack_result(
1614    git_dir: &Path,
1615    format: ObjectFormat,
1616    result: &CruftRepackResult,
1617    prune: bool,
1618) -> Result<()> {
1619    let objects_dir = repository_objects_dir(git_dir);
1620    let pack_dir = objects_dir.join("pack");
1621    fs::create_dir_all(&pack_dir)?;
1622
1623    // Names of packs we are about to remove (so we never delete the new ones).
1624    let new_reachable_name = result
1625        .reachable
1626        .as_ref()
1627        .map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
1628    let new_cruft_name = result
1629        .cruft
1630        .as_ref()
1631        .map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
1632
1633    // Write the reachable pack (idx + rev + pack), content-addressed.
1634    if let Some(reachable) = result.reachable.as_ref() {
1635        let parsed_index = PackIndex::parse(&reachable.idx, format)?;
1636        let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
1637        let reverse_index = sley_pack::PackReverseIndex::write(
1638            format,
1639            &sley_pack::pack_order_index_positions(&parsed_index.entries),
1640            &reachable.pack_checksum,
1641        )?;
1642        write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
1643        write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
1644        write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
1645    }
1646
1647    // Write the cruft pack (pack + rev + mtimes + idx).
1648    if let Some(cruft) = result.cruft.as_ref() {
1649        let pack_name = format!("pack-{}", cruft.checksum.to_hex());
1650        write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
1651        write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
1652        write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
1653        write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
1654    }
1655
1656    if !prune {
1657        return Ok(());
1658    }
1659
1660    // Objects now served by the new packs.
1661    let mut present: HashSet<ObjectId> = HashSet::new();
1662    if let Some(reachable) = result.reachable.as_ref() {
1663        present.extend(reachable.index_entries.iter().map(|e| e.oid));
1664    }
1665    if let Some(cruft) = result.cruft.as_ref() {
1666        present.extend(cruft.oids.iter().copied());
1667    }
1668
1669    // Remove superseded non-cruft + old cruft packs (skip the new ones).
1670    let mut removed_stems: HashSet<String> = HashSet::new();
1671    for pack_path in result
1672        .obsolete_packs
1673        .iter()
1674        .chain(result.obsolete_cruft_packs.iter())
1675    {
1676        let file_name = pack_path.file_name().and_then(|n| n.to_str());
1677        if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
1678            continue;
1679        }
1680        if pack_path.with_extension("keep").exists() {
1681            continue;
1682        }
1683        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
1684            removed_stems.insert(stem.to_string());
1685        }
1686        remove_file_if_exists(pack_path)?;
1687        remove_file_if_exists(&pack_path.with_extension("idx"))?;
1688        for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1689            remove_file_if_exists(&pack_path.with_extension(ext))?;
1690        }
1691    }
1692
1693    // Drop loose objects now in a new pack.
1694    let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1695        .into_iter()
1696        .filter(|oid| present.contains(oid))
1697        .collect();
1698    prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
1699
1700    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1701    Ok(())
1702}
1703
1704fn pack_index_entries_match_writer(
1705    parsed: &[PackIndexEntry],
1706    writer_entries: &[PackIndexEntry],
1707) -> bool {
1708    if parsed.len() != writer_entries.len() {
1709        return false;
1710    }
1711    let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
1712    writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1713    parsed.iter().zip(writer_entries).all(|(left, right)| {
1714        left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
1715    })
1716}
1717
1718/// List loose objects under `git_dir` that are *not* reachable from `roots`,
1719/// optionally deleting them.
1720///
1721/// Reachability is computed with [`collect_reachable_object_ids`] over the
1722/// repository's object database, so trees, parents, and tag targets are all
1723/// followed. When `delete` is `false` the returned ids are merely reported;
1724/// when `true` each unreachable loose object file is removed (packed copies are
1725/// never touched). Deletion is therefore opt-in.
1726pub fn prune_unreachable_loose<I>(
1727    git_dir: &Path,
1728    format: ObjectFormat,
1729    roots: I,
1730    delete: bool,
1731) -> Result<Vec<ObjectId>>
1732where
1733    I: IntoIterator<Item = ObjectId>,
1734{
1735    let objects_dir = repository_objects_dir(git_dir);
1736    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1737    let reachable = collect_reachable_object_ids(&database, format, roots)?;
1738
1739    let store = LooseObjectStore::new(objects_dir.clone(), format);
1740    let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1741        .into_iter()
1742        .filter(|oid| !reachable.contains(oid))
1743        .collect();
1744    pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1745
1746    if delete {
1747        for oid in &pruned {
1748            let path = store.object_path(oid)?;
1749            match fs::remove_file(&path) {
1750                Ok(()) => {}
1751                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1752                Err(err) => return Err(GitError::Io(err.to_string())),
1753            }
1754        }
1755    }
1756    Ok(pruned)
1757}
1758
1759/// Loose object ids under `objects_dir`, sorted by hex, with packed objects
1760/// excluded.
1761fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1762    let oids = loose_object_id_set(objects_dir, format)?;
1763    let mut oids = oids.into_iter().collect::<Vec<_>>();
1764    oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1765    Ok(oids)
1766}
1767
1768fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1769    let mut oids = HashSet::new();
1770    collect_loose_object_ids(objects_dir, format, &mut oids)?;
1771    Ok(oids)
1772}
1773
1774/// Absolute paths of every `*.pack` file directly inside `pack_dir`, sorted for
1775/// deterministic output.
1776fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1777    if !pack_dir.exists() {
1778        return Ok(Vec::new());
1779    }
1780    let mut packs = Vec::new();
1781    for entry in fs::read_dir(pack_dir)? {
1782        let path = entry?.path();
1783        if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1784            packs.push(path);
1785        }
1786    }
1787    packs.sort();
1788    Ok(packs)
1789}
1790
1791/// Remove pre-existing packs whose every object is contained in `present`,
1792/// skipping `keep` (the pack just written), `.keep` packs, and `.promisor` packs.
1793/// A stale multi-pack-index that references any removed pack is removed too.
1794fn prune_packs_contained_in(
1795    objects_dir: &Path,
1796    format: ObjectFormat,
1797    present: &HashSet<ObjectId>,
1798    keep: &Path,
1799) -> Result<()> {
1800    let pack_dir = objects_dir.join("pack");
1801    let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1802    let mut removed_stems: HashSet<String> = HashSet::new();
1803
1804    for pack_path in existing_pack_files(&pack_dir)? {
1805        if pack_path == keep {
1806            continue;
1807        }
1808        let Some(stem) = pack_path.file_stem() else {
1809            continue;
1810        };
1811        if Some(stem) == keep_stem.as_deref() {
1812            continue;
1813        }
1814        if pack_path.with_extension("keep").exists()
1815            || pack_path.with_extension("promisor").exists()
1816        {
1817            continue;
1818        }
1819        let index_path = pack_path.with_extension("idx");
1820        if !index_path.exists() {
1821            // Without an index we cannot prove containment; leave it alone.
1822            continue;
1823        }
1824        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1825        if !index
1826            .entries
1827            .iter()
1828            .all(|entry| present.contains(&entry.oid))
1829        {
1830            continue;
1831        }
1832        // Every object in this pack is safely in the new pack and it has no Git
1833        // policy sidecar that says to keep it: remove the pack, its index, and
1834        // cache sidecars derived from them.
1835        remove_file_if_exists(&pack_path)?;
1836        remove_file_if_exists(&index_path)?;
1837        for ext in ["rev", "mtimes", "bitmap"] {
1838            remove_file_if_exists(&pack_path.with_extension(ext))?;
1839        }
1840        removed_stems.insert(stem.to_string_lossy().into_owned());
1841    }
1842
1843    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1844    Ok(())
1845}
1846
1847/// Remove a `multi-pack-index` if it names *any* pack that was removed.
1848///
1849/// A MIDX that still references a deleted pack makes reads fail (the lookup
1850/// resolves to a pack that is gone) before any fallback. Removing the whole MIDX
1851/// when even one of its packs is pruned forces readers back to the individual pack
1852/// indexes, which are correct; `multi-pack-index write` can rebuild it later.
1853fn prune_stale_multi_pack_index(
1854    pack_dir: &Path,
1855    format: ObjectFormat,
1856    removed_stems: &HashSet<String>,
1857) -> Result<()> {
1858    if removed_stems.is_empty() {
1859        return Ok(());
1860    }
1861    let midx_path = pack_dir.join("multi-pack-index");
1862    if !midx_path.exists() {
1863        return Ok(());
1864    }
1865    let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1866    let references_removed_pack = midx.pack_names.iter().any(|name| {
1867        let stem = name.strip_suffix(".idx").unwrap_or(name);
1868        removed_stems.contains(stem)
1869    });
1870    if references_removed_pack {
1871        remove_file_if_exists(&midx_path)?;
1872    }
1873    Ok(())
1874}
1875
1876/// Remove each loose object in `candidates` whose id is in `present`, leaving
1877/// any object not actually packed untouched.
1878fn prune_loose_objects<'a, I>(
1879    objects_dir: &Path,
1880    format: ObjectFormat,
1881    candidates: I,
1882    present: &HashSet<ObjectId>,
1883) -> Result<()>
1884where
1885    I: IntoIterator<Item = &'a ObjectId>,
1886{
1887    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1888    for oid in candidates {
1889        if !present.contains(oid) {
1890            continue;
1891        }
1892        remove_file_if_exists(&store.object_path(oid)?)?;
1893    }
1894    Ok(())
1895}
1896
1897enum PackDeltaBase {
1898    Offset(u64),
1899    Ref(ObjectId),
1900}
1901
1902struct PackIndexOffsetInfo {
1903    end_offset: u64,
1904    delta_base_oid: Option<ObjectId>,
1905}
1906
1907fn scan_pack_index_offsets(
1908    index: &PackIndex,
1909    target_offset: u64,
1910    trailer_offset: u64,
1911    delta_base_offset: Option<u64>,
1912) -> Result<PackIndexOffsetInfo> {
1913    let mut target_count = 0usize;
1914    let mut next_offset = None;
1915    let mut delta_base_oid = None;
1916
1917    for entry in &index.entries {
1918        if entry.offset == target_offset {
1919            target_count += 1;
1920        } else if entry.offset > target_offset {
1921            match next_offset {
1922                Some(current) if current <= entry.offset => {}
1923                _ => next_offset = Some(entry.offset),
1924            }
1925        }
1926        if Some(entry.offset) == delta_base_offset {
1927            delta_base_oid = Some(entry.oid);
1928        }
1929    }
1930
1931    if target_count == 0 {
1932        return Err(GitError::InvalidFormat(format!(
1933            "pack index offset {target_offset} not found"
1934        )));
1935    }
1936    if let Some(offset) = delta_base_offset
1937        && delta_base_oid.is_none()
1938    {
1939        return Err(GitError::InvalidFormat(format!(
1940            "ofs-delta base offset {offset} not found"
1941        )));
1942    }
1943
1944    Ok(PackIndexOffsetInfo {
1945        // Preserve the old sorted-vector behavior for malformed indexes with
1946        // duplicate offsets: the next sorted entry has the same offset.
1947        end_offset: if target_count > 1 {
1948            target_offset
1949        } else {
1950            next_offset.unwrap_or(trailer_offset)
1951        },
1952        delta_base_oid,
1953    })
1954}
1955
1956fn pack_entry_delta_base(
1957    format: ObjectFormat,
1958    pack: &[u8],
1959    entry_offset: u64,
1960) -> Result<Option<PackDeltaBase>> {
1961    let mut cursor = usize::try_from(entry_offset)
1962        .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1963    let first = pack_next_byte(pack, &mut cursor)?;
1964    let kind = (first >> 4) & 0x07;
1965    let mut byte = first;
1966    while byte & 0x80 != 0 {
1967        byte = pack_next_byte(pack, &mut cursor)?;
1968    }
1969    match kind {
1970        6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1971            pack,
1972            &mut cursor,
1973            entry_offset,
1974        )?))),
1975        7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1976            format,
1977            pack,
1978            &mut cursor,
1979        )?))),
1980        _ => Ok(None),
1981    }
1982}
1983
1984fn parse_ref_delta_base_oid(
1985    format: ObjectFormat,
1986    pack: &[u8],
1987    cursor: &mut usize,
1988) -> Result<ObjectId> {
1989    let raw_len = format.raw_len();
1990    if *cursor + raw_len > pack.len() {
1991        return Err(GitError::InvalidFormat(
1992            "truncated ref-delta base object id".into(),
1993        ));
1994    }
1995    let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1996    *cursor += raw_len;
1997    Ok(oid)
1998}
1999
2000fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
2001    let mut byte = pack_next_byte(pack, cursor)?;
2002    let mut relative = u64::from(byte & 0x7f);
2003    while byte & 0x80 != 0 {
2004        byte = pack_next_byte(pack, cursor)?;
2005        relative = relative
2006            .checked_add(1)
2007            .and_then(|value| value.checked_shl(7))
2008            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2009            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2010    }
2011    entry_offset
2012        .checked_sub(relative)
2013        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2014}
2015
2016fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
2017    let Some(byte) = pack.get(*cursor).copied() else {
2018        return Err(GitError::InvalidFormat("truncated pack entry".into()));
2019    };
2020    *cursor += 1;
2021    Ok(byte)
2022}
2023
2024fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
2025    Ok(ObjectId::null(format))
2026}
2027
2028/// Remove `path` if it exists, treating a missing file as success.
2029fn remove_file_if_exists(path: &Path) -> Result<()> {
2030    match fs::remove_file(path) {
2031        Ok(()) => Ok(()),
2032        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
2033        Err(err) => Err(GitError::Io(err.to_string())),
2034    }
2035}
2036
2037fn walk_reachable_objects<R, I, F>(
2038    reader: &R,
2039    format: ObjectFormat,
2040    starts: I,
2041    excluded: &HashSet<ObjectId>,
2042    visit: F,
2043) -> Result<HashSet<ObjectId>>
2044where
2045    R: ObjectReader,
2046    I: IntoIterator<Item = ObjectId>,
2047    F: FnMut(&ObjectId, &Arc<EncodedObject>),
2048{
2049    walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
2050}
2051
2052/// [`walk_reachable_objects`] with an additional `cut` set: commits in `cut`
2053/// are visited (their trees and blobs too) but their parents are not followed,
2054/// mirroring a shallow client's view of its own history during negotiation.
2055fn walk_reachable_objects_with_cut<R, I, F>(
2056    reader: &R,
2057    format: ObjectFormat,
2058    starts: I,
2059    excluded: &HashSet<ObjectId>,
2060    cut: &HashSet<ObjectId>,
2061    mut visit: F,
2062) -> Result<HashSet<ObjectId>>
2063where
2064    R: ObjectReader,
2065    I: IntoIterator<Item = ObjectId>,
2066    F: FnMut(&ObjectId, &Arc<EncodedObject>),
2067{
2068    let mut seen = HashSet::new();
2069    let mut pending = Vec::new();
2070    for start in starts {
2071        pending.push(start);
2072        while let Some(oid) = pending.pop() {
2073            if excluded.contains(&oid) {
2074                continue;
2075            }
2076            if !seen.insert(oid) {
2077                continue;
2078            }
2079            let object = reader.read_object(&oid).map_err(|err| {
2080                with_missing_object_context(err, oid, MissingObjectContext::Traversal)
2081            })?;
2082            match object.object_type {
2083                ObjectType::Commit => {
2084                    let (tree, parents) = {
2085                        let commit = Commit::parse_ref(format, &object.body)?;
2086                        (commit.tree, commit.parents)
2087                    };
2088                    visit(&oid, &object);
2089                    if !cut.contains(&oid) {
2090                        for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
2091                            pending.push(parent);
2092                        }
2093                    }
2094                    pending.push(tree);
2095                }
2096                ObjectType::Tree => {
2097                    let mut child_oids = Vec::new();
2098                    for entry in TreeEntries::new(format, &object.body) {
2099                        let entry = entry?;
2100                        if entry.is_gitlink() {
2101                            continue;
2102                        }
2103                        child_oids.push(entry.oid);
2104                    }
2105                    visit(&oid, &object);
2106                    pending.extend(child_oids.into_iter().rev());
2107                }
2108                ObjectType::Tag => {
2109                    let target = {
2110                        let tag = Tag::parse_ref(format, &object.body)?;
2111                        tag.object
2112                    };
2113                    visit(&oid, &object);
2114                    pending.push(target);
2115                }
2116                ObjectType::Blob => visit(&oid, &object),
2117            }
2118        }
2119    }
2120    Ok(seen)
2121}
2122
2123// ===== reachability bitmaps (.bitmap write + consult) =====
2124
2125/// Bit accessors over a `Vec<u64>` bitset using git's bitmap convention:
2126/// bit `i` lives in word `i / 64` at bit `i % 64` (LSB-first within a word).
2127fn bitset_get(words: &[u64], position: u32) -> bool {
2128    let word = (position / 64) as usize;
2129    word < words.len() && words[word] & (1u64 << (position % 64)) != 0
2130}
2131
2132fn bitset_set(words: &mut [u64], position: u32) {
2133    let word = (position / 64) as usize;
2134    if word < words.len() {
2135        words[word] |= 1u64 << (position % 64);
2136    }
2137}
2138
2139fn bitset_or(acc: &mut [u64], other: &[u64]) {
2140    for (dst, src) in acc.iter_mut().zip(other) {
2141        *dst |= *src;
2142    }
2143}
2144
2145/// Sorted set-bit positions of a bitset (the inverse of repeated [`bitset_set`]).
2146fn bitset_positions(words: &[u64]) -> Vec<u32> {
2147    let mut positions = Vec::new();
2148    for (word_index, word) in words.iter().enumerate() {
2149        let mut remaining = *word;
2150        while remaining != 0 {
2151            let bit = remaining.trailing_zeros();
2152            positions.push(word_index as u32 * 64 + bit);
2153            remaining &= remaining - 1;
2154        }
2155    }
2156    positions
2157}
2158
2159/// Committer timestamp (epoch seconds) of a commit identity line
2160/// (`Name <email> <timestamp> <tz>`); 0 when unparseable, matching git's
2161/// tolerance for bogus dates during bitmap commit selection.
2162fn commit_identity_timestamp(identity: &[u8]) -> i64 {
2163    let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
2164    let _tz = fields.next();
2165    fields
2166        .next()
2167        .and_then(|raw| std::str::from_utf8(raw).ok())
2168        .and_then(|raw| raw.parse::<i64>().ok())
2169        .unwrap_or(0)
2170}
2171
2172/// Upstream `next_commit_index` (pack-bitmap-write.c): the spacing schedule for
2173/// bitmap commit selection over the date-descending commit list.
2174fn bitmap_next_commit_index(idx: u32) -> u32 {
2175    const MIN_COMMITS: u32 = 100;
2176    const MAX_COMMITS: u32 = 5000;
2177    const MUST_REGION: u32 = 100;
2178    const MIN_REGION: u32 = 20000;
2179
2180    if idx <= MUST_REGION {
2181        return 0;
2182    }
2183    if idx <= MIN_REGION {
2184        let offset = idx - MUST_REGION;
2185        return offset.min(MIN_COMMITS);
2186    }
2187    let offset = idx - MIN_REGION;
2188    offset.clamp(MIN_COMMITS, MAX_COMMITS)
2189}
2190
2191/// Builds a serialised `.bitmap` for the pack described by `index_entries` /
2192/// `pack_checksum`, mirroring upstream pack-bitmap-write.c:
2193///
2194/// * commit selection walks the pack's commits in committer-date-descending
2195///   order through [`bitmap_next_commit_index`]'s spacing schedule, preferring
2196///   `preferred_tips` (ref tips — upstream's `NEEDS_BITMAP`) and merge commits
2197///   inside each window;
2198/// * each selected commit stores its full reachability closure (commits, trees,
2199///   blobs) as pack-order bit positions (no XOR compression — `xor_offset` 0 is
2200///   valid on disk and what readers see after resolution anyway).
2201///
2202/// Returns `Ok(None)` — mirroring upstream's warn-and-skip — when the pack
2203/// lacks full closure (a reachable object is missing from it).
2204pub fn build_pack_bitmap(
2205    db: &FileObjectDatabase,
2206    format: ObjectFormat,
2207    index_entries: &[PackIndexEntry],
2208    pack_checksum: &ObjectId,
2209    preferred_tips: &HashSet<ObjectId>,
2210) -> Result<Option<Vec<u8>>> {
2211    // `index_entries` carries no ordering guarantee (writer provenance is in
2212    // pack-write order); bit numbering follows pack (offset) order.
2213    let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
2214    by_offset.sort_by_key(|&slot| index_entries[slot].offset);
2215    let bit_order: Vec<ObjectId> = by_offset
2216        .into_iter()
2217        .map(|slot| index_entries[slot].oid)
2218        .collect();
2219    build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
2220}
2221
2222/// [`build_pack_bitmap`]'s multi-pack sibling: builds the serialised
2223/// `multi-pack-index-<checksum>.bitmap` for `midx_entries`, with bits in
2224/// pseudo-pack order (preferred pack first, then pack id, then offset — the
2225/// same order [`MultiPackIndex::write_with_reverse_index`] records in `RIDX`)
2226/// and the midx checksum in the BITM checksum field.
2227pub fn build_midx_bitmap(
2228    db: &FileObjectDatabase,
2229    format: ObjectFormat,
2230    midx_entries: &[sley_pack::MultiPackIndexEntry],
2231    midx_checksum: &ObjectId,
2232    preferred_pack: u32,
2233    preferred_tips: &HashSet<ObjectId>,
2234) -> Result<Option<Vec<u8>>> {
2235    let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
2236    pseudo.sort_by_key(|&slot| {
2237        let entry = &midx_entries[slot];
2238        (
2239            entry.pack_int_id != preferred_pack,
2240            entry.pack_int_id,
2241            entry.offset,
2242        )
2243    });
2244    let bit_order: Vec<ObjectId> = pseudo
2245        .into_iter()
2246        .map(|slot| midx_entries[slot].oid)
2247        .collect();
2248    build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
2249}
2250
2251/// Upstream `bitmap_builder_init`'s `num_maximal` counter (pack-bitmap-write.c):
2252/// walk the first-parent ancestry of the selected commits, children before
2253/// parents, propagating per-commit "which selected commits reach me" masks.
2254/// A commit counts as maximal when it is selected, or when distinct selected
2255/// lineages converge on it (its mask gains bits its last contributing child
2256/// did not carry). Only the count is needed (for the trace2 data event), so no
2257/// reverse-edge bookkeeping is kept.
2258fn bitmap_num_maximal_commits(
2259    db: &FileObjectDatabase,
2260    format: ObjectFormat,
2261    selected: &[ObjectId],
2262) -> Result<usize> {
2263    // First-parent subgraph reachable from the selected commits.
2264    let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
2265    let mut stack: Vec<ObjectId> = selected.to_vec();
2266    while let Some(oid) = stack.pop() {
2267        if first_parent.contains_key(&oid) {
2268            continue;
2269        }
2270        let object = db.read_object(&oid)?;
2271        let commit = Commit::parse_ref(format, &object.body)?;
2272        let parent = grafted_parents(db, &oid, commit.parents).first().copied();
2273        first_parent.insert(oid, parent);
2274        if let Some(parent) = parent {
2275            stack.push(parent);
2276        }
2277    }
2278    // Children-before-parents order (Kahn over the single first-parent edge).
2279    let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
2280    for parent in first_parent.values().flatten() {
2281        *pending_children.entry(*parent).or_default() += 1;
2282    }
2283    let word_count = selected.len().div_ceil(64);
2284    struct MaximalEnt {
2285        mask: Vec<u64>,
2286        maximal: bool,
2287    }
2288    let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
2289    for (bit, oid) in selected.iter().enumerate() {
2290        let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
2291            mask: vec![0u64; word_count],
2292            maximal: true,
2293        });
2294        ent.mask[bit / 64] |= 1u64 << (bit % 64);
2295        ent.maximal = true;
2296    }
2297    let mut queue: Vec<ObjectId> = first_parent
2298        .keys()
2299        .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
2300        .copied()
2301        .collect();
2302    let mut num_maximal = 0usize;
2303    while let Some(oid) = queue.pop() {
2304        if let Some(ent) = ents.remove(&oid) {
2305            if ent.maximal {
2306                num_maximal += 1;
2307            }
2308            if let Some(Some(parent)) = first_parent.get(&oid) {
2309                match ents.entry(*parent) {
2310                    std::collections::hash_map::Entry::Vacant(vacant) => {
2311                        // Fresh parent mask: c_not_p, !p_not_c -> not maximal.
2312                        vacant.insert(MaximalEnt {
2313                            mask: ent.mask.clone(),
2314                            maximal: false,
2315                        });
2316                    }
2317                    std::collections::hash_map::Entry::Occupied(mut occupied) => {
2318                        let parent_ent = occupied.get_mut();
2319                        let c_not_p = ent
2320                            .mask
2321                            .iter()
2322                            .zip(&parent_ent.mask)
2323                            .any(|(child, parent)| child & !parent != 0);
2324                        if c_not_p {
2325                            let p_not_c = parent_ent
2326                                .mask
2327                                .iter()
2328                                .zip(&ent.mask)
2329                                .any(|(parent, child)| parent & !child != 0);
2330                            for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
2331                                *parent |= child;
2332                            }
2333                            parent_ent.maximal = p_not_c;
2334                        }
2335                    }
2336                }
2337            }
2338        }
2339        if let Some(Some(parent)) = first_parent.get(&oid)
2340            && let Some(remaining) = pending_children.get_mut(parent)
2341        {
2342            *remaining -= 1;
2343            if *remaining == 0 {
2344                queue.push(*parent);
2345            }
2346        }
2347    }
2348    Ok(num_maximal)
2349}
2350
2351/// Shared write half: `bit_order` lists every covered object's oid in bit
2352/// order (pack order for a single pack, pseudo-pack order for a midx);
2353/// `checksum` fills the BITM checksum field (pack checksum / midx checksum).
2354fn build_reachability_bitmap(
2355    db: &FileObjectDatabase,
2356    format: ObjectFormat,
2357    checksum: &ObjectId,
2358    bit_order: &[ObjectId],
2359    preferred_tips: &HashSet<ObjectId>,
2360) -> Result<Option<Vec<u8>>> {
2361    if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
2362        return Ok(None);
2363    }
2364    let object_count = bit_order.len();
2365
2366    // The on-disk entry position space is the oid-sorted lookup order (.idx /
2367    // midx OIDL); derive each bit-order slot's rank there.
2368    let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
2369    oid_sorted.sort_by(|&left, &right| {
2370        bit_order[left as usize]
2371            .as_bytes()
2372            .cmp(bit_order[right as usize].as_bytes())
2373    });
2374    let mut index_position = vec![0u32; object_count];
2375    for (position, &slot) in oid_sorted.iter().enumerate() {
2376        index_position[slot as usize] = position as u32;
2377    }
2378    let mut oid_to_pack = HashMap::with_capacity(object_count);
2379    for (pack_pos, oid) in bit_order.iter().enumerate() {
2380        oid_to_pack.insert(*oid, pack_pos as u32);
2381    }
2382
2383    // Object types in bit order; commits also collect (date, parent count).
2384    let mut object_types = Vec::with_capacity(object_count);
2385    struct IndexedCommit {
2386        oid: ObjectId,
2387        pack_pos: u32,
2388        index_pos: u32,
2389        date: i64,
2390        parent_count: usize,
2391    }
2392    let mut indexed_commits = Vec::new();
2393    for (pack_pos, oid) in bit_order.iter().enumerate() {
2394        // Type via the header fast path: blobs (the bulk of most packs) never
2395        // need their bodies inflated here.
2396        let object_type = match db.read_object_header(oid)? {
2397            Some((object_type, _)) => object_type,
2398            None => db.read_object(oid)?.object_type,
2399        };
2400        object_types.push(object_type);
2401        if object_type == ObjectType::Commit {
2402            let object = db.read_object(oid)?;
2403            let commit = Commit::parse_ref(format, &object.body)?;
2404            indexed_commits.push(IndexedCommit {
2405                oid: *oid,
2406                pack_pos: pack_pos as u32,
2407                index_pos: index_position[pack_pos],
2408                date: commit_identity_timestamp(commit.committer),
2409                parent_count: grafted_parents(db, oid, commit.parents).len(),
2410            });
2411        }
2412    }
2413
2414    // Selection: date-descending, then the spacing schedule.
2415    indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
2416    let mut selected: Vec<&IndexedCommit> = Vec::new();
2417    let commit_count = indexed_commits.len() as u32;
2418    if commit_count < 100 {
2419        selected.extend(indexed_commits.iter());
2420    } else {
2421        let mut i = 0u32;
2422        loop {
2423            let next = bitmap_next_commit_index(i);
2424            if i + next >= commit_count {
2425                break;
2426            }
2427            let mut chosen = &indexed_commits[(i + next) as usize];
2428            if next > 0 {
2429                for j in 0..=next {
2430                    let candidate = &indexed_commits[(i + j) as usize];
2431                    if preferred_tips.contains(&candidate.oid) {
2432                        chosen = candidate;
2433                        break;
2434                    }
2435                    if candidate.parent_count >= 2 {
2436                        chosen = candidate;
2437                    }
2438                }
2439            }
2440            selected.push(chosen);
2441            i += next + 1;
2442        }
2443    }
2444
2445    // Trace2 selection counters (upstream bitmap_builder_init): emitted before
2446    // the closure walk, like upstream emits them before building the ewah
2447    // bitmaps. Computing num_maximal_commits needs its own first-parent walk,
2448    // so it only runs when the trace2 event target is active.
2449    if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
2450        let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
2451        let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
2452        sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
2453        sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
2454    }
2455
2456    // Reachability closures, oldest-first so newer walks stop at memoised
2457    // older selected commits.
2458    let word_count = object_count.div_ceil(64);
2459    let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
2460    for commit in selected.iter().rev() {
2461        let mut acc = vec![0u64; word_count];
2462        let mut pending = vec![commit.oid];
2463        while let Some(oid) = pending.pop() {
2464            let Some(&pack_pos) = oid_to_pack.get(&oid) else {
2465                // Mirrors upstream's "Packfile doesn't have full closure".
2466                eprintln!(
2467                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
2468                );
2469                return Ok(None);
2470            };
2471            if bitset_get(&acc, pack_pos) {
2472                continue;
2473            }
2474            if let Some(stored) = memo.get(&oid) {
2475                bitset_or(&mut acc, stored);
2476                continue;
2477            }
2478            bitset_set(&mut acc, pack_pos);
2479            let object = db.read_object(&oid)?;
2480            let tree = {
2481                let parsed = Commit::parse_ref(format, &object.body)?;
2482                pending.extend(grafted_parents(db, &oid, parsed.parents));
2483                parsed.tree
2484            };
2485            if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
2486                return Ok(None);
2487            }
2488        }
2489        memo.insert(commit.oid, Arc::new(acc));
2490    }
2491
2492    let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
2493    for commit in &selected {
2494        let words = match memo.get(&commit.oid) {
2495            Some(words) => words,
2496            None => continue,
2497        };
2498        writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
2499    }
2500    writer.write().map(Some)
2501}
2502
2503/// Marks `tree` and everything below it (sub-trees, blobs) in `acc`, skipping
2504/// already-set bits (their closure is already covered). Returns `false` when an
2505/// object is missing from the pack (no full closure), after warning.
2506fn bitmap_mark_tree(
2507    db: &impl ObjectReader,
2508    format: ObjectFormat,
2509    tree: &ObjectId,
2510    oid_to_pack: &HashMap<ObjectId, u32>,
2511    acc: &mut [u64],
2512) -> Result<bool> {
2513    let Some(&pack_pos) = oid_to_pack.get(tree) else {
2514        eprintln!(
2515            "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
2516        );
2517        return Ok(false);
2518    };
2519    if bitset_get(acc, pack_pos) {
2520        return Ok(true);
2521    }
2522    bitset_set(acc, pack_pos);
2523    let object = db.read_object(tree)?;
2524    for entry in TreeEntries::new(format, &object.body) {
2525        let entry = entry?;
2526        if entry.is_gitlink() {
2527            continue;
2528        }
2529        if entry.is_tree() {
2530            if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
2531                return Ok(false);
2532            }
2533        } else {
2534            let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
2535                eprintln!(
2536                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
2537                    entry.oid
2538                );
2539                return Ok(false);
2540            };
2541            bitset_set(acc, blob_pos);
2542        }
2543    }
2544    Ok(true)
2545}
2546
2547/// A pack's `.bitmap` loaded for consultation: oid <-> pack-position mappings,
2548/// resolved (XOR-expanded) per-commit reachability bitsets, and the four object
2549/// type bitmaps. Bit numbering follows pack order throughout.
2550pub struct LoadedPackBitmap {
2551    object_count: u32,
2552    oid_to_pack: HashMap<ObjectId, u32>,
2553    pack_to_oid: Vec<ObjectId>,
2554    commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
2555    commits: Vec<u64>,
2556    trees: Vec<u64>,
2557    blobs: Vec<u64>,
2558    tags: Vec<u64>,
2559}
2560
2561impl LoadedPackBitmap {
2562    pub fn object_count(&self) -> u32 {
2563        self.object_count
2564    }
2565
2566    /// Pack-order position of `oid`, when the object is in the bitmapped pack.
2567    pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
2568        self.oid_to_pack.get(oid).copied()
2569    }
2570
2571    pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
2572        self.pack_to_oid.get(position as usize)
2573    }
2574
2575    /// The resolved reachability bitset stored for `oid`, when it was one of
2576    /// the writer's selected commits.
2577    pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
2578        self.commit_words.get(oid)
2579    }
2580
2581    /// Oids of every commit with a stored bitmap entry (unordered).
2582    pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
2583        self.commit_words.keys()
2584    }
2585
2586    /// The type bitmap for `object_type` (bit per pack position).
2587    pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
2588        match object_type {
2589            ObjectType::Commit => &self.commits,
2590            ObjectType::Tree => &self.trees,
2591            ObjectType::Blob => &self.blobs,
2592            ObjectType::Tag => &self.tags,
2593        }
2594    }
2595
2596    fn word_count(&self) -> usize {
2597        (self.object_count as usize).div_ceil(64)
2598    }
2599}
2600
2601/// Loads the single-pack `.bitmap` of `objects_dir/pack`, if a valid one
2602/// exists. Scans `pack-*.bitmap` files (sorted, first valid wins, like
2603/// upstream's "first bitmap" behaviour), requires the sibling `.idx`, and
2604/// verifies the recorded pack checksum. Any unreadable/corrupt bitmap yields
2605/// `Ok(None)` — consumers fall back to a regular object walk, mirroring
2606/// upstream's warn-and-ignore on bitmap load failure.
2607pub fn load_pack_bitmap(
2608    objects_dir: &Path,
2609    format: ObjectFormat,
2610) -> Result<Option<LoadedPackBitmap>> {
2611    let pack_dir = objects_dir.join("pack");
2612    if !pack_dir.exists() {
2613        return Ok(None);
2614    }
2615    // A multi-pack bitmap wins over single-pack bitmaps, like upstream's
2616    // open_bitmap trying the midx first.
2617    if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
2618        return Ok(Some(bitmap));
2619    }
2620    let mut bitmap_paths = Vec::new();
2621    for entry in fs::read_dir(&pack_dir)? {
2622        let path = entry?.path();
2623        if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
2624            && path
2625                .file_name()
2626                .and_then(|name| name.to_str())
2627                .is_some_and(|name| name.starts_with("pack-"))
2628        {
2629            bitmap_paths.push(path);
2630        }
2631    }
2632    bitmap_paths.sort();
2633    for bitmap_path in bitmap_paths {
2634        match load_pack_bitmap_file(&bitmap_path, format) {
2635            Ok(Some(bitmap)) => return Ok(Some(bitmap)),
2636            Ok(None) | Err(_) => continue,
2637        }
2638    }
2639    Ok(None)
2640}
2641
2642/// Loads `multi-pack-index-<checksum>.bitmap` when the pack directory has a
2643/// multi-pack-index with a `RIDX` chunk (the bit-order permutation) and a
2644/// matching bitmap file. Returns `Ok(None)` — never an error — on any missing
2645/// or unusable piece, so callers fall through to single-pack bitmaps.
2646fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
2647    let midx_path = pack_dir.join("multi-pack-index");
2648    if !midx_path.exists() {
2649        return Ok(None);
2650    }
2651    let Ok(midx_bytes) = fs::read(&midx_path) else {
2652        return Ok(None);
2653    };
2654    let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
2655        return Ok(None);
2656    };
2657    let bitmap_path = pack_dir.join(format!(
2658        "multi-pack-index-{}.bitmap",
2659        midx.checksum.to_hex()
2660    ));
2661    if !bitmap_path.exists() {
2662        return Ok(None);
2663    }
2664    let object_count = midx.objects.len();
2665    // Upstream `load_midx_revindex`: prefer the midx's own RIDX chunk unless
2666    // GIT_TEST_MIDX_READ_RIDX=0 disables it, else fall back to the separate
2667    // `multi-pack-index-<checksum>.rev` file; a trace2 data event records
2668    // which source supplied the permutation.
2669    let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
2670        .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
2671        .unwrap_or(true);
2672    let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
2673        (Some(chunk), true) => {
2674            sley_core::trace2::data("load_midx_revindex", "source", "midx");
2675            chunk.clone()
2676        }
2677        _ => {
2678            let rev_path =
2679                pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
2680            let Ok(rev_bytes) = fs::read(&rev_path) else {
2681                // Without the RIDX permutation the bit numbering is unknown.
2682                return Ok(None);
2683            };
2684            let Ok(parsed_rev) =
2685                sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
2686            else {
2687                return Ok(None);
2688            };
2689            sley_core::trace2::data("load_midx_revindex", "source", "rev");
2690            parsed_rev.positions
2691        }
2692    };
2693    let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
2694        return Ok(None);
2695    };
2696    let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
2697        Ok(parsed) => parsed,
2698        Err(_) => return Ok(None),
2699    };
2700    if parsed.pack_checksum != midx.checksum {
2701        return Ok(None);
2702    }
2703
2704    // midx.objects is in lookup (oid-sorted) order; RIDX maps bit positions
2705    // to lookup positions.
2706    let mut pack_to_oid = Vec::with_capacity(object_count);
2707    for &midx_pos in &reverse_index {
2708        let Some(entry) = midx.objects.get(midx_pos as usize) else {
2709            return Ok(None);
2710        };
2711        pack_to_oid.push(entry.oid);
2712    }
2713    let mut oid_to_pack = HashMap::with_capacity(object_count);
2714    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2715        oid_to_pack.insert(*oid, pack_pos as u32);
2716    }
2717    match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2718        midx.objects.get(position).map(|entry| entry.oid)
2719    }) {
2720        Ok(loaded) => Ok(Some(loaded)),
2721        Err(_) => Ok(None),
2722    }
2723}
2724
2725fn load_pack_bitmap_file(
2726    bitmap_path: &Path,
2727    format: ObjectFormat,
2728) -> Result<Option<LoadedPackBitmap>> {
2729    let index_path = bitmap_path.with_extension("idx");
2730    if !index_path.exists() {
2731        return Ok(None);
2732    }
2733    let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
2734    let object_count = index.entries.len();
2735    let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
2736    if parsed.pack_checksum != index.pack_checksum {
2737        return Ok(None);
2738    }
2739
2740    let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2741    pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2742    let mut pack_to_oid = Vec::with_capacity(object_count);
2743    for index_pos in &pack_order {
2744        pack_to_oid.push(index.entries[*index_pos as usize].oid);
2745    }
2746    let mut oid_to_pack = HashMap::with_capacity(object_count);
2747    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2748        oid_to_pack.insert(*oid, pack_pos as u32);
2749    }
2750
2751    assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2752        index.entries.get(position).map(|entry| entry.oid)
2753    })
2754    .map(Some)
2755}
2756
2757/// Shared tail of the bitmap loaders: expands the type bitmaps, resolves the
2758/// per-commit entries (XOR offsets reference earlier entries in file order),
2759/// and maps each entry's lookup-order position back to a commit oid via
2760/// `lookup_oid`.
2761fn assemble_loaded_bitmap(
2762    parsed: PackBitmapIndex,
2763    object_count: usize,
2764    pack_to_oid: Vec<ObjectId>,
2765    oid_to_pack: HashMap<ObjectId, u32>,
2766    lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2767) -> Result<LoadedPackBitmap> {
2768    let word_count = object_count.div_ceil(64);
2769    let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2770        let mut words = bitmap.to_words()?;
2771        words.resize(word_count, 0);
2772        Ok(words)
2773    };
2774
2775    let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2776    let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2777    for (entry_index, entry) in parsed.entries.iter().enumerate() {
2778        let mut words = expand(&entry.bitmap)?;
2779        if entry.xor_offset > 0 {
2780            let base_index = entry_index - entry.xor_offset as usize;
2781            let base = &resolved[base_index];
2782            for (dst, src) in words.iter_mut().zip(base.iter()) {
2783                *dst ^= *src;
2784            }
2785        }
2786        let words = Arc::new(words);
2787        resolved.push(Arc::clone(&words));
2788        let commit_oid = lookup_oid(entry.object_position as usize)
2789            .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2790        commit_words.insert(commit_oid, words);
2791    }
2792
2793    Ok(LoadedPackBitmap {
2794        object_count: object_count as u32,
2795        oid_to_pack,
2796        pack_to_oid,
2797        commit_words,
2798        commits: expand(&parsed.type_bitmaps.commits)?,
2799        trees: expand(&parsed.type_bitmaps.trees)?,
2800        blobs: expand(&parsed.type_bitmaps.blobs)?,
2801        tags: expand(&parsed.type_bitmaps.tags)?,
2802    })
2803}
2804
2805/// Result of a bitmap-assisted reachability walk: pack-position bits for
2806/// in-pack objects plus the "extended" objects encountered outside the
2807/// bitmapped pack (in first-seen order, like upstream's extended index).
2808pub struct BitmapWalkResult {
2809    pub words: Vec<u64>,
2810    pub extended: Vec<(ObjectId, ObjectType)>,
2811}
2812
2813impl BitmapWalkResult {
2814    /// Removes everything reachable in `haves` from this result.
2815    pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2816        for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2817            *dst &= !*src;
2818        }
2819        let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2820        self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2821    }
2822}
2823
2824/// Computes the set of objects reachable from `roots` using stored bitmaps
2825/// where available and a fill-in object walk where not — the consult half of
2826/// the bitmap engine (upstream `find_objects` + `fill_in_bitmap`).
2827///
2828/// Roots may be any object type; tag chains are peeled with every tag object
2829/// itself included, like the pending-object handling in
2830/// `prepare_bitmap_walk`. When `include_objects` is false only commits are
2831/// walked (tree contents of fill-in commits are not marked) — callers that
2832/// only count/enumerate commits mask with the commit type bitmap, so the
2833/// extra non-commit bits OR-ed in from stored (closed) bitmaps are harmless.
2834pub fn bitmap_reachable(
2835    bitmap: &LoadedPackBitmap,
2836    db: &impl ObjectReader,
2837    format: ObjectFormat,
2838    roots: &[ObjectId],
2839    include_objects: bool,
2840) -> Result<BitmapWalkResult> {
2841    let mut walk = BitmapFillWalk {
2842        bitmap,
2843        words: vec![0u64; bitmap.word_count()],
2844        extended: Vec::new(),
2845        extended_seen: HashSet::new(),
2846    };
2847    let mut commit_stack: Vec<ObjectId> = Vec::new();
2848
2849    for root in roots {
2850        let mut oid = *root;
2851        // Peel tag chains, marking each tag object on the way.
2852        loop {
2853            let object = db.read_object(&oid)?;
2854            match object.object_type {
2855                ObjectType::Tag => {
2856                    walk.mark(&oid, ObjectType::Tag);
2857                    let tag = Tag::parse_ref(format, &object.body)?;
2858                    oid = tag.object;
2859                }
2860                ObjectType::Commit => {
2861                    commit_stack.push(oid);
2862                    break;
2863                }
2864                ObjectType::Tree => {
2865                    walk.mark_tree_closure(db, format, &oid)?;
2866                    break;
2867                }
2868                ObjectType::Blob => {
2869                    walk.mark(&oid, ObjectType::Blob);
2870                    break;
2871                }
2872            }
2873        }
2874    }
2875
2876    while let Some(oid) = commit_stack.pop() {
2877        if let Some(position) = bitmap.pack_position(&oid) {
2878            if bitset_get(&walk.words, position) {
2879                continue;
2880            }
2881            if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2882                bitset_or(&mut walk.words, stored);
2883                continue;
2884            }
2885            bitset_set(&mut walk.words, position);
2886        } else {
2887            if walk.extended_seen.contains(&oid) {
2888                continue;
2889            }
2890            walk.extended_seen.insert(oid);
2891            walk.extended.push((oid, ObjectType::Commit));
2892        }
2893        let object = db.read_object(&oid)?;
2894        let commit = Commit::parse_ref(format, &object.body)?;
2895        commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2896        if include_objects {
2897            walk.mark_tree_closure(db, format, &commit.tree)?;
2898        }
2899    }
2900
2901    Ok(BitmapWalkResult {
2902        words: walk.words,
2903        extended: walk.extended,
2904    })
2905}
2906
2907struct BitmapFillWalk<'a> {
2908    bitmap: &'a LoadedPackBitmap,
2909    words: Vec<u64>,
2910    extended: Vec<(ObjectId, ObjectType)>,
2911    extended_seen: HashSet<ObjectId>,
2912}
2913
2914impl BitmapFillWalk<'_> {
2915    /// Marks one object; returns false when it was already marked.
2916    fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2917        if let Some(position) = self.bitmap.pack_position(oid) {
2918            if bitset_get(&self.words, position) {
2919                return false;
2920            }
2921            bitset_set(&mut self.words, position);
2922            true
2923        } else {
2924            if !self.extended_seen.insert(*oid) {
2925                return false;
2926            }
2927            self.extended.push((*oid, object_type));
2928            true
2929        }
2930    }
2931
2932    /// Marks `tree` and everything below it, skipping subtrees already marked
2933    /// (a set in-pack bit means its closure is covered: either it came from a
2934    /// stored — closed — bitmap, or this walk already expanded it).
2935    fn mark_tree_closure(
2936        &mut self,
2937        db: &impl ObjectReader,
2938        format: ObjectFormat,
2939        tree: &ObjectId,
2940    ) -> Result<()> {
2941        if !self.mark(tree, ObjectType::Tree) {
2942            return Ok(());
2943        }
2944        let object = db.read_object(tree)?;
2945        for entry in TreeEntries::new(format, &object.body) {
2946            let entry = entry?;
2947            if entry.is_gitlink() {
2948                continue;
2949            }
2950            if entry.is_tree() {
2951                self.mark_tree_closure(db, format, &entry.oid)?;
2952            } else {
2953                self.mark(&entry.oid, ObjectType::Blob);
2954            }
2955        }
2956        Ok(())
2957    }
2958}
2959
2960#[derive(Debug)]
2961pub struct ObjectDatabase {
2962    format: ObjectFormat,
2963    // Behind a `Mutex` so `write_object` can take `&self` (matching the
2964    // `ObjectWriter` trait) and a single handle can interleave reads and writes
2965    // without a `&mut` borrow — the same shared-by-`&` shape the file-backed
2966    // database uses for its caches. Removes the need for callers to wrap this in
2967    // a `RefCell`/`&mut` just to write (see sley-fetch's former `RefCell` dance).
2968    objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2969    promisor: bool,
2970}
2971
2972impl ObjectDatabase {
2973    pub fn new(format: ObjectFormat) -> Self {
2974        Self {
2975            format,
2976            objects: Mutex::new(HashMap::new()),
2977            promisor: false,
2978        }
2979    }
2980
2981    pub fn with_promisor(mut self, promisor: bool) -> Self {
2982        self.promisor = promisor;
2983        self
2984    }
2985
2986    pub fn contains(&self, oid: &ObjectId) -> bool {
2987        self.objects
2988            .lock()
2989            .map(|objects| objects.contains_key(oid))
2990            .unwrap_or(false)
2991    }
2992
2993    pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2994        let object = self.read_object(oid)?;
2995        let actual = object.object_id(self.format)?;
2996        if &actual == oid {
2997            Ok(())
2998        } else {
2999            Err(GitError::InvalidObject(format!(
3000                "object id mismatch: expected {oid}, got {actual}"
3001            )))
3002        }
3003    }
3004}
3005
3006impl ObjectReader for ObjectDatabase {
3007    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3008        self.objects
3009            .lock()
3010            .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
3011            .get(oid)
3012            .map(Arc::clone)
3013            .or_else(|| implied_empty_tree_object(self.format, oid))
3014            .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
3015    }
3016}
3017
3018impl ObjectWriter for ObjectDatabase {
3019    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3020        let oid = object.object_id(self.format)?;
3021        self.objects
3022            .lock()
3023            .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
3024            .entry(oid)
3025            .or_insert_with(|| Arc::new(object));
3026        Ok(oid)
3027    }
3028}
3029
3030#[derive(Debug, Clone, PartialEq, Eq)]
3031pub struct Alternate {
3032    pub path: std::path::PathBuf,
3033}
3034
3035#[derive(Debug, Clone, PartialEq, Eq)]
3036pub struct PartialClonePolicy {
3037    pub promisor_remote: Option<String>,
3038    pub allow_missing_promised_objects: bool,
3039}
3040
3041/// Raw pack-file bytes keyed by pack path, shared across cloned handles. Loaded
3042/// once so individual objects can be decoded at their offsets (see
3043/// [`sley_pack::read_object_at`]) without re-reading the whole file per read.
3044type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
3045
3046/// Backing bytes of a pack file: either memory-mapped (under the `mmap` feature)
3047/// or read into the heap. Both deref to `&[u8]`, so the decode path is identical.
3048#[derive(Debug)]
3049enum PackData {
3050    #[cfg(feature = "mmap")]
3051    Mapped(sley_mmap::MappedFile),
3052    Heap(Vec<u8>),
3053}
3054
3055impl std::ops::Deref for PackData {
3056    type Target = [u8];
3057
3058    fn deref(&self) -> &[u8] {
3059        match self {
3060            #[cfg(feature = "mmap")]
3061            Self::Mapped(mapped) => mapped,
3062            Self::Heap(bytes) => bytes,
3063        }
3064    }
3065}
3066
3067/// Load a pack file's bytes: memory-mapped when the `mmap` feature is on (falling
3068/// back to a heap read if the map fails), otherwise read into the heap.
3069#[cfg(feature = "mmap")]
3070fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3071    match sley_mmap::MappedFile::open_pack(pack_path) {
3072        Ok(mapped) => Ok(PackData::Mapped(mapped)),
3073        Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
3074    }
3075}
3076
3077#[cfg(not(feature = "mmap"))]
3078fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3079    Ok(PackData::Heap(fs::read(pack_path)?))
3080}
3081
3082#[cfg(feature = "mmap")]
3083fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3084    match sley_mmap::MappedFile::open_pack(index_path) {
3085        Ok(mapped) => Ok(Arc::new(mapped)),
3086        Err(_) => Ok(Arc::new(fs::read(index_path)?)),
3087    }
3088}
3089
3090#[cfg(not(feature = "mmap"))]
3091fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3092    Ok(Arc::new(fs::read(index_path)?))
3093}
3094
3095#[cfg(feature = "mmap")]
3096fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3097    match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
3098        Ok(mapped) => Ok(Arc::new(mapped)),
3099        Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
3100    }
3101}
3102
3103#[cfg(not(feature = "mmap"))]
3104fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3105    Ok(Arc::new(fs::read(midx_path)?))
3106}
3107
3108/// Memory-capped LRU of recently decoded objects, shared across cloned handles,
3109/// so hot delta bases and repeated reads during a walk aren't re-decoded. The
3110/// cache is bounded by an approximate byte budget (not a fixed object count) so
3111/// it neither thrashes on bulk reads of small objects nor blows up on a few
3112/// large ones.
3113type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
3114
3115/// Per-pack caches of objects decoded from a pack, keyed by pack path and then by
3116/// the in-pack byte offset of each object's entry. Shared across cloned handles.
3117/// This is the delta-base cache: resolving a delta chain by offset reuses already
3118/// decoded bases instead of re-inflating the whole chain on every read.
3119type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
3120
3121/// Per-pack memo of `in-pack offset -> end-of-chain object type` for the
3122/// `cat-file --batch-check` header fast path. Resolving a packed delta's *type*
3123/// walks the delta chain to its base; without this memo every header read
3124/// re-walks (and re-inflates) the whole chain, so reading every object in a
3125/// deeply-deltified pack is super-linear (sley#26). The type only depends on the
3126/// chain base, so memoizing `offset -> type` lets each chain be walked at most
3127/// once across a batch. Keyed by pack path so an offset key is never applied to
3128/// the wrong pack's bytes; shared across cloned handles.
3129/// One pack's offset-keyed header memo (see [`PackHeaderTypeCaches`]).
3130type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
3131
3132type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
3133
3134/// Default approximate byte budget for the decoded-object LRU. Sized to comfortably
3135/// hold the working set of a history walk (commits/trees/blobs and their delta
3136/// bases) without growing without bound on large repositories. Overridable via the
3137/// `SLEY_OBJECT_CACHE_BYTES` environment variable; there is currently no git-config
3138/// hook threaded into the object database, so this constant is the default.
3139const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
3140
3141/// Default approximate byte budget for each per-pack delta-base cache. Holds the
3142/// decoded bases of the delta chains being walked so neighboring reads stay warm.
3143/// Overridable via `SLEY_DELTA_BASE_CACHE_BYTES`.
3144const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
3145
3146/// Approximate heap cost of caching one [`EncodedObject`]: its body plus a fixed
3147/// allowance for the key, enum/`Vec` headers, and per-entry map overhead. Used
3148/// only to drive eviction, so an estimate is fine.
3149fn cached_object_cost(object: &EncodedObject) -> usize {
3150    object.body.len().saturating_add(64)
3151}
3152
3153/// Read an approximate byte budget from `var`, falling back to `default` when the
3154/// variable is unset or unparseable. A value of `0` disables the cache.
3155fn cache_budget_from_env(var: &str, default: usize) -> usize {
3156    match env::var(var) {
3157        Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
3158        Err(_) => default,
3159    }
3160}
3161
3162/// Approximate byte budget for the decoded-object LRU (see
3163/// [`DEFAULT_OBJECT_CACHE_BYTES`], `SLEY_OBJECT_CACHE_BYTES`).
3164///
3165/// Resolved once per process: the environment does not change under us, and a new
3166/// `FileObjectDatabase` is built often enough (e.g. once per revision resolved)
3167/// that re-reading the variable each time showed up as per-object overhead.
3168fn object_cache_budget() -> usize {
3169    static BUDGET: OnceLock<usize> = OnceLock::new();
3170    *BUDGET.get_or_init(|| {
3171        cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
3172    })
3173}
3174
3175/// Approximate byte budget for each per-pack delta-base cache (see
3176/// [`DEFAULT_DELTA_BASE_CACHE_BYTES`], `SLEY_DELTA_BASE_CACHE_BYTES`). Resolved
3177/// once per process for the same reason as [`object_cache_budget`].
3178fn delta_base_cache_budget() -> usize {
3179    static BUDGET: OnceLock<usize> = OnceLock::new();
3180    *BUDGET.get_or_init(|| {
3181        cache_budget_from_env(
3182            "SLEY_DELTA_BASE_CACHE_BYTES",
3183            DEFAULT_DELTA_BASE_CACHE_BYTES,
3184        )
3185    })
3186}
3187
3188/// Whether to re-hash every object on read and compare it to the requested id.
3189///
3190/// Off by default, matching git: reads trust the pack index → offset mapping and
3191/// the loose object's on-disk name, and object ids are verified where git verifies
3192/// them — when a pack is received (the index build re-hashes every object) and on
3193/// demand via [`FileObjectDatabase`]'s `validate`/fsck. Re-hashing on *every* read
3194/// dominated bulk-read cost (a scalar pure-Rust SHA-1 over each object's full
3195/// body), so it is opt-in via `SLEY_VERIFY_READS` (any value other than unset, ``,
3196/// or `0`) for callers that want the paranoid check back. Read once and cached, so
3197/// the default path pays only a single relaxed atomic load per read.
3198fn verify_reads_enabled() -> bool {
3199    static VERIFY: OnceLock<bool> = OnceLock::new();
3200    *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
3201        Ok(value) => !matches!(value.trim(), "" | "0"),
3202        Err(_) => false,
3203    })
3204}
3205
3206/// A memory-capped LRU map from a key `K` to a decoded [`EncodedObject`].
3207///
3208/// Eviction is by approximate byte budget (gix-style), not object count, so the
3209/// cache adapts to object size. On access an entry is moved to most-recently-used;
3210/// on insert, least-recently-used entries are dropped until the budget holds. A
3211/// budget of `0` makes the cache inert. Generic over the key so it backs both the
3212/// oid-keyed decoded-object cache and the offset-keyed delta-base cache.
3213#[derive(Debug)]
3214struct LruCache<K: std::hash::Hash + Eq + Clone> {
3215    budget: usize,
3216    used: usize,
3217    map: HashMap<K, LruEntry<K>>,
3218    head: Option<K>,
3219    tail: Option<K>,
3220}
3221
3222#[derive(Debug)]
3223struct LruEntry<K> {
3224    object: Arc<EncodedObject>,
3225    prev: Option<K>,
3226    next: Option<K>,
3227}
3228
3229impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
3230    fn new(budget: usize) -> Self {
3231        Self {
3232            budget,
3233            used: 0,
3234            map: HashMap::new(),
3235            head: None,
3236            tail: None,
3237        }
3238    }
3239
3240    fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
3241        let object = Arc::clone(&self.map.get(key)?.object);
3242        self.touch(key);
3243        Some(object)
3244    }
3245
3246    /// Move `key` to the most-recently-used end in O(1).
3247    fn touch(&mut self, key: &K) {
3248        if self.tail.as_ref() == Some(key) {
3249            return;
3250        }
3251        if self.map.contains_key(key) {
3252            self.detach(key);
3253            self.attach_back(key.clone());
3254        }
3255    }
3256
3257    /// Drop `key` from both the map and the recency queue, releasing its budget.
3258    fn remove(&mut self, key: &K) {
3259        if let Some(entry) = self.map.get(key) {
3260            self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
3261        }
3262        self.detach(key);
3263        self.map.remove(key);
3264    }
3265
3266    fn detach(&mut self, key: &K) {
3267        let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
3268            let prev = entry.prev.take();
3269            let next = entry.next.take();
3270            (prev, next)
3271        }) else {
3272            return;
3273        };
3274
3275        match &prev {
3276            Some(prev_key) => {
3277                if let Some(prev_entry) = self.map.get_mut(prev_key) {
3278                    prev_entry.next = next.clone();
3279                }
3280            }
3281            None => self.head = next.clone(),
3282        }
3283        match &next {
3284            Some(next_key) => {
3285                if let Some(next_entry) = self.map.get_mut(next_key) {
3286                    next_entry.prev = prev.clone();
3287                }
3288            }
3289            None => self.tail = prev.clone(),
3290        }
3291    }
3292
3293    fn attach_back(&mut self, key: K) {
3294        let previous_tail = self.tail.replace(key.clone());
3295        match previous_tail {
3296            Some(tail_key) => {
3297                if let Some(tail_entry) = self.map.get_mut(&tail_key) {
3298                    tail_entry.next = Some(key.clone());
3299                }
3300                if let Some(entry) = self.map.get_mut(&key) {
3301                    entry.prev = Some(tail_key);
3302                    entry.next = None;
3303                }
3304            }
3305            None => {
3306                self.head = Some(key.clone());
3307                if let Some(entry) = self.map.get_mut(&key) {
3308                    entry.prev = None;
3309                    entry.next = None;
3310                }
3311            }
3312        }
3313    }
3314
3315    fn clear(&mut self) {
3316        self.map.clear();
3317        self.head = None;
3318        self.tail = None;
3319        self.used = 0;
3320    }
3321
3322    fn put(&mut self, key: K, object: Arc<EncodedObject>) {
3323        if self.budget == 0 {
3324            return;
3325        }
3326        let cost = cached_object_cost(&object);
3327        // A single object larger than the whole budget is not worth caching; it
3328        // would immediately evict everything including itself. Drop any stale
3329        // smaller entry stored under the same key so accounting stays exact.
3330        if cost > self.budget {
3331            self.remove(&key);
3332            return;
3333        }
3334        if let Some(entry) = self.map.get_mut(&key) {
3335            let previous = std::mem::replace(&mut entry.object, object);
3336            // Replacing an existing entry: adjust accounting and refresh recency.
3337            self.used = self
3338                .used
3339                .saturating_sub(cached_object_cost(&previous))
3340                .saturating_add(cost);
3341            self.touch(&key);
3342        } else {
3343            self.used = self.used.saturating_add(cost);
3344            self.map.insert(
3345                key.clone(),
3346                LruEntry {
3347                    object,
3348                    prev: None,
3349                    next: None,
3350                },
3351            );
3352            self.attach_back(key);
3353        }
3354        while self.used > self.budget {
3355            let Some(evicted) = self.head.clone() else {
3356                break;
3357            };
3358            self.remove(&evicted);
3359        }
3360    }
3361}
3362
3363/// Decoded-object cache keyed by object id (loose + packed reads share it).
3364type LruObjectCache = LruCache<ObjectId>;
3365/// Delta-base cache keyed by in-pack byte offset, scoped to one pack.
3366type LruOffsetCache = LruCache<u64>;
3367
3368/// Bridges the offset-keyed [`LruOffsetCache`] to [`sley_pack::PackDeltaCache`]
3369/// so the pack decoder can reuse decoded delta bases. Holds the shared cache
3370/// behind its mutex; a poisoned lock simply behaves as a cache miss/no-op, so a
3371/// decode still completes correctly (just without reuse).
3372struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
3373
3374impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
3375    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
3376        self.0.lock().ok()?.get(&offset)
3377    }
3378
3379    fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
3380        if let Ok(mut cache) = self.0.lock() {
3381            cache.put(offset, object);
3382        }
3383    }
3384}
3385
3386/// Bridges a per-pack `offset -> ObjectType` memo into the header fast path so
3387/// the ofs-delta chain walk is performed at most once per chain across a batch
3388/// of `read_object_header` calls (sley#26).
3389struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
3390
3391impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
3392    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
3393        self.0.lock().ok()?.get(&pack_offset).copied()
3394    }
3395
3396    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
3397        if let Ok(mut cache) = self.0.lock() {
3398            cache.insert(pack_offset, header);
3399        }
3400    }
3401}
3402
3403/// Parsed pack indexes keyed by `.idx` path, shared across cloned handles. This
3404/// remains for MIDX and path-only fallback lookups; normal pack-directory scans
3405/// use [`PackRegistrySnapshot`] so the lookup hot path can walk already-parsed
3406/// pack records directly.
3407type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
3408
3409/// Parsed multi-pack-index files keyed by path, shared across cloned handles.
3410/// Caches the MIDX parse so object lookups in repositories with a MIDX avoid
3411/// reparsing the same fanout/object tables for every read.
3412type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
3413
3414/// Raw multi-pack-index OID lookup tables keyed by path, shared across cloned
3415/// handles. These avoid hashing and materializing every MIDX object when a
3416/// command only needs point lookups.
3417type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
3418
3419/// One registered `.idx`/`.pack` pair from a pack directory. The index is parsed
3420/// when the registry snapshot is built; pack bytes and per-pack decode/header
3421/// caches hang directly off this record so repeated object lookups do not bounce
3422/// through path-keyed maps.
3423#[derive(Debug)]
3424struct RegisteredPack {
3425    idx: PathBuf,
3426    pack: PathBuf,
3427    index: Mutex<Option<Arc<PackIndexViewData>>>,
3428    data: Mutex<Option<Arc<PackData>>>,
3429    delta_cache: Arc<Mutex<LruOffsetCache>>,
3430    header_type_cache: PackHeaderTypeCache,
3431}
3432
3433impl RegisteredPack {
3434    fn new(idx: PathBuf, pack: PathBuf) -> Self {
3435        Self {
3436            idx,
3437            pack,
3438            index: Mutex::new(None),
3439            data: Mutex::new(None),
3440            delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
3441            header_type_cache: Arc::new(Mutex::new(HashMap::new())),
3442        }
3443    }
3444
3445    fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
3446        if let Ok(cache) = self.index.lock()
3447            && let Some(index) = cache.as_ref()
3448        {
3449            return Ok(Arc::clone(index));
3450        }
3451        let index_bytes = load_pack_index_data(&self.idx)?;
3452        let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
3453            index_bytes,
3454            format,
3455        )?);
3456        if let Ok(mut cache) = self.index.lock() {
3457            *cache = Some(Arc::clone(&index));
3458        }
3459        Ok(index)
3460    }
3461
3462    fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
3463        if let Ok(cache) = self.data.lock()
3464            && let Some(bytes) = cache.as_ref()
3465        {
3466            return Ok(Arc::clone(bytes));
3467        }
3468        if let Ok(cache) = pack_bytes.lock()
3469            && let Some(bytes) = cache.get(&self.pack)
3470        {
3471            let bytes = Arc::clone(bytes);
3472            if let Ok(mut local_cache) = self.data.lock() {
3473                *local_cache = Some(Arc::clone(&bytes));
3474            }
3475            return Ok(bytes);
3476        }
3477        let bytes = Arc::new(load_pack_data(&self.pack)?);
3478        if let Ok(mut local_cache) = self.data.lock() {
3479            *local_cache = Some(Arc::clone(&bytes));
3480        }
3481        if let Ok(mut cache) = pack_bytes.lock() {
3482            cache.insert(self.pack.clone(), Arc::clone(&bytes));
3483        }
3484        Ok(bytes)
3485    }
3486}
3487
3488#[derive(Debug, Clone, PartialEq, Eq)]
3489struct PackDirFingerprint {
3490    modified: Option<std::time::SystemTime>,
3491    idx_count: usize,
3492    pack_count: usize,
3493}
3494
3495/// Snapshot of a pack directory's lookup state, shared across cloned handles.
3496/// New packs are still found: a lookup that misses every cached pack re-scans the
3497/// directory once before concluding the object is absent (see
3498/// [`FileObjectDatabase::find_pack_containing`]).
3499#[derive(Debug)]
3500struct PackRegistrySnapshot {
3501    fingerprint: PackDirFingerprint,
3502    packs: Vec<Arc<RegisteredPack>>,
3503    recent_pack: Mutex<Option<usize>>,
3504}
3505
3506impl PackRegistrySnapshot {
3507    fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
3508        Self {
3509            fingerprint,
3510            packs,
3511            recent_pack: Mutex::new(None),
3512        }
3513    }
3514
3515    fn cached_hint(&self) -> Option<usize> {
3516        self.recent_pack
3517            .lock()
3518            .ok()
3519            .and_then(|hint| *hint)
3520            .filter(|pack_index| *pack_index < self.packs.len())
3521    }
3522
3523    fn remember_hint(&self, pack_index: usize) {
3524        if let Ok(mut hint) = self.recent_pack.lock() {
3525            *hint = Some(pack_index);
3526        }
3527    }
3528}
3529
3530/// Cached pack-registry snapshot for this object directory, shared across cloned
3531/// handles. A `FileObjectDatabase` owns exactly one object directory, so this is
3532/// an `Option` instead of another path-keyed map.
3533type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
3534
3535#[derive(Debug, Clone)]
3536struct PackLookup {
3537    pack: PathBuf,
3538    registered: Option<Arc<RegisteredPack>>,
3539    offset: u64,
3540}
3541
3542impl PackLookup {
3543    fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
3544        Self {
3545            pack: pack.pack.clone(),
3546            registered: Some(pack),
3547            offset,
3548        }
3549    }
3550
3551    fn from_path(pack: PathBuf, offset: u64) -> Self {
3552        Self {
3553            pack,
3554            registered: None,
3555            offset,
3556        }
3557    }
3558
3559    fn pack_path(&self) -> &Path {
3560        &self.pack
3561    }
3562
3563    fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
3564        match &self.registered {
3565            Some(pack) => pack.bytes(&database.pack_bytes),
3566            None => database.cached_pack_bytes(&self.pack),
3567        }
3568    }
3569
3570    fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
3571        match &self.registered {
3572            Some(pack) => database.cached_pack_index(&pack.idx),
3573            None => database.cached_pack_index(&self.pack.with_extension("idx")),
3574        }
3575    }
3576
3577    fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
3578        match &self.registered {
3579            Some(pack) => Some(Arc::clone(&pack.delta_cache)),
3580            None => database.pack_delta_cache(&self.pack),
3581        }
3582    }
3583
3584    fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
3585        match &self.registered {
3586            Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
3587            None => database.pack_header_type_cache(&self.pack),
3588        }
3589    }
3590}
3591
3592#[derive(Debug, Clone)]
3593pub struct FileObjectDatabase {
3594    loose: LooseObjectStore,
3595    objects_dir: PathBuf,
3596    alternates: Vec<PathBuf>,
3597    format: ObjectFormat,
3598    pack_bytes: PackBytesCache,
3599    pack_indexes: PackIndexCache,
3600    multi_pack_indexes: MultiPackIndexCache,
3601    multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
3602    pack_registry: PackRegistryCache,
3603    decoded: DecodedObjectCache,
3604    pack_deltas: PackDeltaCaches,
3605    pack_header_types: PackHeaderTypeCaches,
3606    /// Graft points (`$GIT_DIR/shallow`), loaded lazily on the first
3607    /// [`ObjectReader::is_shallow_graft`] query. `$GIT_DIR` is taken to be
3608    /// the parent of `objects_dir`, matching the standard layout.
3609    shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
3610}
3611
3612#[derive(Debug)]
3613pub struct ObjectPresenceChecker {
3614    db: FileObjectDatabase,
3615    pack_dir: PathBuf,
3616    midx: Option<Arc<MultiPackIndexOidLookup>>,
3617    registry: Option<Arc<PackRegistrySnapshot>>,
3618    registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
3619    recent_pack: Option<usize>,
3620    prepared_packs: bool,
3621    prepared_registry: bool,
3622}
3623
3624impl ObjectPresenceChecker {
3625    fn new(db: FileObjectDatabase) -> Self {
3626        let pack_dir = db.objects_dir.join("pack");
3627        Self {
3628            db,
3629            pack_dir,
3630            midx: None,
3631            registry: None,
3632            registry_indexes: Vec::new(),
3633            recent_pack: None,
3634            prepared_packs: false,
3635            prepared_registry: false,
3636        }
3637    }
3638
3639    pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
3640        if oid.format() != self.db.format {
3641            return Err(GitError::InvalidObjectId(format!(
3642                "object {oid} uses {}, store uses {}",
3643                oid.format().name(),
3644                self.db.format.name()
3645            )));
3646        }
3647        if self.db.loose.exists(oid)? {
3648            return Ok(true);
3649        }
3650        if self.find_packed(oid, false)? {
3651            return Ok(true);
3652        }
3653        if self.find_packed(oid, true)? {
3654            return Ok(true);
3655        }
3656        for alternate in &self.db.alternates {
3657            if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
3658                return Ok(true);
3659            }
3660        }
3661        // Preserve the regular contains() reprepare-on-miss behavior for loose
3662        // objects that appeared after the fanout cache was populated.
3663        self.db.loose.invalidate_cache();
3664        self.db.loose.exists(oid)
3665    }
3666
3667    fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
3668        self.prepare_packs(force_rescan)?;
3669        if let Some(midx) = &self.midx
3670            && midx.contains(oid)
3671        {
3672            return Ok(true);
3673        }
3674        self.prepare_registry(force_rescan)?;
3675        self.find_in_registry(oid)
3676    }
3677
3678    fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
3679        if self.prepared_packs && !force_rescan {
3680            return Ok(());
3681        }
3682        let midx_path = self.pack_dir.join("multi-pack-index");
3683        self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
3684        self.prepared_packs = true;
3685        Ok(())
3686    }
3687
3688    fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
3689        if self.prepared_registry && !force_rescan {
3690            return Ok(());
3691        }
3692        let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
3693        let registry_changed = match self.registry.as_ref() {
3694            Some(cached) => !Arc::ptr_eq(cached, &registry),
3695            None => true,
3696        };
3697        if registry_changed {
3698            self.registry_indexes = vec![None; registry.packs.len()];
3699            self.recent_pack = None;
3700            self.registry = Some(registry);
3701        }
3702        self.prepared_registry = true;
3703        Ok(())
3704    }
3705
3706    fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
3707        let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
3708            return Ok(false);
3709        };
3710        if let Some(pack_index) = self
3711            .recent_pack
3712            .filter(|pack_index| *pack_index < registry.packs.len())
3713        {
3714            let index = self.registry_index(&registry, pack_index)?;
3715            if index.find(oid).is_some() {
3716                return Ok(true);
3717            }
3718        }
3719        for pack_index in 0..registry.packs.len() {
3720            if Some(pack_index) == self.recent_pack {
3721                continue;
3722            }
3723            let index = self.registry_index(&registry, pack_index)?;
3724            if index.find(oid).is_some() {
3725                self.recent_pack = Some(pack_index);
3726                return Ok(true);
3727            }
3728        }
3729        Ok(false)
3730    }
3731
3732    fn registry_index(
3733        &mut self,
3734        registry: &PackRegistrySnapshot,
3735        pack_index: usize,
3736    ) -> Result<Arc<PackIndexViewData>> {
3737        if self.registry_indexes.len() != registry.packs.len() {
3738            self.registry_indexes = vec![None; registry.packs.len()];
3739            self.recent_pack = None;
3740        }
3741        if let Some(index) = self
3742            .registry_indexes
3743            .get(pack_index)
3744            .and_then(|index| index.as_ref())
3745        {
3746            return Ok(Arc::clone(index));
3747        }
3748        let index = registry.packs[pack_index].index(self.db.format)?;
3749        if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
3750            *slot = Some(Arc::clone(&index));
3751        }
3752        Ok(index)
3753    }
3754}
3755
3756/// Parse `$GIT_DIR/shallow`: one hex object id per line. A missing file is an
3757/// empty set (the repository is not shallow); unparsable lines are ignored so
3758/// a torn write never poisons walks.
3759fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
3760    let Ok(contents) = std::fs::read_to_string(shallow_file) else {
3761        return HashSet::new();
3762    };
3763    contents
3764        .lines()
3765        .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
3766        .collect()
3767}
3768
3769pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3770    env::var_os("GIT_OBJECT_DIRECTORY")
3771        .map(PathBuf::from)
3772        .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
3773}
3774
3775pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3776    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
3777        return PathBuf::from(common_dir);
3778    }
3779    let git_dir = git_dir.as_ref();
3780    let commondir = git_dir.join("commondir");
3781    if let Ok(value) = fs::read_to_string(&commondir) {
3782        let path = PathBuf::from(value.trim());
3783        let common = if path.is_absolute() {
3784            path
3785        } else {
3786            git_dir.join(path)
3787        };
3788        return fs::canonicalize(&common).unwrap_or(common);
3789    }
3790    git_dir.to_path_buf()
3791}
3792
3793pub fn repository_object_ids(
3794    git_dir: impl AsRef<Path>,
3795    format: ObjectFormat,
3796) -> Result<Vec<ObjectId>> {
3797    object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
3798}
3799
3800pub fn object_ids_in_objects_dir(
3801    objects_dir: impl AsRef<Path>,
3802    format: ObjectFormat,
3803) -> Result<Vec<ObjectId>> {
3804    let objects_dir = objects_dir.as_ref();
3805    let mut oids = HashSet::new();
3806    collect_loose_object_ids(objects_dir, format, &mut oids)?;
3807    collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
3808    let mut oids = oids.into_iter().collect::<Vec<_>>();
3809    oids.sort_by_key(ObjectId::to_hex);
3810    Ok(oids)
3811}
3812
3813fn collect_loose_object_ids(
3814    objects_dir: &Path,
3815    format: ObjectFormat,
3816    oids: &mut HashSet<ObjectId>,
3817) -> Result<()> {
3818    if !objects_dir.exists() {
3819        return Ok(());
3820    }
3821    let hex_len = format.hex_len();
3822    for entry in fs::read_dir(objects_dir)? {
3823        let entry = entry?;
3824        if !entry.file_type()?.is_dir() {
3825            continue;
3826        }
3827        let name = entry.file_name();
3828        let Some(fanout) = name.to_str() else {
3829            continue;
3830        };
3831        if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3832            continue;
3833        }
3834        for object_entry in fs::read_dir(entry.path())? {
3835            let object_entry = object_entry?;
3836            if !object_entry.file_type()?.is_file() {
3837                continue;
3838            }
3839            let name = object_entry.file_name();
3840            let Some(suffix) = name.to_str() else {
3841                continue;
3842            };
3843            if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3844                continue;
3845            }
3846            oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
3847        }
3848    }
3849    Ok(())
3850}
3851
3852fn collect_loose_fanout_object_ids(
3853    objects_dir: &Path,
3854    format: ObjectFormat,
3855    fanout: u8,
3856    oids: &mut HashSet<ObjectId>,
3857) -> Result<()> {
3858    let fanout_hex = format!("{fanout:02x}");
3859    let fanout_dir = objects_dir.join(&fanout_hex);
3860    let entries = match fs::read_dir(&fanout_dir) {
3861        Ok(entries) => entries,
3862        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
3863        Err(err) => return Err(GitError::Io(err.to_string())),
3864    };
3865    let hex_len = format.hex_len();
3866    for object_entry in entries {
3867        let object_entry = object_entry?;
3868        let name = object_entry.file_name();
3869        let Some(suffix) = name.to_str() else {
3870            continue;
3871        };
3872        if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3873            continue;
3874        }
3875        oids.insert(ObjectId::from_hex(
3876            format,
3877            &format!("{fanout_hex}{suffix}"),
3878        )?);
3879    }
3880    Ok(())
3881}
3882
3883#[derive(Debug, Default)]
3884struct LoosePresenceCache {
3885    loaded_fanouts: HashSet<u8>,
3886    objects: HashSet<ObjectId>,
3887}
3888
3889/// Every object id resolvable through a pack (any `.idx` or the
3890/// multi-pack-index) under `objects_dir/pack`. Used by `--unpacked`
3891/// filtering: an object is "unpacked" when absent from this set, regardless
3892/// of a loose copy also existing.
3893pub fn packed_object_ids(
3894    objects_dir: impl AsRef<Path>,
3895    format: ObjectFormat,
3896) -> Result<HashSet<ObjectId>> {
3897    let mut oids = HashSet::new();
3898    collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
3899    Ok(oids)
3900}
3901
3902fn collect_packed_object_ids(
3903    pack_dir: &Path,
3904    format: ObjectFormat,
3905    oids: &mut HashSet<ObjectId>,
3906) -> Result<()> {
3907    if !pack_dir.exists() {
3908        return Ok(());
3909    }
3910    let midx_path = pack_dir.join("multi-pack-index");
3911    if midx_path.exists() {
3912        let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
3913        oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
3914    }
3915    for entry in fs::read_dir(pack_dir)? {
3916        let path = entry?.path();
3917        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3918            continue;
3919        }
3920        let index = PackIndex::parse(&fs::read(path)?, format)?;
3921        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
3922    }
3923    Ok(())
3924}
3925
3926impl FileObjectDatabase {
3927    /// The object-id format (hash algorithm) this database was opened with.
3928    pub fn object_format(&self) -> ObjectFormat {
3929        self.format
3930    }
3931
3932    /// The repository object directory this database reads from.
3933    pub fn objects_dir(&self) -> &Path {
3934        &self.objects_dir
3935    }
3936
3937    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3938        let objects_dir = objects_dir.into();
3939        Self {
3940            loose: LooseObjectStore::new(objects_dir.clone(), format),
3941            alternates: alternate_object_dirs(&objects_dir),
3942            objects_dir,
3943            format,
3944            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3945            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3946            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3947            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3948            pack_registry: Arc::new(Mutex::new(None)),
3949            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3950            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3951            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3952            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3953        }
3954    }
3955
3956    fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3957        let objects_dir = objects_dir.into();
3958        Self {
3959            loose: LooseObjectStore::new(objects_dir.clone(), format),
3960            alternates: Vec::new(),
3961            objects_dir,
3962            format,
3963            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3964            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3965            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3966            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3967            pack_registry: Arc::new(Mutex::new(None)),
3968            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3969            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3970            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3971            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3972        }
3973    }
3974
3975    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3976        Self::new(repository_objects_dir(git_dir), format)
3977    }
3978
3979    /// Drop cached pack registries, indexes, and decoded objects so the next read
3980    /// sees packs/objects installed after this handle was created (e.g. after
3981    /// `fetch` or `install_pack`). Long-lived [`Repository`] sessions call this
3982    /// via the owning repository's `refresh_objects` hook.
3983    pub fn refresh_read_cache(&self) {
3984        if let Ok(mut cache) = self.pack_registry.lock() {
3985            *cache = None;
3986        }
3987        if let Ok(mut cache) = self.pack_indexes.lock() {
3988            cache.clear();
3989        }
3990        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3991            cache.clear();
3992        }
3993        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3994            cache.clear();
3995        }
3996        if let Ok(mut cache) = self.pack_bytes.lock() {
3997            cache.clear();
3998        }
3999        if let Ok(mut cache) = self.pack_deltas.lock() {
4000            cache.clear();
4001        }
4002        if let Ok(mut cache) = self.pack_header_types.lock() {
4003            cache.clear();
4004        }
4005        if let Ok(mut cache) = self.decoded.lock() {
4006            cache.clear();
4007        }
4008        self.loose.invalidate_cache();
4009    }
4010
4011    pub fn loose(&self) -> &LooseObjectStore {
4012        &self.loose
4013    }
4014
4015    pub fn presence_checker(&self) -> ObjectPresenceChecker {
4016        ObjectPresenceChecker::new(self.clone())
4017    }
4018
4019    pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4020        self.install_pack_with_options(pack, RawPackInstallOptions::default())
4021    }
4022
4023    pub fn install_pack_with_options(
4024        &self,
4025        pack: &PackWrite,
4026        options: RawPackInstallOptions,
4027    ) -> Result<PackInstallResult> {
4028        if pack.checksum.format() != self.format {
4029            return Err(GitError::InvalidObjectId(format!(
4030                "pack checksum uses {}, store uses {}",
4031                pack.checksum.format().name(),
4032                self.format.name()
4033            )));
4034        }
4035        for entry in &pack.entries {
4036            if entry.oid.format() != self.format {
4037                return Err(GitError::InvalidObjectId(format!(
4038                    "pack entry {} uses {}, store uses {}",
4039                    entry.oid,
4040                    entry.oid.format().name(),
4041                    self.format.name()
4042                )));
4043            }
4044        }
4045        let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
4046        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4047        if canonical_index.pack_checksum != pack.checksum
4048            || parsed_index.pack_checksum != pack.checksum
4049        {
4050            return Err(GitError::InvalidFormat(
4051                "pack and index checksums do not match pack write".into(),
4052            ));
4053        }
4054        if pack.index != canonical_index.index {
4055            return Err(GitError::InvalidFormat(
4056                "pack index does not match pack contents".into(),
4057            ));
4058        }
4059
4060        let pack_dir = self.objects_dir.join("pack");
4061        fs::create_dir_all(&pack_dir)?;
4062        let pack_name = format!("pack-{}", pack.checksum.to_hex());
4063        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4064        let index_path = pack_dir.join(format!("{pack_name}.idx"));
4065        if !pack_path.exists() || !index_path.exists() {
4066            write_pack_component(&pack_path, &pack.pack)?;
4067            write_pack_component(&index_path, &pack.index)?;
4068        }
4069        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4070        Ok(PackInstallResult {
4071            pack_name,
4072            pack_path,
4073            index_path,
4074            promisor_path,
4075            object_ids: canonical_index
4076                .entries
4077                .iter()
4078                .map(|entry| entry.oid)
4079                .collect(),
4080        })
4081    }
4082
4083    /// Install a pack that was produced in this process by [`PackFile::write_packed`].
4084    ///
4085    /// Unlike [`Self::install_raw_pack_with_options`], this does not re-inflate
4086    /// every pack entry to rebuild the index. It validates the generated pack
4087    /// trailer and generated index against the writer's object ids, CRCs, and
4088    /// offsets, then writes those bytes directly. Use the raw installer for
4089    /// arbitrary pack bytes received from an untrusted transport.
4090    pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4091        self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
4092    }
4093
4094    pub fn install_written_pack_with_options(
4095        &self,
4096        pack: &PackWrite,
4097        options: RawPackInstallOptions,
4098    ) -> Result<PackInstallResult> {
4099        validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
4100        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4101        if parsed_index.pack_checksum != pack.checksum {
4102            return Err(GitError::InvalidFormat(
4103                "pack write index checksum does not match pack".into(),
4104            ));
4105        }
4106        if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
4107            return Err(GitError::InvalidFormat(
4108                "pack write index does not match generated entries".into(),
4109            ));
4110        }
4111        self.install_generated_pack_unchecked(pack, options)
4112    }
4113
4114    fn install_generated_pack_unchecked(
4115        &self,
4116        pack: &PackWrite,
4117        options: RawPackInstallOptions,
4118    ) -> Result<PackInstallResult> {
4119        let pack_dir = self.objects_dir.join("pack");
4120        fs::create_dir_all(&pack_dir)?;
4121        let pack_name = format!("pack-{}", pack.checksum.to_hex());
4122        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4123        let index_path = pack_dir.join(format!("{pack_name}.idx"));
4124        if !pack_path.exists() || !index_path.exists() {
4125            write_pack_component(&pack_path, &pack.pack)?;
4126            write_pack_component(&index_path, &pack.index)?;
4127        }
4128        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4129        Ok(PackInstallResult {
4130            pack_name,
4131            pack_path,
4132            index_path,
4133            promisor_path,
4134            object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
4135        })
4136    }
4137
4138    pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
4139        self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
4140    }
4141
4142    pub fn install_raw_pack_with_options(
4143        &self,
4144        pack_bytes: &[u8],
4145        options: RawPackInstallOptions,
4146    ) -> Result<PackInstallResult> {
4147        let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
4148        let pack_dir = self.objects_dir.join("pack");
4149        fs::create_dir_all(&pack_dir)?;
4150        let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
4151        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4152        let index_path = pack_dir.join(format!("{pack_name}.idx"));
4153        if !pack_path.exists() || !index_path.exists() {
4154            write_pack_component(&pack_path, pack_bytes)?;
4155            write_pack_component(&index_path, &built.index)?;
4156        }
4157        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4158        Ok(PackInstallResult {
4159            pack_name,
4160            pack_path,
4161            index_path,
4162            promisor_path,
4163            object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
4164        })
4165    }
4166
4167    pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
4168        if self.loose.exists(oid)? {
4169            return Ok(true);
4170        }
4171        if self.find_pack_containing(oid)?.is_some() {
4172            return Ok(true);
4173        }
4174        for alternate in &self.alternates {
4175            if Self::without_alternates(alternate, self.format).contains(oid)? {
4176                return Ok(true);
4177            }
4178        }
4179        // Reprepare-on-miss: a cached negative loose verdict may predate a
4180        // sibling write. Drop it and exact-probe once before reporting absence.
4181        self.loose.invalidate_cache();
4182        self.loose.exists(oid)
4183    }
4184
4185    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4186        let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
4187            .into_iter()
4188            .collect::<HashSet<_>>();
4189        for alternate in &self.alternates {
4190            oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
4191        }
4192        let mut oids = oids.into_iter().collect::<Vec<_>>();
4193        oids.sort_by_key(ObjectId::to_hex);
4194        Ok(oids)
4195    }
4196
4197    pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
4198        if let Some(disk_size) = self.loose.disk_size(oid)? {
4199            return Ok(Some(ObjectStorageInfo {
4200                disk_size,
4201                deltabase: zero_oid(self.format)?,
4202            }));
4203        }
4204        if let Some(info) = self.packed_object_storage_info(oid)? {
4205            return Ok(Some(info));
4206        }
4207        for alternate in &self.alternates {
4208            if let Some(info) =
4209                Self::without_alternates(alternate, self.format).object_storage_info(oid)?
4210            {
4211                return Ok(Some(info));
4212            }
4213        }
4214        // Reprepare-on-miss: drop any stale negative loose cache and exact-probe
4215        // once before reporting absence (see `read_object`).
4216        self.loose.invalidate_cache();
4217        if let Some(disk_size) = self.loose.disk_size(oid)? {
4218            return Ok(Some(ObjectStorageInfo {
4219                disk_size,
4220                deltabase: zero_oid(self.format)?,
4221            }));
4222        }
4223        Ok(None)
4224    }
4225
4226    pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
4227        validate_object_id_prefix(self.format, prefix)?;
4228        let mut matches = Vec::new();
4229        for oid in self.object_ids()? {
4230            if object_id_matches_prefix(&oid, prefix) {
4231                matches.push(oid);
4232            }
4233        }
4234        Ok(match matches.len() {
4235            0 => ObjectPrefixResolution::Missing,
4236            1 => ObjectPrefixResolution::Unique(matches.remove(0)),
4237            _ => ObjectPrefixResolution::Ambiguous(matches),
4238        })
4239    }
4240
4241    /// The object type and content size of `oid` without decoding its full body —
4242    /// git's `cat-file --batch-check` fast path. Tries the decoded-object cache,
4243    /// then loose storage (inflating only the framing header), then packs (reading
4244    /// the entry header and, for deltas, only the delta's leading varints), then
4245    /// alternates. Returns `Ok(None)` if the object is not present.
4246    ///
4247    /// Unlike [`ObjectReader::read_object`], this never materializes the body, so it
4248    /// stays cheap on huge blobs and deep delta chains. It does not populate the
4249    /// decoded-object cache (nothing is decoded).
4250    pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4251        if implied_empty_tree_object(self.format, oid).is_some() {
4252            return Ok(Some((ObjectType::Tree, 0)));
4253        }
4254        if let Ok(mut cache) = self.decoded.lock()
4255            && let Some(object) = cache.get(oid)
4256        {
4257            return Ok(Some((object.object_type, object.body.len() as u64)));
4258        }
4259        if let Some(header) = self.loose.read_header(oid)? {
4260            return Ok(Some(header));
4261        }
4262        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4263            let bytes = pack_lookup.pack_bytes(self)?;
4264            // Per-pack offset->type memo so the ofs-delta chain walk that resolves
4265            // a packed object's type runs at most once per chain across the batch,
4266            // instead of re-walking (and re-inflating each link's leading varints)
4267            // on every header read — the sley#26 super-linear cat-file --batch-check.
4268            let type_cache = pack_lookup.header_type_cache(self);
4269            let resolve_ref_base = |base: &ObjectId| {
4270                self.read_object_header(base)
4271                    .map(|header| header.map(|(t, _)| t))
4272            };
4273            let header = match &type_cache {
4274                Some(cache) => {
4275                    let mut adapter = PackHeaderTypeCacheAdapter(cache);
4276                    sley_pack::read_object_header_at_with_cache(
4277                        &bytes,
4278                        pack_lookup.offset,
4279                        self.format,
4280                        resolve_ref_base,
4281                        &mut adapter,
4282                    )?
4283                }
4284                None => sley_pack::read_object_header_at(
4285                    &bytes,
4286                    pack_lookup.offset,
4287                    self.format,
4288                    resolve_ref_base,
4289                )?,
4290            };
4291            return Ok(Some(header));
4292        }
4293        for alternate in &self.alternates {
4294            if let Some(header) =
4295                Self::without_alternates(alternate, self.format).read_object_header(oid)?
4296            {
4297                return Ok(Some(header));
4298            }
4299        }
4300        // Reprepare-on-miss: discard any stale negative loose cache and retry an
4301        // exact path probe once before reporting absence (see `read_object`).
4302        self.loose.invalidate_cache();
4303        if let Some(header) = self.loose.read_header(oid)? {
4304            return Ok(Some(header));
4305        }
4306        Ok(None)
4307    }
4308
4309    fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
4310        // Memory-capped decoded-object cache first (delta-base reuse for ref-delta
4311        // bases that resolve back through the store + repeated whole-object reads).
4312        if let Ok(mut cache) = self.decoded.lock()
4313            && let Some(object) = cache.get(oid)
4314        {
4315            return Ok(Some(object));
4316        }
4317        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
4318            return Ok(None);
4319        };
4320        self.read_packed_object_at_lookup(oid, &pack_lookup)
4321            .map(Some)
4322    }
4323
4324    fn read_packed_object_at_lookup(
4325        &self,
4326        oid: &ObjectId,
4327        pack_lookup: &PackLookup,
4328    ) -> Result<Arc<EncodedObject>> {
4329        if let Ok(mut cache) = self.decoded.lock()
4330            && let Some(object) = cache.get(oid)
4331        {
4332            return Ok(object);
4333        }
4334        let bytes = pack_lookup.pack_bytes(self)?;
4335        // Per-pack delta-base cache (keyed by in-pack offset). Resolving an
4336        // ofs-delta chain reuses already-decoded bases instead of re-inflating the
4337        // whole chain on every read. Scoped to this pack's path so an offset key is
4338        // never applied to the wrong pack's bytes.
4339        let delta_cache = pack_lookup.delta_cache(self);
4340        let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
4341        // Decode only this object at its offset (plus its delta-base chain). A
4342        // ref-delta base resolves through the full store (loose / other packs) and
4343        // reuses the decoded-object cache. No cache lock is held across the decode,
4344        // so the recursive resolver re-entry (which may re-enter read_object) is
4345        // safe.
4346        let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
4347        let object = match &delta_adapter {
4348            Some(adapter) => sley_pack::read_object_at_with_cache_arc(
4349                &bytes,
4350                pack_lookup.offset,
4351                self.format,
4352                resolve_ref_base,
4353                adapter,
4354            )?,
4355            None => sley_pack::read_object_at_arc(
4356                &bytes,
4357                pack_lookup.offset,
4358                self.format,
4359                resolve_ref_base,
4360            )?,
4361        };
4362        // Trust the index → offset mapping rather than re-hashing every decoded
4363        // object on read (see `verify_reads_enabled`); this re-hash dominated
4364        // bulk-read cost. Opt back in with `SLEY_VERIFY_READS` for a paranoid check.
4365        if verify_reads_enabled() {
4366            let actual = object.object_id(self.format)?;
4367            if actual != *oid {
4368                return Err(GitError::InvalidObject(format!(
4369                    "pack object id mismatch: index says {oid}, decoded {actual}"
4370                )));
4371            }
4372        }
4373        if let Ok(mut cache) = self.decoded.lock() {
4374            cache.put(*oid, Arc::clone(&object));
4375        }
4376        Ok(object)
4377    }
4378
4379    /// The per-pack delta-base cache for `pack_path`, creating it on first use.
4380    /// Returns `None` only if the shared map's lock is poisoned, in which case the
4381    /// caller falls back to an uncached decode (correctness preserved).
4382    fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
4383        let mut caches = self.pack_deltas.lock().ok()?;
4384        let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
4385            Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
4386        });
4387        Some(Arc::clone(cache))
4388    }
4389
4390    /// The per-pack header-type memo for `pack_path`, creating it on first use.
4391    /// Returns `None` only if the shared map's lock is poisoned, in which case the
4392    /// caller falls back to an unmemoized header walk (correctness preserved).
4393    fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
4394        let mut caches = self.pack_header_types.lock().ok()?;
4395        let cache = caches
4396            .entry(pack_path.to_path_buf())
4397            .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
4398        Some(Arc::clone(cache))
4399    }
4400
4401    /// Backing bytes of the pack at `pack_path`, loaded at most once per database
4402    /// handle (cached, shared across clones). Memory-mapped under the `mmap` feature,
4403    /// otherwise read into the heap. On a poisoned lock it falls back to loading
4404    /// without caching, preserving correctness.
4405    fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
4406        if let Ok(cache) = self.pack_bytes.lock()
4407            && let Some(bytes) = cache.get(pack_path)
4408        {
4409            return Ok(Arc::clone(bytes));
4410        }
4411        let bytes = Arc::new(load_pack_data(pack_path)?);
4412        if let Ok(mut cache) = self.pack_bytes.lock() {
4413            cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
4414        }
4415        Ok(bytes)
4416    }
4417
4418    /// Parsed index for the `.idx` at `index_path`, parsed at most once per
4419    /// database handle. On a poisoned lock it falls back to parsing without
4420    /// caching, preserving correctness.
4421    fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
4422        if let Ok(cache) = self.pack_indexes.lock()
4423            && let Some(index) = cache.get(index_path)
4424        {
4425            return Ok(Arc::clone(index));
4426        }
4427        let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
4428        if let Ok(mut cache) = self.pack_indexes.lock() {
4429            cache.insert(index_path.to_path_buf(), Arc::clone(&index));
4430        }
4431        Ok(index)
4432    }
4433
4434    fn cached_multi_pack_index_oid_lookup(
4435        &self,
4436        midx_path: &Path,
4437    ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
4438        if !midx_path.exists() {
4439            return Ok(None);
4440        }
4441        if let Ok(cache) = self.multi_pack_oid_lookups.lock()
4442            && let Some(midx) = cache.get(midx_path)
4443        {
4444            return Ok(Some(Arc::clone(midx)));
4445        }
4446        let bytes = load_multi_pack_index_lookup_data(midx_path)?;
4447        let midx = Arc::new(MultiPackIndexOidLookup::parse(bytes, self.format)?);
4448        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
4449            cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
4450        }
4451        Ok(Some(midx))
4452    }
4453
4454    /// Registry snapshot for this database's pack directory. With `force_rescan`,
4455    /// the directory is re-read; when the fingerprint and pack set match the
4456    /// cached snapshot, the same `Arc` is returned so miss handling can tell that
4457    /// no new packs appeared.
4458    fn cached_pack_registry(
4459        &self,
4460        pack_dir: &Path,
4461        force_rescan: bool,
4462    ) -> Result<Arc<PackRegistrySnapshot>> {
4463        if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
4464            return Ok(registry);
4465        }
4466        let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
4467        if let Ok(mut cache) = self.pack_registry.lock() {
4468            match cache.as_ref() {
4469                Some(existing)
4470                    if existing.fingerprint == scanned.fingerprint
4471                        && same_registered_pack_set(&existing.packs, &scanned.packs) =>
4472                {
4473                    return Ok(Arc::clone(existing));
4474                }
4475                _ => {
4476                    *cache = Some(Arc::clone(&scanned));
4477                }
4478            }
4479        }
4480        Ok(scanned)
4481    }
4482
4483    fn find_in_pack_registry(
4484        &self,
4485        registry: Arc<PackRegistrySnapshot>,
4486        oid: &ObjectId,
4487    ) -> Result<Option<PackLookup>> {
4488        let hinted_pack_index = registry.cached_hint();
4489        if let Some(pack_index) = hinted_pack_index {
4490            let pack = &registry.packs[pack_index];
4491            let index = pack.index(self.format)?;
4492            if let Some(entry) = index.find(oid) {
4493                return Ok(Some(PackLookup::from_registered(
4494                    Arc::clone(pack),
4495                    entry.offset,
4496                )));
4497            }
4498        }
4499        for (pack_index, pack) in registry.packs.iter().enumerate() {
4500            if Some(pack_index) == hinted_pack_index {
4501                continue;
4502            }
4503            let index = pack.index(self.format)?;
4504            if let Some(entry) = index.find(oid) {
4505                registry.remember_hint(pack_index);
4506                return Ok(Some(PackLookup::from_registered(
4507                    Arc::clone(pack),
4508                    entry.offset,
4509                )));
4510            }
4511        }
4512        Ok(None)
4513    }
4514
4515    /// Read `oid` from any pack *other than* the one named by `exclude`, used as
4516    /// a corruption fallback: a redundant packed copy survives one pack's
4517    /// damage. Scans the on-disk `.idx` files directly (bypassing the registry
4518    /// cache, whose first hit is the excluded pack) and decodes from the first
4519    /// other pack that both indexes the object and parses cleanly.
4520    fn read_packed_object_from_other_packs(
4521        &self,
4522        oid: &ObjectId,
4523        exclude: &PackLookup,
4524    ) -> Result<Option<Arc<EncodedObject>>> {
4525        let pack_dir = self.objects_dir.join("pack");
4526        let Ok(entries) = fs::read_dir(&pack_dir) else {
4527            return Ok(None);
4528        };
4529        let excluded_pack = exclude.pack_path().to_path_buf();
4530        for entry in entries {
4531            let idx_path = entry?.path();
4532            if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
4533                continue;
4534            }
4535            let pack_path = idx_path.with_extension("pack");
4536            if pack_path == excluded_pack {
4537                continue;
4538            }
4539            let Ok(idx_bytes) = fs::read(&idx_path) else {
4540                continue;
4541            };
4542            let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
4543                continue;
4544            };
4545            let Some(entry) = index.find(oid) else {
4546                continue;
4547            };
4548            let candidate = PackLookup::from_path(pack_path, entry.offset);
4549            if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
4550                return Ok(Some(object));
4551            }
4552        }
4553        Ok(None)
4554    }
4555
4556    fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
4557        if oid.format() != self.format {
4558            return Err(GitError::InvalidObjectId(format!(
4559                "object {oid} uses {}, store uses {}",
4560                oid.format().name(),
4561                self.format.name()
4562            )));
4563        }
4564        let pack_dir = self.objects_dir.join("pack");
4565        // Hot path: a previously cached pack registry or multi-pack-index already
4566        // names every pack, and locating `oid` in them is pure in-memory index
4567        // work. Try that first so a warm handle does not parse indexes or hash
4568        // pack paths on every lookup.
4569        if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
4570            && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
4571        {
4572            return Ok(Some(pack_paths));
4573        }
4574        if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
4575            && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
4576        {
4577            return Ok(Some(pack_paths));
4578        }
4579
4580        if !pack_dir.exists() {
4581            return Ok(None);
4582        }
4583        if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
4584            return Ok(Some(pack_paths));
4585        }
4586        // Search the cached registry first. On a complete miss, re-scan the
4587        // directory once (picking up any pack added since the registry was
4588        // cached) and search again, so newly written packs are still found.
4589        let registry = self.cached_pack_registry(&pack_dir, false)?;
4590        if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(&registry), oid)? {
4591            return Ok(Some(pack_paths));
4592        }
4593        let refreshed = self.cached_pack_registry(&pack_dir, true)?;
4594        if Arc::ptr_eq(&registry, &refreshed) {
4595            // The re-scan produced the same registry, so nothing new appeared.
4596            return Ok(None);
4597        }
4598        self.find_in_pack_registry(refreshed, oid)
4599    }
4600
4601    fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
4602        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
4603            return Ok(None);
4604        };
4605        let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
4606        let trailer_offset = pack_len
4607            .checked_sub(self.format.raw_len() as u64)
4608            .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
4609        let index = pack_lookup.pack_index(self)?;
4610        let pack = pack_lookup.pack_bytes(self)?;
4611        let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
4612        let delta_base_offset = match &delta_base {
4613            Some(PackDeltaBase::Offset(offset)) => Some(*offset),
4614            Some(PackDeltaBase::Ref(_)) | None => None,
4615        };
4616        let offset_info = scan_pack_index_offsets(
4617            &index,
4618            pack_lookup.offset,
4619            trailer_offset,
4620            delta_base_offset,
4621        )?;
4622        let disk_size = offset_info
4623            .end_offset
4624            .checked_sub(pack_lookup.offset)
4625            .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
4626        let deltabase = match delta_base {
4627            Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
4628                // scan_pack_index_offsets returns Err when delta_base_offset is
4629                // Some but no matching entry is found, so this is unreachable for
4630                // valid packs; propagate as an error rather than panic to keep a
4631                // malformed pack from taking down the process if that invariant
4632                // ever drifts.
4633                GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
4634            })?,
4635            Some(PackDeltaBase::Ref(oid)) => oid,
4636            None => zero_oid(self.format)?,
4637        };
4638        Ok(Some(ObjectStorageInfo {
4639            disk_size,
4640            deltabase,
4641        }))
4642    }
4643
4644    fn find_midx_pack_containing(
4645        &self,
4646        pack_dir: &Path,
4647        oid: &ObjectId,
4648    ) -> Result<Option<PackLookup>> {
4649        let midx_path = pack_dir.join("multi-pack-index");
4650        let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
4651            return Ok(None);
4652        };
4653        self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
4654    }
4655
4656    fn midx_oid_lookup_pack_paths(
4657        &self,
4658        pack_dir: &Path,
4659        midx: &MultiPackIndexOidLookup,
4660        oid: &ObjectId,
4661    ) -> Result<Option<PackLookup>> {
4662        let Some(entry) = midx.find(oid)? else {
4663            return Ok(None);
4664        };
4665        let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
4666            return Err(GitError::InvalidFormat(
4667                "multi-pack-index object points past pack table".into(),
4668            ));
4669        };
4670        let pack_file_name = pack_name
4671            .strip_suffix(".idx")
4672            .map(|stem| format!("{stem}.pack"))
4673            .unwrap_or_else(|| pack_name.to_string());
4674        let pack = pack_dir.join(pack_file_name);
4675        Ok(Some(PackLookup::from_path(pack, entry.offset)))
4676    }
4677
4678    fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
4679        let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
4680        let cache = self.multi_pack_oid_lookups.lock().ok()?;
4681        cache.get(&midx_path).map(Arc::clone)
4682    }
4683
4684    /// The pack registry for `pack_dir` *only if already scanned and cached* —
4685    /// never touches the filesystem. Used by the lookup hot path to skip
4686    /// per-object pack-dir metadata checks once a handle is warm. A cold cache
4687    /// returns `None`, so the caller falls back to the scanning path. A complete
4688    /// miss still forces one rescan, preserving the new-pack discovery semantics.
4689    fn cached_loaded_pack_registry(
4690        &self,
4691        _pack_dir: &Path,
4692    ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
4693        let cache = match self.pack_registry.lock() {
4694            Ok(cache) => cache,
4695            Err(_) => return Ok(None),
4696        };
4697        Ok(cache.as_ref().map(Arc::clone))
4698    }
4699}
4700
4701fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
4702    if prefix.len() < 4 || prefix.len() > format.hex_len() {
4703        return Err(GitError::InvalidObjectId(format!(
4704            "expected 4 to {} hex digits for {}, got {}",
4705            format.hex_len(),
4706            format.name(),
4707            prefix.len()
4708        )));
4709    }
4710    if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4711        return Err(GitError::InvalidObjectId(format!(
4712            "non-hex object id prefix {prefix}"
4713        )));
4714    }
4715    Ok(())
4716}
4717
4718fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
4719    oid.to_hex()
4720        .as_bytes()
4721        .iter()
4722        .zip(prefix.as_bytes())
4723        .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
4724}
4725
4726fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
4727    match fs::metadata(pack_dir) {
4728        Ok(metadata) => Ok(metadata.modified().ok()),
4729        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
4730        Err(err) => Err(GitError::Io(err.to_string())),
4731    }
4732}
4733
4734/// Scan `pack_dir` for `.idx` files that have a matching `.pack` sibling and
4735/// parse each index into a registered pack. An `.idx` without its `.pack` is
4736/// skipped (an orphan index cannot serve objects), matching the prior per-read
4737/// behavior.
4738fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
4739    let modified = pack_dir_modified(pack_dir)?;
4740    let entries = match fs::read_dir(pack_dir) {
4741        Ok(entries) => entries,
4742        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4743            return Ok(PackRegistrySnapshot::new(
4744                PackDirFingerprint {
4745                    modified,
4746                    idx_count: 0,
4747                    pack_count: 0,
4748                },
4749                Vec::new(),
4750            ));
4751        }
4752        Err(err) => return Err(GitError::Io(err.to_string())),
4753    };
4754
4755    let mut idx_paths = Vec::new();
4756    let mut idx_count = 0;
4757    let mut pack_count = 0;
4758    for entry in entries {
4759        let entry = entry?;
4760        let path = entry.path();
4761        match path.extension().and_then(|ext| ext.to_str()) {
4762            Some("idx") => {
4763                idx_count += 1;
4764                idx_paths.push(path);
4765            }
4766            Some("pack") => {
4767                pack_count += 1;
4768            }
4769            _ => {}
4770        }
4771    }
4772
4773    let mut packs = Vec::new();
4774    for idx in idx_paths {
4775        let pack = idx.with_extension("pack");
4776        let Ok(metadata) = fs::metadata(&pack) else {
4777            continue;
4778        };
4779        let modified = pack_sort_modified(&metadata);
4780        packs.push((
4781            modified,
4782            metadata.len(),
4783            Arc::new(RegisteredPack::new(idx, pack)),
4784        ));
4785    }
4786    // Git keeps a most-recently-used pack order; seed ours with newer/larger
4787    // packs before falling back to the path. In repositories with many packs,
4788    // this avoids parsing a long run of unrelated `.idx` files before the first
4789    // lookup establishes the recent-pack hint.
4790    packs.sort_by(|left, right| {
4791        right
4792            .0
4793            .cmp(&left.0)
4794            .then_with(|| right.1.cmp(&left.1))
4795            .then_with(|| left.2.idx.cmp(&right.2.idx))
4796    });
4797    let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
4798    Ok(PackRegistrySnapshot::new(
4799        PackDirFingerprint {
4800            modified,
4801            idx_count,
4802            pack_count,
4803        },
4804        packs,
4805    ))
4806}
4807
4808fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
4809    metadata
4810        .modified()
4811        .ok()
4812        .and_then(|modified| {
4813            modified
4814                .duration_since(std::time::UNIX_EPOCH)
4815                .ok()
4816                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
4817        })
4818        .unwrap_or((0, 0))
4819}
4820
4821/// Whether two pack registries reference the same pack/index paths (order is
4822/// already normalized by [`scan_pack_registry`]).
4823fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
4824    left.len() == right.len()
4825        && left
4826            .iter()
4827            .zip(right.iter())
4828            .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
4829}
4830
4831fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
4832    let mut alternates = Vec::new();
4833    if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
4834        for raw in value.to_string_lossy().split(':') {
4835            if !raw.is_empty() {
4836                alternates.push(PathBuf::from(raw));
4837            }
4838        }
4839    }
4840    let alternates_path = objects_dir.join("info").join("alternates");
4841    if let Ok(contents) = fs::read(&alternates_path) {
4842        for raw in contents.split(|byte| *byte == b'\n') {
4843            let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4844            if line.is_empty() || line.starts_with(b"#") {
4845                continue;
4846            }
4847            let Ok(value) = std::str::from_utf8(line) else {
4848                continue;
4849            };
4850            let path = Path::new(value);
4851            let absolute = if path.is_absolute() {
4852                path.to_path_buf()
4853            } else {
4854                objects_dir.join(path)
4855            };
4856            alternates.push(absolute);
4857        }
4858    }
4859    alternates
4860}
4861
4862impl ObjectReader for FileObjectDatabase {
4863    fn has_shallow_grafts(&self) -> bool {
4864        !self
4865            .shallow_grafts
4866            .get_or_init(|| {
4867                let shallow_file = self
4868                    .objects_dir
4869                    .parent()
4870                    .map(|git_dir| git_dir.join("shallow"));
4871                match shallow_file {
4872                    Some(path) => read_shallow_grafts(&path, self.format),
4873                    None => HashSet::new(),
4874                }
4875            })
4876            .is_empty()
4877    }
4878
4879    fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
4880        self.shallow_grafts
4881            .get_or_init(|| {
4882                let shallow_file = self
4883                    .objects_dir
4884                    .parent()
4885                    .map(|git_dir| git_dir.join("shallow"));
4886                match shallow_file {
4887                    Some(path) => read_shallow_grafts(&path, self.format),
4888                    None => HashSet::new(),
4889                }
4890            })
4891            .contains(oid)
4892    }
4893
4894    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4895        if let Some(object) = implied_empty_tree_object(self.format, oid) {
4896            return Ok(object);
4897        }
4898        // A corrupt loose copy must not shadow a good packed copy: git's
4899        // `oid_object_info_extended` consults every source, so a repacked object
4900        // whose loose file was later corrupted still reads fine from the pack. If
4901        // a packed copy exists, prefer it WITHOUT touching the corrupt loose file
4902        // (which would otherwise emit a spurious `inflate:` diagnostic on each
4903        // probe). Only when no pack copy exists do we read (and, if corrupt,
4904        // surface the error from) the loose file.
4905        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4906            match self.read_packed_object_at_lookup(oid, &pack_lookup) {
4907                Ok(object) => return Ok(object),
4908                Err(GitError::NotFound(_)) => {}
4909                // A corrupt packed copy must not be fatal when another good copy
4910                // exists: git's `oid_object_info_extended` keeps consulting the
4911                // remaining sources (loose, other packs, alternates) when a pack
4912                // read fails. Fall through to the loose/other-pack probes and
4913                // only surface the packed error if every source comes up empty.
4914                Err(packed_err) => {
4915                    if let Ok(object) = self.loose.read_object(oid) {
4916                        return Ok(object);
4917                    }
4918                    // Try any *other* pack that also holds the object (a
4919                    // redundant copy survives one pack's corruption).
4920                    if let Some(object) =
4921                        self.read_packed_object_from_other_packs(oid, &pack_lookup)?
4922                    {
4923                        return Ok(object);
4924                    }
4925                    for alternate in &self.alternates {
4926                        if let Ok(object) =
4927                            Self::without_alternates(alternate, self.format).read_object(oid)
4928                        {
4929                            return Ok(object);
4930                        }
4931                    }
4932                    return Err(packed_err);
4933                }
4934            }
4935        }
4936        let loose_err = match self.loose.read_object(oid) {
4937            Ok(object) => return Ok(object),
4938            Err(GitError::NotFound(_)) => None,
4939            Err(err) => Some(err),
4940        };
4941        if let Some(object) = self.read_packed_object(oid)? {
4942            return Ok(object);
4943        }
4944        for alternate in &self.alternates {
4945            match Self::without_alternates(alternate, self.format).read_object(oid) {
4946                Ok(object) => return Ok(object),
4947                Err(GitError::NotFound(_)) => {}
4948                Err(err) => return Err(err),
4949            }
4950        }
4951        // Hard miss against every store. If an earlier enumeration built a loose
4952        // cache, an object written loose afterward by a sibling handle could have
4953        // been skipped above. Mirror git's `oid_object_info_extended`
4954        // reprepare-on-miss: drop stale cache state and retry an exact loose path
4955        // probe once before declaring the object missing.
4956        self.loose.invalidate_cache();
4957        match self.loose.read_object(oid) {
4958            Ok(object) => return Ok(object),
4959            Err(GitError::NotFound(_)) => {}
4960            Err(err) => return Err(err),
4961        }
4962        // No good copy in any store. If the local loose copy was corrupt (not
4963        // merely absent), surface that error — it is more specific than a plain
4964        // "not found".
4965        if let Some(err) = loose_err {
4966            return Err(err);
4967        }
4968        Err(GitError::object_not_found_in(
4969            *oid,
4970            MissingObjectContext::Read,
4971        ))
4972    }
4973}
4974
4975impl ObjectWriter for FileObjectDatabase {
4976    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4977        // Mirror git's freshen semantics (`write_object_file`:
4978        // `freshen_packed_object || freshen_loose_object`): an object already
4979        // present anywhere in the database — loose, packed, or through an
4980        // alternate — is not written again, so e.g. `git add` after
4981        // `git repack -ad` does not resurrect a loose copy of a packed object.
4982        let oid = object.object_id(self.format)?;
4983        if self.contains(&oid)? {
4984            return Ok(oid);
4985        }
4986        self.loose.write_object(object)
4987    }
4988}
4989
4990fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
4991    if path.exists() {
4992        return Ok(());
4993    }
4994    let parent = path
4995        .parent()
4996        .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
4997    fs::create_dir_all(parent)?;
4998    let temp_path = unique_temp_path(parent);
4999    let write_result = (|| -> Result<()> {
5000        {
5001            let mut file = fs::OpenOptions::new()
5002                .write(true)
5003                .create_new(true)
5004                .open(&temp_path)?;
5005            file.write_all(bytes)?;
5006            file.sync_all()?;
5007        }
5008        match fs::rename(&temp_path, path) {
5009            Ok(()) => Ok(()),
5010            Err(_) if path.exists() => {
5011                let _ = fs::remove_file(&temp_path);
5012                Ok(())
5013            }
5014            Err(err) => Err(GitError::Io(err.to_string())),
5015        }
5016    })();
5017    if write_result.is_err() {
5018        let _ = fs::remove_file(&temp_path);
5019    }
5020    write_result
5021}
5022
5023fn write_promisor_pack_sidecar(
5024    pack_dir: &Path,
5025    pack_name: &str,
5026    promisor: bool,
5027) -> Result<Option<PathBuf>> {
5028    if !promisor {
5029        return Ok(None);
5030    }
5031    let path = pack_dir.join(format!("{pack_name}.promisor"));
5032    write_pack_component(&path, b"")?;
5033    Ok(Some(path))
5034}
5035
5036/// Maximum number of bytes git will inflate when reading a loose object's
5037/// `"<type> <size>\0"` header (git's `MAX_HEADER_LEN` in object-file.c). The NUL
5038/// terminator must land within this window, so a header of 32 or more non-NUL
5039/// bytes is rejected as too long.
5040const MAX_LOOSE_HEADER_LEN: usize = 32;
5041
5042/// git's exact `error:`-level diagnostic for a loose object whose header overflows
5043/// `MAX_LOOSE_HEADER_LEN` (object-file.c: `error(_("header for %s too long, exceeds
5044/// %d bytes"), ...)`). Shared by the header-only and full-read paths so both surface
5045/// byte-identical text.
5046fn loose_header_too_long(oid: &ObjectId) -> GitError {
5047    GitError::InvalidObject(format!(
5048        "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
5049    ))
5050}
5051
5052/// git's `error:`-level diagnostic when the loose framing header cannot be inflated at
5053/// all (object-file.c `loose_object_info`, the `ULHR_BAD` arm: `error(_("unable to
5054/// unpack %s header"), ...)`).
5055fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
5056    GitError::InvalidObject(format!("unable to unpack {oid} header"))
5057}
5058
5059/// git-zlib.c's `error("inflate: %s (%s)", ...)` text for an inflate failure whose
5060/// cause is identifiable from the zlib stream header. The checks mirror zlib's own
5061/// `inflate()` HEAD-state validation, in order: the FCHECK checksum over CMF+FLG,
5062/// the compression method, the window size, and the FDICT preset-dictionary bit
5063/// (zlib reports `Z_NEED_DICT` with a NULL `msg`, which git renders as
5064/// "(no message)"). Failures past the stream header return `None`: flate2 does not
5065/// surface zlib's per-case `msg` strings, so no diagnostic is fabricated for them.
5066fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
5067    let [cmf, flg, ..] = *input else { return None };
5068    if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
5069        return Some("inflate: data stream error (incorrect header check)");
5070    }
5071    if cmf & 0x0f != 8 {
5072        return Some("inflate: data stream error (unknown compression method)");
5073    }
5074    if cmf >> 4 > 7 {
5075        return Some("inflate: data stream error (invalid window size)");
5076    }
5077    if flg & 0x20 != 0 {
5078        return Some("inflate: needs dictionary (no message)");
5079    }
5080    None
5081}
5082
5083/// Print the `error: inflate: ...` line git's zlib wrapper emits the moment
5084/// `inflate()` fails, when the failure is classifiable from the stream header.
5085fn emit_inflate_diagnostic(input: &[u8]) {
5086    if let Some(diagnostic) = inflate_header_diagnostic(input) {
5087        eprintln!("error: {diagnostic}");
5088    }
5089}
5090
5091/// Integrity verdict for a single loose object file, as classified by
5092/// [`LooseObjectStore::verify_object`].
5093#[derive(Debug, Clone, PartialEq, Eq)]
5094pub enum LooseObjectIntegrity {
5095    /// Inflated, parsed, and re-hashed to its path-derived oid.
5096    Ok,
5097    /// Readable and well-formed, but its content hashes to a different oid
5098    /// (a loose file stored under the wrong path).
5099    HashMismatch { actual: ObjectId },
5100    /// Unreadable: corrupt zlib stream, truncated content, or unparseable header.
5101    /// The `error:`-level diagnostics were already printed to stderr.
5102    Corrupt,
5103}
5104
5105#[derive(Debug, Clone)]
5106pub struct LooseObjectStore {
5107    objects_dir: PathBuf,
5108    format: ObjectFormat,
5109    /// Lazily-populated set of loose object ids present on disk, mirroring git's
5110    /// `loose_objects_cache` (object-file.c). A lookup scans the queried
5111    /// `objects/XX/` fanout once; afterward misses in that fanout are in-memory
5112    /// checks instead of failed exact-path opens. Shared across
5113    /// `FileObjectDatabase` clones via `Arc` so a write through one handle is
5114    /// visible to reads through another; cleared by `refresh_read_cache` so
5115    /// objects installed out-of-band (fetch, repack) become visible. Writes
5116    /// extend the set in place rather than invalidating it.
5117    loose_cache: Arc<Mutex<LoosePresenceCache>>,
5118}
5119
5120impl LooseObjectStore {
5121    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
5122        Self {
5123            objects_dir: objects_dir.into(),
5124            format,
5125            loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
5126        }
5127    }
5128
5129    /// Whether `oid` is present according to the loose-object cache, populating
5130    /// the cache on first use. Returns `None` when the lock cannot be trusted or
5131    /// the scan fails; callers should fall back to an exact filesystem probe in
5132    /// that case so a cache-building problem cannot change read semantics.
5133    fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
5134        let mut guard = self.loose_cache.lock().ok()?;
5135        let fanout = oid.as_bytes()[0];
5136        if !guard.loaded_fanouts.contains(&fanout) {
5137            collect_loose_fanout_object_ids(
5138                &self.objects_dir,
5139                self.format,
5140                fanout,
5141                &mut guard.objects,
5142            )
5143            .ok()?;
5144            guard.loaded_fanouts.insert(fanout);
5145        }
5146        Some(guard.objects.contains(oid))
5147    }
5148
5149    /// Populate the loose-object cache and return the sorted ids. This mirrors
5150    /// git's `odb_loose_cache` lazy fill and is reserved for operations that
5151    /// really need loose-object enumeration.
5152    fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
5153        if let Ok(mut guard) = self.loose_cache.lock() {
5154            guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
5155            guard.loaded_fanouts = (0..=u8::MAX).collect();
5156            let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
5157            ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
5158            return Ok(ids);
5159        }
5160        loose_object_ids(&self.objects_dir, self.format)
5161    }
5162
5163    /// Record `oid` as present in loose storage so subsequent reads find it
5164    /// without a rescan. A no-op when the cache has not been populated yet (the
5165    /// eventual lazy scan will pick the object up) or the lock is poisoned.
5166    fn note_loose_write(&self, oid: ObjectId) {
5167        if let Ok(mut guard) = self.loose_cache.lock() {
5168            guard.objects.insert(oid);
5169        }
5170    }
5171
5172    /// Drop the in-memory loose set so the next access rescans the fanout. Called
5173    /// by `FileObjectDatabase::refresh_read_cache` after out-of-band installs.
5174    pub(crate) fn invalidate_cache(&self) {
5175        if let Ok(mut guard) = self.loose_cache.lock() {
5176            *guard = LoosePresenceCache::default();
5177        }
5178    }
5179
5180    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
5181        Self::new(repository_objects_dir(git_dir), format)
5182    }
5183
5184    fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
5185        if oid.format() != self.format {
5186            return Err(GitError::InvalidObjectId(format!(
5187                "object {oid} uses {}, store uses {}",
5188                oid.format().name(),
5189                self.format.name()
5190            )));
5191        }
5192        Ok(())
5193    }
5194
5195    pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
5196        self.validate_oid_format(oid)?;
5197        let hex = oid.to_hex();
5198        Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
5199    }
5200
5201    pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
5202        self.validate_oid_format(oid)?;
5203        if self.cached_loose_presence(oid) == Some(false) {
5204            return Ok(false);
5205        }
5206        let path = self.object_path(oid)?;
5207        Ok(path.exists())
5208    }
5209
5210    pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
5211        self.validate_oid_format(oid)?;
5212        if self.cached_loose_presence(oid) == Some(false) {
5213            return Ok(None);
5214        }
5215        let path = self.object_path(oid)?;
5216        match fs::metadata(path) {
5217            Ok(metadata) => Ok(Some(metadata.len())),
5218            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5219            Err(err) => Err(GitError::Io(err.to_string())),
5220        }
5221    }
5222
5223    /// The object type and content size of `oid` from loose storage, inflating only
5224    /// the framing header (`"<type> <size>\0"`) and not the body. Output-limited
5225    /// reads keep miniz from inflating past the header even for large objects.
5226    /// Returns `Ok(None)` when the loose object is absent.
5227    pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
5228        self.validate_oid_format(oid)?;
5229        if self.cached_loose_presence(oid) == Some(false) {
5230            return Ok(None);
5231        }
5232        let path = self.object_path(oid)?;
5233        let compressed = match fs::read(&path) {
5234            Ok(compressed) => compressed,
5235            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5236            Err(err) => return Err(GitError::Io(err.to_string())),
5237        };
5238        match inflate_loose_header(&compressed)? {
5239            LooseHeader::Ok(header) => {
5240                let header = std::str::from_utf8(&header)
5241                    .map_err(|err| GitError::InvalidObject(err.to_string()))?;
5242                let (kind, size) = header
5243                    .split_once(' ')
5244                    .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
5245                let object_type = kind.parse::<ObjectType>()?;
5246                let size = size
5247                    .parse::<u64>()
5248                    .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
5249                Ok(Some((object_type, size)))
5250            }
5251            LooseHeader::Bad => {
5252                // git's ULHR_BAD: the zlib wrapper's `error: inflate: ...` line, then
5253                // "unable to unpack <oid> header".
5254                emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
5255                Err(loose_unpack_header_failed(oid))
5256            }
5257            LooseHeader::TooLong => {
5258                // git inflates only the first `MAX_LOOSE_HEADER_LEN` bytes
5259                // (object-file.c `unpack_loose_header`) and reports ULHR_TOO_LONG when
5260                // no NUL terminator lands within them — whether the stream simply ends
5261                // early or overflows the window. Both collapse to the same diagnostic.
5262                Err(loose_header_too_long(oid))
5263            }
5264        }
5265    }
5266
5267    /// Loose object ids in this store, sorted by hex.
5268    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
5269        self.loose_object_ids_cached()
5270    }
5271
5272    /// fsck's loose-object integrity probe, mirroring C git's `read_loose_object`
5273    /// (object-file.c) as called from `fsck_loose` (builtin/fsck.c): inflate and
5274    /// parse the file at `oid`'s loose path, then re-hash its content against the
5275    /// path-derived oid. `display_path` appears verbatim in the `error:`-level
5276    /// diagnostics — the path-form messages of `read_loose_object` ("unable to
5277    /// unpack header of <path>"), unlike the oid-form messages of the normal read
5278    /// path. Returns `Ok(None)` when no loose file exists for `oid`.
5279    pub fn verify_object(
5280        &self,
5281        oid: &ObjectId,
5282        display_path: &str,
5283    ) -> Result<Option<LooseObjectIntegrity>> {
5284        let path = self.object_path(oid)?;
5285        let compressed = match fs::read(&path) {
5286            Ok(compressed) => compressed,
5287            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5288            Err(err) => return Err(GitError::Io(err.to_string())),
5289        };
5290        let mut decoder = ZlibDecoder::new(compressed.as_slice());
5291        let mut framed = Vec::new();
5292        if decoder.read_to_end(&mut framed).is_err() {
5293            emit_inflate_diagnostic(&compressed);
5294            // git inflates the header first (`unpack_loose_header`), then the body
5295            // (`unpack_loose_rest`). If the header inflated (its NUL is visible in
5296            // the partial output) but the body broke, that is a *content*
5297            // corruption: git's `unpack_loose_rest` prints `corrupt loose object
5298            // '<oid>'` (status != Z_STREAM_END), then `read_loose_object` adds
5299            // `unable to unpack contents of <path>`. If inflation died before the
5300            // header materialized, only the header message fires.
5301            if framed_loose_header_terminated(&framed) {
5302                eprintln!("error: corrupt loose object '{oid}'");
5303                eprintln!("error: unable to unpack contents of {display_path}");
5304            } else {
5305                eprintln!("error: unable to unpack header of {display_path}");
5306            }
5307            return Ok(Some(LooseObjectIntegrity::Corrupt));
5308        }
5309        if !framed_loose_header_terminated(&framed) {
5310            // ULHR_TOO_LONG collapses into the same path-form message here: C's
5311            // `read_loose_object` treats every non-OK `unpack_loose_header` alike.
5312            eprintln!("error: unable to unpack header of {display_path}");
5313            return Ok(Some(LooseObjectIntegrity::Corrupt));
5314        }
5315        // git's `unpack_loose_rest`/`check_stream_oid` reject trailing bytes after
5316        // the zlib stream: a fully-inflated object whose compressed input was not
5317        // entirely consumed is `garbage at end of loose object '<oid>'`, then
5318        // `object corrupt or missing: <path>` from `fsck_loose`. (read_to_end
5319        // stops at Z_STREAM_END and silently ignores the trailing bytes, so we
5320        // compare consumed input against the file size ourselves.)
5321        if (decoder.total_in() as usize) < compressed.len() {
5322            // git's `unpack_loose_rest` prints `garbage at end of loose object`
5323            // then returns NULL, so `read_loose_object` also prints `unable to
5324            // unpack contents of <path>`.
5325            eprintln!("error: garbage at end of loose object '{oid}'");
5326            eprintln!("error: unable to unpack contents of {display_path}");
5327            return Ok(Some(LooseObjectIntegrity::Corrupt));
5328        }
5329        // A truncated object can inflate to a clean stream end yet yield fewer
5330        // body bytes than the header's declared size. git's `unpack_loose_rest`
5331        // inflates exactly `size` bytes and, finding the stream ends short,
5332        // prints `corrupt loose object '<oid>'`; `read_loose_object` then adds
5333        // `unable to unpack contents of <path>`. Detect the short body here so it
5334        // is not misreported as a header-parse failure.
5335        if let Some(declared) = loose_header_declared_size(&framed) {
5336            let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
5337            let body_len = framed.len() - (nul + 1).min(framed.len());
5338            if body_len < declared {
5339                eprintln!("error: corrupt loose object '{oid}'");
5340                eprintln!("error: unable to unpack contents of {display_path}");
5341                return Ok(Some(LooseObjectIntegrity::Corrupt));
5342            }
5343        }
5344        let Ok(object) = parse_framed_object(&framed) else {
5345            // Distinguish git's two header-parse failures: a structurally valid
5346            // `"<word> <size>\0"` header whose *type word* is not a known object
5347            // type yields `unable to parse type from header '<header>'`, while a
5348            // genuinely malformed header yields `unable to parse header`.
5349            if let Some(header) = loose_header_with_unknown_type(&framed) {
5350                eprintln!("error: unable to parse type from header '{header}' of {display_path}");
5351            } else {
5352                eprintln!("error: unable to parse header of {display_path}");
5353            }
5354            return Ok(Some(LooseObjectIntegrity::Corrupt));
5355        };
5356        let actual = object.object_id(self.format)?;
5357        if &actual != oid {
5358            return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
5359        }
5360        Ok(Some(LooseObjectIntegrity::Ok))
5361    }
5362}
5363
5364/// Whether the inflated framing bytes contain the header's NUL terminator within
5365/// git's `MAX_HEADER_LEN` window (object-file.c `unpack_loose_header`'s success
5366/// condition).
5367fn framed_loose_header_terminated(framed: &[u8]) -> bool {
5368    framed
5369        .iter()
5370        .take(MAX_LOOSE_HEADER_LEN)
5371        .any(|byte| *byte == 0)
5372}
5373
5374/// If the framing has a structurally valid `"<word> <size>\0"` header whose body
5375/// length matches `<size>` but whose `<word>` is not a known object type, return
5376/// the header string (the bytes before the NUL). Mirrors git's
5377/// `parse_loose_header` reporting `unable to parse type from header '<header>'`.
5378fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
5379    let nul = framed.iter().position(|&b| b == 0)?;
5380    let header = std::str::from_utf8(&framed[..nul]).ok()?;
5381    let (kind, size) = header.split_once(' ')?;
5382    let size: usize = size.parse().ok()?;
5383    // Body length must match the declared size (otherwise it is a different
5384    // corruption, handled by the generic path).
5385    if framed.len() - (nul + 1) != size {
5386        return None;
5387    }
5388    // A known type word would have parsed successfully upstream; only return
5389    // when the word is genuinely unknown.
5390    if kind.parse::<ObjectType>().is_ok() {
5391        return None;
5392    }
5393    Some(header.to_string())
5394}
5395
5396/// The size declared in a loose object's `"<type> <size>\0"` header, if the
5397/// header is structurally a `<word> <decimal-size>` pair. Used to detect a body
5398/// inflated short of its declared length (a truncated object).
5399fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
5400    let nul = framed.iter().position(|&b| b == 0)?;
5401    let header = std::str::from_utf8(&framed[..nul]).ok()?;
5402    let (_kind, size) = header.split_once(' ')?;
5403    size.parse::<usize>().ok()
5404}
5405
5406/// Read up to `prefix.len()` bytes from the start of `file`, returning how many
5407/// were available (short only when the file itself is shorter).
5408/// Outcome of inflating a loose object's header, mirroring git's
5409/// `unpack_loose_header` result codes (object-file.c `enum
5410/// unpack_loose_header_result`).
5411enum LooseHeader {
5412    /// ULHR_OK: a NUL-terminated header was found within the window. Carries the
5413    /// header bytes up to (not including) the NUL.
5414    Ok(Vec<u8>),
5415    /// ULHR_BAD: the zlib stream would not inflate (status != Z_OK/Z_STREAM_END).
5416    Bad,
5417    /// ULHR_TOO_LONG: the inflated output filled the header window with no NUL.
5418    TooLong,
5419}
5420
5421/// Inflate a loose object's *header* exactly as git's `unpack_loose_header` does
5422/// (object-file.c): a single bounded inflate into a `MAX_LOOSE_HEADER_LEN`-byte
5423/// output buffer, then look for the header-terminating NUL in what came out.
5424///
5425/// The byte budget is load-bearing for corruption parity: git inflates only up to
5426/// `MAX_HEADER_LEN` (32) bytes of *output* before stopping, so a `cat-file -s`/`-t`
5427/// header read detects a zlib data error only when it lands within those first 32
5428/// inflated bytes (the header plus the start of the body for a small object) — and
5429/// silently returns the header for corruption buried deeper in the body, which the
5430/// full-object read path catches instead. A byte-by-byte loop that stopped at the
5431/// NUL would never inflate into the corrupt region and miss the bit-error case
5432/// (t1060 "getting type of a corrupt blob fails"); feeding too much output budget
5433/// would over-detect relative to git. So this matches git's exact window.
5434fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
5435    let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
5436    let mut decompress = Decompress::new(true);
5437    // git feeds the whole mapped file as `avail_in` and inflates once into a
5438    // 32-byte `avail_out`; zlib stops at the output limit (Z_OK with avail_out==0)
5439    // or at the stream's end, propagating Z_DATA_ERROR for a corrupt stream.
5440    let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
5441    let produced = decompress.total_out() as usize;
5442    match status {
5443        Ok(_) => {
5444            let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
5445            match window.iter().position(|&byte| byte == 0) {
5446                Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
5447                // No NUL within the window: either the stream ended early or the
5448                // header overflows `MAX_LOOSE_HEADER_LEN`. git collapses both into
5449                // ULHR_TOO_LONG (object-file.c `unpack_loose_header`).
5450                None => Ok(LooseHeader::TooLong),
5451            }
5452        }
5453        // Any zlib error before a NUL materializes is git's ULHR_BAD.
5454        Err(_) => Ok(LooseHeader::Bad),
5455    }
5456}
5457
5458impl ObjectReader for LooseObjectStore {
5459    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
5460        self.validate_oid_format(oid)?;
5461        // Skip the `open()` (and its ENOENT) when an already-built loose cache
5462        // knows the id is absent. Without a cache, use an exact path probe; a
5463        // full fanout scan is far more expensive for one-shot packed-object reads.
5464        if self.cached_loose_presence(oid) == Some(false) {
5465            return Err(GitError::object_not_found_in(
5466                *oid,
5467                MissingObjectContext::Read,
5468            ));
5469        }
5470        let path = self.object_path(oid)?;
5471        let compressed = match fs::read(&path) {
5472            Ok(compressed) => compressed,
5473            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
5474                return Err(GitError::object_not_found_in(
5475                    *oid,
5476                    MissingObjectContext::Read,
5477                ));
5478            }
5479            Err(err) => return Err(GitError::Io(err.to_string())),
5480        };
5481        let mut decoder = ZlibDecoder::new(compressed.as_slice());
5482        let mut framed = Vec::new();
5483        if decoder.read_to_end(&mut framed).is_err() {
5484            emit_inflate_diagnostic(&compressed);
5485            // A stream that dies before the framing header materializes is git's
5486            // ULHR_BAD ("unable to unpack <oid> header"); with the header intact,
5487            // the body is what broke (`unpack_loose_rest`'s "corrupt loose
5488            // object").
5489            if !framed_loose_header_terminated(&framed) {
5490                return Err(loose_unpack_header_failed(oid));
5491            }
5492            return Err(GitError::InvalidObject(format!(
5493                "corrupt loose object '{oid}'"
5494            )));
5495        }
5496        // git only inflates the first `MAX_LOOSE_HEADER_LEN` bytes looking for the
5497        // header's NUL terminator before parsing the type; an over-long header is
5498        // rejected here (with git's diagnostic) rather than failing later as an
5499        // "unknown object type". Mirror that so `cat-file -p` matches upstream.
5500        if framed
5501            .iter()
5502            .take(MAX_LOOSE_HEADER_LEN)
5503            .all(|byte| *byte != 0)
5504        {
5505            return Err(loose_header_too_long(oid));
5506        }
5507        let object = parse_framed_object(&framed)?;
5508        // Trust the loose object's on-disk name rather than re-hashing its full body
5509        // on every read (see `verify_reads_enabled`); use `validate`/fsck or
5510        // `SLEY_VERIFY_READS` for an explicit integrity check.
5511        if verify_reads_enabled() {
5512            let actual = object.object_id(self.format)?;
5513            if &actual != oid {
5514                return Err(GitError::InvalidObject(format!(
5515                    "loose object {} hashes to {actual}",
5516                    path.display()
5517                )));
5518            }
5519        }
5520        Ok(Arc::new(object))
5521    }
5522}
5523
5524impl ObjectWriter for LooseObjectStore {
5525    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
5526        let oid = object.object_id(self.format)?;
5527        let path = self.object_path(&oid)?;
5528        if path.exists() {
5529            self.note_loose_write(oid);
5530            return Ok(oid);
5531        }
5532        let parent = path
5533            .parent()
5534            .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
5535        fs::create_dir_all(parent)?;
5536        let temp_path = unique_temp_path(parent);
5537        let write_result = (|| -> Result<()> {
5538            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5539            encoder.write_all(&object.framed_bytes())?;
5540            let compressed = encoder.finish()?;
5541            {
5542                let mut file = fs::OpenOptions::new()
5543                    .write(true)
5544                    .create_new(true)
5545                    .open(&temp_path)?;
5546                file.write_all(&compressed)?;
5547                // No fsync: git's default `core.fsync=none` fsyncs nothing on the
5548                // loose-object write path (object-file.c writes the temp file and
5549                // renames it without syncing unless `core.fsync` names
5550                // `loose-object`/`objects`/`all`, which it does not by default).
5551                // A per-object sync_all() here made `git add` of N files cost N
5552                // fsyncs — the dominant term in sley#27's 10x `add -u` slowdown —
5553                // for durability git itself does not provide by default. The
5554                // create_new temp + atomic rename below still guarantees the
5555                // object never appears half-written under its final name.
5556            }
5557            match fs::rename(&temp_path, &path) {
5558                Ok(()) => Ok(()),
5559                Err(_) if path.exists() => {
5560                    let _ = fs::remove_file(&temp_path);
5561                    Ok(())
5562                }
5563                Err(err) => Err(GitError::Io(err.to_string())),
5564            }
5565        })();
5566        if write_result.is_err() {
5567            let _ = fs::remove_file(&temp_path);
5568        }
5569        write_result?;
5570        self.note_loose_write(oid);
5571        Ok(oid)
5572    }
5573}
5574
5575fn unique_temp_path(parent: &Path) -> PathBuf {
5576    let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
5577    parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
5578}
5579
5580#[cfg(test)]
5581mod tests {
5582    use super::*;
5583    use sley_core::BString;
5584    use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
5585    use sley_pack::{PackFile, PackWriteOptions};
5586
5587    fn blob_of(byte: u8, len: usize) -> EncodedObject {
5588        EncodedObject::new(ObjectType::Blob, vec![byte; len])
5589    }
5590
5591    fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
5592        Arc::new(blob_of(byte, len))
5593    }
5594
5595    fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
5596        reader
5597            .read_object(oid)
5598            .expect("test operation should succeed")
5599            .as_ref()
5600            .clone()
5601    }
5602
5603    #[test]
5604    fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
5605        // Budget holds two ~1 KiB objects but not three.
5606        let one = cached_object_cost(&blob_of(0, 1000));
5607        let mut cache = LruCache::<u32>::new(one * 2 + 8);
5608        cache.put(1, cached_blob_of(b'a', 1000));
5609        cache.put(2, cached_blob_of(b'b', 1000));
5610        // Touch key 1 so key 2 becomes least-recently-used.
5611        assert!(cache.get(&1).is_some());
5612        cache.put(3, cached_blob_of(b'c', 1000));
5613        // Key 2 (LRU) is evicted; 1 and 3 remain.
5614        assert!(cache.get(&1).is_some());
5615        assert!(cache.get(&2).is_none());
5616        assert!(cache.get(&3).is_some());
5617    }
5618
5619    #[test]
5620    fn lru_cache_zero_budget_is_inert() {
5621        let mut cache = LruCache::<u32>::new(0);
5622        cache.put(1, cached_blob_of(b'a', 16));
5623        assert!(cache.get(&1).is_none());
5624    }
5625
5626    #[test]
5627    fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
5628        let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
5629        cache.put(1, cached_blob_of(b'a', 50));
5630        assert!(cache.get(&1).is_some());
5631        // An object that cannot fit is not cached, and it evicts the prior entry
5632        // stored under the same key (so we never serve a stale value for it).
5633        cache.put(1, cached_blob_of(b'b', 10_000));
5634        assert!(cache.get(&1).is_none());
5635        // A subsequent fitting insert under another key still works and accounting
5636        // is not corrupted by the oversized insert.
5637        cache.put(2, cached_blob_of(b'c', 50));
5638        assert!(cache.get(&2).is_some());
5639    }
5640
5641    #[test]
5642    fn lru_cache_replacing_entry_updates_byte_accounting() {
5643        // Budget holds two 500-byte objects (plus headroom) but not a 500 + a
5644        // ~1900-byte object.
5645        let small = cached_object_cost(&blob_of(0, 500));
5646        let mut cache = LruCache::<u32>::new(small * 2 + 200);
5647        cache.put(1, cached_blob_of(b'a', 500));
5648        cache.put(2, cached_blob_of(b'b', 500));
5649        assert!(cache.get(&1).is_some());
5650        assert!(cache.get(&2).is_some());
5651        // Replace key 2 (now MRU after the gets above re-ordered 1 then 2) with a
5652        // bigger value that still fits the budget alone but makes the running total
5653        // exceed it; the LRU (key 1) is evicted while the replaced key 2 stays.
5654        // This exercises the replace-path accounting.
5655        cache.put(2, cached_blob_of(b'b', 1000));
5656        assert!(cache.get(&2).is_some());
5657        assert!(cache.get(&1).is_none());
5658    }
5659
5660    #[test]
5661    fn write_and_validate_blob() {
5662        let db = ObjectDatabase::new(ObjectFormat::Sha1);
5663        let oid = db
5664            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
5665            .expect("test operation should succeed");
5666        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
5667        db.validate(&oid).expect("test operation should succeed");
5668    }
5669
5670    #[test]
5671    fn loose_store_writes_and_reads_object() {
5672        let root = std::env::temp_dir().join(format!(
5673            "sley-loose-store-{}-{}",
5674            std::process::id(),
5675            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
5676        ));
5677        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
5678        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5679        let oid = store
5680            .write_object(object.clone())
5681            .expect("test operation should succeed");
5682        assert_eq!(read_object_for_assert(&store, &oid), object);
5683        assert!(
5684            store
5685                .object_path(&oid)
5686                .expect("test operation should succeed")
5687                .exists()
5688        );
5689        fs::remove_dir_all(root).expect("test operation should succeed");
5690    }
5691
5692    #[test]
5693    fn read_header_detects_corruption_within_gits_header_window() {
5694        // git's `unpack_loose_header` inflates only the first MAX_HEADER_LEN (32)
5695        // bytes of output; a zlib data error inside that window makes `cat-file
5696        // -s`/`-t` fail (ULHR_BAD → "unable to unpack header"). A byte-by-byte
5697        // header read that stopped at the NUL would never inflate into the corrupt
5698        // region and would silently return a bogus size — the t1060 "getting type
5699        // of a corrupt blob fails" bug. Corrupt a byte inside the inflate stream of
5700        // a tiny object so the damage lands within the first 32 inflated bytes.
5701        let root = temp_root("sley-loose-header-corrupt");
5702        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
5703        let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
5704        let oid = store
5705            .write_object(object)
5706            .expect("test operation should succeed");
5707        let path = store
5708            .object_path(&oid)
5709            .expect("test operation should succeed");
5710        let mut bytes = fs::read(&path).expect("test operation should succeed");
5711        // Offset 10 is inside the deflate stream (past the 2-byte zlib header) and,
5712        // for an 8-byte blob, decodes into the first 32 output bytes. Zero it to
5713        // break inflation, mirroring t1060's `corrupt_byte HEAD:content.t 10`.
5714        bytes[10] = 0;
5715        fs::write(&path, &bytes).expect("test operation should succeed");
5716        store.invalidate_cache();
5717        let err = store
5718            .read_header(&oid)
5719            .expect_err("corrupt loose header must fail like git's ULHR_BAD");
5720        let msg = err.to_string();
5721        assert!(
5722            msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
5723            "expected git's ULHR_BAD message, got: {msg}"
5724        );
5725        fs::remove_dir_all(root).expect("test operation should succeed");
5726    }
5727
5728    #[test]
5729    fn read_header_ignores_corruption_past_gits_header_window() {
5730        // Mirror git: corruption deeper than the 32-byte header window is NOT
5731        // detected by a header-only read (`cat-file -s` still returns the size);
5732        // the full-object read path catches it instead. Over-detecting here would
5733        // diverge from upstream on large objects with a clean header.
5734        let root = temp_root("sley-loose-header-deep-corrupt");
5735        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
5736        // Incompressible body so the deflate stream is long and a deep byte is well
5737        // past the 32 inflated header-window bytes.
5738        let body: Vec<u8> = (0..4096u32).map(|i| (i.wrapping_mul(2654435761)) as u8).collect();
5739        let object = EncodedObject::new(ObjectType::Blob, body.clone());
5740        let oid = store
5741            .write_object(object)
5742            .expect("test operation should succeed");
5743        let path = store
5744            .object_path(&oid)
5745            .expect("test operation should succeed");
5746        let mut bytes = fs::read(&path).expect("test operation should succeed");
5747        let deep = bytes.len() / 2;
5748        bytes[deep] ^= 0xff;
5749        fs::write(&path, &bytes).expect("test operation should succeed");
5750        store.invalidate_cache();
5751        let header = store
5752            .read_header(&oid)
5753            .expect("header-only read must still succeed for deep body corruption");
5754        assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
5755        fs::remove_dir_all(root).expect("test operation should succeed");
5756    }
5757
5758    #[test]
5759    fn file_database_reads_object_from_pack_index() {
5760        let root = temp_root("sley-file-odb-pack");
5761        let git_dir = root.join(".git");
5762        let pack_dir = git_dir.join("objects").join("pack");
5763        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5764        let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
5765        let oid = object
5766            .object_id(ObjectFormat::Sha1)
5767            .expect("test operation should succeed");
5768        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5769            .expect("test operation should succeed");
5770        let pack_name = written.checksum.to_hex();
5771        fs::write(
5772            pack_dir.join(format!("pack-{pack_name}.pack")),
5773            written.pack,
5774        )
5775        .expect("test operation should succeed");
5776        fs::write(
5777            pack_dir.join(format!("pack-{pack_name}.idx")),
5778            written.index,
5779        )
5780        .expect("test operation should succeed");
5781
5782        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5783        assert!(db.contains(&oid).expect("test operation should succeed"));
5784        assert_eq!(read_object_for_assert(&db, &oid), object);
5785        fs::remove_dir_all(root).expect("test operation should succeed");
5786    }
5787
5788    #[test]
5789    fn file_database_loose_cache_observes_same_process_write_after_miss() {
5790        let root = temp_root("sley-file-odb-loose-cache-write");
5791        let git_dir = root.join(".git");
5792        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5793        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5794
5795        let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
5796        let oid = object
5797            .object_id(ObjectFormat::Sha1)
5798            .expect("test operation should succeed");
5799
5800        assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
5801        db.loose()
5802            .write_object(object.clone())
5803            .expect("test operation should succeed");
5804
5805        assert_eq!(read_object_for_assert(&db, &oid), object);
5806        fs::remove_dir_all(root).expect("test operation should succeed");
5807    }
5808
5809    #[test]
5810    fn object_presence_checker_observes_same_process_loose_write_after_miss() {
5811        let root = temp_root("sley-presence-checker-loose-cache-write");
5812        let git_dir = root.join(".git");
5813        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5814        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5815        let mut checker = db.presence_checker();
5816
5817        let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
5818        let oid = object
5819            .object_id(ObjectFormat::Sha1)
5820            .expect("test operation should succeed");
5821
5822        assert!(
5823            !checker
5824                .contains(&oid)
5825                .expect("test operation should succeed")
5826        );
5827        db.loose()
5828            .write_object(object)
5829            .expect("test operation should succeed");
5830
5831        assert!(
5832            checker
5833                .contains(&oid)
5834                .expect("test operation should succeed")
5835        );
5836        fs::remove_dir_all(root).expect("test operation should succeed");
5837    }
5838
5839    #[test]
5840    fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
5841        let root = temp_root("sley-read-object-header");
5842        let git_dir = root.join(".git");
5843        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5844        let format = ObjectFormat::Sha1;
5845        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5846
5847        // Loose object: the header read inflates only the framing, not the body.
5848        let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
5849        let loose_oid = db
5850            .write_object(loose.clone())
5851            .expect("test operation should succeed");
5852
5853        // Packed objects, including an ofs-delta whose *result* size lives in the
5854        // delta stream (not the pack entry header) and whose type is inherited from
5855        // its base at the end of the chain.
5856        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5857        let mut child_body = vec![b'a'; 4096];
5858        child_body.extend_from_slice(b" plus a deltified tail\n");
5859        let child = EncodedObject::new(ObjectType::Blob, child_body);
5860        let commitish =
5861            EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
5862        let base_oid = base
5863            .object_id(format)
5864            .expect("test operation should succeed");
5865        let child_oid = child
5866            .object_id(format)
5867            .expect("test operation should succeed");
5868        let commit_oid = commitish
5869            .object_id(format)
5870            .expect("test operation should succeed");
5871        let options = PackWriteOptions::new()
5872            .with_prefer_ofs_delta(true)
5873            .with_reorder(false);
5874        let pack = PackFile::write_packed_with_options(
5875            &[base.clone(), child.clone(), commitish.clone()],
5876            format,
5877            &options,
5878        )
5879        .expect("test operation should succeed");
5880        db.install_pack(&pack)
5881            .expect("test operation should succeed");
5882
5883        // The header read agrees with a full decode for every object and storage
5884        // class, without ever materializing the body.
5885        for (oid, want_type, want_len) in [
5886            (&loose_oid, ObjectType::Blob, loose.body.len()),
5887            (&base_oid, ObjectType::Blob, base.body.len()),
5888            (&child_oid, ObjectType::Blob, child.body.len()),
5889            (&commit_oid, ObjectType::Commit, commitish.body.len()),
5890        ] {
5891            assert_eq!(
5892                db.read_object_header(oid)
5893                    .expect("test operation should succeed"),
5894                Some((want_type, want_len as u64)),
5895                "header for {oid}"
5896            );
5897            let full = db.read_object(oid).expect("test operation should succeed");
5898            assert_eq!(
5899                db.read_object_header(oid)
5900                    .expect("test operation should succeed"),
5901                Some((full.object_type, full.body.len() as u64))
5902            );
5903        }
5904
5905        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5906            .expect("test operation should succeed");
5907        assert_eq!(
5908            db.read_object_header(&missing)
5909                .expect("test operation should succeed"),
5910            None
5911        );
5912        fs::remove_dir_all(root).expect("test operation should succeed");
5913    }
5914
5915    #[test]
5916    fn object_storage_info_reports_loose_packed_and_delta_metadata() {
5917        let root = temp_root("sley-object-storage-info");
5918        let git_dir = root.join(".git");
5919        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5920        let format = ObjectFormat::Sha1;
5921        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5922
5923        let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
5924        let loose_oid = db
5925            .write_object(loose)
5926            .expect("test operation should succeed");
5927        let loose_size = fs::metadata(
5928            db.loose()
5929                .object_path(&loose_oid)
5930                .expect("test operation should succeed"),
5931        )
5932        .expect("test operation should succeed")
5933        .len();
5934        let loose_info = db
5935            .object_storage_info(&loose_oid)
5936            .expect("test operation should succeed")
5937            .expect("test operation should succeed");
5938        assert_eq!(loose_info.disk_size, loose_size);
5939        assert_eq!(
5940            loose_info.deltabase,
5941            zero_oid(format).expect("test operation should succeed")
5942        );
5943
5944        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5945        let mut child_body = vec![b'a'; 4096];
5946        child_body.extend_from_slice(b" changed tail\n");
5947        let child = EncodedObject::new(ObjectType::Blob, child_body);
5948        let base_oid = base
5949            .object_id(format)
5950            .expect("test operation should succeed");
5951        let child_oid = child
5952            .object_id(format)
5953            .expect("test operation should succeed");
5954        let options = PackWriteOptions::new()
5955            .with_prefer_ofs_delta(true)
5956            .with_reorder(false);
5957        let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
5958            .expect("test operation should succeed");
5959        db.install_pack(&pack)
5960            .expect("test operation should succeed");
5961
5962        let base_info = db
5963            .object_storage_info(&base_oid)
5964            .expect("test operation should succeed")
5965            .expect("test operation should succeed");
5966        assert!(base_info.disk_size > 0);
5967        assert_eq!(
5968            base_info.deltabase,
5969            zero_oid(format).expect("test operation should succeed")
5970        );
5971
5972        let child_info = db
5973            .object_storage_info(&child_oid)
5974            .expect("test operation should succeed")
5975            .expect("test operation should succeed");
5976        assert!(child_info.disk_size > 0);
5977        assert_eq!(child_info.deltabase, base_oid);
5978
5979        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5980            .expect("test operation should succeed");
5981        assert_eq!(
5982            db.object_storage_info(&missing)
5983                .expect("test operation should succeed"),
5984            None
5985        );
5986        fs::remove_dir_all(root).expect("test operation should succeed");
5987    }
5988
5989    #[test]
5990    fn file_database_resolves_unique_loose_object_prefix() {
5991        let root = temp_root("sley-file-odb-prefix-loose");
5992        let git_dir = root.join(".git");
5993        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5994        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5995        let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
5996        let oid = db
5997            .write_object(object)
5998            .expect("test operation should succeed");
5999        let prefix = &oid.to_hex()[..8];
6000
6001        assert_eq!(
6002            db.resolve_prefix(prefix)
6003                .expect("test operation should succeed"),
6004            ObjectPrefixResolution::Unique(oid)
6005        );
6006        assert!(
6007            db.object_ids()
6008                .expect("test operation should succeed")
6009                .contains(&oid)
6010        );
6011        fs::remove_dir_all(root).expect("test operation should succeed");
6012    }
6013
6014    #[test]
6015    fn file_database_resolves_unique_packed_object_prefix() {
6016        let root = temp_root("sley-file-odb-prefix-packed");
6017        let git_dir = root.join(".git");
6018        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6019        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6020        let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
6021        let oid = object
6022            .object_id(ObjectFormat::Sha1)
6023            .expect("test operation should succeed");
6024        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6025            .expect("test operation should succeed");
6026        db.install_pack(&pack)
6027            .expect("test operation should succeed");
6028        let prefix = &oid.to_hex()[..8];
6029
6030        assert_eq!(
6031            db.resolve_prefix(prefix)
6032                .expect("test operation should succeed"),
6033            ObjectPrefixResolution::Unique(oid)
6034        );
6035        fs::remove_dir_all(root).expect("test operation should succeed");
6036    }
6037
6038    #[test]
6039    fn file_database_reports_ambiguous_object_prefix() {
6040        let root = temp_root("sley-file-odb-prefix-ambiguous");
6041        let git_dir = root.join(".git");
6042        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6043        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6044        let mut seen = HashMap::new();
6045        let (prefix, first, second) = (0..10_000)
6046            .find_map(|idx| {
6047                let object =
6048                    EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
6049                let oid = db
6050                    .write_object(object)
6051                    .expect("test operation should succeed");
6052                let prefix = oid.to_hex()[..4].to_string();
6053                seen.insert(prefix.clone(), oid)
6054                    .map(|first| (prefix, first, oid))
6055            })
6056            .expect("test should find a 4-hex collision");
6057
6058        let ObjectPrefixResolution::Ambiguous(mut matches) = db
6059            .resolve_prefix(&prefix)
6060            .expect("test operation should succeed")
6061        else {
6062            panic!("expected ambiguous prefix {prefix}");
6063        };
6064        matches.sort_by_key(ObjectId::to_hex);
6065        let mut expected = vec![first, second];
6066        expected.sort_by_key(ObjectId::to_hex);
6067        assert_eq!(matches, expected);
6068        fs::remove_dir_all(root).expect("test operation should succeed");
6069    }
6070
6071    #[test]
6072    fn file_database_rejects_too_short_object_prefix() {
6073        let root = temp_root("sley-file-odb-prefix-short");
6074        let git_dir = root.join(".git");
6075        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6076        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6077
6078        assert!(matches!(
6079            db.resolve_prefix("abc"),
6080            Err(GitError::InvalidObjectId(_))
6081        ));
6082        fs::remove_dir_all(root).expect("test operation should succeed");
6083    }
6084
6085    #[test]
6086    fn file_database_reads_sha256_object_from_pack_index() {
6087        let root = temp_root("sley-file-odb-pack-sha256");
6088        let git_dir = root.join(".git");
6089        let pack_dir = git_dir.join("objects").join("pack");
6090        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6091        let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
6092        let oid = object
6093            .object_id(ObjectFormat::Sha256)
6094            .expect("test operation should succeed");
6095        let written =
6096            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6097                .expect("test operation should succeed");
6098        let pack_name = written.checksum.to_hex();
6099        fs::write(
6100            pack_dir.join(format!("pack-{pack_name}.pack")),
6101            written.pack,
6102        )
6103        .expect("test operation should succeed");
6104        fs::write(
6105            pack_dir.join(format!("pack-{pack_name}.idx")),
6106            written.index,
6107        )
6108        .expect("test operation should succeed");
6109
6110        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6111        assert!(db.contains(&oid).expect("test operation should succeed"));
6112        assert_eq!(read_object_for_assert(&db, &oid), object);
6113        fs::remove_dir_all(root).expect("test operation should succeed");
6114    }
6115
6116    #[test]
6117    fn file_database_installs_sha256_pack_without_loose_objects() {
6118        let root = temp_root("sley-file-odb-install-pack");
6119        let git_dir = root.join(".git");
6120        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6121        let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
6122        let oid = object
6123            .object_id(ObjectFormat::Sha256)
6124            .expect("test operation should succeed");
6125        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6126            .expect("test operation should succeed");
6127        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6128
6129        let result = db
6130            .install_pack(&pack)
6131            .expect("test operation should succeed");
6132
6133        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6134        assert_eq!(result.object_ids, vec![oid]);
6135        assert!(result.pack_path.exists());
6136        assert!(result.index_path.exists());
6137        assert_eq!(result.promisor_path, None);
6138        assert!(
6139            !db.loose()
6140                .object_path(&oid)
6141                .expect("test operation should succeed")
6142                .exists()
6143        );
6144        assert!(db.contains(&oid).expect("test operation should succeed"));
6145        assert_eq!(read_object_for_assert(&db, &oid), object);
6146        fs::remove_dir_all(root).expect("test operation should succeed");
6147    }
6148
6149    #[test]
6150    fn file_database_installs_raw_sha256_pack_without_loose_objects() {
6151        let root = temp_root("sley-file-odb-install-raw-pack");
6152        let git_dir = root.join(".git");
6153        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6154        let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
6155        let oid = object
6156            .object_id(ObjectFormat::Sha256)
6157            .expect("test operation should succeed");
6158        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6159            .expect("test operation should succeed");
6160        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6161
6162        let result = db
6163            .install_raw_pack(&pack.pack)
6164            .expect("test operation should succeed");
6165
6166        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6167        assert_eq!(result.object_ids, vec![oid]);
6168        assert!(result.pack_path.exists());
6169        assert!(result.index_path.exists());
6170        assert_eq!(result.promisor_path, None);
6171        assert!(
6172            !db.loose()
6173                .object_path(&oid)
6174                .expect("test operation should succeed")
6175                .exists()
6176        );
6177        assert!(db.contains(&oid).expect("test operation should succeed"));
6178        assert_eq!(read_object_for_assert(&db, &oid), object);
6179        fs::remove_dir_all(root).expect("test operation should succeed");
6180    }
6181
6182    #[test]
6183    fn file_database_rejects_noncanonical_pack_index() {
6184        let root = temp_root("sley-file-odb-install-bad-index");
6185        let git_dir = root.join(".git");
6186        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6187        let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
6188        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6189            .expect("test operation should succeed");
6190        let mut entries = pack.entries.clone();
6191        entries[0].crc32 ^= 1;
6192        let mut bad_pack = pack.clone();
6193        bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
6194            .expect("test operation should succeed");
6195        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6196
6197        assert!(db.install_pack(&bad_pack).is_err());
6198
6199        fs::remove_dir_all(root).expect("test operation should succeed");
6200    }
6201
6202    #[test]
6203    fn file_database_installs_raw_promisor_pack_with_sidecar() {
6204        let root = temp_root("sley-file-odb-install-raw-promisor-pack");
6205        let git_dir = root.join(".git");
6206        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6207        let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
6208        let oid = object
6209            .object_id(ObjectFormat::Sha1)
6210            .expect("test operation should succeed");
6211        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6212            .expect("test operation should succeed");
6213        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6214
6215        let result = db
6216            .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
6217            .expect("test operation should succeed");
6218
6219        let promisor_path = result.promisor_path.expect("promisor sidecar");
6220        assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
6221        assert_eq!(
6222            promisor_path.extension().and_then(|ext| ext.to_str()),
6223            Some("promisor")
6224        );
6225        assert!(promisor_path.exists());
6226        assert_eq!(
6227            fs::read(&promisor_path).expect("test operation should succeed"),
6228            b""
6229        );
6230        assert!(result.pack_path.exists());
6231        assert!(result.index_path.exists());
6232        assert!(
6233            !db.loose()
6234                .object_path(&oid)
6235                .expect("test operation should succeed")
6236                .exists()
6237        );
6238        assert_eq!(read_object_for_assert(&db, &oid), object);
6239        fs::remove_dir_all(root).expect("test operation should succeed");
6240    }
6241
6242    #[test]
6243    fn repository_objects_dir_uses_linked_worktree_common_dir() {
6244        let root = temp_root("sley-odb-common-dir");
6245        let common = root.join(".git");
6246        let admin = common.join("worktrees").join("linked");
6247        fs::create_dir_all(&admin).expect("test operation should succeed");
6248        fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
6249
6250        let common = fs::canonicalize(common).expect("test operation should succeed");
6251        assert_eq!(repository_common_dir(&admin), common);
6252        assert_eq!(repository_objects_dir(&admin), common.join("objects"));
6253
6254        fs::remove_dir_all(root).expect("test operation should succeed");
6255    }
6256
6257    #[test]
6258    fn reachable_object_helpers_walk_graph_and_install_pack() {
6259        let root = temp_root("sley-reachable-pack");
6260        let source_git_dir = root.join("source.git");
6261        let destination_git_dir = root.join("destination.git");
6262        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6263        fs::create_dir_all(destination_git_dir.join("objects"))
6264            .expect("test operation should succeed");
6265        let format = ObjectFormat::Sha1;
6266        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6267        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6268
6269        let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
6270        let blob_oid = source
6271            .write_object(blob.clone())
6272            .expect("test operation should succeed");
6273        let tree = EncodedObject::new(
6274            ObjectType::Tree,
6275            Tree {
6276                entries: vec![TreeEntry {
6277                    mode: 0o100644,
6278                    name: BString::from(b"payload.txt"),
6279                    oid: blob_oid,
6280                }],
6281            }
6282            .write(),
6283        );
6284        let tree_oid = source
6285            .write_object(tree.clone())
6286            .expect("test operation should succeed");
6287        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6288        let commit = EncodedObject::new(
6289            ObjectType::Commit,
6290            Commit {
6291                tree: tree_oid,
6292                parents: Vec::new(),
6293                author: identity.clone(),
6294                committer: identity,
6295                encoding: None,
6296                message: b"initial\n".to_vec(),
6297            }
6298            .write(),
6299        );
6300        let commit_oid = source
6301            .write_object(commit.clone())
6302            .expect("test operation should succeed");
6303
6304        let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
6305            .expect("test operation should succeed");
6306        assert!(reachable.contains(&commit_oid));
6307        assert!(reachable.contains(&tree_oid));
6308        assert!(reachable.contains(&blob_oid));
6309
6310        let install =
6311            install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
6312                .expect("test operation should succeed")
6313                .expect("reachable pack should be written");
6314        assert_eq!(install.object_ids.len(), 3);
6315        for (oid, object) in [
6316            (&commit_oid, &commit),
6317            (&tree_oid, &tree),
6318            (&blob_oid, &blob),
6319        ] {
6320            assert!(
6321                !destination
6322                    .loose()
6323                    .object_path(oid)
6324                    .expect("test operation should succeed")
6325                    .exists()
6326            );
6327            assert!(
6328                destination
6329                    .contains(oid)
6330                    .expect("test operation should succeed")
6331            );
6332            assert_eq!(read_object_for_assert(&destination, oid), *object);
6333        }
6334        fs::remove_dir_all(root).expect("test operation should succeed");
6335    }
6336
6337    #[test]
6338    fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
6339        let root = temp_root("sley-reachable-exclusions");
6340        let git_dir = root.join("repo.git");
6341        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6342        let format = ObjectFormat::Sha1;
6343        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6344
6345        let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
6346        let blob_oid = db
6347            .write_object(blob)
6348            .expect("test operation should succeed");
6349        let tree = EncodedObject::new(
6350            ObjectType::Tree,
6351            Tree {
6352                entries: vec![TreeEntry {
6353                    mode: 0o100644,
6354                    name: BString::from(b"payload.txt"),
6355                    oid: blob_oid,
6356                }],
6357            }
6358            .write(),
6359        );
6360        let tree_oid = db
6361            .write_object(tree)
6362            .expect("test operation should succeed");
6363        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6364        let commit = EncodedObject::new(
6365            ObjectType::Commit,
6366            Commit {
6367                tree: tree_oid,
6368                parents: Vec::new(),
6369                author: identity.clone(),
6370                committer: identity,
6371                encoding: None,
6372                message: b"initial\n".to_vec(),
6373            }
6374            .write(),
6375        );
6376        let commit_oid = db
6377            .write_object(commit)
6378            .expect("test operation should succeed");
6379        let excluded = HashSet::from([tree_oid]);
6380
6381        let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
6382            .expect("test operation should succeed");
6383
6384        assert_eq!(objects.len(), 1);
6385        assert_eq!(
6386            objects[0]
6387                .object_id(format)
6388                .expect("test operation should succeed"),
6389            commit_oid
6390        );
6391        fs::remove_dir_all(root).expect("test operation should succeed");
6392    }
6393
6394    #[test]
6395    fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
6396        let root = temp_root("sley-build-reachable-pack");
6397        let git_dir = root.join("repo.git");
6398        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6399        let format = ObjectFormat::Sha1;
6400        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6401
6402        let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
6403        let oid = db
6404            .write_object(object.clone())
6405            .expect("test operation should succeed");
6406        let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
6407            .expect("test operation should succeed")
6408            .expect("reachable pack should be built");
6409        assert!(pack.pack.starts_with(b"PACK"));
6410        assert_eq!(pack.entries.len(), 1);
6411        assert_eq!(pack.entries[0].oid, oid);
6412
6413        let excluded = HashSet::from([oid]);
6414        assert!(
6415            build_reachable_pack(
6416                &db,
6417                format,
6418                pack.entries.into_iter().map(|entry| entry.oid),
6419                &excluded
6420            )
6421            .expect("test operation should succeed")
6422            .is_none()
6423        );
6424        fs::remove_dir_all(root).expect("test operation should succeed");
6425    }
6426
6427    #[test]
6428    fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
6429        let root = temp_root("sley-reachable-tags");
6430        let git_dir = root.join("repo.git");
6431        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6432        let format = ObjectFormat::Sha1;
6433        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6434
6435        let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
6436        let blob_oid = db
6437            .write_object(blob)
6438            .expect("test operation should succeed");
6439        let tag = EncodedObject::new(
6440            ObjectType::Tag,
6441            Tag {
6442                object: blob_oid,
6443                object_type: ObjectType::Blob,
6444                name: b"v1".to_vec(),
6445                tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
6446                message: b"tag message\n".to_vec(),
6447                raw_body: None,
6448            }
6449            .write(),
6450        );
6451        let tag_oid = db.write_object(tag).expect("test operation should succeed");
6452
6453        let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
6454            .expect("test operation should succeed");
6455        assert!(reachable.contains(&tag_oid));
6456        assert!(reachable.contains(&blob_oid));
6457
6458        let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
6459            .expect("test operation should succeed");
6460        let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
6461            .expect_err("missing traversal root should error");
6462        let kind = err.not_found_kind().expect("typed not found");
6463        assert_eq!(kind.object_id(), Some(missing));
6464        assert_eq!(
6465            kind.missing_object_context(),
6466            Some(MissingObjectContext::Traversal)
6467        );
6468        fs::remove_dir_all(root).expect("test operation should succeed");
6469    }
6470
6471    #[test]
6472    fn install_reachable_pack_empty_starts_create_no_pack() {
6473        let root = temp_root("sley-reachable-empty");
6474        let source_git_dir = root.join("source.git");
6475        let destination_git_dir = root.join("destination.git");
6476        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6477        fs::create_dir_all(destination_git_dir.join("objects"))
6478            .expect("test operation should succeed");
6479        let format = ObjectFormat::Sha1;
6480        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6481        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6482
6483        let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
6484            .expect("test operation should succeed");
6485
6486        assert!(result.is_none());
6487        assert!(!destination_git_dir.join("objects").join("pack").exists());
6488        fs::remove_dir_all(root).expect("test operation should succeed");
6489    }
6490
6491    #[test]
6492    fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
6493        let root = temp_root("sley-reachable-install-excluding");
6494        let source_git_dir = root.join("source.git");
6495        let destination_git_dir = root.join("destination.git");
6496        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6497        fs::create_dir_all(destination_git_dir.join("objects"))
6498            .expect("test operation should succeed");
6499        let format = ObjectFormat::Sha1;
6500        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6501        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6502        let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
6503        let oid = source
6504            .write_object(object)
6505            .expect("test operation should succeed");
6506        let excluded = HashSet::from([oid]);
6507
6508        let result = install_reachable_pack_excluding(
6509            &source,
6510            &destination,
6511            format,
6512            std::iter::once(oid),
6513            &excluded,
6514        )
6515        .expect("test operation should succeed");
6516
6517        assert!(result.is_none());
6518        assert!(!destination_git_dir.join("objects").join("pack").exists());
6519        fs::remove_dir_all(root).expect("test operation should succeed");
6520    }
6521
6522    #[test]
6523    fn install_reachable_pack_supports_sha256() {
6524        let root = temp_root("sley-reachable-pack-sha256");
6525        let source_git_dir = root.join("source.git");
6526        let destination_git_dir = root.join("destination.git");
6527        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6528        fs::create_dir_all(destination_git_dir.join("objects"))
6529            .expect("test operation should succeed");
6530        let format = ObjectFormat::Sha256;
6531        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6532        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6533        let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
6534        let oid = source
6535            .write_object(object.clone())
6536            .expect("test operation should succeed");
6537
6538        let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
6539            .expect("test operation should succeed")
6540            .expect("sha256 reachable pack should be built");
6541        assert!(pack.pack.starts_with(b"PACK"));
6542        assert_eq!(pack.entries[0].oid, oid);
6543
6544        let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
6545            .expect("test operation should succeed")
6546            .expect("sha256 reachable pack should be written");
6547
6548        assert_eq!(result.object_ids, vec![oid]);
6549        assert!(
6550            !destination
6551                .loose()
6552                .object_path(&oid)
6553                .expect("test operation should succeed")
6554                .exists()
6555        );
6556        assert_eq!(read_object_for_assert(&destination, &oid), object);
6557        fs::remove_dir_all(root).expect("test operation should succeed");
6558    }
6559
6560    #[test]
6561    fn install_helpers_accept_custom_raw_pack_installer() {
6562        #[derive(Default)]
6563        struct RecordingInstaller {
6564            packs: std::cell::RefCell<Vec<Vec<u8>>>,
6565            installed: std::cell::RefCell<Vec<ObjectId>>,
6566        }
6567
6568        impl RawPackInstaller for RecordingInstaller {
6569            fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
6570                self.packs.borrow_mut().push(pack_bytes.to_vec());
6571                let object_ids = self.installed.borrow().clone();
6572                Ok(RawPackInstallResult { object_ids })
6573            }
6574        }
6575
6576        let format = ObjectFormat::Sha1;
6577        let source = ObjectDatabase::new(format);
6578        let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
6579        let oid = source
6580            .write_object(object)
6581            .expect("test operation should succeed");
6582        let installer = RecordingInstaller::default();
6583        installer.installed.borrow_mut().push(oid);
6584
6585        let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
6586            .expect("test operation should succeed")
6587            .expect("custom installer should receive pack");
6588
6589        assert_eq!(result.object_ids, installer.installed.into_inner());
6590        let packs = installer.packs.into_inner();
6591        assert_eq!(packs.len(), 1);
6592        assert!(packs[0].starts_with(b"PACK"));
6593    }
6594
6595    #[test]
6596    fn file_database_reads_object_from_multi_pack_index() {
6597        let root = temp_root("sley-file-odb-midx");
6598        let git_dir = root.join(".git");
6599        let pack_dir = git_dir.join("objects").join("pack");
6600        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6601        let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
6602        let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
6603        let first_oid = first
6604            .object_id(ObjectFormat::Sha1)
6605            .expect("test operation should succeed");
6606        let second_oid = second
6607            .object_id(ObjectFormat::Sha1)
6608            .expect("test operation should succeed");
6609        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6610            .expect("test operation should succeed");
6611        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6612            .expect("test operation should succeed");
6613        let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
6614        let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
6615        fs::write(
6616            pack_dir.join(first_pack_name.replace(".idx", ".pack")),
6617            first_pack.pack,
6618        )
6619        .expect("test operation should succeed");
6620        fs::write(
6621            pack_dir.join(second_pack_name.replace(".idx", ".pack")),
6622            second_pack.pack,
6623        )
6624        .expect("test operation should succeed");
6625        let midx = MultiPackIndex::write(
6626            ObjectFormat::Sha1,
6627            2,
6628            &[first_pack_name, second_pack_name],
6629            &[
6630                sley_pack::MultiPackIndexEntry {
6631                    oid: first_oid,
6632                    pack_int_id: 0,
6633                    offset: first_pack.entries[0].offset,
6634                },
6635                sley_pack::MultiPackIndexEntry {
6636                    oid: second_oid,
6637                    pack_int_id: 1,
6638                    offset: second_pack.entries[0].offset,
6639                },
6640            ],
6641        )
6642        .expect("test operation should succeed");
6643        fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
6644
6645        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6646        assert!(
6647            db.contains(&second_oid)
6648                .expect("test operation should succeed")
6649        );
6650        assert_eq!(
6651            db.resolve_prefix(&second_oid.to_hex()[..8])
6652                .expect("test operation should succeed"),
6653            ObjectPrefixResolution::Unique(second_oid)
6654        );
6655        assert_eq!(read_object_for_assert(&db, &second_oid), second);
6656        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6657        fs::remove_dir_all(root).expect("test operation should succeed");
6658    }
6659
6660    #[test]
6661    fn file_database_finds_pack_added_after_registry_was_cached() {
6662        // Regression guard for the cached pack-directory registry: a pack written
6663        // after the registry was first cached (via a prior read) must still be
6664        // discovered by the same handle, because a miss triggers a re-scan.
6665        let root = temp_root("sley-file-odb-pack-added-late");
6666        let git_dir = root.join(".git");
6667        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6668        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6669
6670        // First pack + object; reading it populates the registry cache.
6671        let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
6672        let first_oid = first
6673            .object_id(ObjectFormat::Sha1)
6674            .expect("test operation should succeed");
6675        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6676            .expect("test operation should succeed");
6677        db.install_pack(&first_pack)
6678            .expect("test operation should succeed");
6679        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6680
6681        // A second object that the cached registry does not yet know about.
6682        let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
6683        let second_oid = second
6684            .object_id(ObjectFormat::Sha1)
6685            .expect("test operation should succeed");
6686        // It is genuinely absent right now.
6687        assert!(matches!(
6688            db.read_object(&second_oid),
6689            Err(GitError::NotFound(_))
6690        ));
6691
6692        // Install its pack through the same handle; the next read must find it via
6693        // a re-scan, not be masked by the stale registry.
6694        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6695            .expect("test operation should succeed");
6696        db.install_pack(&second_pack)
6697            .expect("test operation should succeed");
6698        assert!(
6699            db.contains(&second_oid)
6700                .expect("test operation should succeed")
6701        );
6702        assert_eq!(read_object_for_assert(&db, &second_oid), second);
6703        // The original object still resolves too.
6704        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6705
6706        fs::remove_dir_all(root).expect("test operation should succeed");
6707    }
6708
6709    #[test]
6710    fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
6711        let root = temp_root("sley-presence-checker-pack-added-late");
6712        let git_dir = root.join(".git");
6713        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6714        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6715
6716        let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
6717        let first_oid = first
6718            .object_id(ObjectFormat::Sha1)
6719            .expect("test operation should succeed");
6720        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6721            .expect("test operation should succeed");
6722        db.install_pack(&first_pack)
6723            .expect("test operation should succeed");
6724
6725        let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
6726        let second_oid = second
6727            .object_id(ObjectFormat::Sha1)
6728            .expect("test operation should succeed");
6729        let mut checker = db.presence_checker();
6730        assert!(
6731            checker
6732                .contains(&first_oid)
6733                .expect("test operation should succeed")
6734        );
6735        assert!(
6736            !checker
6737                .contains(&second_oid)
6738                .expect("test operation should succeed")
6739        );
6740
6741        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6742            .expect("test operation should succeed");
6743        db.install_pack(&second_pack)
6744            .expect("test operation should succeed");
6745
6746        assert!(
6747            checker
6748                .contains(&second_oid)
6749                .expect("test operation should succeed")
6750        );
6751        fs::remove_dir_all(root).expect("test operation should succeed");
6752    }
6753
6754    #[test]
6755    fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
6756        let root = temp_root("sley-file-odb-pack-registry-refresh");
6757        let git_dir = root.join(".git");
6758        let pack_dir = git_dir.join("objects").join("pack");
6759        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6760        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6761
6762        let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
6763        let first_oid = first
6764            .object_id(ObjectFormat::Sha1)
6765            .expect("test operation should succeed");
6766        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6767            .expect("test operation should succeed");
6768        db.install_pack(&first_pack)
6769            .expect("test operation should succeed");
6770
6771        let first_registry = db
6772            .cached_pack_registry(&pack_dir, false)
6773            .expect("test operation should succeed");
6774        assert_eq!(first_registry.fingerprint.idx_count, 1);
6775        assert_eq!(first_registry.fingerprint.pack_count, 1);
6776        assert_eq!(first_registry.packs.len(), 1);
6777        assert!(
6778            first_registry.packs[0]
6779                .index
6780                .lock()
6781                .expect("test operation should succeed")
6782                .is_none()
6783        );
6784        assert!(
6785            first_registry.packs[0]
6786                .data
6787                .lock()
6788                .expect("test operation should succeed")
6789                .is_none()
6790        );
6791
6792        // Existence checks use the parsed index directly and do not load pack
6793        // bytes; a full read fills the registry-owned pack data handle.
6794        assert!(
6795            db.contains(&first_oid)
6796                .expect("test operation should succeed")
6797        );
6798        assert!(
6799            first_registry.packs[0]
6800                .index
6801                .lock()
6802                .expect("test operation should succeed")
6803                .is_some()
6804        );
6805        assert!(
6806            first_registry.packs[0]
6807                .data
6808                .lock()
6809                .expect("test operation should succeed")
6810                .is_none()
6811        );
6812        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6813        assert!(
6814            first_registry.packs[0]
6815                .data
6816                .lock()
6817                .expect("test operation should succeed")
6818                .is_some()
6819        );
6820
6821        let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
6822        let second_oid = second
6823            .object_id(ObjectFormat::Sha1)
6824            .expect("test operation should succeed");
6825        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6826            .expect("test operation should succeed");
6827        db.install_pack(&second_pack)
6828            .expect("test operation should succeed");
6829
6830        let refreshed = db
6831            .cached_pack_registry(&pack_dir, true)
6832            .expect("test operation should succeed");
6833        assert!(!Arc::ptr_eq(&first_registry, &refreshed));
6834        assert_eq!(refreshed.fingerprint.idx_count, 2);
6835        assert_eq!(refreshed.fingerprint.pack_count, 2);
6836        assert_eq!(refreshed.packs.len(), 2);
6837        assert_eq!(read_object_for_assert(&db, &second_oid), second);
6838
6839        fs::remove_dir_all(root).expect("test operation should succeed");
6840    }
6841
6842    #[test]
6843    fn file_database_pack_search_hint_rebuilds_after_pack_added() {
6844        // Regression guard for the recent-pack search hint: it is tied to the
6845        // cached pack registry, so a miss followed by a changed registry must not
6846        // hide newly-added packs.
6847        let root = temp_root("sley-file-odb-pack-lookup-added-late");
6848        let git_dir = root.join(".git");
6849        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6850        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6851
6852        let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
6853        let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
6854        let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
6855        let first_oid = first
6856            .object_id(ObjectFormat::Sha1)
6857            .expect("test operation should succeed");
6858        let second_oid = second
6859            .object_id(ObjectFormat::Sha1)
6860            .expect("test operation should succeed");
6861        let third_oid = third
6862            .object_id(ObjectFormat::Sha1)
6863            .expect("test operation should succeed");
6864
6865        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6866            .expect("test operation should succeed");
6867        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6868            .expect("test operation should succeed");
6869        db.install_pack(&first_pack)
6870            .expect("test operation should succeed");
6871        db.install_pack(&second_pack)
6872            .expect("test operation should succeed");
6873
6874        // With two packs, these reads establish a cached registry and pack hint.
6875        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6876        assert_eq!(read_object_for_assert(&db, &second_oid), second);
6877        assert!(matches!(
6878            db.read_object(&third_oid),
6879            Err(GitError::NotFound(_))
6880        ));
6881
6882        let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
6883            .expect("test operation should succeed");
6884        db.install_pack(&third_pack)
6885            .expect("test operation should succeed");
6886
6887        assert_eq!(read_object_for_assert(&db, &third_oid), third);
6888        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6889
6890        fs::remove_dir_all(root).expect("test operation should succeed");
6891    }
6892
6893    #[test]
6894    fn file_database_prefers_loose_object_over_packed_object() {
6895        let root = temp_root("sley-file-odb-prefer-loose");
6896        let git_dir = root.join(".git");
6897        let pack_dir = git_dir.join("objects").join("pack");
6898        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6899        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6900        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6901            .expect("test operation should succeed");
6902        let pack_name = written.checksum.to_hex();
6903        fs::write(
6904            pack_dir.join(format!("pack-{pack_name}.pack")),
6905            written.pack,
6906        )
6907        .expect("test operation should succeed");
6908        fs::write(
6909            pack_dir.join(format!("pack-{pack_name}.idx")),
6910            written.index,
6911        )
6912        .expect("test operation should succeed");
6913
6914        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6915        let oid = db
6916            .write_object(object.clone())
6917            .expect("test operation should succeed");
6918        assert_eq!(read_object_for_assert(&db, &oid), object);
6919        fs::remove_dir_all(root).expect("test operation should succeed");
6920    }
6921
6922    #[test]
6923    fn bundle_prerequisite_verification_reads_existing_objects() {
6924        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6925        let oid = db
6926            .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
6927            .expect("test operation should succeed");
6928        let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
6929        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6930            .expect("test operation should succeed");
6931
6932        verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
6933    }
6934
6935    #[test]
6936    fn bundle_prerequisite_verification_reports_missing_objects() {
6937        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6938        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6939            .expect("test operation should succeed");
6940        let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
6941        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6942            .expect("test operation should succeed");
6943
6944        assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
6945    }
6946
6947    #[test]
6948    fn unbundle_objects_writes_pack_entries_and_returns_refs() {
6949        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6950        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6951        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6952        let oid = object
6953            .object_id(ObjectFormat::Sha1)
6954            .expect("test operation should succeed");
6955        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6956            .expect("test operation should succeed");
6957        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6958            .into_bytes()
6959            .into_iter()
6960            .chain(pack.pack)
6961            .collect::<Vec<_>>();
6962        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6963            .expect("test operation should succeed");
6964
6965        let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
6966            .expect("test operation should succeed");
6967        assert_eq!(result.written_objects, vec![oid]);
6968        assert_eq!(result.references, bundle.references);
6969        assert_eq!(read_object_for_assert(&writer, &oid), object);
6970    }
6971
6972    #[test]
6973    fn install_bundle_pack_writes_pack_and_returns_refs() {
6974        let root = temp_root("sley-install-bundle-pack");
6975        let git_dir = root.join(".git");
6976        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6977        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6978        let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6979        let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
6980        let oid = object
6981            .object_id(ObjectFormat::Sha1)
6982            .expect("test operation should succeed");
6983        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6984            .expect("test operation should succeed");
6985        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6986            .into_bytes()
6987            .into_iter()
6988            .chain(pack.pack)
6989            .collect::<Vec<_>>();
6990        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6991            .expect("test operation should succeed");
6992
6993        let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
6994            .expect("test operation should succeed");
6995
6996        assert_eq!(result.written_objects, vec![oid]);
6997        assert_eq!(result.references, bundle.references);
6998        assert!(
6999            database
7000                .contains(&oid)
7001                .expect("test operation should succeed")
7002        );
7003        assert_eq!(read_object_for_assert(&database, &oid), object);
7004        assert!(
7005            !database
7006                .loose()
7007                .object_path(&oid)
7008                .expect("test operation should succeed")
7009                .exists()
7010        );
7011        fs::remove_dir_all(root).expect("test operation should succeed");
7012    }
7013
7014    #[test]
7015    fn unpack_packfile_objects_writes_sha256_pack_entries() {
7016        let writer = ObjectDatabase::new(ObjectFormat::Sha256);
7017        let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
7018        let oid = object
7019            .object_id(ObjectFormat::Sha256)
7020            .expect("test operation should succeed");
7021        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7022            .expect("test operation should succeed");
7023
7024        let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
7025            .expect("test operation should succeed");
7026
7027        assert_eq!(result.written_objects, vec![oid]);
7028        assert_eq!(read_object_for_assert(&writer, &oid), object);
7029    }
7030
7031    #[test]
7032    fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
7033        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7034        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
7035        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7036            .expect("test operation should succeed");
7037        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
7038        let oid = object
7039            .object_id(ObjectFormat::Sha1)
7040            .expect("test operation should succeed");
7041        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7042            .expect("test operation should succeed");
7043        let bundle_bytes =
7044            format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
7045                .into_bytes()
7046                .into_iter()
7047                .chain(pack.pack)
7048                .collect::<Vec<_>>();
7049        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7050            .expect("test operation should succeed");
7051
7052        assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
7053        assert!(!writer.contains(&oid));
7054    }
7055
7056    /// Build a commit -> tree -> blob graph in `db`, returning the three object
7057    /// ids and their canonical encodings as `(oid, object)` pairs.
7058    fn write_commit_graph(
7059        db: &mut FileObjectDatabase,
7060        payload: &[u8],
7061    ) -> Vec<(ObjectId, EncodedObject)> {
7062        let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
7063        let blob_oid = db
7064            .write_object(blob.clone())
7065            .expect("test operation should succeed");
7066        let tree = EncodedObject::new(
7067            ObjectType::Tree,
7068            Tree {
7069                entries: vec![TreeEntry {
7070                    mode: 0o100644,
7071                    name: BString::from(b"payload.txt"),
7072                    oid: blob_oid,
7073                }],
7074            }
7075            .write(),
7076        );
7077        let tree_oid = db
7078            .write_object(tree.clone())
7079            .expect("test operation should succeed");
7080        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7081        let commit = EncodedObject::new(
7082            ObjectType::Commit,
7083            Commit {
7084                tree: tree_oid,
7085                parents: Vec::new(),
7086                author: identity.clone(),
7087                committer: identity,
7088                encoding: None,
7089                message: b"initial\n".to_vec(),
7090            }
7091            .write(),
7092        );
7093        let commit_oid = db
7094            .write_object(commit.clone())
7095            .expect("test operation should succeed");
7096        vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
7097    }
7098
7099    fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
7100        let root = temp_root("sley-repack-all");
7101        let git_dir = root.join(".git");
7102        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7103        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7104
7105        // A pre-existing pack holds one blob; the rest of the graph is loose.
7106        let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
7107        let packed_oid = packed_blob
7108            .object_id(format)
7109            .expect("test operation should succeed");
7110        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7111            .expect("test operation should succeed");
7112        let existing = db
7113            .install_pack(&existing_pack)
7114            .expect("test operation should succeed");
7115
7116        let graph = write_commit_graph(&mut db, b"repack payload\n");
7117
7118        let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
7119        expected.insert(packed_oid, packed_blob.clone());
7120
7121        let result = repack_all_objects(&git_dir, format)
7122            .expect("test operation should succeed")
7123            .expect("repository has objects");
7124
7125        // The new pack round-trips and contains every original object byte-for-byte.
7126        assert_eq!(result.object_count, expected.len());
7127        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7128        assert_eq!(parsed.entries.len(), expected.len());
7129        for entry in &parsed.entries {
7130            let want = expected
7131                .get(&entry.entry.oid)
7132                .expect("packed object was in the repository");
7133            assert_eq!(&entry.object, want);
7134            assert_eq!(
7135                entry
7136                    .object
7137                    .object_id(format)
7138                    .expect("test operation should succeed"),
7139                entry.entry.oid
7140            );
7141        }
7142        // The generated index parses and agrees with the pack checksum.
7143        let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
7144        assert_eq!(idx.pack_checksum, parsed.checksum);
7145        assert_eq!(idx.entries.len(), expected.len());
7146
7147        // The pre-existing pack is reported obsolete (by its .pack path).
7148        assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
7149        // Every loose object id is reported as now packed.
7150        let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
7151        want_loose.sort_by_key(ObjectId::to_hex);
7152        assert_eq!(result.packed_loose, want_loose);
7153        assert!(!result.packed_loose.contains(&packed_oid));
7154
7155        fs::remove_dir_all(root).expect("test operation should succeed");
7156    }
7157
7158    #[test]
7159    fn repack_all_objects_consolidates_loose_and_pack_sha1() {
7160        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
7161    }
7162
7163    #[test]
7164    fn repack_all_objects_consolidates_loose_and_pack_sha256() {
7165        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
7166    }
7167
7168    #[test]
7169    fn repack_all_objects_returns_none_for_empty_repository() {
7170        let root = temp_root("sley-repack-empty");
7171        let git_dir = root.join(".git");
7172        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7173
7174        assert!(
7175            repack_all_objects(&git_dir, ObjectFormat::Sha1)
7176                .expect("test operation should succeed")
7177                .is_none()
7178        );
7179
7180        fs::remove_dir_all(root).expect("test operation should succeed");
7181    }
7182
7183    #[test]
7184    fn install_repack_result_writes_pack_without_pruning_by_default() {
7185        let root = temp_root("sley-repack-install-nodelete");
7186        let git_dir = root.join(".git");
7187        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7188        let format = ObjectFormat::Sha1;
7189        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7190        let graph = write_commit_graph(&mut db, b"install no prune\n");
7191
7192        let result = repack_all_objects(&git_dir, format)
7193            .expect("test operation should succeed")
7194            .expect("test operation should succeed");
7195        install_repack_result(&git_dir, format, &result, false)
7196            .expect("test operation should succeed");
7197
7198        // New pack is on disk and readable.
7199        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7200        let pack_dir = git_dir.join("objects").join("pack");
7201        let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
7202        let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
7203        assert!(pack_path.exists());
7204        assert!(idx_path.exists());
7205        // Loose objects survive because prune was not requested.
7206        for (oid, object) in &graph {
7207            assert!(
7208                db.loose()
7209                    .object_path(oid)
7210                    .expect("test operation should succeed")
7211                    .exists()
7212            );
7213            assert_eq!(read_object_for_assert(&db, oid), *object);
7214        }
7215
7216        fs::remove_dir_all(root).expect("test operation should succeed");
7217    }
7218
7219    #[test]
7220    fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
7221        let root = temp_root("sley-repack-install-prune");
7222        let git_dir = root.join(".git");
7223        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7224        let format = ObjectFormat::Sha1;
7225        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7226
7227        let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
7228        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7229            .expect("test operation should succeed");
7230        let existing = db
7231            .install_pack(&existing_pack)
7232            .expect("test operation should succeed");
7233        let graph = write_commit_graph(&mut db, b"prune payload\n");
7234
7235        let result = repack_all_objects(&git_dir, format)
7236            .expect("test operation should succeed")
7237            .expect("test operation should succeed");
7238        let new_pack_checksum = PackFile::parse(&result.pack, format)
7239            .expect("test operation should succeed")
7240            .checksum;
7241        install_repack_result(&git_dir, format, &result, true)
7242            .expect("test operation should succeed");
7243
7244        // Obsolete pack and its index are gone.
7245        assert!(!existing.pack_path.exists());
7246        assert!(!existing.index_path.exists());
7247        // Packed loose objects are gone from disk.
7248        for (oid, _) in &graph {
7249            assert!(
7250                !db.loose()
7251                    .object_path(oid)
7252                    .expect("test operation should succeed")
7253                    .exists()
7254            );
7255        }
7256        // The new consolidated pack remains and still serves every object.
7257        let pack_dir = git_dir.join("objects").join("pack");
7258        assert!(
7259            pack_dir
7260                .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
7261                .exists()
7262        );
7263        let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
7264        for (oid, object) in &graph {
7265            assert!(
7266                reopened
7267                    .contains(oid)
7268                    .expect("test operation should succeed")
7269            );
7270            assert_eq!(read_object_for_assert(&reopened, oid), *object);
7271        }
7272        let packed_oid = packed_blob
7273            .object_id(format)
7274            .expect("test operation should succeed");
7275        assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
7276
7277        fs::remove_dir_all(root).expect("test operation should succeed");
7278    }
7279
7280    #[test]
7281    fn install_repack_result_preserves_keep_and_promisor_packs() {
7282        let root = temp_root("sley-repack-install-keep-promisor");
7283        let git_dir = root.join(".git");
7284        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7285        let format = ObjectFormat::Sha1;
7286        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7287
7288        let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
7289        let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
7290            .expect("test operation should succeed");
7291        let keep_install = db
7292            .install_pack(&keep_pack)
7293            .expect("test operation should succeed");
7294        let keep_sidecar = keep_install.pack_path.with_extension("keep");
7295        fs::write(&keep_sidecar, b"").expect("test operation should succeed");
7296
7297        let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
7298        let promisor_pack =
7299            PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
7300                .expect("test operation should succeed");
7301        let promisor_install = db
7302            .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
7303            .expect("test operation should succeed");
7304        let promisor_sidecar = promisor_install
7305            .promisor_path
7306            .clone()
7307            .expect("promisor sidecar");
7308
7309        let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
7310        let result = repack_all_objects(&git_dir, format)
7311            .expect("test operation should succeed")
7312            .expect("test operation should succeed");
7313        assert!(result.obsolete_packs.contains(&keep_install.pack_path));
7314        assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
7315
7316        install_repack_result(&git_dir, format, &result, true)
7317            .expect("test operation should succeed");
7318
7319        for path in [
7320            &keep_install.pack_path,
7321            &keep_install.index_path,
7322            &keep_sidecar,
7323            &promisor_install.pack_path,
7324            &promisor_install.index_path,
7325            &promisor_sidecar,
7326        ] {
7327            assert!(path.exists(), "{} should be preserved", path.display());
7328        }
7329        for (oid, _) in &graph {
7330            assert!(
7331                !db.loose()
7332                    .object_path(oid)
7333                    .expect("test operation should succeed")
7334                    .exists()
7335            );
7336        }
7337
7338        fs::remove_dir_all(root).expect("test operation should succeed");
7339    }
7340
7341    #[test]
7342    fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
7343        // Safety: a loose object whose id is not in the new pack must survive
7344        // pruning even if the caller lists it in `packed_loose`.
7345        let root = temp_root("sley-repack-install-safety");
7346        let git_dir = root.join(".git");
7347        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7348        let format = ObjectFormat::Sha1;
7349        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7350        let graph = write_commit_graph(&mut db, b"safety packed\n");
7351
7352        let mut result = repack_all_objects(&git_dir, format)
7353            .expect("test operation should succeed")
7354            .expect("test operation should succeed");
7355
7356        // A loose object that is NOT in the new pack, but mislabeled as packed.
7357        let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
7358        let stray_oid = db
7359            .write_object(stray.clone())
7360            .expect("test operation should succeed");
7361        assert!(!result.packed_loose.contains(&stray_oid));
7362        result.packed_loose.push(stray_oid);
7363
7364        install_repack_result(&git_dir, format, &result, true)
7365            .expect("test operation should succeed");
7366
7367        // The stray loose object is untouched because it is not in the new pack.
7368        assert!(
7369            db.loose()
7370                .object_path(&stray_oid)
7371                .expect("test operation should succeed")
7372                .exists()
7373        );
7374        assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
7375        // Genuinely packed loose objects were still removed.
7376        for (oid, _) in &graph {
7377            assert!(
7378                !db.loose()
7379                    .object_path(oid)
7380                    .expect("test operation should succeed")
7381                    .exists()
7382            );
7383        }
7384
7385        fs::remove_dir_all(root).expect("test operation should succeed");
7386    }
7387
7388    #[test]
7389    fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
7390        let root = temp_root("sley-prune-unreachable");
7391        let git_dir = root.join(".git");
7392        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7393        let format = ObjectFormat::Sha1;
7394        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7395        let graph = write_commit_graph(&mut db, b"reachable payload\n");
7396        let commit_oid = graph[0].0.clone();
7397
7398        // A dangling loose blob not referenced by the commit graph.
7399        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
7400        let dangling_oid = db
7401            .write_object(dangling)
7402            .expect("test operation should succeed");
7403
7404        // Report-only pass leaves everything on disk.
7405        let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
7406            .expect("test operation should succeed");
7407        assert_eq!(reported, vec![dangling_oid]);
7408        assert!(
7409            db.loose()
7410                .object_path(&dangling_oid)
7411                .expect("test operation should succeed")
7412                .exists()
7413        );
7414
7415        // Deleting pass removes only the unreachable object.
7416        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
7417            .expect("test operation should succeed");
7418        assert_eq!(deleted, vec![dangling_oid]);
7419        assert!(
7420            !db.loose()
7421                .object_path(&dangling_oid)
7422                .expect("test operation should succeed")
7423                .exists()
7424        );
7425        for (oid, object) in &graph {
7426            assert!(
7427                db.loose()
7428                    .object_path(oid)
7429                    .expect("test operation should succeed")
7430                    .exists()
7431            );
7432            assert_eq!(read_object_for_assert(&db, oid), *object);
7433        }
7434
7435        fs::remove_dir_all(root).expect("test operation should succeed");
7436    }
7437
7438    #[test]
7439    fn prune_unreachable_loose_ignores_gitlink_targets() {
7440        let root = temp_root("sley-prune-gitlink");
7441        let git_dir = root.join(".git");
7442        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7443        let format = ObjectFormat::Sha1;
7444        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7445
7446        let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
7447            .expect("test operation should succeed");
7448        let tree = EncodedObject::new(
7449            ObjectType::Tree,
7450            Tree {
7451                entries: vec![TreeEntry {
7452                    mode: 0o160000,
7453                    name: BString::from(b"submodule"),
7454                    oid: submodule_oid,
7455                }],
7456            }
7457            .write(),
7458        );
7459        let tree_oid = db
7460            .write_object(tree)
7461            .expect("test operation should succeed");
7462        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7463        let commit = EncodedObject::new(
7464            ObjectType::Commit,
7465            Commit {
7466                tree: tree_oid,
7467                parents: Vec::new(),
7468                author: identity.clone(),
7469                committer: identity,
7470                encoding: None,
7471                message: b"gitlink\n".to_vec(),
7472            }
7473            .write(),
7474        );
7475        let commit_oid = db
7476            .write_object(commit)
7477            .expect("test operation should succeed");
7478        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
7479        let dangling_oid = db
7480            .write_object(dangling)
7481            .expect("test operation should succeed");
7482
7483        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
7484            .expect("test operation should succeed");
7485
7486        assert_eq!(deleted, vec![dangling_oid]);
7487        assert!(
7488            !db.loose()
7489                .object_path(&dangling_oid)
7490                .expect("test operation should succeed")
7491                .exists()
7492        );
7493
7494        fs::remove_dir_all(root).expect("test operation should succeed");
7495    }
7496
7497    fn temp_root(prefix: &str) -> PathBuf {
7498        std::env::temp_dir().join(format!(
7499            "{prefix}-{}-{}",
7500            std::process::id(),
7501            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
7502        ))
7503    }
7504}