Skip to main content

sley_odb/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
9use sley_formats::{Bundle, BundleReference};
10use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
11use sley_pack::{
12    MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
13    PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
14};
15use std::collections::{HashMap, HashSet};
16use std::io::{Read, Seek, SeekFrom, Write};
17use std::path::{Path, PathBuf};
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::sync::{Arc, Mutex, OnceLock};
20use std::{env, fs};
21
22static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24pub trait ObjectReader {
25    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
26
27    /// Graft-points seam (shallow clones today, replace refs/grafts later):
28    /// `true` when history is cut at `oid`, so every walk must treat the
29    /// commit as parentless even though its raw body still names parents.
30    ///
31    /// [`FileObjectDatabase`] answers from `$GIT_DIR/shallow`; readers that
32    /// are not backed by a repository (in-memory stores, pack overlays)
33    /// keep the default "no grafts".
34    fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
35        false
36    }
37
38    /// Whether this reader has any shallow/graft boundaries at all. Walkers can
39    /// use this to choose dense graph-only traversal when no boundary can cut
40    /// parent edges.
41    fn has_shallow_grafts(&self) -> bool {
42        false
43    }
44}
45
46fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
47    (*oid == ObjectId::empty_tree(format))
48        .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
49}
50
51fn with_missing_object_context(
52    err: GitError,
53    oid: ObjectId,
54    context: MissingObjectContext,
55) -> GitError {
56    let kind = err
57        .not_found_kind()
58        .and_then(sley_core::NotFoundKind::missing_object_kind);
59    match kind {
60        Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
61        None => err,
62    }
63}
64
65/// Parents of a parsed commit with the graft seam applied: empty when the
66/// reader cuts history at `oid` (shallow boundary), the raw parsed parents
67/// otherwise.
68pub fn grafted_parents<R: ObjectReader + ?Sized>(
69    reader: &R,
70    oid: &ObjectId,
71    parents: Vec<ObjectId>,
72) -> Vec<ObjectId> {
73    if reader.is_shallow_graft(oid) {
74        Vec::new()
75    } else {
76        parents
77    }
78}
79
80pub trait ObjectWriter {
81    /// Write `object`, returning its id. Takes `&self`: every implementation's
82    /// write state (in-memory map, loose-object cache) is behind interior
83    /// mutability, so a single handle can interleave reads and writes without a
84    /// `&mut` borrow. This lets the merge engine read and write through one `db`
85    /// instead of opening a second read-only handle that re-warms the caches.
86    fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
87}
88
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct BundleUnbundleResult {
91    pub written_objects: Vec<ObjectId>,
92    pub references: Vec<BundleReference>,
93}
94
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct PackUnpackResult {
97    pub written_objects: Vec<ObjectId>,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub struct PackInstallResult {
102    pub pack_name: String,
103    pub pack_path: PathBuf,
104    pub index_path: PathBuf,
105    pub promisor_path: Option<PathBuf>,
106    pub object_ids: Vec<ObjectId>,
107}
108
109#[derive(Debug, Clone, PartialEq, Eq)]
110pub struct RawPackInstallResult {
111    pub object_ids: Vec<ObjectId>,
112}
113
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
115pub struct RawPackInstallOptions {
116    pub promisor: bool,
117}
118
119pub trait RawPackInstaller {
120    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
121}
122
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub enum ObjectPrefixResolution {
125    Missing,
126    Unique(ObjectId),
127    Ambiguous(Vec<ObjectId>),
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct ObjectStorageInfo {
132    pub disk_size: u64,
133    pub deltabase: ObjectId,
134}
135
136impl RawPackInstaller for FileObjectDatabase {
137    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
138        let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
139        Ok(RawPackInstallResult {
140            object_ids: result.object_ids,
141        })
142    }
143}
144
145impl RawPackInstaller for ObjectDatabase {
146    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
147        let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
148        Ok(RawPackInstallResult {
149            object_ids: result.written_objects,
150        })
151    }
152}
153
154pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
155    let mut missing = Vec::new();
156    for prerequisite in &bundle.prerequisites {
157        match reader.read_object(&prerequisite.oid) {
158            Ok(object) => {
159                let actual = object.object_id(bundle.format)?;
160                if actual != prerequisite.oid {
161                    return Err(GitError::InvalidObject(format!(
162                        "bundle prerequisite {} hashes to {actual}",
163                        prerequisite.oid
164                    )));
165                }
166            }
167            Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
168            Err(err) => return Err(err),
169        }
170    }
171    if missing.is_empty() {
172        return Ok(());
173    }
174    Err(GitError::object_not_found_in(
175        missing[0],
176        MissingObjectContext::PackInstall,
177    ))
178}
179
180pub fn unbundle_objects<R, W>(
181    bundle: &Bundle,
182    prerequisite_reader: &R,
183    writer: &mut W,
184) -> Result<BundleUnbundleResult>
185where
186    R: ObjectReader,
187    W: ObjectWriter,
188{
189    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
190    let pack = PackFile::parse_bundle(bundle)?;
191    let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
192    Ok(BundleUnbundleResult {
193        written_objects,
194        references: bundle.references.clone(),
195    })
196}
197
198pub fn install_bundle_pack<R>(
199    bundle: &Bundle,
200    prerequisite_reader: &R,
201    destination: &impl RawPackInstaller,
202) -> Result<BundleUnbundleResult>
203where
204    R: ObjectReader,
205{
206    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
207    let install = destination.install_raw_pack(&bundle.pack)?;
208    Ok(BundleUnbundleResult {
209        written_objects: install.object_ids,
210        references: bundle.references.clone(),
211    })
212}
213
214pub fn unpack_packfile_objects<W>(
215    pack_bytes: &[u8],
216    format: ObjectFormat,
217    writer: &W,
218) -> Result<PackUnpackResult>
219where
220    W: ObjectWriter,
221{
222    let pack = PackFile::parse(pack_bytes, format)?;
223    write_pack_objects(pack, writer, "pack")
224}
225
226fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
227where
228    W: ObjectWriter,
229{
230    let mut written_objects = Vec::with_capacity(pack.entries.len());
231    for entry in pack.entries {
232        let expected = entry.entry.oid;
233        let actual = writer.write_object(entry.object)?;
234        if actual != expected {
235            return Err(GitError::InvalidObject(format!(
236                "{source} object id mismatch: expected {expected}, wrote {actual}"
237            )));
238        }
239        written_objects.push(actual);
240    }
241    Ok(PackUnpackResult { written_objects })
242}
243
244pub fn collect_reachable_object_ids<R, I>(
245    reader: &R,
246    format: ObjectFormat,
247    starts: I,
248) -> Result<HashSet<ObjectId>>
249where
250    R: ObjectReader,
251    I: IntoIterator<Item = ObjectId>,
252{
253    walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
254}
255
256/// [`collect_reachable_object_ids`] with a cut set: commits in `cut` are
257/// collected, but the walk does not continue to their parents — the view a
258/// shallow repository has of its own refs (`$GIT_DIR/shallow` of the *other*
259/// side, threaded explicitly because `reader` belongs to this side).
260pub fn collect_reachable_object_ids_with_cut<R, I>(
261    reader: &R,
262    format: ObjectFormat,
263    starts: I,
264    cut: &HashSet<ObjectId>,
265) -> Result<HashSet<ObjectId>>
266where
267    R: ObjectReader,
268    I: IntoIterator<Item = ObjectId>,
269{
270    walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
271}
272
273/// [`collect_reachable_object_ids`] with a stop set: objects in `excluded` are
274/// not visited and not expanded, so the walk never sees anything reachable only
275/// through them (used to truncate history at a shallow boundary).
276pub fn collect_reachable_object_ids_excluding<R, I>(
277    reader: &R,
278    format: ObjectFormat,
279    starts: I,
280    excluded: &HashSet<ObjectId>,
281) -> Result<HashSet<ObjectId>>
282where
283    R: ObjectReader,
284    I: IntoIterator<Item = ObjectId>,
285{
286    walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
287}
288
289pub fn collect_reachable_objects<R, I>(
290    reader: &R,
291    format: ObjectFormat,
292    starts: I,
293    excluded: &HashSet<ObjectId>,
294) -> Result<Vec<Arc<EncodedObject>>>
295where
296    R: ObjectReader,
297    I: IntoIterator<Item = ObjectId>,
298{
299    let mut objects = Vec::new();
300    walk_reachable_objects(reader, format, starts, excluded, |_, object| {
301        objects.push(Arc::clone(object));
302    })?;
303    Ok(objects)
304}
305
306#[derive(Debug, Clone)]
307struct ReachablePackObject {
308    oid: ObjectId,
309    object: Arc<EncodedObject>,
310}
311
312fn collect_reachable_pack_objects<R, I>(
313    reader: &R,
314    format: ObjectFormat,
315    starts: I,
316    excluded: &HashSet<ObjectId>,
317) -> Result<Vec<ReachablePackObject>>
318where
319    R: ObjectReader,
320    I: IntoIterator<Item = ObjectId>,
321{
322    let mut objects = Vec::new();
323    walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
324        objects.push(ReachablePackObject {
325            oid: *oid,
326            object: Arc::clone(object),
327        });
328    })?;
329    Ok(objects)
330}
331
332fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
333    objects
334        .iter()
335        .map(|entry| PackInput {
336            oid: &entry.oid,
337            object: &entry.object,
338        })
339        .collect()
340}
341
342pub fn install_reachable_pack<I>(
343    source: &impl ObjectReader,
344    destination: &impl RawPackInstaller,
345    format: ObjectFormat,
346    starts: I,
347) -> Result<Option<RawPackInstallResult>>
348where
349    I: IntoIterator<Item = ObjectId>,
350{
351    install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
352}
353
354pub fn install_reachable_pack_excluding<I>(
355    source: &impl ObjectReader,
356    destination: &impl RawPackInstaller,
357    format: ObjectFormat,
358    starts: I,
359    excluded: &HashSet<ObjectId>,
360) -> Result<Option<RawPackInstallResult>>
361where
362    I: IntoIterator<Item = ObjectId>,
363{
364    let pack = match build_reachable_pack(source, format, starts, excluded)? {
365        Some(pack) => pack,
366        None => return Ok(None),
367    };
368    destination.install_raw_pack(&pack.pack).map(Some)
369}
370
371pub fn build_reachable_pack<R, I>(
372    reader: &R,
373    format: ObjectFormat,
374    starts: I,
375    excluded: &HashSet<ObjectId>,
376) -> Result<Option<PackWrite>>
377where
378    R: ObjectReader,
379    I: IntoIterator<Item = ObjectId>,
380{
381    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
382    if objects.is_empty() {
383        return Ok(None);
384    }
385    // Delta-compress reachable packs (used by install/push/fetch) via git-pack's
386    // sliding-window selection. Self-contained, ofs-delta by default; round-trips
387    // through the existing parser. PackWrite shape is unchanged, so callers are
388    // unaffected.
389    let inputs = pack_inputs(&objects);
390    PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
391}
392
393pub fn build_and_install_reachable_pack<R, I>(
394    source: &R,
395    destination: &FileObjectDatabase,
396    format: ObjectFormat,
397    starts: I,
398    excluded: &HashSet<ObjectId>,
399    options: RawPackInstallOptions,
400) -> Result<Option<PackInstallResult>>
401where
402    R: ObjectReader,
403    I: IntoIterator<Item = ObjectId>,
404{
405    build_and_install_reachable_pack_filtered(
406        source,
407        destination,
408        format,
409        starts,
410        excluded,
411        options,
412        None,
413        None,
414    )
415}
416
417/// A partial-clone object filter applied while building a transfer pack.
418///
419/// Mirrors the subset of upstream's `list-objects-filter` the in-process local
420/// server supports: directly-wanted tips are always packed; the filter only
421/// prunes objects reached *through* the traversal (upstream's
422/// `filter_blobs_none` runs on traversed blobs, never on wanted tips).
423#[derive(Debug, Clone, Copy, PartialEq, Eq)]
424pub enum PackObjectFilter {
425    /// `blob:none`: omit every blob reached through tree traversal.
426    BlobNone,
427}
428
429/// [`build_and_install_reachable_pack`] with an optional partial-clone
430/// `filter`. With `Some(BlobNone)`, blobs are dropped from the pack unless
431/// they are directly wanted (named in `starts`).
432#[allow(clippy::too_many_arguments)]
433pub fn build_and_install_reachable_pack_filtered<R, I>(
434    source: &R,
435    destination: &FileObjectDatabase,
436    format: ObjectFormat,
437    starts: I,
438    excluded: &HashSet<ObjectId>,
439    options: RawPackInstallOptions,
440    filter: Option<PackObjectFilter>,
441    unpack_limit: Option<usize>,
442) -> Result<Option<PackInstallResult>>
443where
444    R: ObjectReader,
445    I: IntoIterator<Item = ObjectId>,
446{
447    let starts: Vec<ObjectId> = starts.into_iter().collect();
448    let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
449    let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
450    match filter {
451        Some(PackObjectFilter::BlobNone) => {
452            objects.retain(|entry| {
453                entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
454            });
455        }
456        None => {}
457    }
458    if objects.is_empty() {
459        return Ok(None);
460    }
461    // Mirror fetch-pack's unpack-limit: small transfers are exploded into
462    // loose objects instead of landing as a pack (upstream `get_pack` picks
463    // unpack-objects when the header count is below fetch/transfer.unpackLimit).
464    if let Some(limit) = unpack_limit
465        && objects.len() < limit
466    {
467        for entry in &objects {
468            destination.loose().write_object((*entry.object).clone())?;
469        }
470        return Ok(None);
471    }
472    let inputs = pack_inputs(&objects);
473    let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
474    destination
475        .install_generated_pack_unchecked(&pack, options)
476        .map(Some)
477}
478
479/// Assemble a pack stream that reuses an existing pack's object data verbatim
480/// (upstream pack-objects' "pack reuse" fast path, full-pack case) and appends
481/// `appended` as freshly encoded undeltified entries.
482///
483/// The reused pack's entry bytes are copied as-is between our own header and
484/// trailer: a full-pack copy preserves every relative distance, so internal
485/// `OFS_DELTA` bases stay valid. The header object count covers both the
486/// reused and appended entries, and the trailing pack checksum is recomputed
487/// over the assembled stream.
488pub fn assemble_pack_with_verbatim_reuse(
489    format: ObjectFormat,
490    reused_pack_bytes: &[u8],
491    appended: &[PackInput<'_>],
492) -> Result<(Vec<u8>, u32)> {
493    assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
494}
495
496/// Like [`assemble_pack_with_verbatim_reuse`], but concatenates multiple whole
497/// packs before appending fresh entries.
498pub fn assemble_pack_with_verbatim_reuses(
499    format: ObjectFormat,
500    reused_packs: &[&[u8]],
501    appended: &[PackInput<'_>],
502) -> Result<(Vec<u8>, u32)> {
503    let hash_len = format.raw_len();
504    let mut reused_count = 0u32;
505    let mut capacity = 12 + hash_len + 64 * appended.len();
506    for reused_pack_bytes in reused_packs {
507        if reused_pack_bytes.len() < 12 + hash_len {
508            return Err(GitError::InvalidFormat("reused pack too short".into()));
509        }
510        if &reused_pack_bytes[..4] != b"PACK" {
511            return Err(GitError::InvalidFormat(
512                "reused pack has no signature".into(),
513            ));
514        }
515        let version = u32::from_be_bytes([
516            reused_pack_bytes[4],
517            reused_pack_bytes[5],
518            reused_pack_bytes[6],
519            reused_pack_bytes[7],
520        ]);
521        if version != 2 {
522            return Err(GitError::Unsupported(format!(
523                "reused pack version {version}"
524            )));
525        }
526        let count = u32::from_be_bytes([
527            reused_pack_bytes[8],
528            reused_pack_bytes[9],
529            reused_pack_bytes[10],
530            reused_pack_bytes[11],
531        ]);
532        reused_count = reused_count
533            .checked_add(count)
534            .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
535        capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
536    }
537    let total = reused_count
538        .checked_add(appended.len() as u32)
539        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
540
541    let mut out = Vec::with_capacity(capacity);
542    out.extend_from_slice(b"PACK");
543    out.extend_from_slice(&2u32.to_be_bytes());
544    out.extend_from_slice(&total.to_be_bytes());
545    for reused_pack_bytes in reused_packs {
546        out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
547    }
548    for input in appended {
549        write_undeltified_pack_entry(&mut out, input.object)?;
550    }
551    let checksum = sley_core::digest_bytes(format, &out)?;
552    out.extend_from_slice(checksum.as_bytes());
553    Ok((out, reused_count))
554}
555
556/// Assemble a pack stream by copying already-encoded pack entries verbatim and
557/// appending freshly encoded undeltified entries.
558pub fn assemble_pack_with_verbatim_entries(
559    format: ObjectFormat,
560    reused_entries: &[&[u8]],
561    appended: &[PackInput<'_>],
562) -> Result<(Vec<u8>, u32)> {
563    let reused_count = u32::try_from(reused_entries.len())
564        .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
565    let total = reused_count
566        .checked_add(appended.len() as u32)
567        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
568
569    let mut capacity = 12 + format.raw_len() + 64 * appended.len();
570    for entry in reused_entries {
571        capacity = capacity.saturating_add(entry.len());
572    }
573    let mut out = Vec::with_capacity(capacity);
574    out.extend_from_slice(b"PACK");
575    out.extend_from_slice(&2u32.to_be_bytes());
576    out.extend_from_slice(&total.to_be_bytes());
577    for entry in reused_entries {
578        out.extend_from_slice(entry);
579    }
580    for input in appended {
581        write_undeltified_pack_entry(&mut out, input.object)?;
582    }
583    let checksum = sley_core::digest_bytes(format, &out)?;
584    out.extend_from_slice(checksum.as_bytes());
585    Ok((out, reused_count))
586}
587
588/// Append one undeltified pack entry (type/size varint header + zlib body).
589fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
590    let type_bits: u8 = match object.object_type {
591        ObjectType::Commit => 1,
592        ObjectType::Tree => 2,
593        ObjectType::Blob => 3,
594        ObjectType::Tag => 4,
595    };
596    let mut size = object.body.len() as u64;
597    let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
598    size >>= 4;
599    while size > 0 {
600        out.push(byte | 0x80);
601        byte = (size & 0x7f) as u8;
602        size >>= 7;
603    }
604    out.push(byte);
605    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
606    encoder.write_all(&object.body)?;
607    out.extend_from_slice(&encoder.finish()?);
608    Ok(())
609}
610
611/// Outcome of consolidating every object in a repository into a single pack.
612///
613/// This is the engine for `git gc` / `git repack`: [`repack_all_objects`]
614/// produces the bytes for one new delta-compressed pack plus its index, and
615/// reports which on-disk artifacts the caller could now remove. No deletions
616/// are performed by the engine itself; the CLI decides reachability policy and
617/// performs any pruning (see [`install_repack_result`]).
618#[derive(Debug, Clone, PartialEq, Eq)]
619pub struct RepackResult {
620    /// Bytes of the freshly written `.pack` file.
621    pub pack: Vec<u8>,
622    /// Bytes of the matching `.idx` file for [`RepackResult::pack`].
623    pub idx: Vec<u8>,
624    /// Number of distinct objects contained in the new pack.
625    pub object_count: usize,
626    /// Absolute paths of pre-existing `*.pack` files now superseded by the new
627    /// pack (every object they hold is present in [`RepackResult::pack`]).
628    pub obsolete_packs: Vec<PathBuf>,
629    /// Loose object ids that are now also present in the new pack and therefore
630    /// redundant on disk.
631    pub packed_loose: Vec<ObjectId>,
632    pack_checksum: ObjectId,
633    index_entries: Vec<PackIndexEntry>,
634}
635
636/// Gather every object in `git_dir` (loose objects and every existing pack) and
637/// write them into a single new delta-compressed pack.
638///
639/// Returns the new pack/index bytes, the count of packed objects, the list of
640/// pre-existing pack files that the new pack supersedes, and the loose object
641/// ids that are now packed. Nothing is deleted: the caller (CLI) decides
642/// reachability policy and performs any pruning, optionally via
643/// [`install_repack_result`].
644///
645/// Returns `Ok(None)` when the repository contains no objects at all.
646/// `git repack -a`'s gathering rule: pack the reachability closure of `roots`
647/// (ref tips, `HEAD`, reflog entries, indexed objects) instead of everything
648/// on disk. Borrowed objects (alternates) reachable from the roots are packed
649/// into the new local pack like upstream `pack-objects --all` without
650/// `--local`; previously-packed objects that are no longer reachable are NOT
651/// carried forward (that is how `repack -a -d` drops them). Missing objects
652/// are tolerated (stale reflog entries may reference pruned history).
653///
654/// Returns `Ok(None)` when no roots resolve to any object.
655pub fn repack_reachable_objects(
656    git_dir: &Path,
657    format: ObjectFormat,
658    roots: &[ObjectId],
659) -> Result<Option<RepackResult>> {
660    let objects_dir = repository_objects_dir(git_dir);
661    let database = FileObjectDatabase::new(objects_dir.clone(), format);
662
663    let mut seen: HashSet<ObjectId> = HashSet::new();
664    let mut objects: Vec<ReachablePackObject> = Vec::new();
665    let mut pending: Vec<ObjectId> = roots.to_vec();
666    while let Some(oid) = pending.pop() {
667        if !seen.insert(oid) {
668            continue;
669        }
670        let object = match database.read_object(&oid) {
671            Ok(object) => object,
672            Err(GitError::NotFound(_)) => continue,
673            Err(err) => return Err(err),
674        };
675        match object.object_type {
676            ObjectType::Commit => {
677                let commit = Commit::parse_ref(format, &object.body)?;
678                pending.extend(grafted_parents(&database, &oid, commit.parents));
679                pending.push(commit.tree);
680            }
681            ObjectType::Tree => {
682                for entry in TreeEntries::new(format, &object.body) {
683                    let entry = entry?;
684                    if !entry.is_gitlink() {
685                        pending.push(entry.oid);
686                    }
687                }
688            }
689            ObjectType::Tag => {
690                let tag = Tag::parse_ref(format, &object.body)?;
691                pending.push(tag.object);
692            }
693            ObjectType::Blob => {}
694        }
695        objects.push(ReachablePackObject { oid, object });
696    }
697    if objects.is_empty() {
698        return Ok(None);
699    }
700
701    let inputs = pack_inputs(&objects);
702    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
703    let object_count = written.entries.len();
704
705    // Every pre-existing local pack is superseded under `-a` (their reachable
706    // objects are in the new pack; their unreachable ones are being dropped).
707    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
708    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
709        .into_iter()
710        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
711        .collect();
712
713    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
714    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
715        .into_iter()
716        .filter(|oid| packed_oid_set.contains(oid))
717        .collect();
718    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
719
720    let pack_checksum = written.checksum;
721    let index_entries = written.entries.clone();
722    Ok(Some(RepackResult {
723        pack: written.pack,
724        idx: written.index,
725        object_count,
726        obsolete_packs,
727        packed_loose,
728        pack_checksum,
729        index_entries,
730    }))
731}
732
733pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
734    let objects_dir = repository_objects_dir(git_dir);
735    let database = FileObjectDatabase::new(objects_dir.clone(), format);
736
737    // Enumerate every object id reachable on disk: loose objects, every pack
738    // index, and any multi-pack-index. `object_ids_in_objects_dir` already
739    // unions all of these and de-duplicates them.
740    let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
741    if all_oids.is_empty() {
742        return Ok(None);
743    }
744
745    // Read each object's canonical encoding so the new pack stores byte-for-byte
746    // identical payloads. Loose objects take precedence over packed copies in
747    // `FileObjectDatabase::read_object`, but both decode to the same bytes.
748    let mut objects = Vec::with_capacity(all_oids.len());
749    for oid in &all_oids {
750        objects.push(ReachablePackObject {
751            oid: *oid,
752            object: database.read_object(oid)?,
753        });
754    }
755
756    let inputs = pack_inputs(&objects);
757    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
758    let object_count = written.entries.len();
759
760    // The new pack contains every object on disk, so every pre-existing pack is
761    // fully superseded. We still record the exact pack paths (not the index
762    // paths) so the caller can delete the right files. The pack we are about to
763    // write is excluded by name in case its checksum collides with an existing
764    // pack (identical contents).
765    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
766    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
767        .into_iter()
768        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
769        .collect();
770
771    // Loose object ids that the new pack now also holds (which is all of them,
772    // since they were gathered into it).
773    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
774    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
775        .into_iter()
776        .filter(|oid| packed_oid_set.contains(oid))
777        .collect();
778    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
779
780    Ok(Some(RepackResult {
781        pack: written.pack,
782        idx: written.index,
783        object_count,
784        obsolete_packs,
785        packed_loose,
786        pack_checksum: written.checksum,
787        index_entries: written.entries,
788    }))
789}
790
791/// Gather only loose objects in `git_dir` and write them into a new pack.
792///
793/// This is the engine for plain `git repack -d` (without `-a`): existing packs
794/// remain in place, and pruning removes only the loose copies that the new pack
795/// now serves.
796pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
797    let objects_dir = repository_objects_dir(git_dir);
798    let database = FileObjectDatabase::new(objects_dir.clone(), format);
799    let loose_oids = loose_object_ids(&objects_dir, format)?;
800    if loose_oids.is_empty() {
801        return Ok(None);
802    }
803
804    let mut objects = Vec::with_capacity(loose_oids.len());
805    for oid in &loose_oids {
806        objects.push(ReachablePackObject {
807            oid: *oid,
808            object: database.read_object(oid)?,
809        });
810    }
811
812    let inputs = pack_inputs(&objects);
813    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
814    let object_count = written.entries.len();
815    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
816    let mut packed_loose: Vec<ObjectId> = loose_oids
817        .into_iter()
818        .filter(|oid| packed_oid_set.contains(oid))
819        .collect();
820    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
821
822    let pack_checksum = written.checksum;
823    let index_entries = written.entries.clone();
824    Ok(Some(RepackResult {
825        pack: written.pack,
826        idx: written.index,
827        object_count,
828        obsolete_packs: Vec::new(),
829        packed_loose,
830        pack_checksum,
831        index_entries,
832    }))
833}
834
835/// Write the consolidated pack from a [`RepackResult`] into
836/// `objects/pack/` and, when `prune` is set, remove the now-redundant
837/// pre-existing packs and packed loose objects.
838///
839/// Pruning is opt-in and deliberately conservative: an object or pack is only
840/// removed after verifying it is actually present in the freshly written pack
841/// on disk. Concretely:
842///
843/// * a loose object is removed only if its id appears in the new pack;
844/// * a pre-existing pack is removed only if it is not the pack we just wrote
845///   *and* every object listed in its `.idx` is present in the new pack (its
846///   `.idx` and known sidecars are removed alongside it);
847/// * a stale `multi-pack-index` is removed only if every pack it references is
848///   being removed, so no reader is ever left pointing at a deleted pack.
849pub fn install_repack_result(
850    git_dir: &Path,
851    format: ObjectFormat,
852    result: &RepackResult,
853    prune: bool,
854) -> Result<()> {
855    install_repack_result_with_bitmap(git_dir, format, result, prune, None)
856}
857
858/// [`install_repack_result`] that additionally writes a `pack-<checksum>.bitmap`
859/// reachability bitmap alongside the new pack when `bitmap_tips` is `Some`.
860/// `bitmap_tips` carries the repository's ref tips (peeled to commits): they
861/// receive selection preference, mirroring upstream's `NEEDS_BITMAP` flagging of
862/// ref tips in `git repack -b` / `pack-objects --write-bitmap-index`.
863pub fn install_repack_result_with_bitmap(
864    git_dir: &Path,
865    format: ObjectFormat,
866    result: &RepackResult,
867    prune: bool,
868    bitmap_tips: Option<&HashSet<ObjectId>>,
869) -> Result<()> {
870    let objects_dir = repository_objects_dir(git_dir);
871    let pack_dir = objects_dir.join("pack");
872    fs::create_dir_all(&pack_dir)?;
873
874    // Validate the public bytes against the private provenance that
875    // `repack_all_objects` captured from `PackFile::write_packed`. This avoids
876    // inflating and resolving the freshly-written pack a second time while still
877    // catching caller mutations before anything is written or pruned.
878    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
879    let parsed_index = PackIndex::parse(&result.idx, format)?;
880    if parsed_index.pack_checksum != result.pack_checksum {
881        return Err(GitError::InvalidFormat(
882            "repack index checksum does not match the new pack".into(),
883        ));
884    }
885    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
886        return Err(GitError::InvalidFormat(
887            "repack index does not match the new pack contents".into(),
888        ));
889    }
890    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
891    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
892    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
893    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
894    // git writes a `.rev` alongside every repacked pack (`pack.writeReverseIndex`
895    // defaults to true). Write it before the `.idx` so the index never becomes
896    // visible ahead of its companions, mirroring upstream's finalize order.
897    let reverse_index = sley_pack::PackReverseIndex::write(
898        format,
899        &sley_pack::pack_order_index_positions(&parsed_index.entries),
900        &result.pack_checksum,
901    )?;
902    write_pack_component(&new_pack_path, &result.pack)?;
903    write_pack_component(&new_rev_path, &reverse_index)?;
904    write_pack_component(&new_index_path, &result.idx)?;
905
906    if let Some(tips) = bitmap_tips {
907        // Build before pruning: the closure walk reads objects through the
908        // pre-existing packs/loose store (the new pack holds the same bytes).
909        let database = FileObjectDatabase::new(objects_dir.clone(), format);
910        if let Some(bitmap) = build_pack_bitmap(
911            &database,
912            format,
913            &result.index_entries,
914            &result.pack_checksum,
915            tips,
916        )? {
917            // Unlike the pack/idx/rev (content-addressed by the pack
918            // checksum), the bitmap depends on selection inputs (e.g.
919            // pack.preferBitmapTips), so an existing file must be replaced —
920            // write_pack_component's exists-skip would keep a stale selection.
921            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
922            remove_file_if_exists(&bitmap_path)?;
923            write_pack_component(&bitmap_path, &bitmap)?;
924        }
925    }
926
927    if !prune {
928        return Ok(());
929    }
930
931    // Prune based on the objects the new pack's *index* can resolve (what reads use
932    // once the old packs are gone), not just what the pack contains — so a stale
933    // pack is never removed for an object the new index cannot serve.
934    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
935
936    prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
937    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
938    Ok(())
939}
940
941fn validate_pack_checksum(
942    pack: &[u8],
943    format: ObjectFormat,
944    expected: &ObjectId,
945    context: &str,
946) -> Result<()> {
947    if expected.format() != format {
948        return Err(GitError::InvalidObjectId(format!(
949            "{context} checksum format does not match object format"
950        )));
951    }
952    let hash_len = format.raw_len();
953    if pack.len() < 12 + hash_len {
954        return Err(GitError::InvalidFormat(format!(
955            "{context} pack file too short"
956        )));
957    }
958    if &pack[..4] != b"PACK" {
959        return Err(GitError::InvalidFormat(format!(
960            "{context} pack file missing PACK signature"
961        )));
962    }
963    let trailer_offset = pack.len() - hash_len;
964    let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
965    let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
966    if &actual != expected || trailer != *expected {
967        return Err(GitError::InvalidFormat(format!(
968            "{context} pack checksum does not match generated pack"
969        )));
970    }
971    Ok(())
972}
973
974fn pack_index_entries_match_writer(
975    parsed: &[PackIndexEntry],
976    writer_entries: &[PackIndexEntry],
977) -> bool {
978    if parsed.len() != writer_entries.len() {
979        return false;
980    }
981    let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
982    writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
983    parsed.iter().zip(writer_entries).all(|(left, right)| {
984        left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
985    })
986}
987
988/// List loose objects under `git_dir` that are *not* reachable from `roots`,
989/// optionally deleting them.
990///
991/// Reachability is computed with [`collect_reachable_object_ids`] over the
992/// repository's object database, so trees, parents, and tag targets are all
993/// followed. When `delete` is `false` the returned ids are merely reported;
994/// when `true` each unreachable loose object file is removed (packed copies are
995/// never touched). Deletion is therefore opt-in.
996pub fn prune_unreachable_loose<I>(
997    git_dir: &Path,
998    format: ObjectFormat,
999    roots: I,
1000    delete: bool,
1001) -> Result<Vec<ObjectId>>
1002where
1003    I: IntoIterator<Item = ObjectId>,
1004{
1005    let objects_dir = repository_objects_dir(git_dir);
1006    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1007    let reachable = collect_reachable_object_ids(&database, format, roots)?;
1008
1009    let store = LooseObjectStore::new(objects_dir.clone(), format);
1010    let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1011        .into_iter()
1012        .filter(|oid| !reachable.contains(oid))
1013        .collect();
1014    pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1015
1016    if delete {
1017        for oid in &pruned {
1018            let path = store.object_path(oid)?;
1019            match fs::remove_file(&path) {
1020                Ok(()) => {}
1021                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1022                Err(err) => return Err(GitError::Io(err.to_string())),
1023            }
1024        }
1025    }
1026    Ok(pruned)
1027}
1028
1029/// Loose object ids under `objects_dir`, sorted by hex, with packed objects
1030/// excluded.
1031fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1032    let oids = loose_object_id_set(objects_dir, format)?;
1033    let mut oids = oids.into_iter().collect::<Vec<_>>();
1034    oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1035    Ok(oids)
1036}
1037
1038fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1039    let mut oids = HashSet::new();
1040    collect_loose_object_ids(objects_dir, format, &mut oids)?;
1041    Ok(oids)
1042}
1043
1044/// Absolute paths of every `*.pack` file directly inside `pack_dir`, sorted for
1045/// deterministic output.
1046fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1047    if !pack_dir.exists() {
1048        return Ok(Vec::new());
1049    }
1050    let mut packs = Vec::new();
1051    for entry in fs::read_dir(pack_dir)? {
1052        let path = entry?.path();
1053        if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1054            packs.push(path);
1055        }
1056    }
1057    packs.sort();
1058    Ok(packs)
1059}
1060
1061/// Remove pre-existing packs whose every object is contained in `present`,
1062/// skipping `keep` (the pack just written), `.keep` packs, and `.promisor` packs.
1063/// A stale multi-pack-index that references any removed pack is removed too.
1064fn prune_packs_contained_in(
1065    objects_dir: &Path,
1066    format: ObjectFormat,
1067    present: &HashSet<ObjectId>,
1068    keep: &Path,
1069) -> Result<()> {
1070    let pack_dir = objects_dir.join("pack");
1071    let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1072    let mut removed_stems: HashSet<String> = HashSet::new();
1073
1074    for pack_path in existing_pack_files(&pack_dir)? {
1075        if pack_path == keep {
1076            continue;
1077        }
1078        let Some(stem) = pack_path.file_stem() else {
1079            continue;
1080        };
1081        if Some(stem) == keep_stem.as_deref() {
1082            continue;
1083        }
1084        if pack_path.with_extension("keep").exists()
1085            || pack_path.with_extension("promisor").exists()
1086        {
1087            continue;
1088        }
1089        let index_path = pack_path.with_extension("idx");
1090        if !index_path.exists() {
1091            // Without an index we cannot prove containment; leave it alone.
1092            continue;
1093        }
1094        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1095        if !index
1096            .entries
1097            .iter()
1098            .all(|entry| present.contains(&entry.oid))
1099        {
1100            continue;
1101        }
1102        // Every object in this pack is safely in the new pack and it has no Git
1103        // policy sidecar that says to keep it: remove the pack, its index, and
1104        // cache sidecars derived from them.
1105        remove_file_if_exists(&pack_path)?;
1106        remove_file_if_exists(&index_path)?;
1107        for ext in ["rev", "mtimes", "bitmap"] {
1108            remove_file_if_exists(&pack_path.with_extension(ext))?;
1109        }
1110        removed_stems.insert(stem.to_string_lossy().into_owned());
1111    }
1112
1113    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1114    Ok(())
1115}
1116
1117/// Remove a `multi-pack-index` if it names *any* pack that was removed.
1118///
1119/// A MIDX that still references a deleted pack makes reads fail (the lookup
1120/// resolves to a pack that is gone) before any fallback. Removing the whole MIDX
1121/// when even one of its packs is pruned forces readers back to the individual pack
1122/// indexes, which are correct; `multi-pack-index write` can rebuild it later.
1123fn prune_stale_multi_pack_index(
1124    pack_dir: &Path,
1125    format: ObjectFormat,
1126    removed_stems: &HashSet<String>,
1127) -> Result<()> {
1128    if removed_stems.is_empty() {
1129        return Ok(());
1130    }
1131    let midx_path = pack_dir.join("multi-pack-index");
1132    if !midx_path.exists() {
1133        return Ok(());
1134    }
1135    let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1136    let references_removed_pack = midx.pack_names.iter().any(|name| {
1137        let stem = name.strip_suffix(".idx").unwrap_or(name);
1138        removed_stems.contains(stem)
1139    });
1140    if references_removed_pack {
1141        remove_file_if_exists(&midx_path)?;
1142    }
1143    Ok(())
1144}
1145
1146/// Remove each loose object in `candidates` whose id is in `present`, leaving
1147/// any object not actually packed untouched.
1148fn prune_loose_objects<'a, I>(
1149    objects_dir: &Path,
1150    format: ObjectFormat,
1151    candidates: I,
1152    present: &HashSet<ObjectId>,
1153) -> Result<()>
1154where
1155    I: IntoIterator<Item = &'a ObjectId>,
1156{
1157    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1158    for oid in candidates {
1159        if !present.contains(oid) {
1160            continue;
1161        }
1162        remove_file_if_exists(&store.object_path(oid)?)?;
1163    }
1164    Ok(())
1165}
1166
1167enum PackDeltaBase {
1168    Offset(u64),
1169    Ref(ObjectId),
1170}
1171
1172struct PackIndexOffsetInfo {
1173    end_offset: u64,
1174    delta_base_oid: Option<ObjectId>,
1175}
1176
1177fn scan_pack_index_offsets(
1178    index: &PackIndex,
1179    target_offset: u64,
1180    trailer_offset: u64,
1181    delta_base_offset: Option<u64>,
1182) -> Result<PackIndexOffsetInfo> {
1183    let mut target_count = 0usize;
1184    let mut next_offset = None;
1185    let mut delta_base_oid = None;
1186
1187    for entry in &index.entries {
1188        if entry.offset == target_offset {
1189            target_count += 1;
1190        } else if entry.offset > target_offset {
1191            match next_offset {
1192                Some(current) if current <= entry.offset => {}
1193                _ => next_offset = Some(entry.offset),
1194            }
1195        }
1196        if Some(entry.offset) == delta_base_offset {
1197            delta_base_oid = Some(entry.oid);
1198        }
1199    }
1200
1201    if target_count == 0 {
1202        return Err(GitError::InvalidFormat(format!(
1203            "pack index offset {target_offset} not found"
1204        )));
1205    }
1206    if let Some(offset) = delta_base_offset
1207        && delta_base_oid.is_none()
1208    {
1209        return Err(GitError::InvalidFormat(format!(
1210            "ofs-delta base offset {offset} not found"
1211        )));
1212    }
1213
1214    Ok(PackIndexOffsetInfo {
1215        // Preserve the old sorted-vector behavior for malformed indexes with
1216        // duplicate offsets: the next sorted entry has the same offset.
1217        end_offset: if target_count > 1 {
1218            target_offset
1219        } else {
1220            next_offset.unwrap_or(trailer_offset)
1221        },
1222        delta_base_oid,
1223    })
1224}
1225
1226fn pack_entry_delta_base(
1227    format: ObjectFormat,
1228    pack: &[u8],
1229    entry_offset: u64,
1230) -> Result<Option<PackDeltaBase>> {
1231    let mut cursor = usize::try_from(entry_offset)
1232        .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1233    let first = pack_next_byte(pack, &mut cursor)?;
1234    let kind = (first >> 4) & 0x07;
1235    let mut byte = first;
1236    while byte & 0x80 != 0 {
1237        byte = pack_next_byte(pack, &mut cursor)?;
1238    }
1239    match kind {
1240        6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1241            pack,
1242            &mut cursor,
1243            entry_offset,
1244        )?))),
1245        7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1246            format,
1247            pack,
1248            &mut cursor,
1249        )?))),
1250        _ => Ok(None),
1251    }
1252}
1253
1254fn parse_ref_delta_base_oid(
1255    format: ObjectFormat,
1256    pack: &[u8],
1257    cursor: &mut usize,
1258) -> Result<ObjectId> {
1259    let raw_len = format.raw_len();
1260    if *cursor + raw_len > pack.len() {
1261        return Err(GitError::InvalidFormat(
1262            "truncated ref-delta base object id".into(),
1263        ));
1264    }
1265    let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1266    *cursor += raw_len;
1267    Ok(oid)
1268}
1269
1270fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
1271    let mut byte = pack_next_byte(pack, cursor)?;
1272    let mut relative = u64::from(byte & 0x7f);
1273    while byte & 0x80 != 0 {
1274        byte = pack_next_byte(pack, cursor)?;
1275        relative = relative
1276            .checked_add(1)
1277            .and_then(|value| value.checked_shl(7))
1278            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
1279            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
1280    }
1281    entry_offset
1282        .checked_sub(relative)
1283        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
1284}
1285
1286fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
1287    let Some(byte) = pack.get(*cursor).copied() else {
1288        return Err(GitError::InvalidFormat("truncated pack entry".into()));
1289    };
1290    *cursor += 1;
1291    Ok(byte)
1292}
1293
1294fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
1295    Ok(ObjectId::null(format))
1296}
1297
1298/// Remove `path` if it exists, treating a missing file as success.
1299fn remove_file_if_exists(path: &Path) -> Result<()> {
1300    match fs::remove_file(path) {
1301        Ok(()) => Ok(()),
1302        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1303        Err(err) => Err(GitError::Io(err.to_string())),
1304    }
1305}
1306
1307fn walk_reachable_objects<R, I, F>(
1308    reader: &R,
1309    format: ObjectFormat,
1310    starts: I,
1311    excluded: &HashSet<ObjectId>,
1312    visit: F,
1313) -> Result<HashSet<ObjectId>>
1314where
1315    R: ObjectReader,
1316    I: IntoIterator<Item = ObjectId>,
1317    F: FnMut(&ObjectId, &Arc<EncodedObject>),
1318{
1319    walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
1320}
1321
1322/// [`walk_reachable_objects`] with an additional `cut` set: commits in `cut`
1323/// are visited (their trees and blobs too) but their parents are not followed,
1324/// mirroring a shallow client's view of its own history during negotiation.
1325fn walk_reachable_objects_with_cut<R, I, F>(
1326    reader: &R,
1327    format: ObjectFormat,
1328    starts: I,
1329    excluded: &HashSet<ObjectId>,
1330    cut: &HashSet<ObjectId>,
1331    mut visit: F,
1332) -> Result<HashSet<ObjectId>>
1333where
1334    R: ObjectReader,
1335    I: IntoIterator<Item = ObjectId>,
1336    F: FnMut(&ObjectId, &Arc<EncodedObject>),
1337{
1338    let mut seen = HashSet::new();
1339    let mut pending = Vec::new();
1340    for start in starts {
1341        pending.push(start);
1342        while let Some(oid) = pending.pop() {
1343            if excluded.contains(&oid) {
1344                continue;
1345            }
1346            if !seen.insert(oid) {
1347                continue;
1348            }
1349            let object = reader.read_object(&oid).map_err(|err| {
1350                with_missing_object_context(err, oid, MissingObjectContext::Traversal)
1351            })?;
1352            match object.object_type {
1353                ObjectType::Commit => {
1354                    let (tree, parents) = {
1355                        let commit = Commit::parse_ref(format, &object.body)?;
1356                        (commit.tree, commit.parents)
1357                    };
1358                    visit(&oid, &object);
1359                    if !cut.contains(&oid) {
1360                        for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
1361                            pending.push(parent);
1362                        }
1363                    }
1364                    pending.push(tree);
1365                }
1366                ObjectType::Tree => {
1367                    let mut child_oids = Vec::new();
1368                    for entry in TreeEntries::new(format, &object.body) {
1369                        let entry = entry?;
1370                        if entry.is_gitlink() {
1371                            continue;
1372                        }
1373                        child_oids.push(entry.oid);
1374                    }
1375                    visit(&oid, &object);
1376                    pending.extend(child_oids.into_iter().rev());
1377                }
1378                ObjectType::Tag => {
1379                    let target = {
1380                        let tag = Tag::parse_ref(format, &object.body)?;
1381                        tag.object
1382                    };
1383                    visit(&oid, &object);
1384                    pending.push(target);
1385                }
1386                ObjectType::Blob => visit(&oid, &object),
1387            }
1388        }
1389    }
1390    Ok(seen)
1391}
1392
1393// ===== reachability bitmaps (.bitmap write + consult) =====
1394
1395/// Bit accessors over a `Vec<u64>` bitset using git's bitmap convention:
1396/// bit `i` lives in word `i / 64` at bit `i % 64` (LSB-first within a word).
1397fn bitset_get(words: &[u64], position: u32) -> bool {
1398    let word = (position / 64) as usize;
1399    word < words.len() && words[word] & (1u64 << (position % 64)) != 0
1400}
1401
1402fn bitset_set(words: &mut [u64], position: u32) {
1403    let word = (position / 64) as usize;
1404    if word < words.len() {
1405        words[word] |= 1u64 << (position % 64);
1406    }
1407}
1408
1409fn bitset_or(acc: &mut [u64], other: &[u64]) {
1410    for (dst, src) in acc.iter_mut().zip(other) {
1411        *dst |= *src;
1412    }
1413}
1414
1415/// Sorted set-bit positions of a bitset (the inverse of repeated [`bitset_set`]).
1416fn bitset_positions(words: &[u64]) -> Vec<u32> {
1417    let mut positions = Vec::new();
1418    for (word_index, word) in words.iter().enumerate() {
1419        let mut remaining = *word;
1420        while remaining != 0 {
1421            let bit = remaining.trailing_zeros();
1422            positions.push(word_index as u32 * 64 + bit);
1423            remaining &= remaining - 1;
1424        }
1425    }
1426    positions
1427}
1428
1429/// Committer timestamp (epoch seconds) of a commit identity line
1430/// (`Name <email> <timestamp> <tz>`); 0 when unparseable, matching git's
1431/// tolerance for bogus dates during bitmap commit selection.
1432fn commit_identity_timestamp(identity: &[u8]) -> i64 {
1433    let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
1434    let _tz = fields.next();
1435    fields
1436        .next()
1437        .and_then(|raw| std::str::from_utf8(raw).ok())
1438        .and_then(|raw| raw.parse::<i64>().ok())
1439        .unwrap_or(0)
1440}
1441
1442/// Upstream `next_commit_index` (pack-bitmap-write.c): the spacing schedule for
1443/// bitmap commit selection over the date-descending commit list.
1444fn bitmap_next_commit_index(idx: u32) -> u32 {
1445    const MIN_COMMITS: u32 = 100;
1446    const MAX_COMMITS: u32 = 5000;
1447    const MUST_REGION: u32 = 100;
1448    const MIN_REGION: u32 = 20000;
1449
1450    if idx <= MUST_REGION {
1451        return 0;
1452    }
1453    if idx <= MIN_REGION {
1454        let offset = idx - MUST_REGION;
1455        return offset.min(MIN_COMMITS);
1456    }
1457    let offset = idx - MIN_REGION;
1458    offset.clamp(MIN_COMMITS, MAX_COMMITS)
1459}
1460
1461/// Builds a serialised `.bitmap` for the pack described by `index_entries` /
1462/// `pack_checksum`, mirroring upstream pack-bitmap-write.c:
1463///
1464/// * commit selection walks the pack's commits in committer-date-descending
1465///   order through [`bitmap_next_commit_index`]'s spacing schedule, preferring
1466///   `preferred_tips` (ref tips — upstream's `NEEDS_BITMAP`) and merge commits
1467///   inside each window;
1468/// * each selected commit stores its full reachability closure (commits, trees,
1469///   blobs) as pack-order bit positions (no XOR compression — `xor_offset` 0 is
1470///   valid on disk and what readers see after resolution anyway).
1471///
1472/// Returns `Ok(None)` — mirroring upstream's warn-and-skip — when the pack
1473/// lacks full closure (a reachable object is missing from it).
1474pub fn build_pack_bitmap(
1475    db: &FileObjectDatabase,
1476    format: ObjectFormat,
1477    index_entries: &[PackIndexEntry],
1478    pack_checksum: &ObjectId,
1479    preferred_tips: &HashSet<ObjectId>,
1480) -> Result<Option<Vec<u8>>> {
1481    // `index_entries` carries no ordering guarantee (writer provenance is in
1482    // pack-write order); bit numbering follows pack (offset) order.
1483    let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
1484    by_offset.sort_by_key(|&slot| index_entries[slot].offset);
1485    let bit_order: Vec<ObjectId> = by_offset
1486        .into_iter()
1487        .map(|slot| index_entries[slot].oid)
1488        .collect();
1489    build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
1490}
1491
1492/// [`build_pack_bitmap`]'s multi-pack sibling: builds the serialised
1493/// `multi-pack-index-<checksum>.bitmap` for `midx_entries`, with bits in
1494/// pseudo-pack order (preferred pack first, then pack id, then offset — the
1495/// same order [`MultiPackIndex::write_with_reverse_index`] records in `RIDX`)
1496/// and the midx checksum in the BITM checksum field.
1497pub fn build_midx_bitmap(
1498    db: &FileObjectDatabase,
1499    format: ObjectFormat,
1500    midx_entries: &[sley_pack::MultiPackIndexEntry],
1501    midx_checksum: &ObjectId,
1502    preferred_pack: u32,
1503    preferred_tips: &HashSet<ObjectId>,
1504) -> Result<Option<Vec<u8>>> {
1505    let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
1506    pseudo.sort_by_key(|&slot| {
1507        let entry = &midx_entries[slot];
1508        (
1509            entry.pack_int_id != preferred_pack,
1510            entry.pack_int_id,
1511            entry.offset,
1512        )
1513    });
1514    let bit_order: Vec<ObjectId> = pseudo
1515        .into_iter()
1516        .map(|slot| midx_entries[slot].oid)
1517        .collect();
1518    build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
1519}
1520
1521/// Upstream `bitmap_builder_init`'s `num_maximal` counter (pack-bitmap-write.c):
1522/// walk the first-parent ancestry of the selected commits, children before
1523/// parents, propagating per-commit "which selected commits reach me" masks.
1524/// A commit counts as maximal when it is selected, or when distinct selected
1525/// lineages converge on it (its mask gains bits its last contributing child
1526/// did not carry). Only the count is needed (for the trace2 data event), so no
1527/// reverse-edge bookkeeping is kept.
1528fn bitmap_num_maximal_commits(
1529    db: &FileObjectDatabase,
1530    format: ObjectFormat,
1531    selected: &[ObjectId],
1532) -> Result<usize> {
1533    // First-parent subgraph reachable from the selected commits.
1534    let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
1535    let mut stack: Vec<ObjectId> = selected.to_vec();
1536    while let Some(oid) = stack.pop() {
1537        if first_parent.contains_key(&oid) {
1538            continue;
1539        }
1540        let object = db.read_object(&oid)?;
1541        let commit = Commit::parse_ref(format, &object.body)?;
1542        let parent = grafted_parents(db, &oid, commit.parents).first().copied();
1543        first_parent.insert(oid, parent);
1544        if let Some(parent) = parent {
1545            stack.push(parent);
1546        }
1547    }
1548    // Children-before-parents order (Kahn over the single first-parent edge).
1549    let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
1550    for parent in first_parent.values().flatten() {
1551        *pending_children.entry(*parent).or_default() += 1;
1552    }
1553    let word_count = selected.len().div_ceil(64);
1554    struct MaximalEnt {
1555        mask: Vec<u64>,
1556        maximal: bool,
1557    }
1558    let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
1559    for (bit, oid) in selected.iter().enumerate() {
1560        let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
1561            mask: vec![0u64; word_count],
1562            maximal: true,
1563        });
1564        ent.mask[bit / 64] |= 1u64 << (bit % 64);
1565        ent.maximal = true;
1566    }
1567    let mut queue: Vec<ObjectId> = first_parent
1568        .keys()
1569        .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
1570        .copied()
1571        .collect();
1572    let mut num_maximal = 0usize;
1573    while let Some(oid) = queue.pop() {
1574        if let Some(ent) = ents.remove(&oid) {
1575            if ent.maximal {
1576                num_maximal += 1;
1577            }
1578            if let Some(Some(parent)) = first_parent.get(&oid) {
1579                match ents.entry(*parent) {
1580                    std::collections::hash_map::Entry::Vacant(vacant) => {
1581                        // Fresh parent mask: c_not_p, !p_not_c -> not maximal.
1582                        vacant.insert(MaximalEnt {
1583                            mask: ent.mask.clone(),
1584                            maximal: false,
1585                        });
1586                    }
1587                    std::collections::hash_map::Entry::Occupied(mut occupied) => {
1588                        let parent_ent = occupied.get_mut();
1589                        let c_not_p = ent
1590                            .mask
1591                            .iter()
1592                            .zip(&parent_ent.mask)
1593                            .any(|(child, parent)| child & !parent != 0);
1594                        if c_not_p {
1595                            let p_not_c = parent_ent
1596                                .mask
1597                                .iter()
1598                                .zip(&ent.mask)
1599                                .any(|(parent, child)| parent & !child != 0);
1600                            for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
1601                                *parent |= child;
1602                            }
1603                            parent_ent.maximal = p_not_c;
1604                        }
1605                    }
1606                }
1607            }
1608        }
1609        if let Some(Some(parent)) = first_parent.get(&oid)
1610            && let Some(remaining) = pending_children.get_mut(parent)
1611        {
1612            *remaining -= 1;
1613            if *remaining == 0 {
1614                queue.push(*parent);
1615            }
1616        }
1617    }
1618    Ok(num_maximal)
1619}
1620
1621/// Shared write half: `bit_order` lists every covered object's oid in bit
1622/// order (pack order for a single pack, pseudo-pack order for a midx);
1623/// `checksum` fills the BITM checksum field (pack checksum / midx checksum).
1624fn build_reachability_bitmap(
1625    db: &FileObjectDatabase,
1626    format: ObjectFormat,
1627    checksum: &ObjectId,
1628    bit_order: &[ObjectId],
1629    preferred_tips: &HashSet<ObjectId>,
1630) -> Result<Option<Vec<u8>>> {
1631    if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
1632        return Ok(None);
1633    }
1634    let object_count = bit_order.len();
1635
1636    // The on-disk entry position space is the oid-sorted lookup order (.idx /
1637    // midx OIDL); derive each bit-order slot's rank there.
1638    let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
1639    oid_sorted.sort_by(|&left, &right| {
1640        bit_order[left as usize]
1641            .as_bytes()
1642            .cmp(bit_order[right as usize].as_bytes())
1643    });
1644    let mut index_position = vec![0u32; object_count];
1645    for (position, &slot) in oid_sorted.iter().enumerate() {
1646        index_position[slot as usize] = position as u32;
1647    }
1648    let mut oid_to_pack = HashMap::with_capacity(object_count);
1649    for (pack_pos, oid) in bit_order.iter().enumerate() {
1650        oid_to_pack.insert(*oid, pack_pos as u32);
1651    }
1652
1653    // Object types in bit order; commits also collect (date, parent count).
1654    let mut object_types = Vec::with_capacity(object_count);
1655    struct IndexedCommit {
1656        oid: ObjectId,
1657        pack_pos: u32,
1658        index_pos: u32,
1659        date: i64,
1660        parent_count: usize,
1661    }
1662    let mut indexed_commits = Vec::new();
1663    for (pack_pos, oid) in bit_order.iter().enumerate() {
1664        // Type via the header fast path: blobs (the bulk of most packs) never
1665        // need their bodies inflated here.
1666        let object_type = match db.read_object_header(oid)? {
1667            Some((object_type, _)) => object_type,
1668            None => db.read_object(oid)?.object_type,
1669        };
1670        object_types.push(object_type);
1671        if object_type == ObjectType::Commit {
1672            let object = db.read_object(oid)?;
1673            let commit = Commit::parse_ref(format, &object.body)?;
1674            indexed_commits.push(IndexedCommit {
1675                oid: *oid,
1676                pack_pos: pack_pos as u32,
1677                index_pos: index_position[pack_pos],
1678                date: commit_identity_timestamp(commit.committer),
1679                parent_count: grafted_parents(db, oid, commit.parents).len(),
1680            });
1681        }
1682    }
1683
1684    // Selection: date-descending, then the spacing schedule.
1685    indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
1686    let mut selected: Vec<&IndexedCommit> = Vec::new();
1687    let commit_count = indexed_commits.len() as u32;
1688    if commit_count < 100 {
1689        selected.extend(indexed_commits.iter());
1690    } else {
1691        let mut i = 0u32;
1692        loop {
1693            let next = bitmap_next_commit_index(i);
1694            if i + next >= commit_count {
1695                break;
1696            }
1697            let mut chosen = &indexed_commits[(i + next) as usize];
1698            if next > 0 {
1699                for j in 0..=next {
1700                    let candidate = &indexed_commits[(i + j) as usize];
1701                    if preferred_tips.contains(&candidate.oid) {
1702                        chosen = candidate;
1703                        break;
1704                    }
1705                    if candidate.parent_count >= 2 {
1706                        chosen = candidate;
1707                    }
1708                }
1709            }
1710            selected.push(chosen);
1711            i += next + 1;
1712        }
1713    }
1714
1715    // Trace2 selection counters (upstream bitmap_builder_init): emitted before
1716    // the closure walk, like upstream emits them before building the ewah
1717    // bitmaps. Computing num_maximal_commits needs its own first-parent walk,
1718    // so it only runs when the trace2 event target is active.
1719    if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
1720        let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
1721        let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
1722        sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
1723        sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
1724    }
1725
1726    // Reachability closures, oldest-first so newer walks stop at memoised
1727    // older selected commits.
1728    let word_count = object_count.div_ceil(64);
1729    let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
1730    for commit in selected.iter().rev() {
1731        let mut acc = vec![0u64; word_count];
1732        let mut pending = vec![commit.oid];
1733        while let Some(oid) = pending.pop() {
1734            let Some(&pack_pos) = oid_to_pack.get(&oid) else {
1735                // Mirrors upstream's "Packfile doesn't have full closure".
1736                eprintln!(
1737                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
1738                );
1739                return Ok(None);
1740            };
1741            if bitset_get(&acc, pack_pos) {
1742                continue;
1743            }
1744            if let Some(stored) = memo.get(&oid) {
1745                bitset_or(&mut acc, stored);
1746                continue;
1747            }
1748            bitset_set(&mut acc, pack_pos);
1749            let object = db.read_object(&oid)?;
1750            let tree = {
1751                let parsed = Commit::parse_ref(format, &object.body)?;
1752                pending.extend(grafted_parents(db, &oid, parsed.parents));
1753                parsed.tree
1754            };
1755            if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
1756                return Ok(None);
1757            }
1758        }
1759        memo.insert(commit.oid, Arc::new(acc));
1760    }
1761
1762    let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
1763    for commit in &selected {
1764        let words = match memo.get(&commit.oid) {
1765            Some(words) => words,
1766            None => continue,
1767        };
1768        writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
1769    }
1770    writer.write().map(Some)
1771}
1772
1773/// Marks `tree` and everything below it (sub-trees, blobs) in `acc`, skipping
1774/// already-set bits (their closure is already covered). Returns `false` when an
1775/// object is missing from the pack (no full closure), after warning.
1776fn bitmap_mark_tree(
1777    db: &impl ObjectReader,
1778    format: ObjectFormat,
1779    tree: &ObjectId,
1780    oid_to_pack: &HashMap<ObjectId, u32>,
1781    acc: &mut [u64],
1782) -> Result<bool> {
1783    let Some(&pack_pos) = oid_to_pack.get(tree) else {
1784        eprintln!(
1785            "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
1786        );
1787        return Ok(false);
1788    };
1789    if bitset_get(acc, pack_pos) {
1790        return Ok(true);
1791    }
1792    bitset_set(acc, pack_pos);
1793    let object = db.read_object(tree)?;
1794    for entry in TreeEntries::new(format, &object.body) {
1795        let entry = entry?;
1796        if entry.is_gitlink() {
1797            continue;
1798        }
1799        if entry.is_tree() {
1800            if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
1801                return Ok(false);
1802            }
1803        } else {
1804            let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
1805                eprintln!(
1806                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
1807                    entry.oid
1808                );
1809                return Ok(false);
1810            };
1811            bitset_set(acc, blob_pos);
1812        }
1813    }
1814    Ok(true)
1815}
1816
1817/// A pack's `.bitmap` loaded for consultation: oid <-> pack-position mappings,
1818/// resolved (XOR-expanded) per-commit reachability bitsets, and the four object
1819/// type bitmaps. Bit numbering follows pack order throughout.
1820pub struct LoadedPackBitmap {
1821    object_count: u32,
1822    oid_to_pack: HashMap<ObjectId, u32>,
1823    pack_to_oid: Vec<ObjectId>,
1824    commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
1825    commits: Vec<u64>,
1826    trees: Vec<u64>,
1827    blobs: Vec<u64>,
1828    tags: Vec<u64>,
1829}
1830
1831impl LoadedPackBitmap {
1832    pub fn object_count(&self) -> u32 {
1833        self.object_count
1834    }
1835
1836    /// Pack-order position of `oid`, when the object is in the bitmapped pack.
1837    pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
1838        self.oid_to_pack.get(oid).copied()
1839    }
1840
1841    pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
1842        self.pack_to_oid.get(position as usize)
1843    }
1844
1845    /// The resolved reachability bitset stored for `oid`, when it was one of
1846    /// the writer's selected commits.
1847    pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
1848        self.commit_words.get(oid)
1849    }
1850
1851    /// Oids of every commit with a stored bitmap entry (unordered).
1852    pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
1853        self.commit_words.keys()
1854    }
1855
1856    /// The type bitmap for `object_type` (bit per pack position).
1857    pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
1858        match object_type {
1859            ObjectType::Commit => &self.commits,
1860            ObjectType::Tree => &self.trees,
1861            ObjectType::Blob => &self.blobs,
1862            ObjectType::Tag => &self.tags,
1863        }
1864    }
1865
1866    fn word_count(&self) -> usize {
1867        (self.object_count as usize).div_ceil(64)
1868    }
1869}
1870
1871/// Loads the single-pack `.bitmap` of `objects_dir/pack`, if a valid one
1872/// exists. Scans `pack-*.bitmap` files (sorted, first valid wins, like
1873/// upstream's "first bitmap" behaviour), requires the sibling `.idx`, and
1874/// verifies the recorded pack checksum. Any unreadable/corrupt bitmap yields
1875/// `Ok(None)` — consumers fall back to a regular object walk, mirroring
1876/// upstream's warn-and-ignore on bitmap load failure.
1877pub fn load_pack_bitmap(
1878    objects_dir: &Path,
1879    format: ObjectFormat,
1880) -> Result<Option<LoadedPackBitmap>> {
1881    let pack_dir = objects_dir.join("pack");
1882    if !pack_dir.exists() {
1883        return Ok(None);
1884    }
1885    // A multi-pack bitmap wins over single-pack bitmaps, like upstream's
1886    // open_bitmap trying the midx first.
1887    if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
1888        return Ok(Some(bitmap));
1889    }
1890    let mut bitmap_paths = Vec::new();
1891    for entry in fs::read_dir(&pack_dir)? {
1892        let path = entry?.path();
1893        if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
1894            && path
1895                .file_name()
1896                .and_then(|name| name.to_str())
1897                .is_some_and(|name| name.starts_with("pack-"))
1898        {
1899            bitmap_paths.push(path);
1900        }
1901    }
1902    bitmap_paths.sort();
1903    for bitmap_path in bitmap_paths {
1904        match load_pack_bitmap_file(&bitmap_path, format) {
1905            Ok(Some(bitmap)) => return Ok(Some(bitmap)),
1906            Ok(None) | Err(_) => continue,
1907        }
1908    }
1909    Ok(None)
1910}
1911
1912/// Loads `multi-pack-index-<checksum>.bitmap` when the pack directory has a
1913/// multi-pack-index with a `RIDX` chunk (the bit-order permutation) and a
1914/// matching bitmap file. Returns `Ok(None)` — never an error — on any missing
1915/// or unusable piece, so callers fall through to single-pack bitmaps.
1916fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
1917    let midx_path = pack_dir.join("multi-pack-index");
1918    if !midx_path.exists() {
1919        return Ok(None);
1920    }
1921    let Ok(midx_bytes) = fs::read(&midx_path) else {
1922        return Ok(None);
1923    };
1924    let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
1925        return Ok(None);
1926    };
1927    let bitmap_path = pack_dir.join(format!(
1928        "multi-pack-index-{}.bitmap",
1929        midx.checksum.to_hex()
1930    ));
1931    if !bitmap_path.exists() {
1932        return Ok(None);
1933    }
1934    let object_count = midx.objects.len();
1935    // Upstream `load_midx_revindex`: prefer the midx's own RIDX chunk unless
1936    // GIT_TEST_MIDX_READ_RIDX=0 disables it, else fall back to the separate
1937    // `multi-pack-index-<checksum>.rev` file; a trace2 data event records
1938    // which source supplied the permutation.
1939    let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
1940        .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
1941        .unwrap_or(true);
1942    let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
1943        (Some(chunk), true) => {
1944            sley_core::trace2::data("load_midx_revindex", "source", "midx");
1945            chunk.clone()
1946        }
1947        _ => {
1948            let rev_path =
1949                pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
1950            let Ok(rev_bytes) = fs::read(&rev_path) else {
1951                // Without the RIDX permutation the bit numbering is unknown.
1952                return Ok(None);
1953            };
1954            let Ok(parsed_rev) =
1955                sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
1956            else {
1957                return Ok(None);
1958            };
1959            sley_core::trace2::data("load_midx_revindex", "source", "rev");
1960            parsed_rev.positions
1961        }
1962    };
1963    let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
1964        return Ok(None);
1965    };
1966    let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
1967        Ok(parsed) => parsed,
1968        Err(_) => return Ok(None),
1969    };
1970    if parsed.pack_checksum != midx.checksum {
1971        return Ok(None);
1972    }
1973
1974    // midx.objects is in lookup (oid-sorted) order; RIDX maps bit positions
1975    // to lookup positions.
1976    let mut pack_to_oid = Vec::with_capacity(object_count);
1977    for &midx_pos in &reverse_index {
1978        let Some(entry) = midx.objects.get(midx_pos as usize) else {
1979            return Ok(None);
1980        };
1981        pack_to_oid.push(entry.oid);
1982    }
1983    let mut oid_to_pack = HashMap::with_capacity(object_count);
1984    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
1985        oid_to_pack.insert(*oid, pack_pos as u32);
1986    }
1987    match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
1988        midx.objects.get(position).map(|entry| entry.oid)
1989    }) {
1990        Ok(loaded) => Ok(Some(loaded)),
1991        Err(_) => Ok(None),
1992    }
1993}
1994
1995fn load_pack_bitmap_file(
1996    bitmap_path: &Path,
1997    format: ObjectFormat,
1998) -> Result<Option<LoadedPackBitmap>> {
1999    let index_path = bitmap_path.with_extension("idx");
2000    if !index_path.exists() {
2001        return Ok(None);
2002    }
2003    let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
2004    let object_count = index.entries.len();
2005    let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
2006    if parsed.pack_checksum != index.pack_checksum {
2007        return Ok(None);
2008    }
2009
2010    let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2011    pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2012    let mut pack_to_oid = Vec::with_capacity(object_count);
2013    for index_pos in &pack_order {
2014        pack_to_oid.push(index.entries[*index_pos as usize].oid);
2015    }
2016    let mut oid_to_pack = HashMap::with_capacity(object_count);
2017    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2018        oid_to_pack.insert(*oid, pack_pos as u32);
2019    }
2020
2021    assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2022        index.entries.get(position).map(|entry| entry.oid)
2023    })
2024    .map(Some)
2025}
2026
2027/// Shared tail of the bitmap loaders: expands the type bitmaps, resolves the
2028/// per-commit entries (XOR offsets reference earlier entries in file order),
2029/// and maps each entry's lookup-order position back to a commit oid via
2030/// `lookup_oid`.
2031fn assemble_loaded_bitmap(
2032    parsed: PackBitmapIndex,
2033    object_count: usize,
2034    pack_to_oid: Vec<ObjectId>,
2035    oid_to_pack: HashMap<ObjectId, u32>,
2036    lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2037) -> Result<LoadedPackBitmap> {
2038    let word_count = object_count.div_ceil(64);
2039    let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2040        let mut words = bitmap.to_words()?;
2041        words.resize(word_count, 0);
2042        Ok(words)
2043    };
2044
2045    let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2046    let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2047    for (entry_index, entry) in parsed.entries.iter().enumerate() {
2048        let mut words = expand(&entry.bitmap)?;
2049        if entry.xor_offset > 0 {
2050            let base_index = entry_index - entry.xor_offset as usize;
2051            let base = &resolved[base_index];
2052            for (dst, src) in words.iter_mut().zip(base.iter()) {
2053                *dst ^= *src;
2054            }
2055        }
2056        let words = Arc::new(words);
2057        resolved.push(Arc::clone(&words));
2058        let commit_oid = lookup_oid(entry.object_position as usize)
2059            .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2060        commit_words.insert(commit_oid, words);
2061    }
2062
2063    Ok(LoadedPackBitmap {
2064        object_count: object_count as u32,
2065        oid_to_pack,
2066        pack_to_oid,
2067        commit_words,
2068        commits: expand(&parsed.type_bitmaps.commits)?,
2069        trees: expand(&parsed.type_bitmaps.trees)?,
2070        blobs: expand(&parsed.type_bitmaps.blobs)?,
2071        tags: expand(&parsed.type_bitmaps.tags)?,
2072    })
2073}
2074
2075/// Result of a bitmap-assisted reachability walk: pack-position bits for
2076/// in-pack objects plus the "extended" objects encountered outside the
2077/// bitmapped pack (in first-seen order, like upstream's extended index).
2078pub struct BitmapWalkResult {
2079    pub words: Vec<u64>,
2080    pub extended: Vec<(ObjectId, ObjectType)>,
2081}
2082
2083impl BitmapWalkResult {
2084    /// Removes everything reachable in `haves` from this result.
2085    pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2086        for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2087            *dst &= !*src;
2088        }
2089        let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2090        self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2091    }
2092}
2093
2094/// Computes the set of objects reachable from `roots` using stored bitmaps
2095/// where available and a fill-in object walk where not — the consult half of
2096/// the bitmap engine (upstream `find_objects` + `fill_in_bitmap`).
2097///
2098/// Roots may be any object type; tag chains are peeled with every tag object
2099/// itself included, like the pending-object handling in
2100/// `prepare_bitmap_walk`. When `include_objects` is false only commits are
2101/// walked (tree contents of fill-in commits are not marked) — callers that
2102/// only count/enumerate commits mask with the commit type bitmap, so the
2103/// extra non-commit bits OR-ed in from stored (closed) bitmaps are harmless.
2104pub fn bitmap_reachable(
2105    bitmap: &LoadedPackBitmap,
2106    db: &impl ObjectReader,
2107    format: ObjectFormat,
2108    roots: &[ObjectId],
2109    include_objects: bool,
2110) -> Result<BitmapWalkResult> {
2111    let mut walk = BitmapFillWalk {
2112        bitmap,
2113        words: vec![0u64; bitmap.word_count()],
2114        extended: Vec::new(),
2115        extended_seen: HashSet::new(),
2116    };
2117    let mut commit_stack: Vec<ObjectId> = Vec::new();
2118
2119    for root in roots {
2120        let mut oid = *root;
2121        // Peel tag chains, marking each tag object on the way.
2122        loop {
2123            let object = db.read_object(&oid)?;
2124            match object.object_type {
2125                ObjectType::Tag => {
2126                    walk.mark(&oid, ObjectType::Tag);
2127                    let tag = Tag::parse_ref(format, &object.body)?;
2128                    oid = tag.object;
2129                }
2130                ObjectType::Commit => {
2131                    commit_stack.push(oid);
2132                    break;
2133                }
2134                ObjectType::Tree => {
2135                    walk.mark_tree_closure(db, format, &oid)?;
2136                    break;
2137                }
2138                ObjectType::Blob => {
2139                    walk.mark(&oid, ObjectType::Blob);
2140                    break;
2141                }
2142            }
2143        }
2144    }
2145
2146    while let Some(oid) = commit_stack.pop() {
2147        if let Some(position) = bitmap.pack_position(&oid) {
2148            if bitset_get(&walk.words, position) {
2149                continue;
2150            }
2151            if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2152                bitset_or(&mut walk.words, stored);
2153                continue;
2154            }
2155            bitset_set(&mut walk.words, position);
2156        } else {
2157            if walk.extended_seen.contains(&oid) {
2158                continue;
2159            }
2160            walk.extended_seen.insert(oid);
2161            walk.extended.push((oid, ObjectType::Commit));
2162        }
2163        let object = db.read_object(&oid)?;
2164        let commit = Commit::parse_ref(format, &object.body)?;
2165        commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2166        if include_objects {
2167            walk.mark_tree_closure(db, format, &commit.tree)?;
2168        }
2169    }
2170
2171    Ok(BitmapWalkResult {
2172        words: walk.words,
2173        extended: walk.extended,
2174    })
2175}
2176
2177struct BitmapFillWalk<'a> {
2178    bitmap: &'a LoadedPackBitmap,
2179    words: Vec<u64>,
2180    extended: Vec<(ObjectId, ObjectType)>,
2181    extended_seen: HashSet<ObjectId>,
2182}
2183
2184impl BitmapFillWalk<'_> {
2185    /// Marks one object; returns false when it was already marked.
2186    fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2187        if let Some(position) = self.bitmap.pack_position(oid) {
2188            if bitset_get(&self.words, position) {
2189                return false;
2190            }
2191            bitset_set(&mut self.words, position);
2192            true
2193        } else {
2194            if !self.extended_seen.insert(*oid) {
2195                return false;
2196            }
2197            self.extended.push((*oid, object_type));
2198            true
2199        }
2200    }
2201
2202    /// Marks `tree` and everything below it, skipping subtrees already marked
2203    /// (a set in-pack bit means its closure is covered: either it came from a
2204    /// stored — closed — bitmap, or this walk already expanded it).
2205    fn mark_tree_closure(
2206        &mut self,
2207        db: &impl ObjectReader,
2208        format: ObjectFormat,
2209        tree: &ObjectId,
2210    ) -> Result<()> {
2211        if !self.mark(tree, ObjectType::Tree) {
2212            return Ok(());
2213        }
2214        let object = db.read_object(tree)?;
2215        for entry in TreeEntries::new(format, &object.body) {
2216            let entry = entry?;
2217            if entry.is_gitlink() {
2218                continue;
2219            }
2220            if entry.is_tree() {
2221                self.mark_tree_closure(db, format, &entry.oid)?;
2222            } else {
2223                self.mark(&entry.oid, ObjectType::Blob);
2224            }
2225        }
2226        Ok(())
2227    }
2228}
2229
2230#[derive(Debug)]
2231pub struct ObjectDatabase {
2232    format: ObjectFormat,
2233    // Behind a `Mutex` so `write_object` can take `&self` (matching the
2234    // `ObjectWriter` trait) and a single handle can interleave reads and writes
2235    // without a `&mut` borrow — the same shared-by-`&` shape the file-backed
2236    // database uses for its caches. Removes the need for callers to wrap this in
2237    // a `RefCell`/`&mut` just to write (see sley-fetch's former `RefCell` dance).
2238    objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2239    promisor: bool,
2240}
2241
2242impl ObjectDatabase {
2243    pub fn new(format: ObjectFormat) -> Self {
2244        Self {
2245            format,
2246            objects: Mutex::new(HashMap::new()),
2247            promisor: false,
2248        }
2249    }
2250
2251    pub fn with_promisor(mut self, promisor: bool) -> Self {
2252        self.promisor = promisor;
2253        self
2254    }
2255
2256    pub fn contains(&self, oid: &ObjectId) -> bool {
2257        self.objects
2258            .lock()
2259            .map(|objects| objects.contains_key(oid))
2260            .unwrap_or(false)
2261    }
2262
2263    pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2264        let object = self.read_object(oid)?;
2265        let actual = object.object_id(self.format)?;
2266        if &actual == oid {
2267            Ok(())
2268        } else {
2269            Err(GitError::InvalidObject(format!(
2270                "object id mismatch: expected {oid}, got {actual}"
2271            )))
2272        }
2273    }
2274}
2275
2276impl ObjectReader for ObjectDatabase {
2277    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
2278        self.objects
2279            .lock()
2280            .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
2281            .get(oid)
2282            .map(Arc::clone)
2283            .or_else(|| implied_empty_tree_object(self.format, oid))
2284            .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
2285    }
2286}
2287
2288impl ObjectWriter for ObjectDatabase {
2289    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
2290        let oid = object.object_id(self.format)?;
2291        self.objects
2292            .lock()
2293            .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
2294            .entry(oid)
2295            .or_insert_with(|| Arc::new(object));
2296        Ok(oid)
2297    }
2298}
2299
2300#[derive(Debug, Clone, PartialEq, Eq)]
2301pub struct Alternate {
2302    pub path: std::path::PathBuf,
2303}
2304
2305#[derive(Debug, Clone, PartialEq, Eq)]
2306pub struct PartialClonePolicy {
2307    pub promisor_remote: Option<String>,
2308    pub allow_missing_promised_objects: bool,
2309}
2310
2311/// Raw pack-file bytes keyed by pack path, shared across cloned handles. Loaded
2312/// once so individual objects can be decoded at their offsets (see
2313/// [`sley_pack::read_object_at`]) without re-reading the whole file per read.
2314type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
2315
2316/// Backing bytes of a pack file: either memory-mapped (under the `mmap` feature)
2317/// or read into the heap. Both deref to `&[u8]`, so the decode path is identical.
2318#[derive(Debug)]
2319enum PackData {
2320    #[cfg(feature = "mmap")]
2321    Mapped(sley_mmap::MappedFile),
2322    Heap(Vec<u8>),
2323}
2324
2325impl std::ops::Deref for PackData {
2326    type Target = [u8];
2327
2328    fn deref(&self) -> &[u8] {
2329        match self {
2330            #[cfg(feature = "mmap")]
2331            Self::Mapped(mapped) => mapped,
2332            Self::Heap(bytes) => bytes,
2333        }
2334    }
2335}
2336
2337/// Load a pack file's bytes: memory-mapped when the `mmap` feature is on (falling
2338/// back to a heap read if the map fails), otherwise read into the heap.
2339#[cfg(feature = "mmap")]
2340fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2341    match sley_mmap::MappedFile::open_pack(pack_path) {
2342        Ok(mapped) => Ok(PackData::Mapped(mapped)),
2343        Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
2344    }
2345}
2346
2347#[cfg(not(feature = "mmap"))]
2348fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2349    Ok(PackData::Heap(fs::read(pack_path)?))
2350}
2351
2352#[cfg(feature = "mmap")]
2353fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2354    match sley_mmap::MappedFile::open_pack(index_path) {
2355        Ok(mapped) => Ok(Arc::new(mapped)),
2356        Err(_) => Ok(Arc::new(fs::read(index_path)?)),
2357    }
2358}
2359
2360#[cfg(not(feature = "mmap"))]
2361fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2362    Ok(Arc::new(fs::read(index_path)?))
2363}
2364
2365/// Memory-capped LRU of recently decoded objects, shared across cloned handles,
2366/// so hot delta bases and repeated reads during a walk aren't re-decoded. The
2367/// cache is bounded by an approximate byte budget (not a fixed object count) so
2368/// it neither thrashes on bulk reads of small objects nor blows up on a few
2369/// large ones.
2370type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
2371
2372/// Per-pack caches of objects decoded from a pack, keyed by pack path and then by
2373/// the in-pack byte offset of each object's entry. Shared across cloned handles.
2374/// This is the delta-base cache: resolving a delta chain by offset reuses already
2375/// decoded bases instead of re-inflating the whole chain on every read.
2376type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
2377
2378/// Per-pack memo of `in-pack offset -> end-of-chain object type` for the
2379/// `cat-file --batch-check` header fast path. Resolving a packed delta's *type*
2380/// walks the delta chain to its base; without this memo every header read
2381/// re-walks (and re-inflates) the whole chain, so reading every object in a
2382/// deeply-deltified pack is super-linear (sley#26). The type only depends on the
2383/// chain base, so memoizing `offset -> type` lets each chain be walked at most
2384/// once across a batch. Keyed by pack path so an offset key is never applied to
2385/// the wrong pack's bytes; shared across cloned handles.
2386/// One pack's offset-keyed header memo (see [`PackHeaderTypeCaches`]).
2387type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
2388
2389type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
2390
2391/// Default approximate byte budget for the decoded-object LRU. Sized to comfortably
2392/// hold the working set of a history walk (commits/trees/blobs and their delta
2393/// bases) without growing without bound on large repositories. Overridable via the
2394/// `SLEY_OBJECT_CACHE_BYTES` environment variable; there is currently no git-config
2395/// hook threaded into the object database, so this constant is the default.
2396const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
2397
2398/// Default approximate byte budget for each per-pack delta-base cache. Holds the
2399/// decoded bases of the delta chains being walked so neighboring reads stay warm.
2400/// Overridable via `SLEY_DELTA_BASE_CACHE_BYTES`.
2401const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
2402
2403/// Approximate heap cost of caching one [`EncodedObject`]: its body plus a fixed
2404/// allowance for the key, enum/`Vec` headers, and per-entry map overhead. Used
2405/// only to drive eviction, so an estimate is fine.
2406fn cached_object_cost(object: &EncodedObject) -> usize {
2407    object.body.len().saturating_add(64)
2408}
2409
2410/// Read an approximate byte budget from `var`, falling back to `default` when the
2411/// variable is unset or unparseable. A value of `0` disables the cache.
2412fn cache_budget_from_env(var: &str, default: usize) -> usize {
2413    match env::var(var) {
2414        Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
2415        Err(_) => default,
2416    }
2417}
2418
2419/// Approximate byte budget for the decoded-object LRU (see
2420/// [`DEFAULT_OBJECT_CACHE_BYTES`], `SLEY_OBJECT_CACHE_BYTES`).
2421///
2422/// Resolved once per process: the environment does not change under us, and a new
2423/// `FileObjectDatabase` is built often enough (e.g. once per revision resolved)
2424/// that re-reading the variable each time showed up as per-object overhead.
2425fn object_cache_budget() -> usize {
2426    static BUDGET: OnceLock<usize> = OnceLock::new();
2427    *BUDGET.get_or_init(|| {
2428        cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
2429    })
2430}
2431
2432/// Approximate byte budget for each per-pack delta-base cache (see
2433/// [`DEFAULT_DELTA_BASE_CACHE_BYTES`], `SLEY_DELTA_BASE_CACHE_BYTES`). Resolved
2434/// once per process for the same reason as [`object_cache_budget`].
2435fn delta_base_cache_budget() -> usize {
2436    static BUDGET: OnceLock<usize> = OnceLock::new();
2437    *BUDGET.get_or_init(|| {
2438        cache_budget_from_env(
2439            "SLEY_DELTA_BASE_CACHE_BYTES",
2440            DEFAULT_DELTA_BASE_CACHE_BYTES,
2441        )
2442    })
2443}
2444
2445/// Whether to re-hash every object on read and compare it to the requested id.
2446///
2447/// Off by default, matching git: reads trust the pack index → offset mapping and
2448/// the loose object's on-disk name, and object ids are verified where git verifies
2449/// them — when a pack is received (the index build re-hashes every object) and on
2450/// demand via [`FileObjectDatabase`]'s `validate`/fsck. Re-hashing on *every* read
2451/// dominated bulk-read cost (a scalar pure-Rust SHA-1 over each object's full
2452/// body), so it is opt-in via `SLEY_VERIFY_READS` (any value other than unset, ``,
2453/// or `0`) for callers that want the paranoid check back. Read once and cached, so
2454/// the default path pays only a single relaxed atomic load per read.
2455fn verify_reads_enabled() -> bool {
2456    static VERIFY: OnceLock<bool> = OnceLock::new();
2457    *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
2458        Ok(value) => !matches!(value.trim(), "" | "0"),
2459        Err(_) => false,
2460    })
2461}
2462
2463/// A memory-capped LRU map from a key `K` to a decoded [`EncodedObject`].
2464///
2465/// Eviction is by approximate byte budget (gix-style), not object count, so the
2466/// cache adapts to object size. On access an entry is moved to most-recently-used;
2467/// on insert, least-recently-used entries are dropped until the budget holds. A
2468/// budget of `0` makes the cache inert. Generic over the key so it backs both the
2469/// oid-keyed decoded-object cache and the offset-keyed delta-base cache.
2470#[derive(Debug)]
2471struct LruCache<K: std::hash::Hash + Eq + Clone> {
2472    budget: usize,
2473    used: usize,
2474    map: HashMap<K, LruEntry<K>>,
2475    head: Option<K>,
2476    tail: Option<K>,
2477}
2478
2479#[derive(Debug)]
2480struct LruEntry<K> {
2481    object: Arc<EncodedObject>,
2482    prev: Option<K>,
2483    next: Option<K>,
2484}
2485
2486impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
2487    fn new(budget: usize) -> Self {
2488        Self {
2489            budget,
2490            used: 0,
2491            map: HashMap::new(),
2492            head: None,
2493            tail: None,
2494        }
2495    }
2496
2497    fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
2498        let object = Arc::clone(&self.map.get(key)?.object);
2499        self.touch(key);
2500        Some(object)
2501    }
2502
2503    /// Move `key` to the most-recently-used end in O(1).
2504    fn touch(&mut self, key: &K) {
2505        if self.tail.as_ref() == Some(key) {
2506            return;
2507        }
2508        if self.map.contains_key(key) {
2509            self.detach(key);
2510            self.attach_back(key.clone());
2511        }
2512    }
2513
2514    /// Drop `key` from both the map and the recency queue, releasing its budget.
2515    fn remove(&mut self, key: &K) {
2516        if let Some(entry) = self.map.get(key) {
2517            self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
2518        }
2519        self.detach(key);
2520        self.map.remove(key);
2521    }
2522
2523    fn detach(&mut self, key: &K) {
2524        let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
2525            let prev = entry.prev.take();
2526            let next = entry.next.take();
2527            (prev, next)
2528        }) else {
2529            return;
2530        };
2531
2532        match &prev {
2533            Some(prev_key) => {
2534                if let Some(prev_entry) = self.map.get_mut(prev_key) {
2535                    prev_entry.next = next.clone();
2536                }
2537            }
2538            None => self.head = next.clone(),
2539        }
2540        match &next {
2541            Some(next_key) => {
2542                if let Some(next_entry) = self.map.get_mut(next_key) {
2543                    next_entry.prev = prev.clone();
2544                }
2545            }
2546            None => self.tail = prev.clone(),
2547        }
2548    }
2549
2550    fn attach_back(&mut self, key: K) {
2551        let previous_tail = self.tail.replace(key.clone());
2552        match previous_tail {
2553            Some(tail_key) => {
2554                if let Some(tail_entry) = self.map.get_mut(&tail_key) {
2555                    tail_entry.next = Some(key.clone());
2556                }
2557                if let Some(entry) = self.map.get_mut(&key) {
2558                    entry.prev = Some(tail_key);
2559                    entry.next = None;
2560                }
2561            }
2562            None => {
2563                self.head = Some(key.clone());
2564                if let Some(entry) = self.map.get_mut(&key) {
2565                    entry.prev = None;
2566                    entry.next = None;
2567                }
2568            }
2569        }
2570    }
2571
2572    fn clear(&mut self) {
2573        self.map.clear();
2574        self.head = None;
2575        self.tail = None;
2576        self.used = 0;
2577    }
2578
2579    fn put(&mut self, key: K, object: Arc<EncodedObject>) {
2580        if self.budget == 0 {
2581            return;
2582        }
2583        let cost = cached_object_cost(&object);
2584        // A single object larger than the whole budget is not worth caching; it
2585        // would immediately evict everything including itself. Drop any stale
2586        // smaller entry stored under the same key so accounting stays exact.
2587        if cost > self.budget {
2588            self.remove(&key);
2589            return;
2590        }
2591        if let Some(entry) = self.map.get_mut(&key) {
2592            let previous = std::mem::replace(&mut entry.object, object);
2593            // Replacing an existing entry: adjust accounting and refresh recency.
2594            self.used = self
2595                .used
2596                .saturating_sub(cached_object_cost(&previous))
2597                .saturating_add(cost);
2598            self.touch(&key);
2599        } else {
2600            self.used = self.used.saturating_add(cost);
2601            self.map.insert(
2602                key.clone(),
2603                LruEntry {
2604                    object,
2605                    prev: None,
2606                    next: None,
2607                },
2608            );
2609            self.attach_back(key);
2610        }
2611        while self.used > self.budget {
2612            let Some(evicted) = self.head.clone() else {
2613                break;
2614            };
2615            self.remove(&evicted);
2616        }
2617    }
2618}
2619
2620/// Decoded-object cache keyed by object id (loose + packed reads share it).
2621type LruObjectCache = LruCache<ObjectId>;
2622/// Delta-base cache keyed by in-pack byte offset, scoped to one pack.
2623type LruOffsetCache = LruCache<u64>;
2624
2625/// Bridges the offset-keyed [`LruOffsetCache`] to [`sley_pack::PackDeltaCache`]
2626/// so the pack decoder can reuse decoded delta bases. Holds the shared cache
2627/// behind its mutex; a poisoned lock simply behaves as a cache miss/no-op, so a
2628/// decode still completes correctly (just without reuse).
2629struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
2630
2631impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
2632    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
2633        self.0.lock().ok()?.get(&offset)
2634    }
2635
2636    fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
2637        if let Ok(mut cache) = self.0.lock() {
2638            cache.put(offset, object);
2639        }
2640    }
2641}
2642
2643/// Bridges a per-pack `offset -> ObjectType` memo into the header fast path so
2644/// the ofs-delta chain walk is performed at most once per chain across a batch
2645/// of `read_object_header` calls (sley#26).
2646struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
2647
2648impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
2649    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
2650        self.0.lock().ok()?.get(&pack_offset).copied()
2651    }
2652
2653    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
2654        if let Ok(mut cache) = self.0.lock() {
2655            cache.insert(pack_offset, header);
2656        }
2657    }
2658}
2659
2660/// Parsed pack indexes keyed by `.idx` path, shared across cloned handles. This
2661/// remains for MIDX and path-only fallback lookups; normal pack-directory scans
2662/// use [`PackRegistrySnapshot`] so the lookup hot path can walk already-parsed
2663/// pack records directly.
2664type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
2665
2666/// Parsed multi-pack-index files keyed by path, shared across cloned handles.
2667/// Caches the MIDX parse so object lookups in repositories with a MIDX avoid
2668/// reparsing the same fanout/object tables for every read.
2669type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
2670
2671/// Raw multi-pack-index OID lookup tables keyed by path, shared across cloned
2672/// handles. These avoid hashing and materializing every MIDX object when a
2673/// command only needs point lookups.
2674type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
2675
2676/// One registered `.idx`/`.pack` pair from a pack directory. The index is parsed
2677/// when the registry snapshot is built; pack bytes and per-pack decode/header
2678/// caches hang directly off this record so repeated object lookups do not bounce
2679/// through path-keyed maps.
2680#[derive(Debug)]
2681struct RegisteredPack {
2682    idx: PathBuf,
2683    pack: PathBuf,
2684    index: Mutex<Option<Arc<PackIndexViewData>>>,
2685    data: Mutex<Option<Arc<PackData>>>,
2686    delta_cache: Arc<Mutex<LruOffsetCache>>,
2687    header_type_cache: PackHeaderTypeCache,
2688}
2689
2690impl RegisteredPack {
2691    fn new(idx: PathBuf, pack: PathBuf) -> Self {
2692        Self {
2693            idx,
2694            pack,
2695            index: Mutex::new(None),
2696            data: Mutex::new(None),
2697            delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
2698            header_type_cache: Arc::new(Mutex::new(HashMap::new())),
2699        }
2700    }
2701
2702    fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
2703        if let Ok(cache) = self.index.lock()
2704            && let Some(index) = cache.as_ref()
2705        {
2706            return Ok(Arc::clone(index));
2707        }
2708        let index_bytes = load_pack_index_data(&self.idx)?;
2709        let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
2710            index_bytes,
2711            format,
2712        )?);
2713        if let Ok(mut cache) = self.index.lock() {
2714            *cache = Some(Arc::clone(&index));
2715        }
2716        Ok(index)
2717    }
2718
2719    fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
2720        if let Ok(cache) = self.data.lock()
2721            && let Some(bytes) = cache.as_ref()
2722        {
2723            return Ok(Arc::clone(bytes));
2724        }
2725        if let Ok(cache) = pack_bytes.lock()
2726            && let Some(bytes) = cache.get(&self.pack)
2727        {
2728            let bytes = Arc::clone(bytes);
2729            if let Ok(mut local_cache) = self.data.lock() {
2730                *local_cache = Some(Arc::clone(&bytes));
2731            }
2732            return Ok(bytes);
2733        }
2734        let bytes = Arc::new(load_pack_data(&self.pack)?);
2735        if let Ok(mut local_cache) = self.data.lock() {
2736            *local_cache = Some(Arc::clone(&bytes));
2737        }
2738        if let Ok(mut cache) = pack_bytes.lock() {
2739            cache.insert(self.pack.clone(), Arc::clone(&bytes));
2740        }
2741        Ok(bytes)
2742    }
2743}
2744
2745#[derive(Debug, Clone, PartialEq, Eq)]
2746struct PackDirFingerprint {
2747    modified: Option<std::time::SystemTime>,
2748    idx_count: usize,
2749    pack_count: usize,
2750}
2751
2752/// Snapshot of a pack directory's lookup state, shared across cloned handles.
2753/// New packs are still found: a lookup that misses every cached pack re-scans the
2754/// directory once before concluding the object is absent (see
2755/// [`FileObjectDatabase::find_pack_containing`]).
2756#[derive(Debug)]
2757struct PackRegistrySnapshot {
2758    fingerprint: PackDirFingerprint,
2759    packs: Vec<Arc<RegisteredPack>>,
2760    recent_pack: Mutex<Option<usize>>,
2761}
2762
2763impl PackRegistrySnapshot {
2764    fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
2765        Self {
2766            fingerprint,
2767            packs,
2768            recent_pack: Mutex::new(None),
2769        }
2770    }
2771
2772    fn cached_hint(&self) -> Option<usize> {
2773        self.recent_pack
2774            .lock()
2775            .ok()
2776            .and_then(|hint| *hint)
2777            .filter(|pack_index| *pack_index < self.packs.len())
2778    }
2779
2780    fn remember_hint(&self, pack_index: usize) {
2781        if let Ok(mut hint) = self.recent_pack.lock() {
2782            *hint = Some(pack_index);
2783        }
2784    }
2785}
2786
2787/// Cached pack-registry snapshot for this object directory, shared across cloned
2788/// handles. A `FileObjectDatabase` owns exactly one object directory, so this is
2789/// an `Option` instead of another path-keyed map.
2790type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
2791
2792#[derive(Debug, Clone)]
2793struct PackLookup {
2794    pack: PathBuf,
2795    registered: Option<Arc<RegisteredPack>>,
2796    offset: u64,
2797}
2798
2799impl PackLookup {
2800    fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
2801        Self {
2802            pack: pack.pack.clone(),
2803            registered: Some(pack),
2804            offset,
2805        }
2806    }
2807
2808    fn from_path(pack: PathBuf, offset: u64) -> Self {
2809        Self {
2810            pack,
2811            registered: None,
2812            offset,
2813        }
2814    }
2815
2816    fn pack_path(&self) -> &Path {
2817        &self.pack
2818    }
2819
2820    fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
2821        match &self.registered {
2822            Some(pack) => pack.bytes(&database.pack_bytes),
2823            None => database.cached_pack_bytes(&self.pack),
2824        }
2825    }
2826
2827    fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
2828        match &self.registered {
2829            Some(pack) => database.cached_pack_index(&pack.idx),
2830            None => database.cached_pack_index(&self.pack.with_extension("idx")),
2831        }
2832    }
2833
2834    fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
2835        match &self.registered {
2836            Some(pack) => Some(Arc::clone(&pack.delta_cache)),
2837            None => database.pack_delta_cache(&self.pack),
2838        }
2839    }
2840
2841    fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
2842        match &self.registered {
2843            Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
2844            None => database.pack_header_type_cache(&self.pack),
2845        }
2846    }
2847}
2848
2849#[derive(Debug, Clone)]
2850pub struct FileObjectDatabase {
2851    loose: LooseObjectStore,
2852    objects_dir: PathBuf,
2853    alternates: Vec<PathBuf>,
2854    format: ObjectFormat,
2855    pack_bytes: PackBytesCache,
2856    pack_indexes: PackIndexCache,
2857    multi_pack_indexes: MultiPackIndexCache,
2858    multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
2859    pack_registry: PackRegistryCache,
2860    decoded: DecodedObjectCache,
2861    pack_deltas: PackDeltaCaches,
2862    pack_header_types: PackHeaderTypeCaches,
2863    /// Graft points (`$GIT_DIR/shallow`), loaded lazily on the first
2864    /// [`ObjectReader::is_shallow_graft`] query. `$GIT_DIR` is taken to be
2865    /// the parent of `objects_dir`, matching the standard layout.
2866    shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
2867}
2868
2869#[derive(Debug)]
2870pub struct ObjectPresenceChecker {
2871    db: FileObjectDatabase,
2872    pack_dir: PathBuf,
2873    midx: Option<Arc<MultiPackIndexOidLookup>>,
2874    registry: Option<Arc<PackRegistrySnapshot>>,
2875    registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
2876    recent_pack: Option<usize>,
2877    prepared_packs: bool,
2878    prepared_registry: bool,
2879}
2880
2881impl ObjectPresenceChecker {
2882    fn new(db: FileObjectDatabase) -> Self {
2883        let pack_dir = db.objects_dir.join("pack");
2884        Self {
2885            db,
2886            pack_dir,
2887            midx: None,
2888            registry: None,
2889            registry_indexes: Vec::new(),
2890            recent_pack: None,
2891            prepared_packs: false,
2892            prepared_registry: false,
2893        }
2894    }
2895
2896    pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
2897        if oid.format() != self.db.format {
2898            return Err(GitError::InvalidObjectId(format!(
2899                "object {oid} uses {}, store uses {}",
2900                oid.format().name(),
2901                self.db.format.name()
2902            )));
2903        }
2904        if self.db.loose.exists(oid)? {
2905            return Ok(true);
2906        }
2907        if self.find_packed(oid, false)? {
2908            return Ok(true);
2909        }
2910        if self.find_packed(oid, true)? {
2911            return Ok(true);
2912        }
2913        for alternate in &self.db.alternates {
2914            if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
2915                return Ok(true);
2916            }
2917        }
2918        // Preserve the regular contains() reprepare-on-miss behavior for loose
2919        // objects that appeared after the fanout cache was populated.
2920        self.db.loose.invalidate_cache();
2921        self.db.loose.exists(oid)
2922    }
2923
2924    fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
2925        self.prepare_packs(force_rescan)?;
2926        if let Some(midx) = &self.midx
2927            && midx.contains(oid)
2928        {
2929            return Ok(true);
2930        }
2931        self.prepare_registry(force_rescan)?;
2932        self.find_in_registry(oid)
2933    }
2934
2935    fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
2936        if self.prepared_packs && !force_rescan {
2937            return Ok(());
2938        }
2939        let midx_path = self.pack_dir.join("multi-pack-index");
2940        self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
2941        self.prepared_packs = true;
2942        Ok(())
2943    }
2944
2945    fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
2946        if self.prepared_registry && !force_rescan {
2947            return Ok(());
2948        }
2949        let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
2950        let registry_changed = match self.registry.as_ref() {
2951            Some(cached) => !Arc::ptr_eq(cached, &registry),
2952            None => true,
2953        };
2954        if registry_changed {
2955            self.registry_indexes = vec![None; registry.packs.len()];
2956            self.recent_pack = None;
2957            self.registry = Some(registry);
2958        }
2959        self.prepared_registry = true;
2960        Ok(())
2961    }
2962
2963    fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
2964        let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
2965            return Ok(false);
2966        };
2967        if let Some(pack_index) = self
2968            .recent_pack
2969            .filter(|pack_index| *pack_index < registry.packs.len())
2970        {
2971            let index = self.registry_index(&registry, pack_index)?;
2972            if index.find(oid).is_some() {
2973                return Ok(true);
2974            }
2975        }
2976        for pack_index in 0..registry.packs.len() {
2977            if Some(pack_index) == self.recent_pack {
2978                continue;
2979            }
2980            let index = self.registry_index(&registry, pack_index)?;
2981            if index.find(oid).is_some() {
2982                self.recent_pack = Some(pack_index);
2983                return Ok(true);
2984            }
2985        }
2986        Ok(false)
2987    }
2988
2989    fn registry_index(
2990        &mut self,
2991        registry: &PackRegistrySnapshot,
2992        pack_index: usize,
2993    ) -> Result<Arc<PackIndexViewData>> {
2994        if self.registry_indexes.len() != registry.packs.len() {
2995            self.registry_indexes = vec![None; registry.packs.len()];
2996            self.recent_pack = None;
2997        }
2998        if let Some(index) = self
2999            .registry_indexes
3000            .get(pack_index)
3001            .and_then(|index| index.as_ref())
3002        {
3003            return Ok(Arc::clone(index));
3004        }
3005        let index = registry.packs[pack_index].index(self.db.format)?;
3006        if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
3007            *slot = Some(Arc::clone(&index));
3008        }
3009        Ok(index)
3010    }
3011}
3012
3013/// Parse `$GIT_DIR/shallow`: one hex object id per line. A missing file is an
3014/// empty set (the repository is not shallow); unparsable lines are ignored so
3015/// a torn write never poisons walks.
3016fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
3017    let Ok(contents) = std::fs::read_to_string(shallow_file) else {
3018        return HashSet::new();
3019    };
3020    contents
3021        .lines()
3022        .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
3023        .collect()
3024}
3025
3026pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3027    env::var_os("GIT_OBJECT_DIRECTORY")
3028        .map(PathBuf::from)
3029        .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
3030}
3031
3032pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3033    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
3034        return PathBuf::from(common_dir);
3035    }
3036    let git_dir = git_dir.as_ref();
3037    let commondir = git_dir.join("commondir");
3038    if let Ok(value) = fs::read_to_string(&commondir) {
3039        let path = PathBuf::from(value.trim());
3040        let common = if path.is_absolute() {
3041            path
3042        } else {
3043            git_dir.join(path)
3044        };
3045        return fs::canonicalize(&common).unwrap_or(common);
3046    }
3047    git_dir.to_path_buf()
3048}
3049
3050pub fn repository_object_ids(
3051    git_dir: impl AsRef<Path>,
3052    format: ObjectFormat,
3053) -> Result<Vec<ObjectId>> {
3054    object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
3055}
3056
3057pub fn object_ids_in_objects_dir(
3058    objects_dir: impl AsRef<Path>,
3059    format: ObjectFormat,
3060) -> Result<Vec<ObjectId>> {
3061    let objects_dir = objects_dir.as_ref();
3062    let mut oids = HashSet::new();
3063    collect_loose_object_ids(objects_dir, format, &mut oids)?;
3064    collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
3065    let mut oids = oids.into_iter().collect::<Vec<_>>();
3066    oids.sort_by_key(ObjectId::to_hex);
3067    Ok(oids)
3068}
3069
3070fn collect_loose_object_ids(
3071    objects_dir: &Path,
3072    format: ObjectFormat,
3073    oids: &mut HashSet<ObjectId>,
3074) -> Result<()> {
3075    if !objects_dir.exists() {
3076        return Ok(());
3077    }
3078    let hex_len = format.hex_len();
3079    for entry in fs::read_dir(objects_dir)? {
3080        let entry = entry?;
3081        if !entry.file_type()?.is_dir() {
3082            continue;
3083        }
3084        let name = entry.file_name();
3085        let Some(fanout) = name.to_str() else {
3086            continue;
3087        };
3088        if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3089            continue;
3090        }
3091        for object_entry in fs::read_dir(entry.path())? {
3092            let object_entry = object_entry?;
3093            if !object_entry.file_type()?.is_file() {
3094                continue;
3095            }
3096            let name = object_entry.file_name();
3097            let Some(suffix) = name.to_str() else {
3098                continue;
3099            };
3100            if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3101                continue;
3102            }
3103            oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
3104        }
3105    }
3106    Ok(())
3107}
3108
3109fn collect_loose_fanout_object_ids(
3110    objects_dir: &Path,
3111    format: ObjectFormat,
3112    fanout: u8,
3113    oids: &mut HashSet<ObjectId>,
3114) -> Result<()> {
3115    let fanout_hex = format!("{fanout:02x}");
3116    let fanout_dir = objects_dir.join(&fanout_hex);
3117    let entries = match fs::read_dir(&fanout_dir) {
3118        Ok(entries) => entries,
3119        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
3120        Err(err) => return Err(GitError::Io(err.to_string())),
3121    };
3122    let hex_len = format.hex_len();
3123    for object_entry in entries {
3124        let object_entry = object_entry?;
3125        let name = object_entry.file_name();
3126        let Some(suffix) = name.to_str() else {
3127            continue;
3128        };
3129        if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3130            continue;
3131        }
3132        oids.insert(ObjectId::from_hex(
3133            format,
3134            &format!("{fanout_hex}{suffix}"),
3135        )?);
3136    }
3137    Ok(())
3138}
3139
3140#[derive(Debug, Default)]
3141struct LoosePresenceCache {
3142    loaded_fanouts: HashSet<u8>,
3143    objects: HashSet<ObjectId>,
3144}
3145
3146/// Every object id resolvable through a pack (any `.idx` or the
3147/// multi-pack-index) under `objects_dir/pack`. Used by `--unpacked`
3148/// filtering: an object is "unpacked" when absent from this set, regardless
3149/// of a loose copy also existing.
3150pub fn packed_object_ids(
3151    objects_dir: impl AsRef<Path>,
3152    format: ObjectFormat,
3153) -> Result<HashSet<ObjectId>> {
3154    let mut oids = HashSet::new();
3155    collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
3156    Ok(oids)
3157}
3158
3159fn collect_packed_object_ids(
3160    pack_dir: &Path,
3161    format: ObjectFormat,
3162    oids: &mut HashSet<ObjectId>,
3163) -> Result<()> {
3164    if !pack_dir.exists() {
3165        return Ok(());
3166    }
3167    let midx_path = pack_dir.join("multi-pack-index");
3168    if midx_path.exists() {
3169        let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
3170        oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
3171    }
3172    for entry in fs::read_dir(pack_dir)? {
3173        let path = entry?.path();
3174        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3175            continue;
3176        }
3177        let index = PackIndex::parse(&fs::read(path)?, format)?;
3178        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
3179    }
3180    Ok(())
3181}
3182
3183impl FileObjectDatabase {
3184    /// The object-id format (hash algorithm) this database was opened with.
3185    pub fn object_format(&self) -> ObjectFormat {
3186        self.format
3187    }
3188
3189    /// The repository object directory this database reads from.
3190    pub fn objects_dir(&self) -> &Path {
3191        &self.objects_dir
3192    }
3193
3194    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3195        let objects_dir = objects_dir.into();
3196        Self {
3197            loose: LooseObjectStore::new(objects_dir.clone(), format),
3198            alternates: alternate_object_dirs(&objects_dir),
3199            objects_dir,
3200            format,
3201            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3202            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3203            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3204            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3205            pack_registry: Arc::new(Mutex::new(None)),
3206            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3207            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3208            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3209            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3210        }
3211    }
3212
3213    fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3214        let objects_dir = objects_dir.into();
3215        Self {
3216            loose: LooseObjectStore::new(objects_dir.clone(), format),
3217            alternates: Vec::new(),
3218            objects_dir,
3219            format,
3220            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3221            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3222            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3223            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3224            pack_registry: Arc::new(Mutex::new(None)),
3225            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3226            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3227            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3228            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3229        }
3230    }
3231
3232    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3233        Self::new(repository_objects_dir(git_dir), format)
3234    }
3235
3236    /// Drop cached pack registries, indexes, and decoded objects so the next read
3237    /// sees packs/objects installed after this handle was created (e.g. after
3238    /// `fetch` or `install_pack`). Long-lived [`Repository`] sessions call this
3239    /// via the owning repository's `refresh_objects` hook.
3240    pub fn refresh_read_cache(&self) {
3241        if let Ok(mut cache) = self.pack_registry.lock() {
3242            *cache = None;
3243        }
3244        if let Ok(mut cache) = self.pack_indexes.lock() {
3245            cache.clear();
3246        }
3247        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3248            cache.clear();
3249        }
3250        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3251            cache.clear();
3252        }
3253        if let Ok(mut cache) = self.pack_bytes.lock() {
3254            cache.clear();
3255        }
3256        if let Ok(mut cache) = self.pack_deltas.lock() {
3257            cache.clear();
3258        }
3259        if let Ok(mut cache) = self.pack_header_types.lock() {
3260            cache.clear();
3261        }
3262        if let Ok(mut cache) = self.decoded.lock() {
3263            cache.clear();
3264        }
3265        self.loose.invalidate_cache();
3266    }
3267
3268    pub fn loose(&self) -> &LooseObjectStore {
3269        &self.loose
3270    }
3271
3272    pub fn presence_checker(&self) -> ObjectPresenceChecker {
3273        ObjectPresenceChecker::new(self.clone())
3274    }
3275
3276    pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3277        self.install_pack_with_options(pack, RawPackInstallOptions::default())
3278    }
3279
3280    pub fn install_pack_with_options(
3281        &self,
3282        pack: &PackWrite,
3283        options: RawPackInstallOptions,
3284    ) -> Result<PackInstallResult> {
3285        if pack.checksum.format() != self.format {
3286            return Err(GitError::InvalidObjectId(format!(
3287                "pack checksum uses {}, store uses {}",
3288                pack.checksum.format().name(),
3289                self.format.name()
3290            )));
3291        }
3292        for entry in &pack.entries {
3293            if entry.oid.format() != self.format {
3294                return Err(GitError::InvalidObjectId(format!(
3295                    "pack entry {} uses {}, store uses {}",
3296                    entry.oid,
3297                    entry.oid.format().name(),
3298                    self.format.name()
3299                )));
3300            }
3301        }
3302        let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
3303        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3304        if canonical_index.pack_checksum != pack.checksum
3305            || parsed_index.pack_checksum != pack.checksum
3306        {
3307            return Err(GitError::InvalidFormat(
3308                "pack and index checksums do not match pack write".into(),
3309            ));
3310        }
3311        if pack.index != canonical_index.index {
3312            return Err(GitError::InvalidFormat(
3313                "pack index does not match pack contents".into(),
3314            ));
3315        }
3316
3317        let pack_dir = self.objects_dir.join("pack");
3318        fs::create_dir_all(&pack_dir)?;
3319        let pack_name = format!("pack-{}", pack.checksum.to_hex());
3320        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3321        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3322        if !pack_path.exists() || !index_path.exists() {
3323            write_pack_component(&pack_path, &pack.pack)?;
3324            write_pack_component(&index_path, &pack.index)?;
3325        }
3326        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3327        Ok(PackInstallResult {
3328            pack_name,
3329            pack_path,
3330            index_path,
3331            promisor_path,
3332            object_ids: canonical_index
3333                .entries
3334                .iter()
3335                .map(|entry| entry.oid)
3336                .collect(),
3337        })
3338    }
3339
3340    /// Install a pack that was produced in this process by [`PackFile::write_packed`].
3341    ///
3342    /// Unlike [`Self::install_raw_pack_with_options`], this does not re-inflate
3343    /// every pack entry to rebuild the index. It validates the generated pack
3344    /// trailer and generated index against the writer's object ids, CRCs, and
3345    /// offsets, then writes those bytes directly. Use the raw installer for
3346    /// arbitrary pack bytes received from an untrusted transport.
3347    pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3348        self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
3349    }
3350
3351    pub fn install_written_pack_with_options(
3352        &self,
3353        pack: &PackWrite,
3354        options: RawPackInstallOptions,
3355    ) -> Result<PackInstallResult> {
3356        validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
3357        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3358        if parsed_index.pack_checksum != pack.checksum {
3359            return Err(GitError::InvalidFormat(
3360                "pack write index checksum does not match pack".into(),
3361            ));
3362        }
3363        if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
3364            return Err(GitError::InvalidFormat(
3365                "pack write index does not match generated entries".into(),
3366            ));
3367        }
3368        self.install_generated_pack_unchecked(pack, options)
3369    }
3370
3371    fn install_generated_pack_unchecked(
3372        &self,
3373        pack: &PackWrite,
3374        options: RawPackInstallOptions,
3375    ) -> Result<PackInstallResult> {
3376        let pack_dir = self.objects_dir.join("pack");
3377        fs::create_dir_all(&pack_dir)?;
3378        let pack_name = format!("pack-{}", pack.checksum.to_hex());
3379        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3380        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3381        if !pack_path.exists() || !index_path.exists() {
3382            write_pack_component(&pack_path, &pack.pack)?;
3383            write_pack_component(&index_path, &pack.index)?;
3384        }
3385        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3386        Ok(PackInstallResult {
3387            pack_name,
3388            pack_path,
3389            index_path,
3390            promisor_path,
3391            object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
3392        })
3393    }
3394
3395    pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
3396        self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
3397    }
3398
3399    pub fn install_raw_pack_with_options(
3400        &self,
3401        pack_bytes: &[u8],
3402        options: RawPackInstallOptions,
3403    ) -> Result<PackInstallResult> {
3404        let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
3405        let pack_dir = self.objects_dir.join("pack");
3406        fs::create_dir_all(&pack_dir)?;
3407        let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
3408        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3409        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3410        if !pack_path.exists() || !index_path.exists() {
3411            write_pack_component(&pack_path, pack_bytes)?;
3412            write_pack_component(&index_path, &built.index)?;
3413        }
3414        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3415        Ok(PackInstallResult {
3416            pack_name,
3417            pack_path,
3418            index_path,
3419            promisor_path,
3420            object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
3421        })
3422    }
3423
3424    pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
3425        if self.loose.exists(oid)? {
3426            return Ok(true);
3427        }
3428        if self.find_pack_containing(oid)?.is_some() {
3429            return Ok(true);
3430        }
3431        for alternate in &self.alternates {
3432            if Self::without_alternates(alternate, self.format).contains(oid)? {
3433                return Ok(true);
3434            }
3435        }
3436        // Reprepare-on-miss: a cached negative loose verdict may predate a
3437        // sibling write. Drop it and exact-probe once before reporting absence.
3438        self.loose.invalidate_cache();
3439        self.loose.exists(oid)
3440    }
3441
3442    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3443        let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
3444            .into_iter()
3445            .collect::<HashSet<_>>();
3446        for alternate in &self.alternates {
3447            oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
3448        }
3449        let mut oids = oids.into_iter().collect::<Vec<_>>();
3450        oids.sort_by_key(ObjectId::to_hex);
3451        Ok(oids)
3452    }
3453
3454    pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3455        if let Some(disk_size) = self.loose.disk_size(oid)? {
3456            return Ok(Some(ObjectStorageInfo {
3457                disk_size,
3458                deltabase: zero_oid(self.format)?,
3459            }));
3460        }
3461        if let Some(info) = self.packed_object_storage_info(oid)? {
3462            return Ok(Some(info));
3463        }
3464        for alternate in &self.alternates {
3465            if let Some(info) =
3466                Self::without_alternates(alternate, self.format).object_storage_info(oid)?
3467            {
3468                return Ok(Some(info));
3469            }
3470        }
3471        // Reprepare-on-miss: drop any stale negative loose cache and exact-probe
3472        // once before reporting absence (see `read_object`).
3473        self.loose.invalidate_cache();
3474        if let Some(disk_size) = self.loose.disk_size(oid)? {
3475            return Ok(Some(ObjectStorageInfo {
3476                disk_size,
3477                deltabase: zero_oid(self.format)?,
3478            }));
3479        }
3480        Ok(None)
3481    }
3482
3483    pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
3484        validate_object_id_prefix(self.format, prefix)?;
3485        let mut matches = Vec::new();
3486        for oid in self.object_ids()? {
3487            if object_id_matches_prefix(&oid, prefix) {
3488                matches.push(oid);
3489            }
3490        }
3491        Ok(match matches.len() {
3492            0 => ObjectPrefixResolution::Missing,
3493            1 => ObjectPrefixResolution::Unique(matches.remove(0)),
3494            _ => ObjectPrefixResolution::Ambiguous(matches),
3495        })
3496    }
3497
3498    /// The object type and content size of `oid` without decoding its full body —
3499    /// git's `cat-file --batch-check` fast path. Tries the decoded-object cache,
3500    /// then loose storage (inflating only the framing header), then packs (reading
3501    /// the entry header and, for deltas, only the delta's leading varints), then
3502    /// alternates. Returns `Ok(None)` if the object is not present.
3503    ///
3504    /// Unlike [`ObjectReader::read_object`], this never materializes the body, so it
3505    /// stays cheap on huge blobs and deep delta chains. It does not populate the
3506    /// decoded-object cache (nothing is decoded).
3507    pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3508        if implied_empty_tree_object(self.format, oid).is_some() {
3509            return Ok(Some((ObjectType::Tree, 0)));
3510        }
3511        if let Ok(mut cache) = self.decoded.lock()
3512            && let Some(object) = cache.get(oid)
3513        {
3514            return Ok(Some((object.object_type, object.body.len() as u64)));
3515        }
3516        if let Some(header) = self.loose.read_header(oid)? {
3517            return Ok(Some(header));
3518        }
3519        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
3520            let bytes = pack_lookup.pack_bytes(self)?;
3521            // Per-pack offset->type memo so the ofs-delta chain walk that resolves
3522            // a packed object's type runs at most once per chain across the batch,
3523            // instead of re-walking (and re-inflating each link's leading varints)
3524            // on every header read — the sley#26 super-linear cat-file --batch-check.
3525            let type_cache = pack_lookup.header_type_cache(self);
3526            let resolve_ref_base = |base: &ObjectId| {
3527                self.read_object_header(base)
3528                    .map(|header| header.map(|(t, _)| t))
3529            };
3530            let header = match &type_cache {
3531                Some(cache) => {
3532                    let mut adapter = PackHeaderTypeCacheAdapter(cache);
3533                    sley_pack::read_object_header_at_with_cache(
3534                        &bytes,
3535                        pack_lookup.offset,
3536                        self.format,
3537                        resolve_ref_base,
3538                        &mut adapter,
3539                    )?
3540                }
3541                None => sley_pack::read_object_header_at(
3542                    &bytes,
3543                    pack_lookup.offset,
3544                    self.format,
3545                    resolve_ref_base,
3546                )?,
3547            };
3548            return Ok(Some(header));
3549        }
3550        for alternate in &self.alternates {
3551            if let Some(header) =
3552                Self::without_alternates(alternate, self.format).read_object_header(oid)?
3553            {
3554                return Ok(Some(header));
3555            }
3556        }
3557        // Reprepare-on-miss: discard any stale negative loose cache and retry an
3558        // exact path probe once before reporting absence (see `read_object`).
3559        self.loose.invalidate_cache();
3560        if let Some(header) = self.loose.read_header(oid)? {
3561            return Ok(Some(header));
3562        }
3563        Ok(None)
3564    }
3565
3566    fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
3567        // Memory-capped decoded-object cache first (delta-base reuse for ref-delta
3568        // bases that resolve back through the store + repeated whole-object reads).
3569        if let Ok(mut cache) = self.decoded.lock()
3570            && let Some(object) = cache.get(oid)
3571        {
3572            return Ok(Some(object));
3573        }
3574        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3575            return Ok(None);
3576        };
3577        self.read_packed_object_at_lookup(oid, &pack_lookup).map(Some)
3578    }
3579
3580    fn read_packed_object_at_lookup(
3581        &self,
3582        oid: &ObjectId,
3583        pack_lookup: &PackLookup,
3584    ) -> Result<Arc<EncodedObject>> {
3585        if let Ok(mut cache) = self.decoded.lock()
3586            && let Some(object) = cache.get(oid)
3587        {
3588            return Ok(object);
3589        }
3590        let bytes = pack_lookup.pack_bytes(self)?;
3591        // Per-pack delta-base cache (keyed by in-pack offset). Resolving an
3592        // ofs-delta chain reuses already-decoded bases instead of re-inflating the
3593        // whole chain on every read. Scoped to this pack's path so an offset key is
3594        // never applied to the wrong pack's bytes.
3595        let delta_cache = pack_lookup.delta_cache(self);
3596        let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
3597        // Decode only this object at its offset (plus its delta-base chain). A
3598        // ref-delta base resolves through the full store (loose / other packs) and
3599        // reuses the decoded-object cache. No cache lock is held across the decode,
3600        // so the recursive resolver re-entry (which may re-enter read_object) is
3601        // safe.
3602        let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
3603        let object = match &delta_adapter {
3604            Some(adapter) => sley_pack::read_object_at_with_cache_arc(
3605                &bytes,
3606                pack_lookup.offset,
3607                self.format,
3608                resolve_ref_base,
3609                adapter,
3610            )?,
3611            None => sley_pack::read_object_at_arc(
3612                &bytes,
3613                pack_lookup.offset,
3614                self.format,
3615                resolve_ref_base,
3616            )?,
3617        };
3618        // Trust the index → offset mapping rather than re-hashing every decoded
3619        // object on read (see `verify_reads_enabled`); this re-hash dominated
3620        // bulk-read cost. Opt back in with `SLEY_VERIFY_READS` for a paranoid check.
3621        if verify_reads_enabled() {
3622            let actual = object.object_id(self.format)?;
3623            if actual != *oid {
3624                return Err(GitError::InvalidObject(format!(
3625                    "pack object id mismatch: index says {oid}, decoded {actual}"
3626                )));
3627            }
3628        }
3629        if let Ok(mut cache) = self.decoded.lock() {
3630            cache.put(*oid, Arc::clone(&object));
3631        }
3632        Ok(object)
3633    }
3634
3635    /// The per-pack delta-base cache for `pack_path`, creating it on first use.
3636    /// Returns `None` only if the shared map's lock is poisoned, in which case the
3637    /// caller falls back to an uncached decode (correctness preserved).
3638    fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
3639        let mut caches = self.pack_deltas.lock().ok()?;
3640        let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
3641            Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
3642        });
3643        Some(Arc::clone(cache))
3644    }
3645
3646    /// The per-pack header-type memo for `pack_path`, creating it on first use.
3647    /// Returns `None` only if the shared map's lock is poisoned, in which case the
3648    /// caller falls back to an unmemoized header walk (correctness preserved).
3649    fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
3650        let mut caches = self.pack_header_types.lock().ok()?;
3651        let cache = caches
3652            .entry(pack_path.to_path_buf())
3653            .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
3654        Some(Arc::clone(cache))
3655    }
3656
3657    /// Backing bytes of the pack at `pack_path`, loaded at most once per database
3658    /// handle (cached, shared across clones). Memory-mapped under the `mmap` feature,
3659    /// otherwise read into the heap. On a poisoned lock it falls back to loading
3660    /// without caching, preserving correctness.
3661    fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
3662        if let Ok(cache) = self.pack_bytes.lock()
3663            && let Some(bytes) = cache.get(pack_path)
3664        {
3665            return Ok(Arc::clone(bytes));
3666        }
3667        let bytes = Arc::new(load_pack_data(pack_path)?);
3668        if let Ok(mut cache) = self.pack_bytes.lock() {
3669            cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
3670        }
3671        Ok(bytes)
3672    }
3673
3674    /// Parsed index for the `.idx` at `index_path`, parsed at most once per
3675    /// database handle. On a poisoned lock it falls back to parsing without
3676    /// caching, preserving correctness.
3677    fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
3678        if let Ok(cache) = self.pack_indexes.lock()
3679            && let Some(index) = cache.get(index_path)
3680        {
3681            return Ok(Arc::clone(index));
3682        }
3683        let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
3684        if let Ok(mut cache) = self.pack_indexes.lock() {
3685            cache.insert(index_path.to_path_buf(), Arc::clone(&index));
3686        }
3687        Ok(index)
3688    }
3689
3690    fn cached_multi_pack_index_oid_lookup(
3691        &self,
3692        midx_path: &Path,
3693    ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
3694        if !midx_path.exists() {
3695            return Ok(None);
3696        }
3697        if let Ok(cache) = self.multi_pack_oid_lookups.lock()
3698            && let Some(midx) = cache.get(midx_path)
3699        {
3700            return Ok(Some(Arc::clone(midx)));
3701        }
3702        let bytes = Arc::new(fs::read(midx_path)?);
3703        let midx = Arc::new(MultiPackIndexOidLookup::parse(bytes, self.format)?);
3704        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3705            cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
3706        }
3707        Ok(Some(midx))
3708    }
3709
3710    /// Registry snapshot for this database's pack directory. With `force_rescan`,
3711    /// the directory is re-read; when the fingerprint and pack set match the
3712    /// cached snapshot, the same `Arc` is returned so miss handling can tell that
3713    /// no new packs appeared.
3714    fn cached_pack_registry(
3715        &self,
3716        pack_dir: &Path,
3717        force_rescan: bool,
3718    ) -> Result<Arc<PackRegistrySnapshot>> {
3719        if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
3720            return Ok(registry);
3721        }
3722        let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
3723        if let Ok(mut cache) = self.pack_registry.lock() {
3724            match cache.as_ref() {
3725                Some(existing)
3726                    if existing.fingerprint == scanned.fingerprint
3727                        && same_registered_pack_set(&existing.packs, &scanned.packs) =>
3728                {
3729                    return Ok(Arc::clone(existing));
3730                }
3731                _ => {
3732                    *cache = Some(Arc::clone(&scanned));
3733                }
3734            }
3735        }
3736        Ok(scanned)
3737    }
3738
3739    fn find_in_pack_registry(
3740        &self,
3741        registry: Arc<PackRegistrySnapshot>,
3742        oid: &ObjectId,
3743    ) -> Result<Option<PackLookup>> {
3744        let hinted_pack_index = registry.cached_hint();
3745        if let Some(pack_index) = hinted_pack_index {
3746            let pack = &registry.packs[pack_index];
3747            let index = pack.index(self.format)?;
3748            if let Some(entry) = index.find(oid) {
3749                return Ok(Some(PackLookup::from_registered(
3750                    Arc::clone(pack),
3751                    entry.offset,
3752                )));
3753            }
3754        }
3755        for (pack_index, pack) in registry.packs.iter().enumerate() {
3756            if Some(pack_index) == hinted_pack_index {
3757                continue;
3758            }
3759            let index = pack.index(self.format)?;
3760            if let Some(entry) = index.find(oid) {
3761                registry.remember_hint(pack_index);
3762                return Ok(Some(PackLookup::from_registered(
3763                    Arc::clone(pack),
3764                    entry.offset,
3765                )));
3766            }
3767        }
3768        Ok(None)
3769    }
3770
3771    fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
3772        if oid.format() != self.format {
3773            return Err(GitError::InvalidObjectId(format!(
3774                "object {oid} uses {}, store uses {}",
3775                oid.format().name(),
3776                self.format.name()
3777            )));
3778        }
3779        let pack_dir = self.objects_dir.join("pack");
3780        // Hot path: a previously cached pack registry or multi-pack-index already
3781        // names every pack, and locating `oid` in them is pure in-memory index
3782        // work. Try that first so a warm handle does not parse indexes or hash
3783        // pack paths on every lookup.
3784        if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
3785            && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
3786        {
3787            return Ok(Some(pack_paths));
3788        }
3789        if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
3790            && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
3791        {
3792            return Ok(Some(pack_paths));
3793        }
3794
3795        if !pack_dir.exists() {
3796            return Ok(None);
3797        }
3798        if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
3799            return Ok(Some(pack_paths));
3800        }
3801        // Search the cached registry first. On a complete miss, re-scan the
3802        // directory once (picking up any pack added since the registry was
3803        // cached) and search again, so newly written packs are still found.
3804        let registry = self.cached_pack_registry(&pack_dir, false)?;
3805        if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(&registry), oid)? {
3806            return Ok(Some(pack_paths));
3807        }
3808        let refreshed = self.cached_pack_registry(&pack_dir, true)?;
3809        if Arc::ptr_eq(&registry, &refreshed) {
3810            // The re-scan produced the same registry, so nothing new appeared.
3811            return Ok(None);
3812        }
3813        self.find_in_pack_registry(refreshed, oid)
3814    }
3815
3816    fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3817        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3818            return Ok(None);
3819        };
3820        let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
3821        let trailer_offset = pack_len
3822            .checked_sub(self.format.raw_len() as u64)
3823            .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
3824        let index = pack_lookup.pack_index(self)?;
3825        let pack = pack_lookup.pack_bytes(self)?;
3826        let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
3827        let delta_base_offset = match &delta_base {
3828            Some(PackDeltaBase::Offset(offset)) => Some(*offset),
3829            Some(PackDeltaBase::Ref(_)) | None => None,
3830        };
3831        let offset_info = scan_pack_index_offsets(
3832            &index,
3833            pack_lookup.offset,
3834            trailer_offset,
3835            delta_base_offset,
3836        )?;
3837        let disk_size = offset_info
3838            .end_offset
3839            .checked_sub(pack_lookup.offset)
3840            .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
3841        let deltabase = match delta_base {
3842            Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
3843                // scan_pack_index_offsets returns Err when delta_base_offset is
3844                // Some but no matching entry is found, so this is unreachable for
3845                // valid packs; propagate as an error rather than panic to keep a
3846                // malformed pack from taking down the process if that invariant
3847                // ever drifts.
3848                GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
3849            })?,
3850            Some(PackDeltaBase::Ref(oid)) => oid,
3851            None => zero_oid(self.format)?,
3852        };
3853        Ok(Some(ObjectStorageInfo {
3854            disk_size,
3855            deltabase,
3856        }))
3857    }
3858
3859    fn find_midx_pack_containing(
3860        &self,
3861        pack_dir: &Path,
3862        oid: &ObjectId,
3863    ) -> Result<Option<PackLookup>> {
3864        let midx_path = pack_dir.join("multi-pack-index");
3865        let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
3866            return Ok(None);
3867        };
3868        self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
3869    }
3870
3871    fn midx_oid_lookup_pack_paths(
3872        &self,
3873        pack_dir: &Path,
3874        midx: &MultiPackIndexOidLookup,
3875        oid: &ObjectId,
3876    ) -> Result<Option<PackLookup>> {
3877        let Some(entry) = midx.find(oid)? else {
3878            return Ok(None);
3879        };
3880        let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
3881            return Err(GitError::InvalidFormat(
3882                "multi-pack-index object points past pack table".into(),
3883            ));
3884        };
3885        let pack_file_name = pack_name
3886            .strip_suffix(".idx")
3887            .map(|stem| format!("{stem}.pack"))
3888            .unwrap_or_else(|| pack_name.to_string());
3889        let pack = pack_dir.join(pack_file_name);
3890        Ok(Some(PackLookup::from_path(pack, entry.offset)))
3891    }
3892
3893    fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
3894        let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
3895        let cache = self.multi_pack_oid_lookups.lock().ok()?;
3896        cache.get(&midx_path).map(Arc::clone)
3897    }
3898
3899    /// The pack registry for `pack_dir` *only if already scanned and cached* —
3900    /// never touches the filesystem. Used by the lookup hot path to skip
3901    /// per-object pack-dir metadata checks once a handle is warm. A cold cache
3902    /// returns `None`, so the caller falls back to the scanning path. A complete
3903    /// miss still forces one rescan, preserving the new-pack discovery semantics.
3904    fn cached_loaded_pack_registry(
3905        &self,
3906        _pack_dir: &Path,
3907    ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
3908        let cache = match self.pack_registry.lock() {
3909            Ok(cache) => cache,
3910            Err(_) => return Ok(None),
3911        };
3912        Ok(cache.as_ref().map(Arc::clone))
3913    }
3914}
3915
3916fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
3917    if prefix.len() < 4 || prefix.len() > format.hex_len() {
3918        return Err(GitError::InvalidObjectId(format!(
3919            "expected 4 to {} hex digits for {}, got {}",
3920            format.hex_len(),
3921            format.name(),
3922            prefix.len()
3923        )));
3924    }
3925    if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3926        return Err(GitError::InvalidObjectId(format!(
3927            "non-hex object id prefix {prefix}"
3928        )));
3929    }
3930    Ok(())
3931}
3932
3933fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
3934    oid.to_hex()
3935        .as_bytes()
3936        .iter()
3937        .zip(prefix.as_bytes())
3938        .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
3939}
3940
3941fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
3942    match fs::metadata(pack_dir) {
3943        Ok(metadata) => Ok(metadata.modified().ok()),
3944        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
3945        Err(err) => Err(GitError::Io(err.to_string())),
3946    }
3947}
3948
3949/// Scan `pack_dir` for `.idx` files that have a matching `.pack` sibling and
3950/// parse each index into a registered pack. An `.idx` without its `.pack` is
3951/// skipped (an orphan index cannot serve objects), matching the prior per-read
3952/// behavior.
3953fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
3954    let modified = pack_dir_modified(pack_dir)?;
3955    let entries = match fs::read_dir(pack_dir) {
3956        Ok(entries) => entries,
3957        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3958            return Ok(PackRegistrySnapshot::new(
3959                PackDirFingerprint {
3960                    modified,
3961                    idx_count: 0,
3962                    pack_count: 0,
3963                },
3964                Vec::new(),
3965            ));
3966        }
3967        Err(err) => return Err(GitError::Io(err.to_string())),
3968    };
3969
3970    let mut idx_paths = Vec::new();
3971    let mut idx_count = 0;
3972    let mut pack_count = 0;
3973    for entry in entries {
3974        let entry = entry?;
3975        let path = entry.path();
3976        match path.extension().and_then(|ext| ext.to_str()) {
3977            Some("idx") => {
3978                idx_count += 1;
3979                idx_paths.push(path);
3980            }
3981            Some("pack") => {
3982                pack_count += 1;
3983            }
3984            _ => {}
3985        }
3986    }
3987
3988    let mut packs = Vec::new();
3989    for idx in idx_paths {
3990        let pack = idx.with_extension("pack");
3991        let Ok(metadata) = fs::metadata(&pack) else {
3992            continue;
3993        };
3994        let modified = pack_sort_modified(&metadata);
3995        packs.push((modified, metadata.len(), Arc::new(RegisteredPack::new(idx, pack))));
3996    }
3997    // Git keeps a most-recently-used pack order; seed ours with newer/larger
3998    // packs before falling back to the path. In repositories with many packs,
3999    // this avoids parsing a long run of unrelated `.idx` files before the first
4000    // lookup establishes the recent-pack hint.
4001    packs.sort_by(|left, right| {
4002        right
4003            .0
4004            .cmp(&left.0)
4005            .then_with(|| right.1.cmp(&left.1))
4006            .then_with(|| left.2.idx.cmp(&right.2.idx))
4007    });
4008    let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
4009    Ok(PackRegistrySnapshot::new(
4010        PackDirFingerprint {
4011            modified,
4012            idx_count,
4013            pack_count,
4014        },
4015        packs,
4016    ))
4017}
4018
4019fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
4020    metadata
4021        .modified()
4022        .ok()
4023        .and_then(|modified| {
4024            modified
4025                .duration_since(std::time::UNIX_EPOCH)
4026                .ok()
4027                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
4028        })
4029        .unwrap_or((0, 0))
4030}
4031
4032/// Whether two pack registries reference the same pack/index paths (order is
4033/// already normalized by [`scan_pack_registry`]).
4034fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
4035    left.len() == right.len()
4036        && left
4037            .iter()
4038            .zip(right.iter())
4039            .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
4040}
4041
4042fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
4043    let mut alternates = Vec::new();
4044    if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
4045        for raw in value.to_string_lossy().split(':') {
4046            if !raw.is_empty() {
4047                alternates.push(PathBuf::from(raw));
4048            }
4049        }
4050    }
4051    let alternates_path = objects_dir.join("info").join("alternates");
4052    if let Ok(contents) = fs::read(&alternates_path) {
4053        for raw in contents.split(|byte| *byte == b'\n') {
4054            let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4055            if line.is_empty() || line.starts_with(b"#") {
4056                continue;
4057            }
4058            let Ok(value) = std::str::from_utf8(line) else {
4059                continue;
4060            };
4061            let path = Path::new(value);
4062            let absolute = if path.is_absolute() {
4063                path.to_path_buf()
4064            } else {
4065                objects_dir.join(path)
4066            };
4067            alternates.push(absolute);
4068        }
4069    }
4070    alternates
4071}
4072
4073impl ObjectReader for FileObjectDatabase {
4074    fn has_shallow_grafts(&self) -> bool {
4075        !self
4076            .shallow_grafts
4077            .get_or_init(|| {
4078                let shallow_file = self
4079                    .objects_dir
4080                    .parent()
4081                    .map(|git_dir| git_dir.join("shallow"));
4082                match shallow_file {
4083                    Some(path) => read_shallow_grafts(&path, self.format),
4084                    None => HashSet::new(),
4085                }
4086            })
4087            .is_empty()
4088    }
4089
4090    fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
4091        self.shallow_grafts
4092            .get_or_init(|| {
4093                let shallow_file = self
4094                    .objects_dir
4095                    .parent()
4096                    .map(|git_dir| git_dir.join("shallow"));
4097                match shallow_file {
4098                    Some(path) => read_shallow_grafts(&path, self.format),
4099                    None => HashSet::new(),
4100                }
4101            })
4102            .contains(oid)
4103    }
4104
4105    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4106        if let Some(object) = implied_empty_tree_object(self.format, oid) {
4107            return Ok(object);
4108        }
4109        // A corrupt loose copy must not shadow a good packed copy: git's
4110        // `oid_object_info_extended` consults every source, so a repacked object
4111        // whose loose file was later corrupted still reads fine from the pack. If
4112        // a packed copy exists, prefer it WITHOUT touching the corrupt loose file
4113        // (which would otherwise emit a spurious `inflate:` diagnostic on each
4114        // probe). Only when no pack copy exists do we read (and, if corrupt,
4115        // surface the error from) the loose file.
4116        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4117            return self.read_packed_object_at_lookup(oid, &pack_lookup);
4118        }
4119        let loose_err = match self.loose.read_object(oid) {
4120            Ok(object) => return Ok(object),
4121            Err(GitError::NotFound(_)) => None,
4122            Err(err) => Some(err),
4123        };
4124        if let Some(object) = self.read_packed_object(oid)? {
4125            return Ok(object);
4126        }
4127        for alternate in &self.alternates {
4128            match Self::without_alternates(alternate, self.format).read_object(oid) {
4129                Ok(object) => return Ok(object),
4130                Err(GitError::NotFound(_)) => {}
4131                Err(err) => return Err(err),
4132            }
4133        }
4134        // Hard miss against every store. If an earlier enumeration built a loose
4135        // cache, an object written loose afterward by a sibling handle could have
4136        // been skipped above. Mirror git's `oid_object_info_extended`
4137        // reprepare-on-miss: drop stale cache state and retry an exact loose path
4138        // probe once before declaring the object missing.
4139        self.loose.invalidate_cache();
4140        match self.loose.read_object(oid) {
4141            Ok(object) => return Ok(object),
4142            Err(GitError::NotFound(_)) => {}
4143            Err(err) => return Err(err),
4144        }
4145        // No good copy in any store. If the local loose copy was corrupt (not
4146        // merely absent), surface that error — it is more specific than a plain
4147        // "not found".
4148        if let Some(err) = loose_err {
4149            return Err(err);
4150        }
4151        Err(GitError::object_not_found_in(
4152            *oid,
4153            MissingObjectContext::Read,
4154        ))
4155    }
4156}
4157
4158impl ObjectWriter for FileObjectDatabase {
4159    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4160        // Mirror git's freshen semantics (`write_object_file`:
4161        // `freshen_packed_object || freshen_loose_object`): an object already
4162        // present anywhere in the database — loose, packed, or through an
4163        // alternate — is not written again, so e.g. `git add` after
4164        // `git repack -ad` does not resurrect a loose copy of a packed object.
4165        let oid = object.object_id(self.format)?;
4166        if self.contains(&oid)? {
4167            return Ok(oid);
4168        }
4169        self.loose.write_object(object)
4170    }
4171}
4172
4173fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
4174    if path.exists() {
4175        return Ok(());
4176    }
4177    let parent = path
4178        .parent()
4179        .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
4180    fs::create_dir_all(parent)?;
4181    let temp_path = unique_temp_path(parent);
4182    let write_result = (|| -> Result<()> {
4183        {
4184            let mut file = fs::OpenOptions::new()
4185                .write(true)
4186                .create_new(true)
4187                .open(&temp_path)?;
4188            file.write_all(bytes)?;
4189            file.sync_all()?;
4190        }
4191        match fs::rename(&temp_path, path) {
4192            Ok(()) => Ok(()),
4193            Err(_) if path.exists() => {
4194                let _ = fs::remove_file(&temp_path);
4195                Ok(())
4196            }
4197            Err(err) => Err(GitError::Io(err.to_string())),
4198        }
4199    })();
4200    if write_result.is_err() {
4201        let _ = fs::remove_file(&temp_path);
4202    }
4203    write_result
4204}
4205
4206fn write_promisor_pack_sidecar(
4207    pack_dir: &Path,
4208    pack_name: &str,
4209    promisor: bool,
4210) -> Result<Option<PathBuf>> {
4211    if !promisor {
4212        return Ok(None);
4213    }
4214    let path = pack_dir.join(format!("{pack_name}.promisor"));
4215    write_pack_component(&path, b"")?;
4216    Ok(Some(path))
4217}
4218
4219/// Maximum number of bytes git will inflate when reading a loose object's
4220/// `"<type> <size>\0"` header (git's `MAX_HEADER_LEN` in object-file.c). The NUL
4221/// terminator must land within this window, so a header of 32 or more non-NUL
4222/// bytes is rejected as too long.
4223const MAX_LOOSE_HEADER_LEN: usize = 32;
4224
4225/// git's exact `error:`-level diagnostic for a loose object whose header overflows
4226/// `MAX_LOOSE_HEADER_LEN` (object-file.c: `error(_("header for %s too long, exceeds
4227/// %d bytes"), ...)`). Shared by the header-only and full-read paths so both surface
4228/// byte-identical text.
4229fn loose_header_too_long(oid: &ObjectId) -> GitError {
4230    GitError::InvalidObject(format!(
4231        "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
4232    ))
4233}
4234
4235/// git's `error:`-level diagnostic when the loose framing header cannot be inflated at
4236/// all (object-file.c `loose_object_info`, the `ULHR_BAD` arm: `error(_("unable to
4237/// unpack %s header"), ...)`).
4238fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
4239    GitError::InvalidObject(format!("unable to unpack {oid} header"))
4240}
4241
4242/// git-zlib.c's `error("inflate: %s (%s)", ...)` text for an inflate failure whose
4243/// cause is identifiable from the zlib stream header. The checks mirror zlib's own
4244/// `inflate()` HEAD-state validation, in order: the FCHECK checksum over CMF+FLG,
4245/// the compression method, the window size, and the FDICT preset-dictionary bit
4246/// (zlib reports `Z_NEED_DICT` with a NULL `msg`, which git renders as
4247/// "(no message)"). Failures past the stream header return `None`: flate2 does not
4248/// surface zlib's per-case `msg` strings, so no diagnostic is fabricated for them.
4249fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
4250    let [cmf, flg, ..] = *input else { return None };
4251    if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
4252        return Some("inflate: data stream error (incorrect header check)");
4253    }
4254    if cmf & 0x0f != 8 {
4255        return Some("inflate: data stream error (unknown compression method)");
4256    }
4257    if cmf >> 4 > 7 {
4258        return Some("inflate: data stream error (invalid window size)");
4259    }
4260    if flg & 0x20 != 0 {
4261        return Some("inflate: needs dictionary (no message)");
4262    }
4263    None
4264}
4265
4266/// Print the `error: inflate: ...` line git's zlib wrapper emits the moment
4267/// `inflate()` fails, when the failure is classifiable from the stream header.
4268fn emit_inflate_diagnostic(input: &[u8]) {
4269    if let Some(diagnostic) = inflate_header_diagnostic(input) {
4270        eprintln!("error: {diagnostic}");
4271    }
4272}
4273
4274/// Integrity verdict for a single loose object file, as classified by
4275/// [`LooseObjectStore::verify_object`].
4276#[derive(Debug, Clone, PartialEq, Eq)]
4277pub enum LooseObjectIntegrity {
4278    /// Inflated, parsed, and re-hashed to its path-derived oid.
4279    Ok,
4280    /// Readable and well-formed, but its content hashes to a different oid
4281    /// (a loose file stored under the wrong path).
4282    HashMismatch { actual: ObjectId },
4283    /// Unreadable: corrupt zlib stream, truncated content, or unparseable header.
4284    /// The `error:`-level diagnostics were already printed to stderr.
4285    Corrupt,
4286}
4287
4288#[derive(Debug, Clone)]
4289pub struct LooseObjectStore {
4290    objects_dir: PathBuf,
4291    format: ObjectFormat,
4292    /// Lazily-populated set of loose object ids present on disk, mirroring git's
4293    /// `loose_objects_cache` (object-file.c). A lookup scans the queried
4294    /// `objects/XX/` fanout once; afterward misses in that fanout are in-memory
4295    /// checks instead of failed exact-path opens. Shared across
4296    /// `FileObjectDatabase` clones via `Arc` so a write through one handle is
4297    /// visible to reads through another; cleared by `refresh_read_cache` so
4298    /// objects installed out-of-band (fetch, repack) become visible. Writes
4299    /// extend the set in place rather than invalidating it.
4300    loose_cache: Arc<Mutex<LoosePresenceCache>>,
4301}
4302
4303impl LooseObjectStore {
4304    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4305        Self {
4306            objects_dir: objects_dir.into(),
4307            format,
4308            loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
4309        }
4310    }
4311
4312    /// Whether `oid` is present according to the loose-object cache, populating
4313    /// the cache on first use. Returns `None` when the lock cannot be trusted or
4314    /// the scan fails; callers should fall back to an exact filesystem probe in
4315    /// that case so a cache-building problem cannot change read semantics.
4316    fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
4317        let mut guard = self.loose_cache.lock().ok()?;
4318        let fanout = oid.as_bytes()[0];
4319        if !guard.loaded_fanouts.contains(&fanout) {
4320            collect_loose_fanout_object_ids(
4321                &self.objects_dir,
4322                self.format,
4323                fanout,
4324                &mut guard.objects,
4325            )
4326            .ok()?;
4327            guard.loaded_fanouts.insert(fanout);
4328        }
4329        Some(guard.objects.contains(oid))
4330    }
4331
4332    /// Populate the loose-object cache and return the sorted ids. This mirrors
4333    /// git's `odb_loose_cache` lazy fill and is reserved for operations that
4334    /// really need loose-object enumeration.
4335    fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
4336        if let Ok(mut guard) = self.loose_cache.lock() {
4337            guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
4338            guard.loaded_fanouts = (0..=u8::MAX).collect();
4339            let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
4340            ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
4341            return Ok(ids);
4342        }
4343        loose_object_ids(&self.objects_dir, self.format)
4344    }
4345
4346    /// Record `oid` as present in loose storage so subsequent reads find it
4347    /// without a rescan. A no-op when the cache has not been populated yet (the
4348    /// eventual lazy scan will pick the object up) or the lock is poisoned.
4349    fn note_loose_write(&self, oid: ObjectId) {
4350        if let Ok(mut guard) = self.loose_cache.lock() {
4351            guard.objects.insert(oid);
4352        }
4353    }
4354
4355    /// Drop the in-memory loose set so the next access rescans the fanout. Called
4356    /// by `FileObjectDatabase::refresh_read_cache` after out-of-band installs.
4357    pub(crate) fn invalidate_cache(&self) {
4358        if let Ok(mut guard) = self.loose_cache.lock() {
4359            *guard = LoosePresenceCache::default();
4360        }
4361    }
4362
4363    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4364        Self::new(repository_objects_dir(git_dir), format)
4365    }
4366
4367    fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
4368        if oid.format() != self.format {
4369            return Err(GitError::InvalidObjectId(format!(
4370                "object {oid} uses {}, store uses {}",
4371                oid.format().name(),
4372                self.format.name()
4373            )));
4374        }
4375        Ok(())
4376    }
4377
4378    pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
4379        self.validate_oid_format(oid)?;
4380        let hex = oid.to_hex();
4381        Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
4382    }
4383
4384    pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
4385        self.validate_oid_format(oid)?;
4386        if self.cached_loose_presence(oid) == Some(false) {
4387            return Ok(false);
4388        }
4389        let path = self.object_path(oid)?;
4390        Ok(path.exists())
4391    }
4392
4393    pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
4394        self.validate_oid_format(oid)?;
4395        if self.cached_loose_presence(oid) == Some(false) {
4396            return Ok(None);
4397        }
4398        let path = self.object_path(oid)?;
4399        match fs::metadata(path) {
4400            Ok(metadata) => Ok(Some(metadata.len())),
4401            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
4402            Err(err) => Err(GitError::Io(err.to_string())),
4403        }
4404    }
4405
4406    /// The object type and content size of `oid` from loose storage, inflating only
4407    /// the framing header (`"<type> <size>\0"`) and not the body. Output-limited
4408    /// reads keep miniz from inflating past the header even for large objects.
4409    /// Returns `Ok(None)` when the loose object is absent.
4410    pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4411        self.validate_oid_format(oid)?;
4412        if self.cached_loose_presence(oid) == Some(false) {
4413            return Ok(None);
4414        }
4415        let path = self.object_path(oid)?;
4416        let mut file = match fs::File::open(&path) {
4417            Ok(file) => file,
4418            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4419            Err(err) => return Err(GitError::Io(err.to_string())),
4420        };
4421        // Capture the zlib stream's 2-byte header before inflating: when the stream
4422        // is corrupt, those bytes identify zlib's diagnostic (incorrect header
4423        // check, needs dictionary, ...) exactly as zlib's `inflate()` would report
4424        // it through git's wrapper.
4425        let mut stream_prefix = [0u8; 2];
4426        let prefix_len = read_full_prefix(&mut file, &mut stream_prefix)?;
4427        file.seek(SeekFrom::Start(0))
4428            .map_err(|err| GitError::Io(err.to_string()))?;
4429        let mut decoder = ZlibDecoder::new(file);
4430        let mut header = Vec::new();
4431        let mut byte = [0u8; 1];
4432        loop {
4433            // git inflates only the first `MAX_LOOSE_HEADER_LEN` bytes
4434            // (object-file.c `unpack_loose_header`) and reports ULHR_TOO_LONG when no
4435            // NUL terminator lands within them — whether the stream simply ends early
4436            // or overflows the window. Both collapse to the same `error:`-level
4437            // diagnostic, so a header that ends before its NUL is "too long" too.
4438            // A stream that won't inflate at all is git's ULHR_BAD instead: the
4439            // zlib wrapper's `error: inflate: ...` line, then "unable to unpack
4440            // <oid> header".
4441            let read = match decoder.read(&mut byte) {
4442                Ok(read) => read,
4443                Err(_) => {
4444                    emit_inflate_diagnostic(&stream_prefix[..prefix_len]);
4445                    return Err(loose_unpack_header_failed(oid));
4446                }
4447            };
4448            if read == 0 {
4449                return Err(loose_header_too_long(oid));
4450            }
4451            if byte[0] == 0 {
4452                break;
4453            }
4454            header.push(byte[0]);
4455            // A 31-byte header (NUL at the 32nd byte) is the longest that fits; 32
4456            // non-NUL bytes overflow the window.
4457            if header.len() >= MAX_LOOSE_HEADER_LEN {
4458                return Err(loose_header_too_long(oid));
4459            }
4460        }
4461        let header =
4462            std::str::from_utf8(&header).map_err(|err| GitError::InvalidObject(err.to_string()))?;
4463        let (kind, size) = header
4464            .split_once(' ')
4465            .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
4466        let object_type = kind.parse::<ObjectType>()?;
4467        let size = size
4468            .parse::<u64>()
4469            .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
4470        Ok(Some((object_type, size)))
4471    }
4472
4473    /// Loose object ids in this store, sorted by hex.
4474    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4475        self.loose_object_ids_cached()
4476    }
4477
4478    /// fsck's loose-object integrity probe, mirroring C git's `read_loose_object`
4479    /// (object-file.c) as called from `fsck_loose` (builtin/fsck.c): inflate and
4480    /// parse the file at `oid`'s loose path, then re-hash its content against the
4481    /// path-derived oid. `display_path` appears verbatim in the `error:`-level
4482    /// diagnostics — the path-form messages of `read_loose_object` ("unable to
4483    /// unpack header of <path>"), unlike the oid-form messages of the normal read
4484    /// path. Returns `Ok(None)` when no loose file exists for `oid`.
4485    pub fn verify_object(
4486        &self,
4487        oid: &ObjectId,
4488        display_path: &str,
4489    ) -> Result<Option<LooseObjectIntegrity>> {
4490        let path = self.object_path(oid)?;
4491        let compressed = match fs::read(&path) {
4492            Ok(compressed) => compressed,
4493            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4494            Err(err) => return Err(GitError::Io(err.to_string())),
4495        };
4496        let mut decoder = ZlibDecoder::new(compressed.as_slice());
4497        let mut framed = Vec::new();
4498        if decoder.read_to_end(&mut framed).is_err() {
4499            emit_inflate_diagnostic(&compressed);
4500            // git inflates the header first (`unpack_loose_header`), then the body
4501            // (`unpack_loose_rest`). If the header inflated (its NUL is visible in
4502            // the partial output) but the body broke, that is a *content*
4503            // corruption: git's `unpack_loose_rest` prints `corrupt loose object
4504            // '<oid>'` (status != Z_STREAM_END), then `read_loose_object` adds
4505            // `unable to unpack contents of <path>`. If inflation died before the
4506            // header materialized, only the header message fires.
4507            if framed_loose_header_terminated(&framed) {
4508                eprintln!("error: corrupt loose object '{oid}'");
4509                eprintln!("error: unable to unpack contents of {display_path}");
4510            } else {
4511                eprintln!("error: unable to unpack header of {display_path}");
4512            }
4513            return Ok(Some(LooseObjectIntegrity::Corrupt));
4514        }
4515        if !framed_loose_header_terminated(&framed) {
4516            // ULHR_TOO_LONG collapses into the same path-form message here: C's
4517            // `read_loose_object` treats every non-OK `unpack_loose_header` alike.
4518            eprintln!("error: unable to unpack header of {display_path}");
4519            return Ok(Some(LooseObjectIntegrity::Corrupt));
4520        }
4521        // git's `unpack_loose_rest`/`check_stream_oid` reject trailing bytes after
4522        // the zlib stream: a fully-inflated object whose compressed input was not
4523        // entirely consumed is `garbage at end of loose object '<oid>'`, then
4524        // `object corrupt or missing: <path>` from `fsck_loose`. (read_to_end
4525        // stops at Z_STREAM_END and silently ignores the trailing bytes, so we
4526        // compare consumed input against the file size ourselves.)
4527        if (decoder.total_in() as usize) < compressed.len() {
4528            // git's `unpack_loose_rest` prints `garbage at end of loose object`
4529            // then returns NULL, so `read_loose_object` also prints `unable to
4530            // unpack contents of <path>`.
4531            eprintln!("error: garbage at end of loose object '{oid}'");
4532            eprintln!("error: unable to unpack contents of {display_path}");
4533            return Ok(Some(LooseObjectIntegrity::Corrupt));
4534        }
4535        // A truncated object can inflate to a clean stream end yet yield fewer
4536        // body bytes than the header's declared size. git's `unpack_loose_rest`
4537        // inflates exactly `size` bytes and, finding the stream ends short,
4538        // prints `corrupt loose object '<oid>'`; `read_loose_object` then adds
4539        // `unable to unpack contents of <path>`. Detect the short body here so it
4540        // is not misreported as a header-parse failure.
4541        if let Some(declared) = loose_header_declared_size(&framed) {
4542            let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
4543            let body_len = framed.len() - (nul + 1).min(framed.len());
4544            if body_len < declared {
4545                eprintln!("error: corrupt loose object '{oid}'");
4546                eprintln!("error: unable to unpack contents of {display_path}");
4547                return Ok(Some(LooseObjectIntegrity::Corrupt));
4548            }
4549        }
4550        let Ok(object) = parse_framed_object(&framed) else {
4551            // Distinguish git's two header-parse failures: a structurally valid
4552            // `"<word> <size>\0"` header whose *type word* is not a known object
4553            // type yields `unable to parse type from header '<header>'`, while a
4554            // genuinely malformed header yields `unable to parse header`.
4555            if let Some(header) = loose_header_with_unknown_type(&framed) {
4556                eprintln!(
4557                    "error: unable to parse type from header '{header}' of {display_path}"
4558                );
4559            } else {
4560                eprintln!("error: unable to parse header of {display_path}");
4561            }
4562            return Ok(Some(LooseObjectIntegrity::Corrupt));
4563        };
4564        let actual = object.object_id(self.format)?;
4565        if &actual != oid {
4566            return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
4567        }
4568        Ok(Some(LooseObjectIntegrity::Ok))
4569    }
4570}
4571
4572/// Whether the inflated framing bytes contain the header's NUL terminator within
4573/// git's `MAX_HEADER_LEN` window (object-file.c `unpack_loose_header`'s success
4574/// condition).
4575fn framed_loose_header_terminated(framed: &[u8]) -> bool {
4576    framed
4577        .iter()
4578        .take(MAX_LOOSE_HEADER_LEN)
4579        .any(|byte| *byte == 0)
4580}
4581
4582/// If the framing has a structurally valid `"<word> <size>\0"` header whose body
4583/// length matches `<size>` but whose `<word>` is not a known object type, return
4584/// the header string (the bytes before the NUL). Mirrors git's
4585/// `parse_loose_header` reporting `unable to parse type from header '<header>'`.
4586fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
4587    let nul = framed.iter().position(|&b| b == 0)?;
4588    let header = std::str::from_utf8(&framed[..nul]).ok()?;
4589    let (kind, size) = header.split_once(' ')?;
4590    let size: usize = size.parse().ok()?;
4591    // Body length must match the declared size (otherwise it is a different
4592    // corruption, handled by the generic path).
4593    if framed.len() - (nul + 1) != size {
4594        return None;
4595    }
4596    // A known type word would have parsed successfully upstream; only return
4597    // when the word is genuinely unknown.
4598    if kind.parse::<ObjectType>().is_ok() {
4599        return None;
4600    }
4601    Some(header.to_string())
4602}
4603
4604/// The size declared in a loose object's `"<type> <size>\0"` header, if the
4605/// header is structurally a `<word> <decimal-size>` pair. Used to detect a body
4606/// inflated short of its declared length (a truncated object).
4607fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
4608    let nul = framed.iter().position(|&b| b == 0)?;
4609    let header = std::str::from_utf8(&framed[..nul]).ok()?;
4610    let (_kind, size) = header.split_once(' ')?;
4611    size.parse::<usize>().ok()
4612}
4613
4614/// Read up to `prefix.len()` bytes from the start of `file`, returning how many
4615/// were available (short only when the file itself is shorter).
4616fn read_full_prefix(file: &mut fs::File, prefix: &mut [u8]) -> Result<usize> {
4617    let mut len = 0;
4618    while len < prefix.len() {
4619        let read = file
4620            .read(&mut prefix[len..])
4621            .map_err(|err| GitError::Io(err.to_string()))?;
4622        if read == 0 {
4623            break;
4624        }
4625        len += read;
4626    }
4627    Ok(len)
4628}
4629
4630impl ObjectReader for LooseObjectStore {
4631    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4632        self.validate_oid_format(oid)?;
4633        // Skip the `open()` (and its ENOENT) when an already-built loose cache
4634        // knows the id is absent. Without a cache, use an exact path probe; a
4635        // full fanout scan is far more expensive for one-shot packed-object reads.
4636        if self.cached_loose_presence(oid) == Some(false) {
4637            return Err(GitError::object_not_found_in(
4638                *oid,
4639                MissingObjectContext::Read,
4640            ));
4641        }
4642        let path = self.object_path(oid)?;
4643        let compressed = match fs::read(&path) {
4644            Ok(compressed) => compressed,
4645            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4646                return Err(GitError::object_not_found_in(
4647                    *oid,
4648                    MissingObjectContext::Read,
4649                ));
4650            }
4651            Err(err) => return Err(GitError::Io(err.to_string())),
4652        };
4653        let mut decoder = ZlibDecoder::new(compressed.as_slice());
4654        let mut framed = Vec::new();
4655        if decoder.read_to_end(&mut framed).is_err() {
4656            emit_inflate_diagnostic(&compressed);
4657            // A stream that dies before the framing header materializes is git's
4658            // ULHR_BAD ("unable to unpack <oid> header"); with the header intact,
4659            // the body is what broke (`unpack_loose_rest`'s "corrupt loose
4660            // object").
4661            if !framed_loose_header_terminated(&framed) {
4662                return Err(loose_unpack_header_failed(oid));
4663            }
4664            return Err(GitError::InvalidObject(format!(
4665                "corrupt loose object '{oid}'"
4666            )));
4667        }
4668        // git only inflates the first `MAX_LOOSE_HEADER_LEN` bytes looking for the
4669        // header's NUL terminator before parsing the type; an over-long header is
4670        // rejected here (with git's diagnostic) rather than failing later as an
4671        // "unknown object type". Mirror that so `cat-file -p` matches upstream.
4672        if framed
4673            .iter()
4674            .take(MAX_LOOSE_HEADER_LEN)
4675            .all(|byte| *byte != 0)
4676        {
4677            return Err(loose_header_too_long(oid));
4678        }
4679        let object = parse_framed_object(&framed)?;
4680        // Trust the loose object's on-disk name rather than re-hashing its full body
4681        // on every read (see `verify_reads_enabled`); use `validate`/fsck or
4682        // `SLEY_VERIFY_READS` for an explicit integrity check.
4683        if verify_reads_enabled() {
4684            let actual = object.object_id(self.format)?;
4685            if &actual != oid {
4686                return Err(GitError::InvalidObject(format!(
4687                    "loose object {} hashes to {actual}",
4688                    path.display()
4689                )));
4690            }
4691        }
4692        Ok(Arc::new(object))
4693    }
4694}
4695
4696impl ObjectWriter for LooseObjectStore {
4697    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4698        let oid = object.object_id(self.format)?;
4699        let path = self.object_path(&oid)?;
4700        if path.exists() {
4701            self.note_loose_write(oid);
4702            return Ok(oid);
4703        }
4704        let parent = path
4705            .parent()
4706            .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
4707        fs::create_dir_all(parent)?;
4708        let temp_path = unique_temp_path(parent);
4709        let write_result = (|| -> Result<()> {
4710            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4711            encoder.write_all(&object.framed_bytes())?;
4712            let compressed = encoder.finish()?;
4713            {
4714                let mut file = fs::OpenOptions::new()
4715                    .write(true)
4716                    .create_new(true)
4717                    .open(&temp_path)?;
4718                file.write_all(&compressed)?;
4719                // No fsync: git's default `core.fsync=none` fsyncs nothing on the
4720                // loose-object write path (object-file.c writes the temp file and
4721                // renames it without syncing unless `core.fsync` names
4722                // `loose-object`/`objects`/`all`, which it does not by default).
4723                // A per-object sync_all() here made `git add` of N files cost N
4724                // fsyncs — the dominant term in sley#27's 10x `add -u` slowdown —
4725                // for durability git itself does not provide by default. The
4726                // create_new temp + atomic rename below still guarantees the
4727                // object never appears half-written under its final name.
4728            }
4729            match fs::rename(&temp_path, &path) {
4730                Ok(()) => Ok(()),
4731                Err(_) if path.exists() => {
4732                    let _ = fs::remove_file(&temp_path);
4733                    Ok(())
4734                }
4735                Err(err) => Err(GitError::Io(err.to_string())),
4736            }
4737        })();
4738        if write_result.is_err() {
4739            let _ = fs::remove_file(&temp_path);
4740        }
4741        write_result?;
4742        self.note_loose_write(oid);
4743        Ok(oid)
4744    }
4745}
4746
4747fn unique_temp_path(parent: &Path) -> PathBuf {
4748    let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
4749    parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
4750}
4751
4752#[cfg(test)]
4753mod tests {
4754    use super::*;
4755    use sley_core::BString;
4756    use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
4757    use sley_pack::{PackFile, PackWriteOptions};
4758
4759    fn blob_of(byte: u8, len: usize) -> EncodedObject {
4760        EncodedObject::new(ObjectType::Blob, vec![byte; len])
4761    }
4762
4763    fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
4764        Arc::new(blob_of(byte, len))
4765    }
4766
4767    fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
4768        reader
4769            .read_object(oid)
4770            .expect("test operation should succeed")
4771            .as_ref()
4772            .clone()
4773    }
4774
4775    #[test]
4776    fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
4777        // Budget holds two ~1 KiB objects but not three.
4778        let one = cached_object_cost(&blob_of(0, 1000));
4779        let mut cache = LruCache::<u32>::new(one * 2 + 8);
4780        cache.put(1, cached_blob_of(b'a', 1000));
4781        cache.put(2, cached_blob_of(b'b', 1000));
4782        // Touch key 1 so key 2 becomes least-recently-used.
4783        assert!(cache.get(&1).is_some());
4784        cache.put(3, cached_blob_of(b'c', 1000));
4785        // Key 2 (LRU) is evicted; 1 and 3 remain.
4786        assert!(cache.get(&1).is_some());
4787        assert!(cache.get(&2).is_none());
4788        assert!(cache.get(&3).is_some());
4789    }
4790
4791    #[test]
4792    fn lru_cache_zero_budget_is_inert() {
4793        let mut cache = LruCache::<u32>::new(0);
4794        cache.put(1, cached_blob_of(b'a', 16));
4795        assert!(cache.get(&1).is_none());
4796    }
4797
4798    #[test]
4799    fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
4800        let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
4801        cache.put(1, cached_blob_of(b'a', 50));
4802        assert!(cache.get(&1).is_some());
4803        // An object that cannot fit is not cached, and it evicts the prior entry
4804        // stored under the same key (so we never serve a stale value for it).
4805        cache.put(1, cached_blob_of(b'b', 10_000));
4806        assert!(cache.get(&1).is_none());
4807        // A subsequent fitting insert under another key still works and accounting
4808        // is not corrupted by the oversized insert.
4809        cache.put(2, cached_blob_of(b'c', 50));
4810        assert!(cache.get(&2).is_some());
4811    }
4812
4813    #[test]
4814    fn lru_cache_replacing_entry_updates_byte_accounting() {
4815        // Budget holds two 500-byte objects (plus headroom) but not a 500 + a
4816        // ~1900-byte object.
4817        let small = cached_object_cost(&blob_of(0, 500));
4818        let mut cache = LruCache::<u32>::new(small * 2 + 200);
4819        cache.put(1, cached_blob_of(b'a', 500));
4820        cache.put(2, cached_blob_of(b'b', 500));
4821        assert!(cache.get(&1).is_some());
4822        assert!(cache.get(&2).is_some());
4823        // Replace key 2 (now MRU after the gets above re-ordered 1 then 2) with a
4824        // bigger value that still fits the budget alone but makes the running total
4825        // exceed it; the LRU (key 1) is evicted while the replaced key 2 stays.
4826        // This exercises the replace-path accounting.
4827        cache.put(2, cached_blob_of(b'b', 1000));
4828        assert!(cache.get(&2).is_some());
4829        assert!(cache.get(&1).is_none());
4830    }
4831
4832    #[test]
4833    fn write_and_validate_blob() {
4834        let db = ObjectDatabase::new(ObjectFormat::Sha1);
4835        let oid = db
4836            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
4837            .expect("test operation should succeed");
4838        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
4839        db.validate(&oid).expect("test operation should succeed");
4840    }
4841
4842    #[test]
4843    fn loose_store_writes_and_reads_object() {
4844        let root = std::env::temp_dir().join(format!(
4845            "sley-loose-store-{}-{}",
4846            std::process::id(),
4847            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
4848        ));
4849        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4850        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4851        let oid = store
4852            .write_object(object.clone())
4853            .expect("test operation should succeed");
4854        assert_eq!(read_object_for_assert(&store, &oid), object);
4855        assert!(
4856            store
4857                .object_path(&oid)
4858                .expect("test operation should succeed")
4859                .exists()
4860        );
4861        fs::remove_dir_all(root).expect("test operation should succeed");
4862    }
4863
4864    #[test]
4865    fn file_database_reads_object_from_pack_index() {
4866        let root = temp_root("sley-file-odb-pack");
4867        let git_dir = root.join(".git");
4868        let pack_dir = git_dir.join("objects").join("pack");
4869        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
4870        let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
4871        let oid = object
4872            .object_id(ObjectFormat::Sha1)
4873            .expect("test operation should succeed");
4874        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4875            .expect("test operation should succeed");
4876        let pack_name = written.checksum.to_hex();
4877        fs::write(
4878            pack_dir.join(format!("pack-{pack_name}.pack")),
4879            written.pack,
4880        )
4881        .expect("test operation should succeed");
4882        fs::write(
4883            pack_dir.join(format!("pack-{pack_name}.idx")),
4884            written.index,
4885        )
4886        .expect("test operation should succeed");
4887
4888        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4889        assert!(db.contains(&oid).expect("test operation should succeed"));
4890        assert_eq!(read_object_for_assert(&db, &oid), object);
4891        fs::remove_dir_all(root).expect("test operation should succeed");
4892    }
4893
4894    #[test]
4895    fn file_database_loose_cache_observes_same_process_write_after_miss() {
4896        let root = temp_root("sley-file-odb-loose-cache-write");
4897        let git_dir = root.join(".git");
4898        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4899        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4900
4901        let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
4902        let oid = object
4903            .object_id(ObjectFormat::Sha1)
4904            .expect("test operation should succeed");
4905
4906        assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
4907        db.loose()
4908            .write_object(object.clone())
4909            .expect("test operation should succeed");
4910
4911        assert_eq!(read_object_for_assert(&db, &oid), object);
4912        fs::remove_dir_all(root).expect("test operation should succeed");
4913    }
4914
4915    #[test]
4916    fn object_presence_checker_observes_same_process_loose_write_after_miss() {
4917        let root = temp_root("sley-presence-checker-loose-cache-write");
4918        let git_dir = root.join(".git");
4919        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4920        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4921        let mut checker = db.presence_checker();
4922
4923        let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
4924        let oid = object
4925            .object_id(ObjectFormat::Sha1)
4926            .expect("test operation should succeed");
4927
4928        assert!(
4929            !checker
4930                .contains(&oid)
4931                .expect("test operation should succeed")
4932        );
4933        db.loose()
4934            .write_object(object)
4935            .expect("test operation should succeed");
4936
4937        assert!(
4938            checker
4939                .contains(&oid)
4940                .expect("test operation should succeed")
4941        );
4942        fs::remove_dir_all(root).expect("test operation should succeed");
4943    }
4944
4945    #[test]
4946    fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
4947        let root = temp_root("sley-read-object-header");
4948        let git_dir = root.join(".git");
4949        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4950        let format = ObjectFormat::Sha1;
4951        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4952
4953        // Loose object: the header read inflates only the framing, not the body.
4954        let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
4955        let loose_oid = db
4956            .write_object(loose.clone())
4957            .expect("test operation should succeed");
4958
4959        // Packed objects, including an ofs-delta whose *result* size lives in the
4960        // delta stream (not the pack entry header) and whose type is inherited from
4961        // its base at the end of the chain.
4962        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
4963        let mut child_body = vec![b'a'; 4096];
4964        child_body.extend_from_slice(b" plus a deltified tail\n");
4965        let child = EncodedObject::new(ObjectType::Blob, child_body);
4966        let commitish =
4967            EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
4968        let base_oid = base
4969            .object_id(format)
4970            .expect("test operation should succeed");
4971        let child_oid = child
4972            .object_id(format)
4973            .expect("test operation should succeed");
4974        let commit_oid = commitish
4975            .object_id(format)
4976            .expect("test operation should succeed");
4977        let options = PackWriteOptions::new()
4978            .with_prefer_ofs_delta(true)
4979            .with_reorder(false);
4980        let pack = PackFile::write_packed_with_options(
4981            &[base.clone(), child.clone(), commitish.clone()],
4982            format,
4983            &options,
4984        )
4985        .expect("test operation should succeed");
4986        db.install_pack(&pack)
4987            .expect("test operation should succeed");
4988
4989        // The header read agrees with a full decode for every object and storage
4990        // class, without ever materializing the body.
4991        for (oid, want_type, want_len) in [
4992            (&loose_oid, ObjectType::Blob, loose.body.len()),
4993            (&base_oid, ObjectType::Blob, base.body.len()),
4994            (&child_oid, ObjectType::Blob, child.body.len()),
4995            (&commit_oid, ObjectType::Commit, commitish.body.len()),
4996        ] {
4997            assert_eq!(
4998                db.read_object_header(oid)
4999                    .expect("test operation should succeed"),
5000                Some((want_type, want_len as u64)),
5001                "header for {oid}"
5002            );
5003            let full = db.read_object(oid).expect("test operation should succeed");
5004            assert_eq!(
5005                db.read_object_header(oid)
5006                    .expect("test operation should succeed"),
5007                Some((full.object_type, full.body.len() as u64))
5008            );
5009        }
5010
5011        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5012            .expect("test operation should succeed");
5013        assert_eq!(
5014            db.read_object_header(&missing)
5015                .expect("test operation should succeed"),
5016            None
5017        );
5018        fs::remove_dir_all(root).expect("test operation should succeed");
5019    }
5020
5021    #[test]
5022    fn object_storage_info_reports_loose_packed_and_delta_metadata() {
5023        let root = temp_root("sley-object-storage-info");
5024        let git_dir = root.join(".git");
5025        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5026        let format = ObjectFormat::Sha1;
5027        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5028
5029        let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
5030        let loose_oid = db
5031            .write_object(loose)
5032            .expect("test operation should succeed");
5033        let loose_size = fs::metadata(
5034            db.loose()
5035                .object_path(&loose_oid)
5036                .expect("test operation should succeed"),
5037        )
5038        .expect("test operation should succeed")
5039        .len();
5040        let loose_info = db
5041            .object_storage_info(&loose_oid)
5042            .expect("test operation should succeed")
5043            .expect("test operation should succeed");
5044        assert_eq!(loose_info.disk_size, loose_size);
5045        assert_eq!(
5046            loose_info.deltabase,
5047            zero_oid(format).expect("test operation should succeed")
5048        );
5049
5050        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5051        let mut child_body = vec![b'a'; 4096];
5052        child_body.extend_from_slice(b" changed tail\n");
5053        let child = EncodedObject::new(ObjectType::Blob, child_body);
5054        let base_oid = base
5055            .object_id(format)
5056            .expect("test operation should succeed");
5057        let child_oid = child
5058            .object_id(format)
5059            .expect("test operation should succeed");
5060        let options = PackWriteOptions::new()
5061            .with_prefer_ofs_delta(true)
5062            .with_reorder(false);
5063        let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
5064            .expect("test operation should succeed");
5065        db.install_pack(&pack)
5066            .expect("test operation should succeed");
5067
5068        let base_info = db
5069            .object_storage_info(&base_oid)
5070            .expect("test operation should succeed")
5071            .expect("test operation should succeed");
5072        assert!(base_info.disk_size > 0);
5073        assert_eq!(
5074            base_info.deltabase,
5075            zero_oid(format).expect("test operation should succeed")
5076        );
5077
5078        let child_info = db
5079            .object_storage_info(&child_oid)
5080            .expect("test operation should succeed")
5081            .expect("test operation should succeed");
5082        assert!(child_info.disk_size > 0);
5083        assert_eq!(child_info.deltabase, base_oid);
5084
5085        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5086            .expect("test operation should succeed");
5087        assert_eq!(
5088            db.object_storage_info(&missing)
5089                .expect("test operation should succeed"),
5090            None
5091        );
5092        fs::remove_dir_all(root).expect("test operation should succeed");
5093    }
5094
5095    #[test]
5096    fn file_database_resolves_unique_loose_object_prefix() {
5097        let root = temp_root("sley-file-odb-prefix-loose");
5098        let git_dir = root.join(".git");
5099        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5100        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5101        let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
5102        let oid = db
5103            .write_object(object)
5104            .expect("test operation should succeed");
5105        let prefix = &oid.to_hex()[..8];
5106
5107        assert_eq!(
5108            db.resolve_prefix(prefix)
5109                .expect("test operation should succeed"),
5110            ObjectPrefixResolution::Unique(oid)
5111        );
5112        assert!(
5113            db.object_ids()
5114                .expect("test operation should succeed")
5115                .contains(&oid)
5116        );
5117        fs::remove_dir_all(root).expect("test operation should succeed");
5118    }
5119
5120    #[test]
5121    fn file_database_resolves_unique_packed_object_prefix() {
5122        let root = temp_root("sley-file-odb-prefix-packed");
5123        let git_dir = root.join(".git");
5124        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5125        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5126        let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
5127        let oid = object
5128            .object_id(ObjectFormat::Sha1)
5129            .expect("test operation should succeed");
5130        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5131            .expect("test operation should succeed");
5132        db.install_pack(&pack)
5133            .expect("test operation should succeed");
5134        let prefix = &oid.to_hex()[..8];
5135
5136        assert_eq!(
5137            db.resolve_prefix(prefix)
5138                .expect("test operation should succeed"),
5139            ObjectPrefixResolution::Unique(oid)
5140        );
5141        fs::remove_dir_all(root).expect("test operation should succeed");
5142    }
5143
5144    #[test]
5145    fn file_database_reports_ambiguous_object_prefix() {
5146        let root = temp_root("sley-file-odb-prefix-ambiguous");
5147        let git_dir = root.join(".git");
5148        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5149        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5150        let mut seen = HashMap::new();
5151        let (prefix, first, second) = (0..10_000)
5152            .find_map(|idx| {
5153                let object =
5154                    EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
5155                let oid = db
5156                    .write_object(object)
5157                    .expect("test operation should succeed");
5158                let prefix = oid.to_hex()[..4].to_string();
5159                seen.insert(prefix.clone(), oid)
5160                    .map(|first| (prefix, first, oid))
5161            })
5162            .expect("test should find a 4-hex collision");
5163
5164        let ObjectPrefixResolution::Ambiguous(mut matches) = db
5165            .resolve_prefix(&prefix)
5166            .expect("test operation should succeed")
5167        else {
5168            panic!("expected ambiguous prefix {prefix}");
5169        };
5170        matches.sort_by_key(ObjectId::to_hex);
5171        let mut expected = vec![first, second];
5172        expected.sort_by_key(ObjectId::to_hex);
5173        assert_eq!(matches, expected);
5174        fs::remove_dir_all(root).expect("test operation should succeed");
5175    }
5176
5177    #[test]
5178    fn file_database_rejects_too_short_object_prefix() {
5179        let root = temp_root("sley-file-odb-prefix-short");
5180        let git_dir = root.join(".git");
5181        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5182        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5183
5184        assert!(matches!(
5185            db.resolve_prefix("abc"),
5186            Err(GitError::InvalidObjectId(_))
5187        ));
5188        fs::remove_dir_all(root).expect("test operation should succeed");
5189    }
5190
5191    #[test]
5192    fn file_database_reads_sha256_object_from_pack_index() {
5193        let root = temp_root("sley-file-odb-pack-sha256");
5194        let git_dir = root.join(".git");
5195        let pack_dir = git_dir.join("objects").join("pack");
5196        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5197        let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
5198        let oid = object
5199            .object_id(ObjectFormat::Sha256)
5200            .expect("test operation should succeed");
5201        let written =
5202            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5203                .expect("test operation should succeed");
5204        let pack_name = written.checksum.to_hex();
5205        fs::write(
5206            pack_dir.join(format!("pack-{pack_name}.pack")),
5207            written.pack,
5208        )
5209        .expect("test operation should succeed");
5210        fs::write(
5211            pack_dir.join(format!("pack-{pack_name}.idx")),
5212            written.index,
5213        )
5214        .expect("test operation should succeed");
5215
5216        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5217        assert!(db.contains(&oid).expect("test operation should succeed"));
5218        assert_eq!(read_object_for_assert(&db, &oid), object);
5219        fs::remove_dir_all(root).expect("test operation should succeed");
5220    }
5221
5222    #[test]
5223    fn file_database_installs_sha256_pack_without_loose_objects() {
5224        let root = temp_root("sley-file-odb-install-pack");
5225        let git_dir = root.join(".git");
5226        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5227        let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
5228        let oid = object
5229            .object_id(ObjectFormat::Sha256)
5230            .expect("test operation should succeed");
5231        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5232            .expect("test operation should succeed");
5233        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5234
5235        let result = db
5236            .install_pack(&pack)
5237            .expect("test operation should succeed");
5238
5239        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5240        assert_eq!(result.object_ids, vec![oid]);
5241        assert!(result.pack_path.exists());
5242        assert!(result.index_path.exists());
5243        assert_eq!(result.promisor_path, None);
5244        assert!(
5245            !db.loose()
5246                .object_path(&oid)
5247                .expect("test operation should succeed")
5248                .exists()
5249        );
5250        assert!(db.contains(&oid).expect("test operation should succeed"));
5251        assert_eq!(read_object_for_assert(&db, &oid), object);
5252        fs::remove_dir_all(root).expect("test operation should succeed");
5253    }
5254
5255    #[test]
5256    fn file_database_installs_raw_sha256_pack_without_loose_objects() {
5257        let root = temp_root("sley-file-odb-install-raw-pack");
5258        let git_dir = root.join(".git");
5259        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5260        let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
5261        let oid = object
5262            .object_id(ObjectFormat::Sha256)
5263            .expect("test operation should succeed");
5264        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5265            .expect("test operation should succeed");
5266        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5267
5268        let result = db
5269            .install_raw_pack(&pack.pack)
5270            .expect("test operation should succeed");
5271
5272        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5273        assert_eq!(result.object_ids, vec![oid]);
5274        assert!(result.pack_path.exists());
5275        assert!(result.index_path.exists());
5276        assert_eq!(result.promisor_path, None);
5277        assert!(
5278            !db.loose()
5279                .object_path(&oid)
5280                .expect("test operation should succeed")
5281                .exists()
5282        );
5283        assert!(db.contains(&oid).expect("test operation should succeed"));
5284        assert_eq!(read_object_for_assert(&db, &oid), object);
5285        fs::remove_dir_all(root).expect("test operation should succeed");
5286    }
5287
5288    #[test]
5289    fn file_database_rejects_noncanonical_pack_index() {
5290        let root = temp_root("sley-file-odb-install-bad-index");
5291        let git_dir = root.join(".git");
5292        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5293        let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
5294        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5295            .expect("test operation should succeed");
5296        let mut entries = pack.entries.clone();
5297        entries[0].crc32 ^= 1;
5298        let mut bad_pack = pack.clone();
5299        bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
5300            .expect("test operation should succeed");
5301        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5302
5303        assert!(db.install_pack(&bad_pack).is_err());
5304
5305        fs::remove_dir_all(root).expect("test operation should succeed");
5306    }
5307
5308    #[test]
5309    fn file_database_installs_raw_promisor_pack_with_sidecar() {
5310        let root = temp_root("sley-file-odb-install-raw-promisor-pack");
5311        let git_dir = root.join(".git");
5312        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5313        let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
5314        let oid = object
5315            .object_id(ObjectFormat::Sha1)
5316            .expect("test operation should succeed");
5317        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5318            .expect("test operation should succeed");
5319        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5320
5321        let result = db
5322            .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
5323            .expect("test operation should succeed");
5324
5325        let promisor_path = result.promisor_path.expect("promisor sidecar");
5326        assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
5327        assert_eq!(
5328            promisor_path.extension().and_then(|ext| ext.to_str()),
5329            Some("promisor")
5330        );
5331        assert!(promisor_path.exists());
5332        assert_eq!(
5333            fs::read(&promisor_path).expect("test operation should succeed"),
5334            b""
5335        );
5336        assert!(result.pack_path.exists());
5337        assert!(result.index_path.exists());
5338        assert!(
5339            !db.loose()
5340                .object_path(&oid)
5341                .expect("test operation should succeed")
5342                .exists()
5343        );
5344        assert_eq!(read_object_for_assert(&db, &oid), object);
5345        fs::remove_dir_all(root).expect("test operation should succeed");
5346    }
5347
5348    #[test]
5349    fn repository_objects_dir_uses_linked_worktree_common_dir() {
5350        let root = temp_root("sley-odb-common-dir");
5351        let common = root.join(".git");
5352        let admin = common.join("worktrees").join("linked");
5353        fs::create_dir_all(&admin).expect("test operation should succeed");
5354        fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
5355
5356        let common = fs::canonicalize(common).expect("test operation should succeed");
5357        assert_eq!(repository_common_dir(&admin), common);
5358        assert_eq!(repository_objects_dir(&admin), common.join("objects"));
5359
5360        fs::remove_dir_all(root).expect("test operation should succeed");
5361    }
5362
5363    #[test]
5364    fn reachable_object_helpers_walk_graph_and_install_pack() {
5365        let root = temp_root("sley-reachable-pack");
5366        let source_git_dir = root.join("source.git");
5367        let destination_git_dir = root.join("destination.git");
5368        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5369        fs::create_dir_all(destination_git_dir.join("objects"))
5370            .expect("test operation should succeed");
5371        let format = ObjectFormat::Sha1;
5372        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5373        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5374
5375        let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
5376        let blob_oid = source
5377            .write_object(blob.clone())
5378            .expect("test operation should succeed");
5379        let tree = EncodedObject::new(
5380            ObjectType::Tree,
5381            Tree {
5382                entries: vec![TreeEntry {
5383                    mode: 0o100644,
5384                    name: BString::from(b"payload.txt"),
5385                    oid: blob_oid,
5386                }],
5387            }
5388            .write(),
5389        );
5390        let tree_oid = source
5391            .write_object(tree.clone())
5392            .expect("test operation should succeed");
5393        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5394        let commit = EncodedObject::new(
5395            ObjectType::Commit,
5396            Commit {
5397                tree: tree_oid,
5398                parents: Vec::new(),
5399                author: identity.clone(),
5400                committer: identity,
5401                encoding: None,
5402                message: b"initial\n".to_vec(),
5403            }
5404            .write(),
5405        );
5406        let commit_oid = source
5407            .write_object(commit.clone())
5408            .expect("test operation should succeed");
5409
5410        let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
5411            .expect("test operation should succeed");
5412        assert!(reachable.contains(&commit_oid));
5413        assert!(reachable.contains(&tree_oid));
5414        assert!(reachable.contains(&blob_oid));
5415
5416        let install =
5417            install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
5418                .expect("test operation should succeed")
5419                .expect("reachable pack should be written");
5420        assert_eq!(install.object_ids.len(), 3);
5421        for (oid, object) in [
5422            (&commit_oid, &commit),
5423            (&tree_oid, &tree),
5424            (&blob_oid, &blob),
5425        ] {
5426            assert!(
5427                !destination
5428                    .loose()
5429                    .object_path(oid)
5430                    .expect("test operation should succeed")
5431                    .exists()
5432            );
5433            assert!(
5434                destination
5435                    .contains(oid)
5436                    .expect("test operation should succeed")
5437            );
5438            assert_eq!(read_object_for_assert(&destination, oid), *object);
5439        }
5440        fs::remove_dir_all(root).expect("test operation should succeed");
5441    }
5442
5443    #[test]
5444    fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
5445        let root = temp_root("sley-reachable-exclusions");
5446        let git_dir = root.join("repo.git");
5447        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5448        let format = ObjectFormat::Sha1;
5449        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5450
5451        let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
5452        let blob_oid = db
5453            .write_object(blob)
5454            .expect("test operation should succeed");
5455        let tree = EncodedObject::new(
5456            ObjectType::Tree,
5457            Tree {
5458                entries: vec![TreeEntry {
5459                    mode: 0o100644,
5460                    name: BString::from(b"payload.txt"),
5461                    oid: blob_oid,
5462                }],
5463            }
5464            .write(),
5465        );
5466        let tree_oid = db
5467            .write_object(tree)
5468            .expect("test operation should succeed");
5469        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5470        let commit = EncodedObject::new(
5471            ObjectType::Commit,
5472            Commit {
5473                tree: tree_oid,
5474                parents: Vec::new(),
5475                author: identity.clone(),
5476                committer: identity,
5477                encoding: None,
5478                message: b"initial\n".to_vec(),
5479            }
5480            .write(),
5481        );
5482        let commit_oid = db
5483            .write_object(commit)
5484            .expect("test operation should succeed");
5485        let excluded = HashSet::from([tree_oid]);
5486
5487        let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
5488            .expect("test operation should succeed");
5489
5490        assert_eq!(objects.len(), 1);
5491        assert_eq!(
5492            objects[0]
5493                .object_id(format)
5494                .expect("test operation should succeed"),
5495            commit_oid
5496        );
5497        fs::remove_dir_all(root).expect("test operation should succeed");
5498    }
5499
5500    #[test]
5501    fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
5502        let root = temp_root("sley-build-reachable-pack");
5503        let git_dir = root.join("repo.git");
5504        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5505        let format = ObjectFormat::Sha1;
5506        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5507
5508        let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
5509        let oid = db
5510            .write_object(object.clone())
5511            .expect("test operation should succeed");
5512        let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
5513            .expect("test operation should succeed")
5514            .expect("reachable pack should be built");
5515        assert!(pack.pack.starts_with(b"PACK"));
5516        assert_eq!(pack.entries.len(), 1);
5517        assert_eq!(pack.entries[0].oid, oid);
5518
5519        let excluded = HashSet::from([oid]);
5520        assert!(
5521            build_reachable_pack(
5522                &db,
5523                format,
5524                pack.entries.into_iter().map(|entry| entry.oid),
5525                &excluded
5526            )
5527            .expect("test operation should succeed")
5528            .is_none()
5529        );
5530        fs::remove_dir_all(root).expect("test operation should succeed");
5531    }
5532
5533    #[test]
5534    fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
5535        let root = temp_root("sley-reachable-tags");
5536        let git_dir = root.join("repo.git");
5537        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5538        let format = ObjectFormat::Sha1;
5539        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5540
5541        let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
5542        let blob_oid = db
5543            .write_object(blob)
5544            .expect("test operation should succeed");
5545        let tag = EncodedObject::new(
5546            ObjectType::Tag,
5547            Tag {
5548                object: blob_oid,
5549                object_type: ObjectType::Blob,
5550                name: b"v1".to_vec(),
5551                tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
5552                message: b"tag message\n".to_vec(),
5553                raw_body: None,
5554            }
5555            .write(),
5556        );
5557        let tag_oid = db.write_object(tag).expect("test operation should succeed");
5558
5559        let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
5560            .expect("test operation should succeed");
5561        assert!(reachable.contains(&tag_oid));
5562        assert!(reachable.contains(&blob_oid));
5563
5564        let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
5565            .expect("test operation should succeed");
5566        let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
5567            .expect_err("missing traversal root should error");
5568        let kind = err.not_found_kind().expect("typed not found");
5569        assert_eq!(kind.object_id(), Some(missing));
5570        assert_eq!(
5571            kind.missing_object_context(),
5572            Some(MissingObjectContext::Traversal)
5573        );
5574        fs::remove_dir_all(root).expect("test operation should succeed");
5575    }
5576
5577    #[test]
5578    fn install_reachable_pack_empty_starts_create_no_pack() {
5579        let root = temp_root("sley-reachable-empty");
5580        let source_git_dir = root.join("source.git");
5581        let destination_git_dir = root.join("destination.git");
5582        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5583        fs::create_dir_all(destination_git_dir.join("objects"))
5584            .expect("test operation should succeed");
5585        let format = ObjectFormat::Sha1;
5586        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5587        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5588
5589        let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
5590            .expect("test operation should succeed");
5591
5592        assert!(result.is_none());
5593        assert!(!destination_git_dir.join("objects").join("pack").exists());
5594        fs::remove_dir_all(root).expect("test operation should succeed");
5595    }
5596
5597    #[test]
5598    fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
5599        let root = temp_root("sley-reachable-install-excluding");
5600        let source_git_dir = root.join("source.git");
5601        let destination_git_dir = root.join("destination.git");
5602        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5603        fs::create_dir_all(destination_git_dir.join("objects"))
5604            .expect("test operation should succeed");
5605        let format = ObjectFormat::Sha1;
5606        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5607        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5608        let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
5609        let oid = source
5610            .write_object(object)
5611            .expect("test operation should succeed");
5612        let excluded = HashSet::from([oid]);
5613
5614        let result = install_reachable_pack_excluding(
5615            &source,
5616            &destination,
5617            format,
5618            std::iter::once(oid),
5619            &excluded,
5620        )
5621        .expect("test operation should succeed");
5622
5623        assert!(result.is_none());
5624        assert!(!destination_git_dir.join("objects").join("pack").exists());
5625        fs::remove_dir_all(root).expect("test operation should succeed");
5626    }
5627
5628    #[test]
5629    fn install_reachable_pack_supports_sha256() {
5630        let root = temp_root("sley-reachable-pack-sha256");
5631        let source_git_dir = root.join("source.git");
5632        let destination_git_dir = root.join("destination.git");
5633        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5634        fs::create_dir_all(destination_git_dir.join("objects"))
5635            .expect("test operation should succeed");
5636        let format = ObjectFormat::Sha256;
5637        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5638        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5639        let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
5640        let oid = source
5641            .write_object(object.clone())
5642            .expect("test operation should succeed");
5643
5644        let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
5645            .expect("test operation should succeed")
5646            .expect("sha256 reachable pack should be built");
5647        assert!(pack.pack.starts_with(b"PACK"));
5648        assert_eq!(pack.entries[0].oid, oid);
5649
5650        let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
5651            .expect("test operation should succeed")
5652            .expect("sha256 reachable pack should be written");
5653
5654        assert_eq!(result.object_ids, vec![oid]);
5655        assert!(
5656            !destination
5657                .loose()
5658                .object_path(&oid)
5659                .expect("test operation should succeed")
5660                .exists()
5661        );
5662        assert_eq!(read_object_for_assert(&destination, &oid), object);
5663        fs::remove_dir_all(root).expect("test operation should succeed");
5664    }
5665
5666    #[test]
5667    fn install_helpers_accept_custom_raw_pack_installer() {
5668        #[derive(Default)]
5669        struct RecordingInstaller {
5670            packs: std::cell::RefCell<Vec<Vec<u8>>>,
5671            installed: std::cell::RefCell<Vec<ObjectId>>,
5672        }
5673
5674        impl RawPackInstaller for RecordingInstaller {
5675            fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
5676                self.packs.borrow_mut().push(pack_bytes.to_vec());
5677                let object_ids = self.installed.borrow().clone();
5678                Ok(RawPackInstallResult { object_ids })
5679            }
5680        }
5681
5682        let format = ObjectFormat::Sha1;
5683        let source = ObjectDatabase::new(format);
5684        let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
5685        let oid = source
5686            .write_object(object)
5687            .expect("test operation should succeed");
5688        let installer = RecordingInstaller::default();
5689        installer.installed.borrow_mut().push(oid);
5690
5691        let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
5692            .expect("test operation should succeed")
5693            .expect("custom installer should receive pack");
5694
5695        assert_eq!(result.object_ids, installer.installed.into_inner());
5696        let packs = installer.packs.into_inner();
5697        assert_eq!(packs.len(), 1);
5698        assert!(packs[0].starts_with(b"PACK"));
5699    }
5700
5701    #[test]
5702    fn file_database_reads_object_from_multi_pack_index() {
5703        let root = temp_root("sley-file-odb-midx");
5704        let git_dir = root.join(".git");
5705        let pack_dir = git_dir.join("objects").join("pack");
5706        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5707        let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
5708        let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
5709        let first_oid = first
5710            .object_id(ObjectFormat::Sha1)
5711            .expect("test operation should succeed");
5712        let second_oid = second
5713            .object_id(ObjectFormat::Sha1)
5714            .expect("test operation should succeed");
5715        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5716            .expect("test operation should succeed");
5717        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5718            .expect("test operation should succeed");
5719        let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
5720        let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
5721        fs::write(
5722            pack_dir.join(first_pack_name.replace(".idx", ".pack")),
5723            first_pack.pack,
5724        )
5725        .expect("test operation should succeed");
5726        fs::write(
5727            pack_dir.join(second_pack_name.replace(".idx", ".pack")),
5728            second_pack.pack,
5729        )
5730        .expect("test operation should succeed");
5731        let midx = MultiPackIndex::write(
5732            ObjectFormat::Sha1,
5733            2,
5734            &[first_pack_name, second_pack_name],
5735            &[
5736                sley_pack::MultiPackIndexEntry {
5737                    oid: first_oid,
5738                    pack_int_id: 0,
5739                    offset: first_pack.entries[0].offset,
5740                },
5741                sley_pack::MultiPackIndexEntry {
5742                    oid: second_oid,
5743                    pack_int_id: 1,
5744                    offset: second_pack.entries[0].offset,
5745                },
5746            ],
5747        )
5748        .expect("test operation should succeed");
5749        fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
5750
5751        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5752        assert!(
5753            db.contains(&second_oid)
5754                .expect("test operation should succeed")
5755        );
5756        assert_eq!(
5757            db.resolve_prefix(&second_oid.to_hex()[..8])
5758                .expect("test operation should succeed"),
5759            ObjectPrefixResolution::Unique(second_oid)
5760        );
5761        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5762        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5763        fs::remove_dir_all(root).expect("test operation should succeed");
5764    }
5765
5766    #[test]
5767    fn file_database_finds_pack_added_after_registry_was_cached() {
5768        // Regression guard for the cached pack-directory registry: a pack written
5769        // after the registry was first cached (via a prior read) must still be
5770        // discovered by the same handle, because a miss triggers a re-scan.
5771        let root = temp_root("sley-file-odb-pack-added-late");
5772        let git_dir = root.join(".git");
5773        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5774        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5775
5776        // First pack + object; reading it populates the registry cache.
5777        let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
5778        let first_oid = first
5779            .object_id(ObjectFormat::Sha1)
5780            .expect("test operation should succeed");
5781        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5782            .expect("test operation should succeed");
5783        db.install_pack(&first_pack)
5784            .expect("test operation should succeed");
5785        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5786
5787        // A second object that the cached registry does not yet know about.
5788        let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
5789        let second_oid = second
5790            .object_id(ObjectFormat::Sha1)
5791            .expect("test operation should succeed");
5792        // It is genuinely absent right now.
5793        assert!(matches!(
5794            db.read_object(&second_oid),
5795            Err(GitError::NotFound(_))
5796        ));
5797
5798        // Install its pack through the same handle; the next read must find it via
5799        // a re-scan, not be masked by the stale registry.
5800        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5801            .expect("test operation should succeed");
5802        db.install_pack(&second_pack)
5803            .expect("test operation should succeed");
5804        assert!(
5805            db.contains(&second_oid)
5806                .expect("test operation should succeed")
5807        );
5808        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5809        // The original object still resolves too.
5810        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5811
5812        fs::remove_dir_all(root).expect("test operation should succeed");
5813    }
5814
5815    #[test]
5816    fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
5817        let root = temp_root("sley-presence-checker-pack-added-late");
5818        let git_dir = root.join(".git");
5819        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5820        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5821
5822        let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
5823        let first_oid = first
5824            .object_id(ObjectFormat::Sha1)
5825            .expect("test operation should succeed");
5826        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5827            .expect("test operation should succeed");
5828        db.install_pack(&first_pack)
5829            .expect("test operation should succeed");
5830
5831        let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
5832        let second_oid = second
5833            .object_id(ObjectFormat::Sha1)
5834            .expect("test operation should succeed");
5835        let mut checker = db.presence_checker();
5836        assert!(
5837            checker
5838                .contains(&first_oid)
5839                .expect("test operation should succeed")
5840        );
5841        assert!(
5842            !checker
5843                .contains(&second_oid)
5844                .expect("test operation should succeed")
5845        );
5846
5847        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5848            .expect("test operation should succeed");
5849        db.install_pack(&second_pack)
5850            .expect("test operation should succeed");
5851
5852        assert!(
5853            checker
5854                .contains(&second_oid)
5855                .expect("test operation should succeed")
5856        );
5857        fs::remove_dir_all(root).expect("test operation should succeed");
5858    }
5859
5860    #[test]
5861    fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
5862        let root = temp_root("sley-file-odb-pack-registry-refresh");
5863        let git_dir = root.join(".git");
5864        let pack_dir = git_dir.join("objects").join("pack");
5865        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5866        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5867
5868        let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
5869        let first_oid = first
5870            .object_id(ObjectFormat::Sha1)
5871            .expect("test operation should succeed");
5872        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5873            .expect("test operation should succeed");
5874        db.install_pack(&first_pack)
5875            .expect("test operation should succeed");
5876
5877        let first_registry = db
5878            .cached_pack_registry(&pack_dir, false)
5879            .expect("test operation should succeed");
5880        assert_eq!(first_registry.fingerprint.idx_count, 1);
5881        assert_eq!(first_registry.fingerprint.pack_count, 1);
5882        assert_eq!(first_registry.packs.len(), 1);
5883        assert!(
5884            first_registry.packs[0]
5885                .index
5886                .lock()
5887                .expect("test operation should succeed")
5888                .is_none()
5889        );
5890        assert!(
5891            first_registry.packs[0]
5892                .data
5893                .lock()
5894                .expect("test operation should succeed")
5895                .is_none()
5896        );
5897
5898        // Existence checks use the parsed index directly and do not load pack
5899        // bytes; a full read fills the registry-owned pack data handle.
5900        assert!(
5901            db.contains(&first_oid)
5902                .expect("test operation should succeed")
5903        );
5904        assert!(
5905            first_registry.packs[0]
5906                .index
5907                .lock()
5908                .expect("test operation should succeed")
5909                .is_some()
5910        );
5911        assert!(
5912            first_registry.packs[0]
5913                .data
5914                .lock()
5915                .expect("test operation should succeed")
5916                .is_none()
5917        );
5918        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5919        assert!(
5920            first_registry.packs[0]
5921                .data
5922                .lock()
5923                .expect("test operation should succeed")
5924                .is_some()
5925        );
5926
5927        let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
5928        let second_oid = second
5929            .object_id(ObjectFormat::Sha1)
5930            .expect("test operation should succeed");
5931        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5932            .expect("test operation should succeed");
5933        db.install_pack(&second_pack)
5934            .expect("test operation should succeed");
5935
5936        let refreshed = db
5937            .cached_pack_registry(&pack_dir, true)
5938            .expect("test operation should succeed");
5939        assert!(!Arc::ptr_eq(&first_registry, &refreshed));
5940        assert_eq!(refreshed.fingerprint.idx_count, 2);
5941        assert_eq!(refreshed.fingerprint.pack_count, 2);
5942        assert_eq!(refreshed.packs.len(), 2);
5943        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5944
5945        fs::remove_dir_all(root).expect("test operation should succeed");
5946    }
5947
5948    #[test]
5949    fn file_database_pack_search_hint_rebuilds_after_pack_added() {
5950        // Regression guard for the recent-pack search hint: it is tied to the
5951        // cached pack registry, so a miss followed by a changed registry must not
5952        // hide newly-added packs.
5953        let root = temp_root("sley-file-odb-pack-lookup-added-late");
5954        let git_dir = root.join(".git");
5955        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5956        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5957
5958        let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
5959        let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
5960        let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
5961        let first_oid = first
5962            .object_id(ObjectFormat::Sha1)
5963            .expect("test operation should succeed");
5964        let second_oid = second
5965            .object_id(ObjectFormat::Sha1)
5966            .expect("test operation should succeed");
5967        let third_oid = third
5968            .object_id(ObjectFormat::Sha1)
5969            .expect("test operation should succeed");
5970
5971        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5972            .expect("test operation should succeed");
5973        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5974            .expect("test operation should succeed");
5975        db.install_pack(&first_pack)
5976            .expect("test operation should succeed");
5977        db.install_pack(&second_pack)
5978            .expect("test operation should succeed");
5979
5980        // With two packs, these reads establish a cached registry and pack hint.
5981        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5982        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5983        assert!(matches!(
5984            db.read_object(&third_oid),
5985            Err(GitError::NotFound(_))
5986        ));
5987
5988        let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
5989            .expect("test operation should succeed");
5990        db.install_pack(&third_pack)
5991            .expect("test operation should succeed");
5992
5993        assert_eq!(read_object_for_assert(&db, &third_oid), third);
5994        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5995
5996        fs::remove_dir_all(root).expect("test operation should succeed");
5997    }
5998
5999    #[test]
6000    fn file_database_prefers_loose_object_over_packed_object() {
6001        let root = temp_root("sley-file-odb-prefer-loose");
6002        let git_dir = root.join(".git");
6003        let pack_dir = git_dir.join("objects").join("pack");
6004        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6005        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6006        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6007            .expect("test operation should succeed");
6008        let pack_name = written.checksum.to_hex();
6009        fs::write(
6010            pack_dir.join(format!("pack-{pack_name}.pack")),
6011            written.pack,
6012        )
6013        .expect("test operation should succeed");
6014        fs::write(
6015            pack_dir.join(format!("pack-{pack_name}.idx")),
6016            written.index,
6017        )
6018        .expect("test operation should succeed");
6019
6020        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6021        let oid = db
6022            .write_object(object.clone())
6023            .expect("test operation should succeed");
6024        assert_eq!(read_object_for_assert(&db, &oid), object);
6025        fs::remove_dir_all(root).expect("test operation should succeed");
6026    }
6027
6028    #[test]
6029    fn bundle_prerequisite_verification_reads_existing_objects() {
6030        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6031        let oid = db
6032            .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
6033            .expect("test operation should succeed");
6034        let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
6035        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6036            .expect("test operation should succeed");
6037
6038        verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
6039    }
6040
6041    #[test]
6042    fn bundle_prerequisite_verification_reports_missing_objects() {
6043        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6044        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6045            .expect("test operation should succeed");
6046        let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
6047        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6048            .expect("test operation should succeed");
6049
6050        assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
6051    }
6052
6053    #[test]
6054    fn unbundle_objects_writes_pack_entries_and_returns_refs() {
6055        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6056        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6057        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6058        let oid = object
6059            .object_id(ObjectFormat::Sha1)
6060            .expect("test operation should succeed");
6061        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6062            .expect("test operation should succeed");
6063        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6064            .into_bytes()
6065            .into_iter()
6066            .chain(pack.pack)
6067            .collect::<Vec<_>>();
6068        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6069            .expect("test operation should succeed");
6070
6071        let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
6072            .expect("test operation should succeed");
6073        assert_eq!(result.written_objects, vec![oid]);
6074        assert_eq!(result.references, bundle.references);
6075        assert_eq!(read_object_for_assert(&writer, &oid), object);
6076    }
6077
6078    #[test]
6079    fn install_bundle_pack_writes_pack_and_returns_refs() {
6080        let root = temp_root("sley-install-bundle-pack");
6081        let git_dir = root.join(".git");
6082        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6083        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6084        let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6085        let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
6086        let oid = object
6087            .object_id(ObjectFormat::Sha1)
6088            .expect("test operation should succeed");
6089        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6090            .expect("test operation should succeed");
6091        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6092            .into_bytes()
6093            .into_iter()
6094            .chain(pack.pack)
6095            .collect::<Vec<_>>();
6096        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6097            .expect("test operation should succeed");
6098
6099        let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
6100            .expect("test operation should succeed");
6101
6102        assert_eq!(result.written_objects, vec![oid]);
6103        assert_eq!(result.references, bundle.references);
6104        assert!(
6105            database
6106                .contains(&oid)
6107                .expect("test operation should succeed")
6108        );
6109        assert_eq!(read_object_for_assert(&database, &oid), object);
6110        assert!(
6111            !database
6112                .loose()
6113                .object_path(&oid)
6114                .expect("test operation should succeed")
6115                .exists()
6116        );
6117        fs::remove_dir_all(root).expect("test operation should succeed");
6118    }
6119
6120    #[test]
6121    fn unpack_packfile_objects_writes_sha256_pack_entries() {
6122        let writer = ObjectDatabase::new(ObjectFormat::Sha256);
6123        let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
6124        let oid = object
6125            .object_id(ObjectFormat::Sha256)
6126            .expect("test operation should succeed");
6127        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6128            .expect("test operation should succeed");
6129
6130        let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
6131            .expect("test operation should succeed");
6132
6133        assert_eq!(result.written_objects, vec![oid]);
6134        assert_eq!(read_object_for_assert(&writer, &oid), object);
6135    }
6136
6137    #[test]
6138    fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
6139        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6140        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6141        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6142            .expect("test operation should succeed");
6143        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6144        let oid = object
6145            .object_id(ObjectFormat::Sha1)
6146            .expect("test operation should succeed");
6147        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6148            .expect("test operation should succeed");
6149        let bundle_bytes =
6150            format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
6151                .into_bytes()
6152                .into_iter()
6153                .chain(pack.pack)
6154                .collect::<Vec<_>>();
6155        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6156            .expect("test operation should succeed");
6157
6158        assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
6159        assert!(!writer.contains(&oid));
6160    }
6161
6162    /// Build a commit -> tree -> blob graph in `db`, returning the three object
6163    /// ids and their canonical encodings as `(oid, object)` pairs.
6164    fn write_commit_graph(
6165        db: &mut FileObjectDatabase,
6166        payload: &[u8],
6167    ) -> Vec<(ObjectId, EncodedObject)> {
6168        let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
6169        let blob_oid = db
6170            .write_object(blob.clone())
6171            .expect("test operation should succeed");
6172        let tree = EncodedObject::new(
6173            ObjectType::Tree,
6174            Tree {
6175                entries: vec![TreeEntry {
6176                    mode: 0o100644,
6177                    name: BString::from(b"payload.txt"),
6178                    oid: blob_oid,
6179                }],
6180            }
6181            .write(),
6182        );
6183        let tree_oid = db
6184            .write_object(tree.clone())
6185            .expect("test operation should succeed");
6186        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6187        let commit = EncodedObject::new(
6188            ObjectType::Commit,
6189            Commit {
6190                tree: tree_oid,
6191                parents: Vec::new(),
6192                author: identity.clone(),
6193                committer: identity,
6194                encoding: None,
6195                message: b"initial\n".to_vec(),
6196            }
6197            .write(),
6198        );
6199        let commit_oid = db
6200            .write_object(commit.clone())
6201            .expect("test operation should succeed");
6202        vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
6203    }
6204
6205    fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
6206        let root = temp_root("sley-repack-all");
6207        let git_dir = root.join(".git");
6208        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6209        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6210
6211        // A pre-existing pack holds one blob; the rest of the graph is loose.
6212        let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
6213        let packed_oid = packed_blob
6214            .object_id(format)
6215            .expect("test operation should succeed");
6216        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6217            .expect("test operation should succeed");
6218        let existing = db
6219            .install_pack(&existing_pack)
6220            .expect("test operation should succeed");
6221
6222        let graph = write_commit_graph(&mut db, b"repack payload\n");
6223
6224        let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
6225        expected.insert(packed_oid, packed_blob.clone());
6226
6227        let result = repack_all_objects(&git_dir, format)
6228            .expect("test operation should succeed")
6229            .expect("repository has objects");
6230
6231        // The new pack round-trips and contains every original object byte-for-byte.
6232        assert_eq!(result.object_count, expected.len());
6233        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6234        assert_eq!(parsed.entries.len(), expected.len());
6235        for entry in &parsed.entries {
6236            let want = expected
6237                .get(&entry.entry.oid)
6238                .expect("packed object was in the repository");
6239            assert_eq!(&entry.object, want);
6240            assert_eq!(
6241                entry
6242                    .object
6243                    .object_id(format)
6244                    .expect("test operation should succeed"),
6245                entry.entry.oid
6246            );
6247        }
6248        // The generated index parses and agrees with the pack checksum.
6249        let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
6250        assert_eq!(idx.pack_checksum, parsed.checksum);
6251        assert_eq!(idx.entries.len(), expected.len());
6252
6253        // The pre-existing pack is reported obsolete (by its .pack path).
6254        assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
6255        // Every loose object id is reported as now packed.
6256        let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
6257        want_loose.sort_by_key(ObjectId::to_hex);
6258        assert_eq!(result.packed_loose, want_loose);
6259        assert!(!result.packed_loose.contains(&packed_oid));
6260
6261        fs::remove_dir_all(root).expect("test operation should succeed");
6262    }
6263
6264    #[test]
6265    fn repack_all_objects_consolidates_loose_and_pack_sha1() {
6266        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
6267    }
6268
6269    #[test]
6270    fn repack_all_objects_consolidates_loose_and_pack_sha256() {
6271        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
6272    }
6273
6274    #[test]
6275    fn repack_all_objects_returns_none_for_empty_repository() {
6276        let root = temp_root("sley-repack-empty");
6277        let git_dir = root.join(".git");
6278        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6279
6280        assert!(
6281            repack_all_objects(&git_dir, ObjectFormat::Sha1)
6282                .expect("test operation should succeed")
6283                .is_none()
6284        );
6285
6286        fs::remove_dir_all(root).expect("test operation should succeed");
6287    }
6288
6289    #[test]
6290    fn install_repack_result_writes_pack_without_pruning_by_default() {
6291        let root = temp_root("sley-repack-install-nodelete");
6292        let git_dir = root.join(".git");
6293        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6294        let format = ObjectFormat::Sha1;
6295        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6296        let graph = write_commit_graph(&mut db, b"install no prune\n");
6297
6298        let result = repack_all_objects(&git_dir, format)
6299            .expect("test operation should succeed")
6300            .expect("test operation should succeed");
6301        install_repack_result(&git_dir, format, &result, false)
6302            .expect("test operation should succeed");
6303
6304        // New pack is on disk and readable.
6305        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6306        let pack_dir = git_dir.join("objects").join("pack");
6307        let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
6308        let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
6309        assert!(pack_path.exists());
6310        assert!(idx_path.exists());
6311        // Loose objects survive because prune was not requested.
6312        for (oid, object) in &graph {
6313            assert!(
6314                db.loose()
6315                    .object_path(oid)
6316                    .expect("test operation should succeed")
6317                    .exists()
6318            );
6319            assert_eq!(read_object_for_assert(&db, oid), *object);
6320        }
6321
6322        fs::remove_dir_all(root).expect("test operation should succeed");
6323    }
6324
6325    #[test]
6326    fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
6327        let root = temp_root("sley-repack-install-prune");
6328        let git_dir = root.join(".git");
6329        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6330        let format = ObjectFormat::Sha1;
6331        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6332
6333        let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
6334        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6335            .expect("test operation should succeed");
6336        let existing = db
6337            .install_pack(&existing_pack)
6338            .expect("test operation should succeed");
6339        let graph = write_commit_graph(&mut db, b"prune payload\n");
6340
6341        let result = repack_all_objects(&git_dir, format)
6342            .expect("test operation should succeed")
6343            .expect("test operation should succeed");
6344        let new_pack_checksum = PackFile::parse(&result.pack, format)
6345            .expect("test operation should succeed")
6346            .checksum;
6347        install_repack_result(&git_dir, format, &result, true)
6348            .expect("test operation should succeed");
6349
6350        // Obsolete pack and its index are gone.
6351        assert!(!existing.pack_path.exists());
6352        assert!(!existing.index_path.exists());
6353        // Packed loose objects are gone from disk.
6354        for (oid, _) in &graph {
6355            assert!(
6356                !db.loose()
6357                    .object_path(oid)
6358                    .expect("test operation should succeed")
6359                    .exists()
6360            );
6361        }
6362        // The new consolidated pack remains and still serves every object.
6363        let pack_dir = git_dir.join("objects").join("pack");
6364        assert!(
6365            pack_dir
6366                .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
6367                .exists()
6368        );
6369        let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
6370        for (oid, object) in &graph {
6371            assert!(
6372                reopened
6373                    .contains(oid)
6374                    .expect("test operation should succeed")
6375            );
6376            assert_eq!(read_object_for_assert(&reopened, oid), *object);
6377        }
6378        let packed_oid = packed_blob
6379            .object_id(format)
6380            .expect("test operation should succeed");
6381        assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
6382
6383        fs::remove_dir_all(root).expect("test operation should succeed");
6384    }
6385
6386    #[test]
6387    fn install_repack_result_preserves_keep_and_promisor_packs() {
6388        let root = temp_root("sley-repack-install-keep-promisor");
6389        let git_dir = root.join(".git");
6390        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6391        let format = ObjectFormat::Sha1;
6392        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6393
6394        let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
6395        let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
6396            .expect("test operation should succeed");
6397        let keep_install = db
6398            .install_pack(&keep_pack)
6399            .expect("test operation should succeed");
6400        let keep_sidecar = keep_install.pack_path.with_extension("keep");
6401        fs::write(&keep_sidecar, b"").expect("test operation should succeed");
6402
6403        let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
6404        let promisor_pack =
6405            PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
6406                .expect("test operation should succeed");
6407        let promisor_install = db
6408            .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
6409            .expect("test operation should succeed");
6410        let promisor_sidecar = promisor_install
6411            .promisor_path
6412            .clone()
6413            .expect("promisor sidecar");
6414
6415        let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
6416        let result = repack_all_objects(&git_dir, format)
6417            .expect("test operation should succeed")
6418            .expect("test operation should succeed");
6419        assert!(result.obsolete_packs.contains(&keep_install.pack_path));
6420        assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
6421
6422        install_repack_result(&git_dir, format, &result, true)
6423            .expect("test operation should succeed");
6424
6425        for path in [
6426            &keep_install.pack_path,
6427            &keep_install.index_path,
6428            &keep_sidecar,
6429            &promisor_install.pack_path,
6430            &promisor_install.index_path,
6431            &promisor_sidecar,
6432        ] {
6433            assert!(path.exists(), "{} should be preserved", path.display());
6434        }
6435        for (oid, _) in &graph {
6436            assert!(
6437                !db.loose()
6438                    .object_path(oid)
6439                    .expect("test operation should succeed")
6440                    .exists()
6441            );
6442        }
6443
6444        fs::remove_dir_all(root).expect("test operation should succeed");
6445    }
6446
6447    #[test]
6448    fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
6449        // Safety: a loose object whose id is not in the new pack must survive
6450        // pruning even if the caller lists it in `packed_loose`.
6451        let root = temp_root("sley-repack-install-safety");
6452        let git_dir = root.join(".git");
6453        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6454        let format = ObjectFormat::Sha1;
6455        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6456        let graph = write_commit_graph(&mut db, b"safety packed\n");
6457
6458        let mut result = repack_all_objects(&git_dir, format)
6459            .expect("test operation should succeed")
6460            .expect("test operation should succeed");
6461
6462        // A loose object that is NOT in the new pack, but mislabeled as packed.
6463        let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
6464        let stray_oid = db
6465            .write_object(stray.clone())
6466            .expect("test operation should succeed");
6467        assert!(!result.packed_loose.contains(&stray_oid));
6468        result.packed_loose.push(stray_oid);
6469
6470        install_repack_result(&git_dir, format, &result, true)
6471            .expect("test operation should succeed");
6472
6473        // The stray loose object is untouched because it is not in the new pack.
6474        assert!(
6475            db.loose()
6476                .object_path(&stray_oid)
6477                .expect("test operation should succeed")
6478                .exists()
6479        );
6480        assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
6481        // Genuinely packed loose objects were still removed.
6482        for (oid, _) in &graph {
6483            assert!(
6484                !db.loose()
6485                    .object_path(oid)
6486                    .expect("test operation should succeed")
6487                    .exists()
6488            );
6489        }
6490
6491        fs::remove_dir_all(root).expect("test operation should succeed");
6492    }
6493
6494    #[test]
6495    fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
6496        let root = temp_root("sley-prune-unreachable");
6497        let git_dir = root.join(".git");
6498        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6499        let format = ObjectFormat::Sha1;
6500        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6501        let graph = write_commit_graph(&mut db, b"reachable payload\n");
6502        let commit_oid = graph[0].0.clone();
6503
6504        // A dangling loose blob not referenced by the commit graph.
6505        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
6506        let dangling_oid = db
6507            .write_object(dangling)
6508            .expect("test operation should succeed");
6509
6510        // Report-only pass leaves everything on disk.
6511        let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
6512            .expect("test operation should succeed");
6513        assert_eq!(reported, vec![dangling_oid]);
6514        assert!(
6515            db.loose()
6516                .object_path(&dangling_oid)
6517                .expect("test operation should succeed")
6518                .exists()
6519        );
6520
6521        // Deleting pass removes only the unreachable object.
6522        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6523            .expect("test operation should succeed");
6524        assert_eq!(deleted, vec![dangling_oid]);
6525        assert!(
6526            !db.loose()
6527                .object_path(&dangling_oid)
6528                .expect("test operation should succeed")
6529                .exists()
6530        );
6531        for (oid, object) in &graph {
6532            assert!(
6533                db.loose()
6534                    .object_path(oid)
6535                    .expect("test operation should succeed")
6536                    .exists()
6537            );
6538            assert_eq!(read_object_for_assert(&db, oid), *object);
6539        }
6540
6541        fs::remove_dir_all(root).expect("test operation should succeed");
6542    }
6543
6544    #[test]
6545    fn prune_unreachable_loose_ignores_gitlink_targets() {
6546        let root = temp_root("sley-prune-gitlink");
6547        let git_dir = root.join(".git");
6548        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6549        let format = ObjectFormat::Sha1;
6550        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6551
6552        let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
6553            .expect("test operation should succeed");
6554        let tree = EncodedObject::new(
6555            ObjectType::Tree,
6556            Tree {
6557                entries: vec![TreeEntry {
6558                    mode: 0o160000,
6559                    name: BString::from(b"submodule"),
6560                    oid: submodule_oid,
6561                }],
6562            }
6563            .write(),
6564        );
6565        let tree_oid = db
6566            .write_object(tree)
6567            .expect("test operation should succeed");
6568        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6569        let commit = EncodedObject::new(
6570            ObjectType::Commit,
6571            Commit {
6572                tree: tree_oid,
6573                parents: Vec::new(),
6574                author: identity.clone(),
6575                committer: identity,
6576                encoding: None,
6577                message: b"gitlink\n".to_vec(),
6578            }
6579            .write(),
6580        );
6581        let commit_oid = db
6582            .write_object(commit)
6583            .expect("test operation should succeed");
6584        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
6585        let dangling_oid = db
6586            .write_object(dangling)
6587            .expect("test operation should succeed");
6588
6589        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6590            .expect("test operation should succeed");
6591
6592        assert_eq!(deleted, vec![dangling_oid]);
6593        assert!(
6594            !db.loose()
6595                .object_path(&dangling_oid)
6596                .expect("test operation should succeed")
6597                .exists()
6598        );
6599
6600        fs::remove_dir_all(root).expect("test operation should succeed");
6601    }
6602
6603    fn temp_root(prefix: &str) -> PathBuf {
6604        std::env::temp_dir().join(format!(
6605            "{prefix}-{}-{}",
6606            std::process::id(),
6607            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6608        ))
6609    }
6610}