Skip to main content

sley_odb/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
12use sley_pack::{
13    MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
14    PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
15};
16use std::collections::{HashMap, HashSet};
17use std::io::{Read, Write};
18use std::path::{Path, PathBuf};
19use std::sync::atomic::{AtomicU64, Ordering};
20use std::sync::{Arc, Mutex, OnceLock};
21use std::{env, fs};
22
23static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
24
25pub trait ObjectReader {
26    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
27
28    /// Graft-points seam (shallow clones today, replace refs/grafts later):
29    /// `true` when history is cut at `oid`, so every walk must treat the
30    /// commit as parentless even though its raw body still names parents.
31    ///
32    /// [`FileObjectDatabase`] answers from `$GIT_DIR/shallow`; readers that
33    /// are not backed by a repository (in-memory stores, pack overlays)
34    /// keep the default "no grafts".
35    fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
36        false
37    }
38
39    /// Whether this reader has any shallow/graft boundaries at all. Walkers can
40    /// use this to choose dense graph-only traversal when no boundary can cut
41    /// parent edges.
42    fn has_shallow_grafts(&self) -> bool {
43        false
44    }
45}
46
47fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
48    (*oid == ObjectId::empty_tree(format))
49        .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
50}
51
52fn with_missing_object_context(
53    err: GitError,
54    oid: ObjectId,
55    context: MissingObjectContext,
56) -> GitError {
57    let kind = err
58        .not_found_kind()
59        .and_then(sley_core::NotFoundKind::missing_object_kind);
60    match kind {
61        Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
62        None => err,
63    }
64}
65
66/// Parents of a parsed commit with the graft seam applied: empty when the
67/// reader cuts history at `oid` (shallow boundary), the raw parsed parents
68/// otherwise.
69pub fn grafted_parents<R: ObjectReader + ?Sized>(
70    reader: &R,
71    oid: &ObjectId,
72    parents: Vec<ObjectId>,
73) -> Vec<ObjectId> {
74    if reader.is_shallow_graft(oid) {
75        Vec::new()
76    } else {
77        parents
78    }
79}
80
81pub trait ObjectWriter {
82    /// Write `object`, returning its id. Takes `&self`: every implementation's
83    /// write state (in-memory map, loose-object cache) is behind interior
84    /// mutability, so a single handle can interleave reads and writes without a
85    /// `&mut` borrow. This lets the merge engine read and write through one `db`
86    /// instead of opening a second read-only handle that re-warms the caches.
87    fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct BundleUnbundleResult {
92    pub written_objects: Vec<ObjectId>,
93    pub references: Vec<BundleReference>,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct PackUnpackResult {
98    pub written_objects: Vec<ObjectId>,
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct PackInstallResult {
103    pub pack_name: String,
104    pub pack_path: PathBuf,
105    pub index_path: PathBuf,
106    pub promisor_path: Option<PathBuf>,
107    pub object_ids: Vec<ObjectId>,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct RawPackInstallResult {
112    pub object_ids: Vec<ObjectId>,
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
116pub struct RawPackInstallOptions {
117    pub promisor: bool,
118}
119
120pub trait RawPackInstaller {
121    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub enum ObjectPrefixResolution {
126    Missing,
127    Unique(ObjectId),
128    Ambiguous(Vec<ObjectId>),
129}
130
131#[derive(Debug, Clone, PartialEq, Eq)]
132pub struct ObjectStorageInfo {
133    pub disk_size: u64,
134    pub deltabase: ObjectId,
135}
136
137impl RawPackInstaller for FileObjectDatabase {
138    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
139        let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
140        Ok(RawPackInstallResult {
141            object_ids: result.object_ids,
142        })
143    }
144}
145
146impl RawPackInstaller for ObjectDatabase {
147    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
148        let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
149        Ok(RawPackInstallResult {
150            object_ids: result.written_objects,
151        })
152    }
153}
154
155pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
156    let mut missing = Vec::new();
157    for prerequisite in &bundle.prerequisites {
158        match reader.read_object(&prerequisite.oid) {
159            Ok(object) => {
160                let actual = object.object_id(bundle.format)?;
161                if actual != prerequisite.oid {
162                    return Err(GitError::InvalidObject(format!(
163                        "bundle prerequisite {} hashes to {actual}",
164                        prerequisite.oid
165                    )));
166                }
167            }
168            Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
169            Err(err) => return Err(err),
170        }
171    }
172    if missing.is_empty() {
173        return Ok(());
174    }
175    Err(GitError::object_not_found_in(
176        missing[0],
177        MissingObjectContext::PackInstall,
178    ))
179}
180
181pub fn unbundle_objects<R, W>(
182    bundle: &Bundle,
183    prerequisite_reader: &R,
184    writer: &mut W,
185) -> Result<BundleUnbundleResult>
186where
187    R: ObjectReader,
188    W: ObjectWriter,
189{
190    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
191    let pack = PackFile::parse_bundle(bundle)?;
192    let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
193    Ok(BundleUnbundleResult {
194        written_objects,
195        references: bundle.references.clone(),
196    })
197}
198
199pub fn install_bundle_pack<R>(
200    bundle: &Bundle,
201    prerequisite_reader: &R,
202    destination: &impl RawPackInstaller,
203) -> Result<BundleUnbundleResult>
204where
205    R: ObjectReader,
206{
207    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
208    let install = destination.install_raw_pack(&bundle.pack)?;
209    Ok(BundleUnbundleResult {
210        written_objects: install.object_ids,
211        references: bundle.references.clone(),
212    })
213}
214
215pub fn unpack_packfile_objects<W>(
216    pack_bytes: &[u8],
217    format: ObjectFormat,
218    writer: &W,
219) -> Result<PackUnpackResult>
220where
221    W: ObjectWriter,
222{
223    let pack = PackFile::parse(pack_bytes, format)?;
224    write_pack_objects(pack, writer, "pack")
225}
226
227fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
228where
229    W: ObjectWriter,
230{
231    let mut written_objects = Vec::with_capacity(pack.entries.len());
232    for entry in pack.entries {
233        let expected = entry.entry.oid;
234        let actual = writer.write_object(entry.object)?;
235        if actual != expected {
236            return Err(GitError::InvalidObject(format!(
237                "{source} object id mismatch: expected {expected}, wrote {actual}"
238            )));
239        }
240        written_objects.push(actual);
241    }
242    Ok(PackUnpackResult { written_objects })
243}
244
245pub fn collect_reachable_object_ids<R, I>(
246    reader: &R,
247    format: ObjectFormat,
248    starts: I,
249) -> Result<HashSet<ObjectId>>
250where
251    R: ObjectReader,
252    I: IntoIterator<Item = ObjectId>,
253{
254    walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
255}
256
257/// [`collect_reachable_object_ids`] with a cut set: commits in `cut` are
258/// collected, but the walk does not continue to their parents — the view a
259/// shallow repository has of its own refs (`$GIT_DIR/shallow` of the *other*
260/// side, threaded explicitly because `reader` belongs to this side).
261pub fn collect_reachable_object_ids_with_cut<R, I>(
262    reader: &R,
263    format: ObjectFormat,
264    starts: I,
265    cut: &HashSet<ObjectId>,
266) -> Result<HashSet<ObjectId>>
267where
268    R: ObjectReader,
269    I: IntoIterator<Item = ObjectId>,
270{
271    walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
272}
273
274/// [`collect_reachable_object_ids`] with a stop set: objects in `excluded` are
275/// not visited and not expanded, so the walk never sees anything reachable only
276/// through them (used to truncate history at a shallow boundary).
277pub fn collect_reachable_object_ids_excluding<R, I>(
278    reader: &R,
279    format: ObjectFormat,
280    starts: I,
281    excluded: &HashSet<ObjectId>,
282) -> Result<HashSet<ObjectId>>
283where
284    R: ObjectReader,
285    I: IntoIterator<Item = ObjectId>,
286{
287    walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
288}
289
290pub fn collect_reachable_objects<R, I>(
291    reader: &R,
292    format: ObjectFormat,
293    starts: I,
294    excluded: &HashSet<ObjectId>,
295) -> Result<Vec<Arc<EncodedObject>>>
296where
297    R: ObjectReader,
298    I: IntoIterator<Item = ObjectId>,
299{
300    let mut objects = Vec::new();
301    walk_reachable_objects(reader, format, starts, excluded, |_, object| {
302        objects.push(Arc::clone(object));
303    })?;
304    Ok(objects)
305}
306
307#[derive(Debug, Clone)]
308struct ReachablePackObject {
309    oid: ObjectId,
310    object: Arc<EncodedObject>,
311}
312
313fn collect_reachable_pack_objects<R, I>(
314    reader: &R,
315    format: ObjectFormat,
316    starts: I,
317    excluded: &HashSet<ObjectId>,
318) -> Result<Vec<ReachablePackObject>>
319where
320    R: ObjectReader,
321    I: IntoIterator<Item = ObjectId>,
322{
323    let mut objects = Vec::new();
324    walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
325        objects.push(ReachablePackObject {
326            oid: *oid,
327            object: Arc::clone(object),
328        });
329    })?;
330    Ok(objects)
331}
332
333fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
334    objects
335        .iter()
336        .map(|entry| PackInput {
337            oid: &entry.oid,
338            object: &entry.object,
339        })
340        .collect()
341}
342
343pub fn install_reachable_pack<I>(
344    source: &impl ObjectReader,
345    destination: &impl RawPackInstaller,
346    format: ObjectFormat,
347    starts: I,
348) -> Result<Option<RawPackInstallResult>>
349where
350    I: IntoIterator<Item = ObjectId>,
351{
352    install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
353}
354
355pub fn install_reachable_pack_excluding<I>(
356    source: &impl ObjectReader,
357    destination: &impl RawPackInstaller,
358    format: ObjectFormat,
359    starts: I,
360    excluded: &HashSet<ObjectId>,
361) -> Result<Option<RawPackInstallResult>>
362where
363    I: IntoIterator<Item = ObjectId>,
364{
365    let pack = match build_reachable_pack(source, format, starts, excluded)? {
366        Some(pack) => pack,
367        None => return Ok(None),
368    };
369    destination.install_raw_pack(&pack.pack).map(Some)
370}
371
372pub fn build_reachable_pack<R, I>(
373    reader: &R,
374    format: ObjectFormat,
375    starts: I,
376    excluded: &HashSet<ObjectId>,
377) -> Result<Option<PackWrite>>
378where
379    R: ObjectReader,
380    I: IntoIterator<Item = ObjectId>,
381{
382    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
383    if objects.is_empty() {
384        return Ok(None);
385    }
386    // Delta-compress reachable packs (used by install/push/fetch) via git-pack's
387    // sliding-window selection. Self-contained, ofs-delta by default; round-trips
388    // through the existing parser. PackWrite shape is unchanged, so callers are
389    // unaffected.
390    let inputs = pack_inputs(&objects);
391    PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
392}
393
394pub fn build_and_install_reachable_pack<R, I>(
395    source: &R,
396    destination: &FileObjectDatabase,
397    format: ObjectFormat,
398    starts: I,
399    excluded: &HashSet<ObjectId>,
400    options: RawPackInstallOptions,
401) -> Result<Option<PackInstallResult>>
402where
403    R: ObjectReader,
404    I: IntoIterator<Item = ObjectId>,
405{
406    build_and_install_reachable_pack_filtered(
407        source,
408        destination,
409        format,
410        starts,
411        excluded,
412        options,
413        None,
414        None,
415    )
416}
417
418/// A partial-clone object filter applied while building a transfer pack.
419///
420/// Mirrors the subset of upstream's `list-objects-filter` the in-process local
421/// server supports: directly-wanted tips are always packed; the filter only
422/// prunes objects reached *through* the traversal (upstream's
423/// `filter_blobs_none` runs on traversed blobs, never on wanted tips).
424#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425pub enum PackObjectFilter {
426    /// `blob:none`: omit every blob reached through tree traversal.
427    BlobNone,
428}
429
430/// [`build_and_install_reachable_pack`] with an optional partial-clone
431/// `filter`. With `Some(BlobNone)`, blobs are dropped from the pack unless
432/// they are directly wanted (named in `starts`).
433#[allow(clippy::too_many_arguments)]
434pub fn build_and_install_reachable_pack_filtered<R, I>(
435    source: &R,
436    destination: &FileObjectDatabase,
437    format: ObjectFormat,
438    starts: I,
439    excluded: &HashSet<ObjectId>,
440    options: RawPackInstallOptions,
441    filter: Option<PackObjectFilter>,
442    unpack_limit: Option<usize>,
443) -> Result<Option<PackInstallResult>>
444where
445    R: ObjectReader,
446    I: IntoIterator<Item = ObjectId>,
447{
448    let starts: Vec<ObjectId> = starts.into_iter().collect();
449    let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
450    let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
451    match filter {
452        Some(PackObjectFilter::BlobNone) => {
453            objects.retain(|entry| {
454                entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
455            });
456        }
457        None => {}
458    }
459    if objects.is_empty() {
460        return Ok(None);
461    }
462    // Mirror fetch-pack's unpack-limit: small transfers are exploded into
463    // loose objects instead of landing as a pack (upstream `get_pack` picks
464    // unpack-objects when the header count is below fetch/transfer.unpackLimit).
465    if let Some(limit) = unpack_limit
466        && objects.len() < limit
467    {
468        for entry in &objects {
469            destination.loose().write_object((*entry.object).clone())?;
470        }
471        return Ok(None);
472    }
473    let inputs = pack_inputs(&objects);
474    let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
475    destination
476        .install_generated_pack_unchecked(&pack, options)
477        .map(Some)
478}
479
480/// Assemble a pack stream that reuses an existing pack's object data verbatim
481/// (upstream pack-objects' "pack reuse" fast path, full-pack case) and appends
482/// `appended` as freshly encoded undeltified entries.
483///
484/// The reused pack's entry bytes are copied as-is between our own header and
485/// trailer: a full-pack copy preserves every relative distance, so internal
486/// `OFS_DELTA` bases stay valid. The header object count covers both the
487/// reused and appended entries, and the trailing pack checksum is recomputed
488/// over the assembled stream.
489pub fn assemble_pack_with_verbatim_reuse(
490    format: ObjectFormat,
491    reused_pack_bytes: &[u8],
492    appended: &[PackInput<'_>],
493) -> Result<(Vec<u8>, u32)> {
494    assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
495}
496
497/// Like [`assemble_pack_with_verbatim_reuse`], but concatenates multiple whole
498/// packs before appending fresh entries.
499pub fn assemble_pack_with_verbatim_reuses(
500    format: ObjectFormat,
501    reused_packs: &[&[u8]],
502    appended: &[PackInput<'_>],
503) -> Result<(Vec<u8>, u32)> {
504    let hash_len = format.raw_len();
505    let mut reused_count = 0u32;
506    let mut capacity = 12 + hash_len + 64 * appended.len();
507    for reused_pack_bytes in reused_packs {
508        if reused_pack_bytes.len() < 12 + hash_len {
509            return Err(GitError::InvalidFormat("reused pack too short".into()));
510        }
511        if &reused_pack_bytes[..4] != b"PACK" {
512            return Err(GitError::InvalidFormat(
513                "reused pack has no signature".into(),
514            ));
515        }
516        let version = u32::from_be_bytes([
517            reused_pack_bytes[4],
518            reused_pack_bytes[5],
519            reused_pack_bytes[6],
520            reused_pack_bytes[7],
521        ]);
522        if version != 2 {
523            return Err(GitError::Unsupported(format!(
524                "reused pack version {version}"
525            )));
526        }
527        let count = u32::from_be_bytes([
528            reused_pack_bytes[8],
529            reused_pack_bytes[9],
530            reused_pack_bytes[10],
531            reused_pack_bytes[11],
532        ]);
533        reused_count = reused_count
534            .checked_add(count)
535            .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
536        capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
537    }
538    let total = reused_count
539        .checked_add(appended.len() as u32)
540        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
541
542    let mut out = Vec::with_capacity(capacity);
543    out.extend_from_slice(b"PACK");
544    out.extend_from_slice(&2u32.to_be_bytes());
545    out.extend_from_slice(&total.to_be_bytes());
546    for reused_pack_bytes in reused_packs {
547        out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
548    }
549    for input in appended {
550        write_undeltified_pack_entry(&mut out, input.object)?;
551    }
552    let checksum = sley_core::digest_bytes(format, &out)?;
553    out.extend_from_slice(checksum.as_bytes());
554    Ok((out, reused_count))
555}
556
557/// Assemble a pack stream by copying already-encoded pack entries verbatim and
558/// appending freshly encoded undeltified entries.
559pub fn assemble_pack_with_verbatim_entries(
560    format: ObjectFormat,
561    reused_entries: &[&[u8]],
562    appended: &[PackInput<'_>],
563) -> Result<(Vec<u8>, u32)> {
564    let reused_count = u32::try_from(reused_entries.len())
565        .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
566    let total = reused_count
567        .checked_add(appended.len() as u32)
568        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
569
570    let mut capacity = 12 + format.raw_len() + 64 * appended.len();
571    for entry in reused_entries {
572        capacity = capacity.saturating_add(entry.len());
573    }
574    let mut out = Vec::with_capacity(capacity);
575    out.extend_from_slice(b"PACK");
576    out.extend_from_slice(&2u32.to_be_bytes());
577    out.extend_from_slice(&total.to_be_bytes());
578    for entry in reused_entries {
579        out.extend_from_slice(entry);
580    }
581    for input in appended {
582        write_undeltified_pack_entry(&mut out, input.object)?;
583    }
584    let checksum = sley_core::digest_bytes(format, &out)?;
585    out.extend_from_slice(checksum.as_bytes());
586    Ok((out, reused_count))
587}
588
589/// Append one undeltified pack entry (type/size varint header + zlib body).
590fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
591    let type_bits: u8 = match object.object_type {
592        ObjectType::Commit => 1,
593        ObjectType::Tree => 2,
594        ObjectType::Blob => 3,
595        ObjectType::Tag => 4,
596    };
597    let mut size = object.body.len() as u64;
598    let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
599    size >>= 4;
600    while size > 0 {
601        out.push(byte | 0x80);
602        byte = (size & 0x7f) as u8;
603        size >>= 7;
604    }
605    out.push(byte);
606    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
607    encoder.write_all(&object.body)?;
608    out.extend_from_slice(&encoder.finish()?);
609    Ok(())
610}
611
612/// Outcome of consolidating every object in a repository into a single pack.
613///
614/// This is the engine for `git gc` / `git repack`: [`repack_all_objects`]
615/// produces the bytes for one new delta-compressed pack plus its index, and
616/// reports which on-disk artifacts the caller could now remove. No deletions
617/// are performed by the engine itself; the CLI decides reachability policy and
618/// performs any pruning (see [`install_repack_result`]).
619#[derive(Debug, Clone, PartialEq, Eq)]
620pub struct RepackResult {
621    /// Bytes of the freshly written `.pack` file.
622    pub pack: Vec<u8>,
623    /// Bytes of the matching `.idx` file for [`RepackResult::pack`].
624    pub idx: Vec<u8>,
625    /// Number of distinct objects contained in the new pack.
626    pub object_count: usize,
627    /// Absolute paths of pre-existing `*.pack` files now superseded by the new
628    /// pack (every object they hold is present in [`RepackResult::pack`]).
629    pub obsolete_packs: Vec<PathBuf>,
630    /// Loose object ids that are now also present in the new pack and therefore
631    /// redundant on disk.
632    pub packed_loose: Vec<ObjectId>,
633    pack_checksum: ObjectId,
634    index_entries: Vec<PackIndexEntry>,
635}
636
637/// Gather every object in `git_dir` (loose objects and every existing pack) and
638/// write them into a single new delta-compressed pack.
639///
640/// Returns the new pack/index bytes, the count of packed objects, the list of
641/// pre-existing pack files that the new pack supersedes, and the loose object
642/// ids that are now packed. Nothing is deleted: the caller (CLI) decides
643/// reachability policy and performs any pruning, optionally via
644/// [`install_repack_result`].
645///
646/// Returns `Ok(None)` when the repository contains no objects at all.
647/// `git repack -a`'s gathering rule: pack the reachability closure of `roots`
648/// (ref tips, `HEAD`, reflog entries, indexed objects) instead of everything
649/// on disk. Borrowed objects (alternates) reachable from the roots are packed
650/// into the new local pack like upstream `pack-objects --all` without
651/// `--local`; previously-packed objects that are no longer reachable are NOT
652/// carried forward (that is how `repack -a -d` drops them). Missing objects
653/// are tolerated (stale reflog entries may reference pruned history).
654///
655/// Returns `Ok(None)` when no roots resolve to any object.
656pub fn repack_reachable_objects(
657    git_dir: &Path,
658    format: ObjectFormat,
659    roots: &[ObjectId],
660) -> Result<Option<RepackResult>> {
661    let objects_dir = repository_objects_dir(git_dir);
662    let database = FileObjectDatabase::new(objects_dir.clone(), format);
663
664    let mut seen: HashSet<ObjectId> = HashSet::new();
665    let mut objects: Vec<ReachablePackObject> = Vec::new();
666    let mut pending: Vec<ObjectId> = roots.to_vec();
667    while let Some(oid) = pending.pop() {
668        if !seen.insert(oid) {
669            continue;
670        }
671        let object = match database.read_object(&oid) {
672            Ok(object) => object,
673            Err(GitError::NotFound(_)) => continue,
674            Err(err) => return Err(err),
675        };
676        match object.object_type {
677            ObjectType::Commit => {
678                let commit = Commit::parse_ref(format, &object.body)?;
679                pending.extend(grafted_parents(&database, &oid, commit.parents));
680                pending.push(commit.tree);
681            }
682            ObjectType::Tree => {
683                for entry in TreeEntries::new(format, &object.body) {
684                    let entry = entry?;
685                    if !entry.is_gitlink() {
686                        pending.push(entry.oid);
687                    }
688                }
689            }
690            ObjectType::Tag => {
691                let tag = Tag::parse_ref(format, &object.body)?;
692                pending.push(tag.object);
693            }
694            ObjectType::Blob => {}
695        }
696        objects.push(ReachablePackObject { oid, object });
697    }
698    if objects.is_empty() {
699        return Ok(None);
700    }
701
702    let inputs = pack_inputs(&objects);
703    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
704    let object_count = written.entries.len();
705
706    // Every pre-existing local pack is superseded under `-a` (their reachable
707    // objects are in the new pack; their unreachable ones are being dropped).
708    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
709    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
710        .into_iter()
711        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
712        .collect();
713
714    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
715    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
716        .into_iter()
717        .filter(|oid| packed_oid_set.contains(oid))
718        .collect();
719    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
720
721    let pack_checksum = written.checksum;
722    let index_entries = written.entries.clone();
723    Ok(Some(RepackResult {
724        pack: written.pack,
725        idx: written.index,
726        object_count,
727        obsolete_packs,
728        packed_loose,
729        pack_checksum,
730        index_entries,
731    }))
732}
733
734pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
735    let objects_dir = repository_objects_dir(git_dir);
736    let database = FileObjectDatabase::new(objects_dir.clone(), format);
737
738    // Enumerate every object id reachable on disk: loose objects, every pack
739    // index, and any multi-pack-index. `object_ids_in_objects_dir` already
740    // unions all of these and de-duplicates them.
741    let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
742    if all_oids.is_empty() {
743        return Ok(None);
744    }
745
746    // Read each object's canonical encoding so the new pack stores byte-for-byte
747    // identical payloads. Loose objects take precedence over packed copies in
748    // `FileObjectDatabase::read_object`, but both decode to the same bytes.
749    let mut objects = Vec::with_capacity(all_oids.len());
750    for oid in &all_oids {
751        objects.push(ReachablePackObject {
752            oid: *oid,
753            object: database.read_object(oid)?,
754        });
755    }
756
757    let inputs = pack_inputs(&objects);
758    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
759    let object_count = written.entries.len();
760
761    // The new pack contains every object on disk, so every pre-existing pack is
762    // fully superseded. We still record the exact pack paths (not the index
763    // paths) so the caller can delete the right files. The pack we are about to
764    // write is excluded by name in case its checksum collides with an existing
765    // pack (identical contents).
766    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
767    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
768        .into_iter()
769        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
770        .collect();
771
772    // Loose object ids that the new pack now also holds (which is all of them,
773    // since they were gathered into it).
774    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
775    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
776        .into_iter()
777        .filter(|oid| packed_oid_set.contains(oid))
778        .collect();
779    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
780
781    Ok(Some(RepackResult {
782        pack: written.pack,
783        idx: written.index,
784        object_count,
785        obsolete_packs,
786        packed_loose,
787        pack_checksum: written.checksum,
788        index_entries: written.entries,
789    }))
790}
791
792/// Gather only loose objects in `git_dir` and write them into a new pack.
793///
794/// This is the engine for plain `git repack -d` (without `-a`): existing packs
795/// remain in place, and pruning removes only the loose copies that the new pack
796/// now serves.
797pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
798    let objects_dir = repository_objects_dir(git_dir);
799    let database = FileObjectDatabase::new(objects_dir.clone(), format);
800    let loose_oids = loose_object_ids(&objects_dir, format)?;
801    if loose_oids.is_empty() {
802        return Ok(None);
803    }
804
805    let mut objects = Vec::with_capacity(loose_oids.len());
806    for oid in &loose_oids {
807        objects.push(ReachablePackObject {
808            oid: *oid,
809            object: database.read_object(oid)?,
810        });
811    }
812
813    let inputs = pack_inputs(&objects);
814    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
815    let object_count = written.entries.len();
816    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
817    let mut packed_loose: Vec<ObjectId> = loose_oids
818        .into_iter()
819        .filter(|oid| packed_oid_set.contains(oid))
820        .collect();
821    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
822
823    let pack_checksum = written.checksum;
824    let index_entries = written.entries.clone();
825    Ok(Some(RepackResult {
826        pack: written.pack,
827        idx: written.index,
828        object_count,
829        obsolete_packs: Vec::new(),
830        packed_loose,
831        pack_checksum,
832        index_entries,
833    }))
834}
835
836/// Write the consolidated pack from a [`RepackResult`] into
837/// `objects/pack/` and, when `prune` is set, remove the now-redundant
838/// pre-existing packs and packed loose objects.
839///
840/// Pruning is opt-in and deliberately conservative: an object or pack is only
841/// removed after verifying it is actually present in the freshly written pack
842/// on disk. Concretely:
843///
844/// * a loose object is removed only if its id appears in the new pack;
845/// * a pre-existing pack is removed only if it is not the pack we just wrote
846///   *and* every object listed in its `.idx` is present in the new pack (its
847///   `.idx` and known sidecars are removed alongside it);
848/// * a stale `multi-pack-index` is removed only if every pack it references is
849///   being removed, so no reader is ever left pointing at a deleted pack.
850pub fn install_repack_result(
851    git_dir: &Path,
852    format: ObjectFormat,
853    result: &RepackResult,
854    prune: bool,
855) -> Result<()> {
856    install_repack_result_with_bitmap(git_dir, format, result, prune, None)
857}
858
859/// [`install_repack_result`] that additionally writes a `pack-<checksum>.bitmap`
860/// reachability bitmap alongside the new pack when `bitmap_tips` is `Some`.
861/// `bitmap_tips` carries the repository's ref tips (peeled to commits): they
862/// receive selection preference, mirroring upstream's `NEEDS_BITMAP` flagging of
863/// ref tips in `git repack -b` / `pack-objects --write-bitmap-index`.
864pub fn install_repack_result_with_bitmap(
865    git_dir: &Path,
866    format: ObjectFormat,
867    result: &RepackResult,
868    prune: bool,
869    bitmap_tips: Option<&HashSet<ObjectId>>,
870) -> Result<()> {
871    let objects_dir = repository_objects_dir(git_dir);
872    let pack_dir = objects_dir.join("pack");
873    fs::create_dir_all(&pack_dir)?;
874
875    // Validate the public bytes against the private provenance that
876    // `repack_all_objects` captured from `PackFile::write_packed`. This avoids
877    // inflating and resolving the freshly-written pack a second time while still
878    // catching caller mutations before anything is written or pruned.
879    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
880    let parsed_index = PackIndex::parse(&result.idx, format)?;
881    if parsed_index.pack_checksum != result.pack_checksum {
882        return Err(GitError::InvalidFormat(
883            "repack index checksum does not match the new pack".into(),
884        ));
885    }
886    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
887        return Err(GitError::InvalidFormat(
888            "repack index does not match the new pack contents".into(),
889        ));
890    }
891    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
892    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
893    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
894    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
895    // git writes a `.rev` alongside every repacked pack (`pack.writeReverseIndex`
896    // defaults to true). Write it before the `.idx` so the index never becomes
897    // visible ahead of its companions, mirroring upstream's finalize order.
898    let reverse_index = sley_pack::PackReverseIndex::write(
899        format,
900        &sley_pack::pack_order_index_positions(&parsed_index.entries),
901        &result.pack_checksum,
902    )?;
903    write_pack_component(&new_pack_path, &result.pack)?;
904    write_pack_component(&new_rev_path, &reverse_index)?;
905    write_pack_component(&new_index_path, &result.idx)?;
906
907    if let Some(tips) = bitmap_tips {
908        // Build before pruning: the closure walk reads objects through the
909        // pre-existing packs/loose store (the new pack holds the same bytes).
910        let database = FileObjectDatabase::new(objects_dir.clone(), format);
911        if let Some(bitmap) = build_pack_bitmap(
912            &database,
913            format,
914            &result.index_entries,
915            &result.pack_checksum,
916            tips,
917        )? {
918            // Unlike the pack/idx/rev (content-addressed by the pack
919            // checksum), the bitmap depends on selection inputs (e.g.
920            // pack.preferBitmapTips), so an existing file must be replaced —
921            // write_pack_component's exists-skip would keep a stale selection.
922            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
923            remove_file_if_exists(&bitmap_path)?;
924            write_pack_component(&bitmap_path, &bitmap)?;
925        }
926    }
927
928    if !prune {
929        return Ok(());
930    }
931
932    // Prune based on the objects the new pack's *index* can resolve (what reads use
933    // once the old packs are gone), not just what the pack contains — so a stale
934    // pack is never removed for an object the new index cannot serve.
935    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
936
937    prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
938    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
939    Ok(())
940}
941
942fn validate_pack_checksum(
943    pack: &[u8],
944    format: ObjectFormat,
945    expected: &ObjectId,
946    context: &str,
947) -> Result<()> {
948    if expected.format() != format {
949        return Err(GitError::InvalidObjectId(format!(
950            "{context} checksum format does not match object format"
951        )));
952    }
953    let hash_len = format.raw_len();
954    if pack.len() < 12 + hash_len {
955        return Err(GitError::InvalidFormat(format!(
956            "{context} pack file too short"
957        )));
958    }
959    if &pack[..4] != b"PACK" {
960        return Err(GitError::InvalidFormat(format!(
961            "{context} pack file missing PACK signature"
962        )));
963    }
964    let trailer_offset = pack.len() - hash_len;
965    let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
966    let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
967    if &actual != expected || trailer != *expected {
968        return Err(GitError::InvalidFormat(format!(
969            "{context} pack checksum does not match generated pack"
970        )));
971    }
972    Ok(())
973}
974
975fn pack_index_entries_match_writer(
976    parsed: &[PackIndexEntry],
977    writer_entries: &[PackIndexEntry],
978) -> bool {
979    if parsed.len() != writer_entries.len() {
980        return false;
981    }
982    let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
983    writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
984    parsed.iter().zip(writer_entries).all(|(left, right)| {
985        left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
986    })
987}
988
989/// List loose objects under `git_dir` that are *not* reachable from `roots`,
990/// optionally deleting them.
991///
992/// Reachability is computed with [`collect_reachable_object_ids`] over the
993/// repository's object database, so trees, parents, and tag targets are all
994/// followed. When `delete` is `false` the returned ids are merely reported;
995/// when `true` each unreachable loose object file is removed (packed copies are
996/// never touched). Deletion is therefore opt-in.
997pub fn prune_unreachable_loose<I>(
998    git_dir: &Path,
999    format: ObjectFormat,
1000    roots: I,
1001    delete: bool,
1002) -> Result<Vec<ObjectId>>
1003where
1004    I: IntoIterator<Item = ObjectId>,
1005{
1006    let objects_dir = repository_objects_dir(git_dir);
1007    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1008    let reachable = collect_reachable_object_ids(&database, format, roots)?;
1009
1010    let store = LooseObjectStore::new(objects_dir.clone(), format);
1011    let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1012        .into_iter()
1013        .filter(|oid| !reachable.contains(oid))
1014        .collect();
1015    pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1016
1017    if delete {
1018        for oid in &pruned {
1019            let path = store.object_path(oid)?;
1020            match fs::remove_file(&path) {
1021                Ok(()) => {}
1022                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1023                Err(err) => return Err(GitError::Io(err.to_string())),
1024            }
1025        }
1026    }
1027    Ok(pruned)
1028}
1029
1030/// Loose object ids under `objects_dir`, sorted by hex, with packed objects
1031/// excluded.
1032fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1033    let oids = loose_object_id_set(objects_dir, format)?;
1034    let mut oids = oids.into_iter().collect::<Vec<_>>();
1035    oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1036    Ok(oids)
1037}
1038
1039fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1040    let mut oids = HashSet::new();
1041    collect_loose_object_ids(objects_dir, format, &mut oids)?;
1042    Ok(oids)
1043}
1044
1045/// Absolute paths of every `*.pack` file directly inside `pack_dir`, sorted for
1046/// deterministic output.
1047fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1048    if !pack_dir.exists() {
1049        return Ok(Vec::new());
1050    }
1051    let mut packs = Vec::new();
1052    for entry in fs::read_dir(pack_dir)? {
1053        let path = entry?.path();
1054        if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1055            packs.push(path);
1056        }
1057    }
1058    packs.sort();
1059    Ok(packs)
1060}
1061
1062/// Remove pre-existing packs whose every object is contained in `present`,
1063/// skipping `keep` (the pack just written), `.keep` packs, and `.promisor` packs.
1064/// A stale multi-pack-index that references any removed pack is removed too.
1065fn prune_packs_contained_in(
1066    objects_dir: &Path,
1067    format: ObjectFormat,
1068    present: &HashSet<ObjectId>,
1069    keep: &Path,
1070) -> Result<()> {
1071    let pack_dir = objects_dir.join("pack");
1072    let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1073    let mut removed_stems: HashSet<String> = HashSet::new();
1074
1075    for pack_path in existing_pack_files(&pack_dir)? {
1076        if pack_path == keep {
1077            continue;
1078        }
1079        let Some(stem) = pack_path.file_stem() else {
1080            continue;
1081        };
1082        if Some(stem) == keep_stem.as_deref() {
1083            continue;
1084        }
1085        if pack_path.with_extension("keep").exists()
1086            || pack_path.with_extension("promisor").exists()
1087        {
1088            continue;
1089        }
1090        let index_path = pack_path.with_extension("idx");
1091        if !index_path.exists() {
1092            // Without an index we cannot prove containment; leave it alone.
1093            continue;
1094        }
1095        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1096        if !index
1097            .entries
1098            .iter()
1099            .all(|entry| present.contains(&entry.oid))
1100        {
1101            continue;
1102        }
1103        // Every object in this pack is safely in the new pack and it has no Git
1104        // policy sidecar that says to keep it: remove the pack, its index, and
1105        // cache sidecars derived from them.
1106        remove_file_if_exists(&pack_path)?;
1107        remove_file_if_exists(&index_path)?;
1108        for ext in ["rev", "mtimes", "bitmap"] {
1109            remove_file_if_exists(&pack_path.with_extension(ext))?;
1110        }
1111        removed_stems.insert(stem.to_string_lossy().into_owned());
1112    }
1113
1114    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1115    Ok(())
1116}
1117
1118/// Remove a `multi-pack-index` if it names *any* pack that was removed.
1119///
1120/// A MIDX that still references a deleted pack makes reads fail (the lookup
1121/// resolves to a pack that is gone) before any fallback. Removing the whole MIDX
1122/// when even one of its packs is pruned forces readers back to the individual pack
1123/// indexes, which are correct; `multi-pack-index write` can rebuild it later.
1124fn prune_stale_multi_pack_index(
1125    pack_dir: &Path,
1126    format: ObjectFormat,
1127    removed_stems: &HashSet<String>,
1128) -> Result<()> {
1129    if removed_stems.is_empty() {
1130        return Ok(());
1131    }
1132    let midx_path = pack_dir.join("multi-pack-index");
1133    if !midx_path.exists() {
1134        return Ok(());
1135    }
1136    let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1137    let references_removed_pack = midx.pack_names.iter().any(|name| {
1138        let stem = name.strip_suffix(".idx").unwrap_or(name);
1139        removed_stems.contains(stem)
1140    });
1141    if references_removed_pack {
1142        remove_file_if_exists(&midx_path)?;
1143    }
1144    Ok(())
1145}
1146
1147/// Remove each loose object in `candidates` whose id is in `present`, leaving
1148/// any object not actually packed untouched.
1149fn prune_loose_objects<'a, I>(
1150    objects_dir: &Path,
1151    format: ObjectFormat,
1152    candidates: I,
1153    present: &HashSet<ObjectId>,
1154) -> Result<()>
1155where
1156    I: IntoIterator<Item = &'a ObjectId>,
1157{
1158    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1159    for oid in candidates {
1160        if !present.contains(oid) {
1161            continue;
1162        }
1163        remove_file_if_exists(&store.object_path(oid)?)?;
1164    }
1165    Ok(())
1166}
1167
1168enum PackDeltaBase {
1169    Offset(u64),
1170    Ref(ObjectId),
1171}
1172
1173struct PackIndexOffsetInfo {
1174    end_offset: u64,
1175    delta_base_oid: Option<ObjectId>,
1176}
1177
1178fn scan_pack_index_offsets(
1179    index: &PackIndex,
1180    target_offset: u64,
1181    trailer_offset: u64,
1182    delta_base_offset: Option<u64>,
1183) -> Result<PackIndexOffsetInfo> {
1184    let mut target_count = 0usize;
1185    let mut next_offset = None;
1186    let mut delta_base_oid = None;
1187
1188    for entry in &index.entries {
1189        if entry.offset == target_offset {
1190            target_count += 1;
1191        } else if entry.offset > target_offset {
1192            match next_offset {
1193                Some(current) if current <= entry.offset => {}
1194                _ => next_offset = Some(entry.offset),
1195            }
1196        }
1197        if Some(entry.offset) == delta_base_offset {
1198            delta_base_oid = Some(entry.oid);
1199        }
1200    }
1201
1202    if target_count == 0 {
1203        return Err(GitError::InvalidFormat(format!(
1204            "pack index offset {target_offset} not found"
1205        )));
1206    }
1207    if let Some(offset) = delta_base_offset
1208        && delta_base_oid.is_none()
1209    {
1210        return Err(GitError::InvalidFormat(format!(
1211            "ofs-delta base offset {offset} not found"
1212        )));
1213    }
1214
1215    Ok(PackIndexOffsetInfo {
1216        // Preserve the old sorted-vector behavior for malformed indexes with
1217        // duplicate offsets: the next sorted entry has the same offset.
1218        end_offset: if target_count > 1 {
1219            target_offset
1220        } else {
1221            next_offset.unwrap_or(trailer_offset)
1222        },
1223        delta_base_oid,
1224    })
1225}
1226
1227fn pack_entry_delta_base(
1228    format: ObjectFormat,
1229    pack: &[u8],
1230    entry_offset: u64,
1231) -> Result<Option<PackDeltaBase>> {
1232    let mut cursor = usize::try_from(entry_offset)
1233        .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1234    let first = pack_next_byte(pack, &mut cursor)?;
1235    let kind = (first >> 4) & 0x07;
1236    let mut byte = first;
1237    while byte & 0x80 != 0 {
1238        byte = pack_next_byte(pack, &mut cursor)?;
1239    }
1240    match kind {
1241        6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1242            pack,
1243            &mut cursor,
1244            entry_offset,
1245        )?))),
1246        7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1247            format,
1248            pack,
1249            &mut cursor,
1250        )?))),
1251        _ => Ok(None),
1252    }
1253}
1254
1255fn parse_ref_delta_base_oid(
1256    format: ObjectFormat,
1257    pack: &[u8],
1258    cursor: &mut usize,
1259) -> Result<ObjectId> {
1260    let raw_len = format.raw_len();
1261    if *cursor + raw_len > pack.len() {
1262        return Err(GitError::InvalidFormat(
1263            "truncated ref-delta base object id".into(),
1264        ));
1265    }
1266    let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1267    *cursor += raw_len;
1268    Ok(oid)
1269}
1270
1271fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
1272    let mut byte = pack_next_byte(pack, cursor)?;
1273    let mut relative = u64::from(byte & 0x7f);
1274    while byte & 0x80 != 0 {
1275        byte = pack_next_byte(pack, cursor)?;
1276        relative = relative
1277            .checked_add(1)
1278            .and_then(|value| value.checked_shl(7))
1279            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
1280            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
1281    }
1282    entry_offset
1283        .checked_sub(relative)
1284        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
1285}
1286
1287fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
1288    let Some(byte) = pack.get(*cursor).copied() else {
1289        return Err(GitError::InvalidFormat("truncated pack entry".into()));
1290    };
1291    *cursor += 1;
1292    Ok(byte)
1293}
1294
1295fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
1296    Ok(ObjectId::null(format))
1297}
1298
1299/// Remove `path` if it exists, treating a missing file as success.
1300fn remove_file_if_exists(path: &Path) -> Result<()> {
1301    match fs::remove_file(path) {
1302        Ok(()) => Ok(()),
1303        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1304        Err(err) => Err(GitError::Io(err.to_string())),
1305    }
1306}
1307
1308fn walk_reachable_objects<R, I, F>(
1309    reader: &R,
1310    format: ObjectFormat,
1311    starts: I,
1312    excluded: &HashSet<ObjectId>,
1313    visit: F,
1314) -> Result<HashSet<ObjectId>>
1315where
1316    R: ObjectReader,
1317    I: IntoIterator<Item = ObjectId>,
1318    F: FnMut(&ObjectId, &Arc<EncodedObject>),
1319{
1320    walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
1321}
1322
1323/// [`walk_reachable_objects`] with an additional `cut` set: commits in `cut`
1324/// are visited (their trees and blobs too) but their parents are not followed,
1325/// mirroring a shallow client's view of its own history during negotiation.
1326fn walk_reachable_objects_with_cut<R, I, F>(
1327    reader: &R,
1328    format: ObjectFormat,
1329    starts: I,
1330    excluded: &HashSet<ObjectId>,
1331    cut: &HashSet<ObjectId>,
1332    mut visit: F,
1333) -> Result<HashSet<ObjectId>>
1334where
1335    R: ObjectReader,
1336    I: IntoIterator<Item = ObjectId>,
1337    F: FnMut(&ObjectId, &Arc<EncodedObject>),
1338{
1339    let mut seen = HashSet::new();
1340    let mut pending = Vec::new();
1341    for start in starts {
1342        pending.push(start);
1343        while let Some(oid) = pending.pop() {
1344            if excluded.contains(&oid) {
1345                continue;
1346            }
1347            if !seen.insert(oid) {
1348                continue;
1349            }
1350            let object = reader.read_object(&oid).map_err(|err| {
1351                with_missing_object_context(err, oid, MissingObjectContext::Traversal)
1352            })?;
1353            match object.object_type {
1354                ObjectType::Commit => {
1355                    let (tree, parents) = {
1356                        let commit = Commit::parse_ref(format, &object.body)?;
1357                        (commit.tree, commit.parents)
1358                    };
1359                    visit(&oid, &object);
1360                    if !cut.contains(&oid) {
1361                        for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
1362                            pending.push(parent);
1363                        }
1364                    }
1365                    pending.push(tree);
1366                }
1367                ObjectType::Tree => {
1368                    let mut child_oids = Vec::new();
1369                    for entry in TreeEntries::new(format, &object.body) {
1370                        let entry = entry?;
1371                        if entry.is_gitlink() {
1372                            continue;
1373                        }
1374                        child_oids.push(entry.oid);
1375                    }
1376                    visit(&oid, &object);
1377                    pending.extend(child_oids.into_iter().rev());
1378                }
1379                ObjectType::Tag => {
1380                    let target = {
1381                        let tag = Tag::parse_ref(format, &object.body)?;
1382                        tag.object
1383                    };
1384                    visit(&oid, &object);
1385                    pending.push(target);
1386                }
1387                ObjectType::Blob => visit(&oid, &object),
1388            }
1389        }
1390    }
1391    Ok(seen)
1392}
1393
1394// ===== reachability bitmaps (.bitmap write + consult) =====
1395
1396/// Bit accessors over a `Vec<u64>` bitset using git's bitmap convention:
1397/// bit `i` lives in word `i / 64` at bit `i % 64` (LSB-first within a word).
1398fn bitset_get(words: &[u64], position: u32) -> bool {
1399    let word = (position / 64) as usize;
1400    word < words.len() && words[word] & (1u64 << (position % 64)) != 0
1401}
1402
1403fn bitset_set(words: &mut [u64], position: u32) {
1404    let word = (position / 64) as usize;
1405    if word < words.len() {
1406        words[word] |= 1u64 << (position % 64);
1407    }
1408}
1409
1410fn bitset_or(acc: &mut [u64], other: &[u64]) {
1411    for (dst, src) in acc.iter_mut().zip(other) {
1412        *dst |= *src;
1413    }
1414}
1415
1416/// Sorted set-bit positions of a bitset (the inverse of repeated [`bitset_set`]).
1417fn bitset_positions(words: &[u64]) -> Vec<u32> {
1418    let mut positions = Vec::new();
1419    for (word_index, word) in words.iter().enumerate() {
1420        let mut remaining = *word;
1421        while remaining != 0 {
1422            let bit = remaining.trailing_zeros();
1423            positions.push(word_index as u32 * 64 + bit);
1424            remaining &= remaining - 1;
1425        }
1426    }
1427    positions
1428}
1429
1430/// Committer timestamp (epoch seconds) of a commit identity line
1431/// (`Name <email> <timestamp> <tz>`); 0 when unparseable, matching git's
1432/// tolerance for bogus dates during bitmap commit selection.
1433fn commit_identity_timestamp(identity: &[u8]) -> i64 {
1434    let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
1435    let _tz = fields.next();
1436    fields
1437        .next()
1438        .and_then(|raw| std::str::from_utf8(raw).ok())
1439        .and_then(|raw| raw.parse::<i64>().ok())
1440        .unwrap_or(0)
1441}
1442
1443/// Upstream `next_commit_index` (pack-bitmap-write.c): the spacing schedule for
1444/// bitmap commit selection over the date-descending commit list.
1445fn bitmap_next_commit_index(idx: u32) -> u32 {
1446    const MIN_COMMITS: u32 = 100;
1447    const MAX_COMMITS: u32 = 5000;
1448    const MUST_REGION: u32 = 100;
1449    const MIN_REGION: u32 = 20000;
1450
1451    if idx <= MUST_REGION {
1452        return 0;
1453    }
1454    if idx <= MIN_REGION {
1455        let offset = idx - MUST_REGION;
1456        return offset.min(MIN_COMMITS);
1457    }
1458    let offset = idx - MIN_REGION;
1459    offset.clamp(MIN_COMMITS, MAX_COMMITS)
1460}
1461
1462/// Builds a serialised `.bitmap` for the pack described by `index_entries` /
1463/// `pack_checksum`, mirroring upstream pack-bitmap-write.c:
1464///
1465/// * commit selection walks the pack's commits in committer-date-descending
1466///   order through [`bitmap_next_commit_index`]'s spacing schedule, preferring
1467///   `preferred_tips` (ref tips — upstream's `NEEDS_BITMAP`) and merge commits
1468///   inside each window;
1469/// * each selected commit stores its full reachability closure (commits, trees,
1470///   blobs) as pack-order bit positions (no XOR compression — `xor_offset` 0 is
1471///   valid on disk and what readers see after resolution anyway).
1472///
1473/// Returns `Ok(None)` — mirroring upstream's warn-and-skip — when the pack
1474/// lacks full closure (a reachable object is missing from it).
1475pub fn build_pack_bitmap(
1476    db: &FileObjectDatabase,
1477    format: ObjectFormat,
1478    index_entries: &[PackIndexEntry],
1479    pack_checksum: &ObjectId,
1480    preferred_tips: &HashSet<ObjectId>,
1481) -> Result<Option<Vec<u8>>> {
1482    // `index_entries` carries no ordering guarantee (writer provenance is in
1483    // pack-write order); bit numbering follows pack (offset) order.
1484    let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
1485    by_offset.sort_by_key(|&slot| index_entries[slot].offset);
1486    let bit_order: Vec<ObjectId> = by_offset
1487        .into_iter()
1488        .map(|slot| index_entries[slot].oid)
1489        .collect();
1490    build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
1491}
1492
1493/// [`build_pack_bitmap`]'s multi-pack sibling: builds the serialised
1494/// `multi-pack-index-<checksum>.bitmap` for `midx_entries`, with bits in
1495/// pseudo-pack order (preferred pack first, then pack id, then offset — the
1496/// same order [`MultiPackIndex::write_with_reverse_index`] records in `RIDX`)
1497/// and the midx checksum in the BITM checksum field.
1498pub fn build_midx_bitmap(
1499    db: &FileObjectDatabase,
1500    format: ObjectFormat,
1501    midx_entries: &[sley_pack::MultiPackIndexEntry],
1502    midx_checksum: &ObjectId,
1503    preferred_pack: u32,
1504    preferred_tips: &HashSet<ObjectId>,
1505) -> Result<Option<Vec<u8>>> {
1506    let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
1507    pseudo.sort_by_key(|&slot| {
1508        let entry = &midx_entries[slot];
1509        (
1510            entry.pack_int_id != preferred_pack,
1511            entry.pack_int_id,
1512            entry.offset,
1513        )
1514    });
1515    let bit_order: Vec<ObjectId> = pseudo
1516        .into_iter()
1517        .map(|slot| midx_entries[slot].oid)
1518        .collect();
1519    build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
1520}
1521
1522/// Upstream `bitmap_builder_init`'s `num_maximal` counter (pack-bitmap-write.c):
1523/// walk the first-parent ancestry of the selected commits, children before
1524/// parents, propagating per-commit "which selected commits reach me" masks.
1525/// A commit counts as maximal when it is selected, or when distinct selected
1526/// lineages converge on it (its mask gains bits its last contributing child
1527/// did not carry). Only the count is needed (for the trace2 data event), so no
1528/// reverse-edge bookkeeping is kept.
1529fn bitmap_num_maximal_commits(
1530    db: &FileObjectDatabase,
1531    format: ObjectFormat,
1532    selected: &[ObjectId],
1533) -> Result<usize> {
1534    // First-parent subgraph reachable from the selected commits.
1535    let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
1536    let mut stack: Vec<ObjectId> = selected.to_vec();
1537    while let Some(oid) = stack.pop() {
1538        if first_parent.contains_key(&oid) {
1539            continue;
1540        }
1541        let object = db.read_object(&oid)?;
1542        let commit = Commit::parse_ref(format, &object.body)?;
1543        let parent = grafted_parents(db, &oid, commit.parents).first().copied();
1544        first_parent.insert(oid, parent);
1545        if let Some(parent) = parent {
1546            stack.push(parent);
1547        }
1548    }
1549    // Children-before-parents order (Kahn over the single first-parent edge).
1550    let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
1551    for parent in first_parent.values().flatten() {
1552        *pending_children.entry(*parent).or_default() += 1;
1553    }
1554    let word_count = selected.len().div_ceil(64);
1555    struct MaximalEnt {
1556        mask: Vec<u64>,
1557        maximal: bool,
1558    }
1559    let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
1560    for (bit, oid) in selected.iter().enumerate() {
1561        let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
1562            mask: vec![0u64; word_count],
1563            maximal: true,
1564        });
1565        ent.mask[bit / 64] |= 1u64 << (bit % 64);
1566        ent.maximal = true;
1567    }
1568    let mut queue: Vec<ObjectId> = first_parent
1569        .keys()
1570        .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
1571        .copied()
1572        .collect();
1573    let mut num_maximal = 0usize;
1574    while let Some(oid) = queue.pop() {
1575        if let Some(ent) = ents.remove(&oid) {
1576            if ent.maximal {
1577                num_maximal += 1;
1578            }
1579            if let Some(Some(parent)) = first_parent.get(&oid) {
1580                match ents.entry(*parent) {
1581                    std::collections::hash_map::Entry::Vacant(vacant) => {
1582                        // Fresh parent mask: c_not_p, !p_not_c -> not maximal.
1583                        vacant.insert(MaximalEnt {
1584                            mask: ent.mask.clone(),
1585                            maximal: false,
1586                        });
1587                    }
1588                    std::collections::hash_map::Entry::Occupied(mut occupied) => {
1589                        let parent_ent = occupied.get_mut();
1590                        let c_not_p = ent
1591                            .mask
1592                            .iter()
1593                            .zip(&parent_ent.mask)
1594                            .any(|(child, parent)| child & !parent != 0);
1595                        if c_not_p {
1596                            let p_not_c = parent_ent
1597                                .mask
1598                                .iter()
1599                                .zip(&ent.mask)
1600                                .any(|(parent, child)| parent & !child != 0);
1601                            for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
1602                                *parent |= child;
1603                            }
1604                            parent_ent.maximal = p_not_c;
1605                        }
1606                    }
1607                }
1608            }
1609        }
1610        if let Some(Some(parent)) = first_parent.get(&oid)
1611            && let Some(remaining) = pending_children.get_mut(parent)
1612        {
1613            *remaining -= 1;
1614            if *remaining == 0 {
1615                queue.push(*parent);
1616            }
1617        }
1618    }
1619    Ok(num_maximal)
1620}
1621
1622/// Shared write half: `bit_order` lists every covered object's oid in bit
1623/// order (pack order for a single pack, pseudo-pack order for a midx);
1624/// `checksum` fills the BITM checksum field (pack checksum / midx checksum).
1625fn build_reachability_bitmap(
1626    db: &FileObjectDatabase,
1627    format: ObjectFormat,
1628    checksum: &ObjectId,
1629    bit_order: &[ObjectId],
1630    preferred_tips: &HashSet<ObjectId>,
1631) -> Result<Option<Vec<u8>>> {
1632    if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
1633        return Ok(None);
1634    }
1635    let object_count = bit_order.len();
1636
1637    // The on-disk entry position space is the oid-sorted lookup order (.idx /
1638    // midx OIDL); derive each bit-order slot's rank there.
1639    let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
1640    oid_sorted.sort_by(|&left, &right| {
1641        bit_order[left as usize]
1642            .as_bytes()
1643            .cmp(bit_order[right as usize].as_bytes())
1644    });
1645    let mut index_position = vec![0u32; object_count];
1646    for (position, &slot) in oid_sorted.iter().enumerate() {
1647        index_position[slot as usize] = position as u32;
1648    }
1649    let mut oid_to_pack = HashMap::with_capacity(object_count);
1650    for (pack_pos, oid) in bit_order.iter().enumerate() {
1651        oid_to_pack.insert(*oid, pack_pos as u32);
1652    }
1653
1654    // Object types in bit order; commits also collect (date, parent count).
1655    let mut object_types = Vec::with_capacity(object_count);
1656    struct IndexedCommit {
1657        oid: ObjectId,
1658        pack_pos: u32,
1659        index_pos: u32,
1660        date: i64,
1661        parent_count: usize,
1662    }
1663    let mut indexed_commits = Vec::new();
1664    for (pack_pos, oid) in bit_order.iter().enumerate() {
1665        // Type via the header fast path: blobs (the bulk of most packs) never
1666        // need their bodies inflated here.
1667        let object_type = match db.read_object_header(oid)? {
1668            Some((object_type, _)) => object_type,
1669            None => db.read_object(oid)?.object_type,
1670        };
1671        object_types.push(object_type);
1672        if object_type == ObjectType::Commit {
1673            let object = db.read_object(oid)?;
1674            let commit = Commit::parse_ref(format, &object.body)?;
1675            indexed_commits.push(IndexedCommit {
1676                oid: *oid,
1677                pack_pos: pack_pos as u32,
1678                index_pos: index_position[pack_pos],
1679                date: commit_identity_timestamp(commit.committer),
1680                parent_count: grafted_parents(db, oid, commit.parents).len(),
1681            });
1682        }
1683    }
1684
1685    // Selection: date-descending, then the spacing schedule.
1686    indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
1687    let mut selected: Vec<&IndexedCommit> = Vec::new();
1688    let commit_count = indexed_commits.len() as u32;
1689    if commit_count < 100 {
1690        selected.extend(indexed_commits.iter());
1691    } else {
1692        let mut i = 0u32;
1693        loop {
1694            let next = bitmap_next_commit_index(i);
1695            if i + next >= commit_count {
1696                break;
1697            }
1698            let mut chosen = &indexed_commits[(i + next) as usize];
1699            if next > 0 {
1700                for j in 0..=next {
1701                    let candidate = &indexed_commits[(i + j) as usize];
1702                    if preferred_tips.contains(&candidate.oid) {
1703                        chosen = candidate;
1704                        break;
1705                    }
1706                    if candidate.parent_count >= 2 {
1707                        chosen = candidate;
1708                    }
1709                }
1710            }
1711            selected.push(chosen);
1712            i += next + 1;
1713        }
1714    }
1715
1716    // Trace2 selection counters (upstream bitmap_builder_init): emitted before
1717    // the closure walk, like upstream emits them before building the ewah
1718    // bitmaps. Computing num_maximal_commits needs its own first-parent walk,
1719    // so it only runs when the trace2 event target is active.
1720    if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
1721        let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
1722        let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
1723        sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
1724        sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
1725    }
1726
1727    // Reachability closures, oldest-first so newer walks stop at memoised
1728    // older selected commits.
1729    let word_count = object_count.div_ceil(64);
1730    let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
1731    for commit in selected.iter().rev() {
1732        let mut acc = vec![0u64; word_count];
1733        let mut pending = vec![commit.oid];
1734        while let Some(oid) = pending.pop() {
1735            let Some(&pack_pos) = oid_to_pack.get(&oid) else {
1736                // Mirrors upstream's "Packfile doesn't have full closure".
1737                eprintln!(
1738                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
1739                );
1740                return Ok(None);
1741            };
1742            if bitset_get(&acc, pack_pos) {
1743                continue;
1744            }
1745            if let Some(stored) = memo.get(&oid) {
1746                bitset_or(&mut acc, stored);
1747                continue;
1748            }
1749            bitset_set(&mut acc, pack_pos);
1750            let object = db.read_object(&oid)?;
1751            let tree = {
1752                let parsed = Commit::parse_ref(format, &object.body)?;
1753                pending.extend(grafted_parents(db, &oid, parsed.parents));
1754                parsed.tree
1755            };
1756            if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
1757                return Ok(None);
1758            }
1759        }
1760        memo.insert(commit.oid, Arc::new(acc));
1761    }
1762
1763    let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
1764    for commit in &selected {
1765        let words = match memo.get(&commit.oid) {
1766            Some(words) => words,
1767            None => continue,
1768        };
1769        writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
1770    }
1771    writer.write().map(Some)
1772}
1773
1774/// Marks `tree` and everything below it (sub-trees, blobs) in `acc`, skipping
1775/// already-set bits (their closure is already covered). Returns `false` when an
1776/// object is missing from the pack (no full closure), after warning.
1777fn bitmap_mark_tree(
1778    db: &impl ObjectReader,
1779    format: ObjectFormat,
1780    tree: &ObjectId,
1781    oid_to_pack: &HashMap<ObjectId, u32>,
1782    acc: &mut [u64],
1783) -> Result<bool> {
1784    let Some(&pack_pos) = oid_to_pack.get(tree) else {
1785        eprintln!(
1786            "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
1787        );
1788        return Ok(false);
1789    };
1790    if bitset_get(acc, pack_pos) {
1791        return Ok(true);
1792    }
1793    bitset_set(acc, pack_pos);
1794    let object = db.read_object(tree)?;
1795    for entry in TreeEntries::new(format, &object.body) {
1796        let entry = entry?;
1797        if entry.is_gitlink() {
1798            continue;
1799        }
1800        if entry.is_tree() {
1801            if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
1802                return Ok(false);
1803            }
1804        } else {
1805            let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
1806                eprintln!(
1807                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
1808                    entry.oid
1809                );
1810                return Ok(false);
1811            };
1812            bitset_set(acc, blob_pos);
1813        }
1814    }
1815    Ok(true)
1816}
1817
1818/// A pack's `.bitmap` loaded for consultation: oid <-> pack-position mappings,
1819/// resolved (XOR-expanded) per-commit reachability bitsets, and the four object
1820/// type bitmaps. Bit numbering follows pack order throughout.
1821pub struct LoadedPackBitmap {
1822    object_count: u32,
1823    oid_to_pack: HashMap<ObjectId, u32>,
1824    pack_to_oid: Vec<ObjectId>,
1825    commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
1826    commits: Vec<u64>,
1827    trees: Vec<u64>,
1828    blobs: Vec<u64>,
1829    tags: Vec<u64>,
1830}
1831
1832impl LoadedPackBitmap {
1833    pub fn object_count(&self) -> u32 {
1834        self.object_count
1835    }
1836
1837    /// Pack-order position of `oid`, when the object is in the bitmapped pack.
1838    pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
1839        self.oid_to_pack.get(oid).copied()
1840    }
1841
1842    pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
1843        self.pack_to_oid.get(position as usize)
1844    }
1845
1846    /// The resolved reachability bitset stored for `oid`, when it was one of
1847    /// the writer's selected commits.
1848    pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
1849        self.commit_words.get(oid)
1850    }
1851
1852    /// Oids of every commit with a stored bitmap entry (unordered).
1853    pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
1854        self.commit_words.keys()
1855    }
1856
1857    /// The type bitmap for `object_type` (bit per pack position).
1858    pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
1859        match object_type {
1860            ObjectType::Commit => &self.commits,
1861            ObjectType::Tree => &self.trees,
1862            ObjectType::Blob => &self.blobs,
1863            ObjectType::Tag => &self.tags,
1864        }
1865    }
1866
1867    fn word_count(&self) -> usize {
1868        (self.object_count as usize).div_ceil(64)
1869    }
1870}
1871
1872/// Loads the single-pack `.bitmap` of `objects_dir/pack`, if a valid one
1873/// exists. Scans `pack-*.bitmap` files (sorted, first valid wins, like
1874/// upstream's "first bitmap" behaviour), requires the sibling `.idx`, and
1875/// verifies the recorded pack checksum. Any unreadable/corrupt bitmap yields
1876/// `Ok(None)` — consumers fall back to a regular object walk, mirroring
1877/// upstream's warn-and-ignore on bitmap load failure.
1878pub fn load_pack_bitmap(
1879    objects_dir: &Path,
1880    format: ObjectFormat,
1881) -> Result<Option<LoadedPackBitmap>> {
1882    let pack_dir = objects_dir.join("pack");
1883    if !pack_dir.exists() {
1884        return Ok(None);
1885    }
1886    // A multi-pack bitmap wins over single-pack bitmaps, like upstream's
1887    // open_bitmap trying the midx first.
1888    if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
1889        return Ok(Some(bitmap));
1890    }
1891    let mut bitmap_paths = Vec::new();
1892    for entry in fs::read_dir(&pack_dir)? {
1893        let path = entry?.path();
1894        if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
1895            && path
1896                .file_name()
1897                .and_then(|name| name.to_str())
1898                .is_some_and(|name| name.starts_with("pack-"))
1899        {
1900            bitmap_paths.push(path);
1901        }
1902    }
1903    bitmap_paths.sort();
1904    for bitmap_path in bitmap_paths {
1905        match load_pack_bitmap_file(&bitmap_path, format) {
1906            Ok(Some(bitmap)) => return Ok(Some(bitmap)),
1907            Ok(None) | Err(_) => continue,
1908        }
1909    }
1910    Ok(None)
1911}
1912
1913/// Loads `multi-pack-index-<checksum>.bitmap` when the pack directory has a
1914/// multi-pack-index with a `RIDX` chunk (the bit-order permutation) and a
1915/// matching bitmap file. Returns `Ok(None)` — never an error — on any missing
1916/// or unusable piece, so callers fall through to single-pack bitmaps.
1917fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
1918    let midx_path = pack_dir.join("multi-pack-index");
1919    if !midx_path.exists() {
1920        return Ok(None);
1921    }
1922    let Ok(midx_bytes) = fs::read(&midx_path) else {
1923        return Ok(None);
1924    };
1925    let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
1926        return Ok(None);
1927    };
1928    let bitmap_path = pack_dir.join(format!(
1929        "multi-pack-index-{}.bitmap",
1930        midx.checksum.to_hex()
1931    ));
1932    if !bitmap_path.exists() {
1933        return Ok(None);
1934    }
1935    let object_count = midx.objects.len();
1936    // Upstream `load_midx_revindex`: prefer the midx's own RIDX chunk unless
1937    // GIT_TEST_MIDX_READ_RIDX=0 disables it, else fall back to the separate
1938    // `multi-pack-index-<checksum>.rev` file; a trace2 data event records
1939    // which source supplied the permutation.
1940    let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
1941        .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
1942        .unwrap_or(true);
1943    let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
1944        (Some(chunk), true) => {
1945            sley_core::trace2::data("load_midx_revindex", "source", "midx");
1946            chunk.clone()
1947        }
1948        _ => {
1949            let rev_path =
1950                pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
1951            let Ok(rev_bytes) = fs::read(&rev_path) else {
1952                // Without the RIDX permutation the bit numbering is unknown.
1953                return Ok(None);
1954            };
1955            let Ok(parsed_rev) =
1956                sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
1957            else {
1958                return Ok(None);
1959            };
1960            sley_core::trace2::data("load_midx_revindex", "source", "rev");
1961            parsed_rev.positions
1962        }
1963    };
1964    let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
1965        return Ok(None);
1966    };
1967    let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
1968        Ok(parsed) => parsed,
1969        Err(_) => return Ok(None),
1970    };
1971    if parsed.pack_checksum != midx.checksum {
1972        return Ok(None);
1973    }
1974
1975    // midx.objects is in lookup (oid-sorted) order; RIDX maps bit positions
1976    // to lookup positions.
1977    let mut pack_to_oid = Vec::with_capacity(object_count);
1978    for &midx_pos in &reverse_index {
1979        let Some(entry) = midx.objects.get(midx_pos as usize) else {
1980            return Ok(None);
1981        };
1982        pack_to_oid.push(entry.oid);
1983    }
1984    let mut oid_to_pack = HashMap::with_capacity(object_count);
1985    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
1986        oid_to_pack.insert(*oid, pack_pos as u32);
1987    }
1988    match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
1989        midx.objects.get(position).map(|entry| entry.oid)
1990    }) {
1991        Ok(loaded) => Ok(Some(loaded)),
1992        Err(_) => Ok(None),
1993    }
1994}
1995
1996fn load_pack_bitmap_file(
1997    bitmap_path: &Path,
1998    format: ObjectFormat,
1999) -> Result<Option<LoadedPackBitmap>> {
2000    let index_path = bitmap_path.with_extension("idx");
2001    if !index_path.exists() {
2002        return Ok(None);
2003    }
2004    let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
2005    let object_count = index.entries.len();
2006    let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
2007    if parsed.pack_checksum != index.pack_checksum {
2008        return Ok(None);
2009    }
2010
2011    let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2012    pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2013    let mut pack_to_oid = Vec::with_capacity(object_count);
2014    for index_pos in &pack_order {
2015        pack_to_oid.push(index.entries[*index_pos as usize].oid);
2016    }
2017    let mut oid_to_pack = HashMap::with_capacity(object_count);
2018    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2019        oid_to_pack.insert(*oid, pack_pos as u32);
2020    }
2021
2022    assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2023        index.entries.get(position).map(|entry| entry.oid)
2024    })
2025    .map(Some)
2026}
2027
2028/// Shared tail of the bitmap loaders: expands the type bitmaps, resolves the
2029/// per-commit entries (XOR offsets reference earlier entries in file order),
2030/// and maps each entry's lookup-order position back to a commit oid via
2031/// `lookup_oid`.
2032fn assemble_loaded_bitmap(
2033    parsed: PackBitmapIndex,
2034    object_count: usize,
2035    pack_to_oid: Vec<ObjectId>,
2036    oid_to_pack: HashMap<ObjectId, u32>,
2037    lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2038) -> Result<LoadedPackBitmap> {
2039    let word_count = object_count.div_ceil(64);
2040    let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2041        let mut words = bitmap.to_words()?;
2042        words.resize(word_count, 0);
2043        Ok(words)
2044    };
2045
2046    let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2047    let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2048    for (entry_index, entry) in parsed.entries.iter().enumerate() {
2049        let mut words = expand(&entry.bitmap)?;
2050        if entry.xor_offset > 0 {
2051            let base_index = entry_index - entry.xor_offset as usize;
2052            let base = &resolved[base_index];
2053            for (dst, src) in words.iter_mut().zip(base.iter()) {
2054                *dst ^= *src;
2055            }
2056        }
2057        let words = Arc::new(words);
2058        resolved.push(Arc::clone(&words));
2059        let commit_oid = lookup_oid(entry.object_position as usize)
2060            .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2061        commit_words.insert(commit_oid, words);
2062    }
2063
2064    Ok(LoadedPackBitmap {
2065        object_count: object_count as u32,
2066        oid_to_pack,
2067        pack_to_oid,
2068        commit_words,
2069        commits: expand(&parsed.type_bitmaps.commits)?,
2070        trees: expand(&parsed.type_bitmaps.trees)?,
2071        blobs: expand(&parsed.type_bitmaps.blobs)?,
2072        tags: expand(&parsed.type_bitmaps.tags)?,
2073    })
2074}
2075
2076/// Result of a bitmap-assisted reachability walk: pack-position bits for
2077/// in-pack objects plus the "extended" objects encountered outside the
2078/// bitmapped pack (in first-seen order, like upstream's extended index).
2079pub struct BitmapWalkResult {
2080    pub words: Vec<u64>,
2081    pub extended: Vec<(ObjectId, ObjectType)>,
2082}
2083
2084impl BitmapWalkResult {
2085    /// Removes everything reachable in `haves` from this result.
2086    pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2087        for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2088            *dst &= !*src;
2089        }
2090        let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2091        self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2092    }
2093}
2094
2095/// Computes the set of objects reachable from `roots` using stored bitmaps
2096/// where available and a fill-in object walk where not — the consult half of
2097/// the bitmap engine (upstream `find_objects` + `fill_in_bitmap`).
2098///
2099/// Roots may be any object type; tag chains are peeled with every tag object
2100/// itself included, like the pending-object handling in
2101/// `prepare_bitmap_walk`. When `include_objects` is false only commits are
2102/// walked (tree contents of fill-in commits are not marked) — callers that
2103/// only count/enumerate commits mask with the commit type bitmap, so the
2104/// extra non-commit bits OR-ed in from stored (closed) bitmaps are harmless.
2105pub fn bitmap_reachable(
2106    bitmap: &LoadedPackBitmap,
2107    db: &impl ObjectReader,
2108    format: ObjectFormat,
2109    roots: &[ObjectId],
2110    include_objects: bool,
2111) -> Result<BitmapWalkResult> {
2112    let mut walk = BitmapFillWalk {
2113        bitmap,
2114        words: vec![0u64; bitmap.word_count()],
2115        extended: Vec::new(),
2116        extended_seen: HashSet::new(),
2117    };
2118    let mut commit_stack: Vec<ObjectId> = Vec::new();
2119
2120    for root in roots {
2121        let mut oid = *root;
2122        // Peel tag chains, marking each tag object on the way.
2123        loop {
2124            let object = db.read_object(&oid)?;
2125            match object.object_type {
2126                ObjectType::Tag => {
2127                    walk.mark(&oid, ObjectType::Tag);
2128                    let tag = Tag::parse_ref(format, &object.body)?;
2129                    oid = tag.object;
2130                }
2131                ObjectType::Commit => {
2132                    commit_stack.push(oid);
2133                    break;
2134                }
2135                ObjectType::Tree => {
2136                    walk.mark_tree_closure(db, format, &oid)?;
2137                    break;
2138                }
2139                ObjectType::Blob => {
2140                    walk.mark(&oid, ObjectType::Blob);
2141                    break;
2142                }
2143            }
2144        }
2145    }
2146
2147    while let Some(oid) = commit_stack.pop() {
2148        if let Some(position) = bitmap.pack_position(&oid) {
2149            if bitset_get(&walk.words, position) {
2150                continue;
2151            }
2152            if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2153                bitset_or(&mut walk.words, stored);
2154                continue;
2155            }
2156            bitset_set(&mut walk.words, position);
2157        } else {
2158            if walk.extended_seen.contains(&oid) {
2159                continue;
2160            }
2161            walk.extended_seen.insert(oid);
2162            walk.extended.push((oid, ObjectType::Commit));
2163        }
2164        let object = db.read_object(&oid)?;
2165        let commit = Commit::parse_ref(format, &object.body)?;
2166        commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2167        if include_objects {
2168            walk.mark_tree_closure(db, format, &commit.tree)?;
2169        }
2170    }
2171
2172    Ok(BitmapWalkResult {
2173        words: walk.words,
2174        extended: walk.extended,
2175    })
2176}
2177
2178struct BitmapFillWalk<'a> {
2179    bitmap: &'a LoadedPackBitmap,
2180    words: Vec<u64>,
2181    extended: Vec<(ObjectId, ObjectType)>,
2182    extended_seen: HashSet<ObjectId>,
2183}
2184
2185impl BitmapFillWalk<'_> {
2186    /// Marks one object; returns false when it was already marked.
2187    fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2188        if let Some(position) = self.bitmap.pack_position(oid) {
2189            if bitset_get(&self.words, position) {
2190                return false;
2191            }
2192            bitset_set(&mut self.words, position);
2193            true
2194        } else {
2195            if !self.extended_seen.insert(*oid) {
2196                return false;
2197            }
2198            self.extended.push((*oid, object_type));
2199            true
2200        }
2201    }
2202
2203    /// Marks `tree` and everything below it, skipping subtrees already marked
2204    /// (a set in-pack bit means its closure is covered: either it came from a
2205    /// stored — closed — bitmap, or this walk already expanded it).
2206    fn mark_tree_closure(
2207        &mut self,
2208        db: &impl ObjectReader,
2209        format: ObjectFormat,
2210        tree: &ObjectId,
2211    ) -> Result<()> {
2212        if !self.mark(tree, ObjectType::Tree) {
2213            return Ok(());
2214        }
2215        let object = db.read_object(tree)?;
2216        for entry in TreeEntries::new(format, &object.body) {
2217            let entry = entry?;
2218            if entry.is_gitlink() {
2219                continue;
2220            }
2221            if entry.is_tree() {
2222                self.mark_tree_closure(db, format, &entry.oid)?;
2223            } else {
2224                self.mark(&entry.oid, ObjectType::Blob);
2225            }
2226        }
2227        Ok(())
2228    }
2229}
2230
2231#[derive(Debug)]
2232pub struct ObjectDatabase {
2233    format: ObjectFormat,
2234    // Behind a `Mutex` so `write_object` can take `&self` (matching the
2235    // `ObjectWriter` trait) and a single handle can interleave reads and writes
2236    // without a `&mut` borrow — the same shared-by-`&` shape the file-backed
2237    // database uses for its caches. Removes the need for callers to wrap this in
2238    // a `RefCell`/`&mut` just to write (see sley-fetch's former `RefCell` dance).
2239    objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2240    promisor: bool,
2241}
2242
2243impl ObjectDatabase {
2244    pub fn new(format: ObjectFormat) -> Self {
2245        Self {
2246            format,
2247            objects: Mutex::new(HashMap::new()),
2248            promisor: false,
2249        }
2250    }
2251
2252    pub fn with_promisor(mut self, promisor: bool) -> Self {
2253        self.promisor = promisor;
2254        self
2255    }
2256
2257    pub fn contains(&self, oid: &ObjectId) -> bool {
2258        self.objects
2259            .lock()
2260            .map(|objects| objects.contains_key(oid))
2261            .unwrap_or(false)
2262    }
2263
2264    pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2265        let object = self.read_object(oid)?;
2266        let actual = object.object_id(self.format)?;
2267        if &actual == oid {
2268            Ok(())
2269        } else {
2270            Err(GitError::InvalidObject(format!(
2271                "object id mismatch: expected {oid}, got {actual}"
2272            )))
2273        }
2274    }
2275}
2276
2277impl ObjectReader for ObjectDatabase {
2278    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
2279        self.objects
2280            .lock()
2281            .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
2282            .get(oid)
2283            .map(Arc::clone)
2284            .or_else(|| implied_empty_tree_object(self.format, oid))
2285            .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
2286    }
2287}
2288
2289impl ObjectWriter for ObjectDatabase {
2290    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
2291        let oid = object.object_id(self.format)?;
2292        self.objects
2293            .lock()
2294            .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
2295            .entry(oid)
2296            .or_insert_with(|| Arc::new(object));
2297        Ok(oid)
2298    }
2299}
2300
2301#[derive(Debug, Clone, PartialEq, Eq)]
2302pub struct Alternate {
2303    pub path: std::path::PathBuf,
2304}
2305
2306#[derive(Debug, Clone, PartialEq, Eq)]
2307pub struct PartialClonePolicy {
2308    pub promisor_remote: Option<String>,
2309    pub allow_missing_promised_objects: bool,
2310}
2311
2312/// Raw pack-file bytes keyed by pack path, shared across cloned handles. Loaded
2313/// once so individual objects can be decoded at their offsets (see
2314/// [`sley_pack::read_object_at`]) without re-reading the whole file per read.
2315type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
2316
2317/// Backing bytes of a pack file: either memory-mapped (under the `mmap` feature)
2318/// or read into the heap. Both deref to `&[u8]`, so the decode path is identical.
2319#[derive(Debug)]
2320enum PackData {
2321    #[cfg(feature = "mmap")]
2322    Mapped(sley_mmap::MappedFile),
2323    Heap(Vec<u8>),
2324}
2325
2326impl std::ops::Deref for PackData {
2327    type Target = [u8];
2328
2329    fn deref(&self) -> &[u8] {
2330        match self {
2331            #[cfg(feature = "mmap")]
2332            Self::Mapped(mapped) => mapped,
2333            Self::Heap(bytes) => bytes,
2334        }
2335    }
2336}
2337
2338/// Load a pack file's bytes: memory-mapped when the `mmap` feature is on (falling
2339/// back to a heap read if the map fails), otherwise read into the heap.
2340#[cfg(feature = "mmap")]
2341fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2342    match sley_mmap::MappedFile::open_pack(pack_path) {
2343        Ok(mapped) => Ok(PackData::Mapped(mapped)),
2344        Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
2345    }
2346}
2347
2348#[cfg(not(feature = "mmap"))]
2349fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2350    Ok(PackData::Heap(fs::read(pack_path)?))
2351}
2352
2353#[cfg(feature = "mmap")]
2354fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2355    match sley_mmap::MappedFile::open_pack(index_path) {
2356        Ok(mapped) => Ok(Arc::new(mapped)),
2357        Err(_) => Ok(Arc::new(fs::read(index_path)?)),
2358    }
2359}
2360
2361#[cfg(not(feature = "mmap"))]
2362fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2363    Ok(Arc::new(fs::read(index_path)?))
2364}
2365
2366/// Memory-capped LRU of recently decoded objects, shared across cloned handles,
2367/// so hot delta bases and repeated reads during a walk aren't re-decoded. The
2368/// cache is bounded by an approximate byte budget (not a fixed object count) so
2369/// it neither thrashes on bulk reads of small objects nor blows up on a few
2370/// large ones.
2371type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
2372
2373/// Per-pack caches of objects decoded from a pack, keyed by pack path and then by
2374/// the in-pack byte offset of each object's entry. Shared across cloned handles.
2375/// This is the delta-base cache: resolving a delta chain by offset reuses already
2376/// decoded bases instead of re-inflating the whole chain on every read.
2377type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
2378
2379/// Per-pack memo of `in-pack offset -> end-of-chain object type` for the
2380/// `cat-file --batch-check` header fast path. Resolving a packed delta's *type*
2381/// walks the delta chain to its base; without this memo every header read
2382/// re-walks (and re-inflates) the whole chain, so reading every object in a
2383/// deeply-deltified pack is super-linear (sley#26). The type only depends on the
2384/// chain base, so memoizing `offset -> type` lets each chain be walked at most
2385/// once across a batch. Keyed by pack path so an offset key is never applied to
2386/// the wrong pack's bytes; shared across cloned handles.
2387/// One pack's offset-keyed header memo (see [`PackHeaderTypeCaches`]).
2388type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
2389
2390type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
2391
2392/// Default approximate byte budget for the decoded-object LRU. Sized to comfortably
2393/// hold the working set of a history walk (commits/trees/blobs and their delta
2394/// bases) without growing without bound on large repositories. Overridable via the
2395/// `SLEY_OBJECT_CACHE_BYTES` environment variable; there is currently no git-config
2396/// hook threaded into the object database, so this constant is the default.
2397const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
2398
2399/// Default approximate byte budget for each per-pack delta-base cache. Holds the
2400/// decoded bases of the delta chains being walked so neighboring reads stay warm.
2401/// Overridable via `SLEY_DELTA_BASE_CACHE_BYTES`.
2402const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
2403
2404/// Approximate heap cost of caching one [`EncodedObject`]: its body plus a fixed
2405/// allowance for the key, enum/`Vec` headers, and per-entry map overhead. Used
2406/// only to drive eviction, so an estimate is fine.
2407fn cached_object_cost(object: &EncodedObject) -> usize {
2408    object.body.len().saturating_add(64)
2409}
2410
2411/// Read an approximate byte budget from `var`, falling back to `default` when the
2412/// variable is unset or unparseable. A value of `0` disables the cache.
2413fn cache_budget_from_env(var: &str, default: usize) -> usize {
2414    match env::var(var) {
2415        Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
2416        Err(_) => default,
2417    }
2418}
2419
2420/// Approximate byte budget for the decoded-object LRU (see
2421/// [`DEFAULT_OBJECT_CACHE_BYTES`], `SLEY_OBJECT_CACHE_BYTES`).
2422///
2423/// Resolved once per process: the environment does not change under us, and a new
2424/// `FileObjectDatabase` is built often enough (e.g. once per revision resolved)
2425/// that re-reading the variable each time showed up as per-object overhead.
2426fn object_cache_budget() -> usize {
2427    static BUDGET: OnceLock<usize> = OnceLock::new();
2428    *BUDGET.get_or_init(|| {
2429        cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
2430    })
2431}
2432
2433/// Approximate byte budget for each per-pack delta-base cache (see
2434/// [`DEFAULT_DELTA_BASE_CACHE_BYTES`], `SLEY_DELTA_BASE_CACHE_BYTES`). Resolved
2435/// once per process for the same reason as [`object_cache_budget`].
2436fn delta_base_cache_budget() -> usize {
2437    static BUDGET: OnceLock<usize> = OnceLock::new();
2438    *BUDGET.get_or_init(|| {
2439        cache_budget_from_env(
2440            "SLEY_DELTA_BASE_CACHE_BYTES",
2441            DEFAULT_DELTA_BASE_CACHE_BYTES,
2442        )
2443    })
2444}
2445
2446/// Whether to re-hash every object on read and compare it to the requested id.
2447///
2448/// Off by default, matching git: reads trust the pack index → offset mapping and
2449/// the loose object's on-disk name, and object ids are verified where git verifies
2450/// them — when a pack is received (the index build re-hashes every object) and on
2451/// demand via [`FileObjectDatabase`]'s `validate`/fsck. Re-hashing on *every* read
2452/// dominated bulk-read cost (a scalar pure-Rust SHA-1 over each object's full
2453/// body), so it is opt-in via `SLEY_VERIFY_READS` (any value other than unset, ``,
2454/// or `0`) for callers that want the paranoid check back. Read once and cached, so
2455/// the default path pays only a single relaxed atomic load per read.
2456fn verify_reads_enabled() -> bool {
2457    static VERIFY: OnceLock<bool> = OnceLock::new();
2458    *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
2459        Ok(value) => !matches!(value.trim(), "" | "0"),
2460        Err(_) => false,
2461    })
2462}
2463
2464/// A memory-capped LRU map from a key `K` to a decoded [`EncodedObject`].
2465///
2466/// Eviction is by approximate byte budget (gix-style), not object count, so the
2467/// cache adapts to object size. On access an entry is moved to most-recently-used;
2468/// on insert, least-recently-used entries are dropped until the budget holds. A
2469/// budget of `0` makes the cache inert. Generic over the key so it backs both the
2470/// oid-keyed decoded-object cache and the offset-keyed delta-base cache.
2471#[derive(Debug)]
2472struct LruCache<K: std::hash::Hash + Eq + Clone> {
2473    budget: usize,
2474    used: usize,
2475    map: HashMap<K, LruEntry<K>>,
2476    head: Option<K>,
2477    tail: Option<K>,
2478}
2479
2480#[derive(Debug)]
2481struct LruEntry<K> {
2482    object: Arc<EncodedObject>,
2483    prev: Option<K>,
2484    next: Option<K>,
2485}
2486
2487impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
2488    fn new(budget: usize) -> Self {
2489        Self {
2490            budget,
2491            used: 0,
2492            map: HashMap::new(),
2493            head: None,
2494            tail: None,
2495        }
2496    }
2497
2498    fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
2499        let object = Arc::clone(&self.map.get(key)?.object);
2500        self.touch(key);
2501        Some(object)
2502    }
2503
2504    /// Move `key` to the most-recently-used end in O(1).
2505    fn touch(&mut self, key: &K) {
2506        if self.tail.as_ref() == Some(key) {
2507            return;
2508        }
2509        if self.map.contains_key(key) {
2510            self.detach(key);
2511            self.attach_back(key.clone());
2512        }
2513    }
2514
2515    /// Drop `key` from both the map and the recency queue, releasing its budget.
2516    fn remove(&mut self, key: &K) {
2517        if let Some(entry) = self.map.get(key) {
2518            self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
2519        }
2520        self.detach(key);
2521        self.map.remove(key);
2522    }
2523
2524    fn detach(&mut self, key: &K) {
2525        let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
2526            let prev = entry.prev.take();
2527            let next = entry.next.take();
2528            (prev, next)
2529        }) else {
2530            return;
2531        };
2532
2533        match &prev {
2534            Some(prev_key) => {
2535                if let Some(prev_entry) = self.map.get_mut(prev_key) {
2536                    prev_entry.next = next.clone();
2537                }
2538            }
2539            None => self.head = next.clone(),
2540        }
2541        match &next {
2542            Some(next_key) => {
2543                if let Some(next_entry) = self.map.get_mut(next_key) {
2544                    next_entry.prev = prev.clone();
2545                }
2546            }
2547            None => self.tail = prev.clone(),
2548        }
2549    }
2550
2551    fn attach_back(&mut self, key: K) {
2552        let previous_tail = self.tail.replace(key.clone());
2553        match previous_tail {
2554            Some(tail_key) => {
2555                if let Some(tail_entry) = self.map.get_mut(&tail_key) {
2556                    tail_entry.next = Some(key.clone());
2557                }
2558                if let Some(entry) = self.map.get_mut(&key) {
2559                    entry.prev = Some(tail_key);
2560                    entry.next = None;
2561                }
2562            }
2563            None => {
2564                self.head = Some(key.clone());
2565                if let Some(entry) = self.map.get_mut(&key) {
2566                    entry.prev = None;
2567                    entry.next = None;
2568                }
2569            }
2570        }
2571    }
2572
2573    fn clear(&mut self) {
2574        self.map.clear();
2575        self.head = None;
2576        self.tail = None;
2577        self.used = 0;
2578    }
2579
2580    fn put(&mut self, key: K, object: Arc<EncodedObject>) {
2581        if self.budget == 0 {
2582            return;
2583        }
2584        let cost = cached_object_cost(&object);
2585        // A single object larger than the whole budget is not worth caching; it
2586        // would immediately evict everything including itself. Drop any stale
2587        // smaller entry stored under the same key so accounting stays exact.
2588        if cost > self.budget {
2589            self.remove(&key);
2590            return;
2591        }
2592        if let Some(entry) = self.map.get_mut(&key) {
2593            let previous = std::mem::replace(&mut entry.object, object);
2594            // Replacing an existing entry: adjust accounting and refresh recency.
2595            self.used = self
2596                .used
2597                .saturating_sub(cached_object_cost(&previous))
2598                .saturating_add(cost);
2599            self.touch(&key);
2600        } else {
2601            self.used = self.used.saturating_add(cost);
2602            self.map.insert(
2603                key.clone(),
2604                LruEntry {
2605                    object,
2606                    prev: None,
2607                    next: None,
2608                },
2609            );
2610            self.attach_back(key);
2611        }
2612        while self.used > self.budget {
2613            let Some(evicted) = self.head.clone() else {
2614                break;
2615            };
2616            self.remove(&evicted);
2617        }
2618    }
2619}
2620
2621/// Decoded-object cache keyed by object id (loose + packed reads share it).
2622type LruObjectCache = LruCache<ObjectId>;
2623/// Delta-base cache keyed by in-pack byte offset, scoped to one pack.
2624type LruOffsetCache = LruCache<u64>;
2625
2626/// Bridges the offset-keyed [`LruOffsetCache`] to [`sley_pack::PackDeltaCache`]
2627/// so the pack decoder can reuse decoded delta bases. Holds the shared cache
2628/// behind its mutex; a poisoned lock simply behaves as a cache miss/no-op, so a
2629/// decode still completes correctly (just without reuse).
2630struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
2631
2632impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
2633    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
2634        self.0.lock().ok()?.get(&offset)
2635    }
2636
2637    fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
2638        if let Ok(mut cache) = self.0.lock() {
2639            cache.put(offset, object);
2640        }
2641    }
2642}
2643
2644/// Bridges a per-pack `offset -> ObjectType` memo into the header fast path so
2645/// the ofs-delta chain walk is performed at most once per chain across a batch
2646/// of `read_object_header` calls (sley#26).
2647struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
2648
2649impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
2650    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
2651        self.0.lock().ok()?.get(&pack_offset).copied()
2652    }
2653
2654    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
2655        if let Ok(mut cache) = self.0.lock() {
2656            cache.insert(pack_offset, header);
2657        }
2658    }
2659}
2660
2661/// Parsed pack indexes keyed by `.idx` path, shared across cloned handles. This
2662/// remains for MIDX and path-only fallback lookups; normal pack-directory scans
2663/// use [`PackRegistrySnapshot`] so the lookup hot path can walk already-parsed
2664/// pack records directly.
2665type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
2666
2667/// Parsed multi-pack-index files keyed by path, shared across cloned handles.
2668/// Caches the MIDX parse so object lookups in repositories with a MIDX avoid
2669/// reparsing the same fanout/object tables for every read.
2670type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
2671
2672/// Raw multi-pack-index OID lookup tables keyed by path, shared across cloned
2673/// handles. These avoid hashing and materializing every MIDX object when a
2674/// command only needs point lookups.
2675type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
2676
2677/// One registered `.idx`/`.pack` pair from a pack directory. The index is parsed
2678/// when the registry snapshot is built; pack bytes and per-pack decode/header
2679/// caches hang directly off this record so repeated object lookups do not bounce
2680/// through path-keyed maps.
2681#[derive(Debug)]
2682struct RegisteredPack {
2683    idx: PathBuf,
2684    pack: PathBuf,
2685    index: Mutex<Option<Arc<PackIndexViewData>>>,
2686    data: Mutex<Option<Arc<PackData>>>,
2687    delta_cache: Arc<Mutex<LruOffsetCache>>,
2688    header_type_cache: PackHeaderTypeCache,
2689}
2690
2691impl RegisteredPack {
2692    fn new(idx: PathBuf, pack: PathBuf) -> Self {
2693        Self {
2694            idx,
2695            pack,
2696            index: Mutex::new(None),
2697            data: Mutex::new(None),
2698            delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
2699            header_type_cache: Arc::new(Mutex::new(HashMap::new())),
2700        }
2701    }
2702
2703    fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
2704        if let Ok(cache) = self.index.lock()
2705            && let Some(index) = cache.as_ref()
2706        {
2707            return Ok(Arc::clone(index));
2708        }
2709        let index_bytes = load_pack_index_data(&self.idx)?;
2710        let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
2711            index_bytes,
2712            format,
2713        )?);
2714        if let Ok(mut cache) = self.index.lock() {
2715            *cache = Some(Arc::clone(&index));
2716        }
2717        Ok(index)
2718    }
2719
2720    fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
2721        if let Ok(cache) = self.data.lock()
2722            && let Some(bytes) = cache.as_ref()
2723        {
2724            return Ok(Arc::clone(bytes));
2725        }
2726        if let Ok(cache) = pack_bytes.lock()
2727            && let Some(bytes) = cache.get(&self.pack)
2728        {
2729            let bytes = Arc::clone(bytes);
2730            if let Ok(mut local_cache) = self.data.lock() {
2731                *local_cache = Some(Arc::clone(&bytes));
2732            }
2733            return Ok(bytes);
2734        }
2735        let bytes = Arc::new(load_pack_data(&self.pack)?);
2736        if let Ok(mut local_cache) = self.data.lock() {
2737            *local_cache = Some(Arc::clone(&bytes));
2738        }
2739        if let Ok(mut cache) = pack_bytes.lock() {
2740            cache.insert(self.pack.clone(), Arc::clone(&bytes));
2741        }
2742        Ok(bytes)
2743    }
2744}
2745
2746#[derive(Debug, Clone, PartialEq, Eq)]
2747struct PackDirFingerprint {
2748    modified: Option<std::time::SystemTime>,
2749    idx_count: usize,
2750    pack_count: usize,
2751}
2752
2753/// Snapshot of a pack directory's lookup state, shared across cloned handles.
2754/// New packs are still found: a lookup that misses every cached pack re-scans the
2755/// directory once before concluding the object is absent (see
2756/// [`FileObjectDatabase::find_pack_containing`]).
2757#[derive(Debug)]
2758struct PackRegistrySnapshot {
2759    fingerprint: PackDirFingerprint,
2760    packs: Vec<Arc<RegisteredPack>>,
2761    recent_pack: Mutex<Option<usize>>,
2762}
2763
2764impl PackRegistrySnapshot {
2765    fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
2766        Self {
2767            fingerprint,
2768            packs,
2769            recent_pack: Mutex::new(None),
2770        }
2771    }
2772
2773    fn cached_hint(&self) -> Option<usize> {
2774        self.recent_pack
2775            .lock()
2776            .ok()
2777            .and_then(|hint| *hint)
2778            .filter(|pack_index| *pack_index < self.packs.len())
2779    }
2780
2781    fn remember_hint(&self, pack_index: usize) {
2782        if let Ok(mut hint) = self.recent_pack.lock() {
2783            *hint = Some(pack_index);
2784        }
2785    }
2786}
2787
2788/// Cached pack-registry snapshot for this object directory, shared across cloned
2789/// handles. A `FileObjectDatabase` owns exactly one object directory, so this is
2790/// an `Option` instead of another path-keyed map.
2791type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
2792
2793#[derive(Debug, Clone)]
2794struct PackLookup {
2795    pack: PathBuf,
2796    registered: Option<Arc<RegisteredPack>>,
2797    offset: u64,
2798}
2799
2800impl PackLookup {
2801    fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
2802        Self {
2803            pack: pack.pack.clone(),
2804            registered: Some(pack),
2805            offset,
2806        }
2807    }
2808
2809    fn from_path(pack: PathBuf, offset: u64) -> Self {
2810        Self {
2811            pack,
2812            registered: None,
2813            offset,
2814        }
2815    }
2816
2817    fn pack_path(&self) -> &Path {
2818        &self.pack
2819    }
2820
2821    fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
2822        match &self.registered {
2823            Some(pack) => pack.bytes(&database.pack_bytes),
2824            None => database.cached_pack_bytes(&self.pack),
2825        }
2826    }
2827
2828    fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
2829        match &self.registered {
2830            Some(pack) => database.cached_pack_index(&pack.idx),
2831            None => database.cached_pack_index(&self.pack.with_extension("idx")),
2832        }
2833    }
2834
2835    fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
2836        match &self.registered {
2837            Some(pack) => Some(Arc::clone(&pack.delta_cache)),
2838            None => database.pack_delta_cache(&self.pack),
2839        }
2840    }
2841
2842    fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
2843        match &self.registered {
2844            Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
2845            None => database.pack_header_type_cache(&self.pack),
2846        }
2847    }
2848}
2849
2850#[derive(Debug, Clone)]
2851pub struct FileObjectDatabase {
2852    loose: LooseObjectStore,
2853    objects_dir: PathBuf,
2854    alternates: Vec<PathBuf>,
2855    format: ObjectFormat,
2856    pack_bytes: PackBytesCache,
2857    pack_indexes: PackIndexCache,
2858    multi_pack_indexes: MultiPackIndexCache,
2859    multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
2860    pack_registry: PackRegistryCache,
2861    decoded: DecodedObjectCache,
2862    pack_deltas: PackDeltaCaches,
2863    pack_header_types: PackHeaderTypeCaches,
2864    /// Graft points (`$GIT_DIR/shallow`), loaded lazily on the first
2865    /// [`ObjectReader::is_shallow_graft`] query. `$GIT_DIR` is taken to be
2866    /// the parent of `objects_dir`, matching the standard layout.
2867    shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
2868}
2869
2870#[derive(Debug)]
2871pub struct ObjectPresenceChecker {
2872    db: FileObjectDatabase,
2873    pack_dir: PathBuf,
2874    midx: Option<Arc<MultiPackIndexOidLookup>>,
2875    registry: Option<Arc<PackRegistrySnapshot>>,
2876    registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
2877    recent_pack: Option<usize>,
2878    prepared_packs: bool,
2879    prepared_registry: bool,
2880}
2881
2882impl ObjectPresenceChecker {
2883    fn new(db: FileObjectDatabase) -> Self {
2884        let pack_dir = db.objects_dir.join("pack");
2885        Self {
2886            db,
2887            pack_dir,
2888            midx: None,
2889            registry: None,
2890            registry_indexes: Vec::new(),
2891            recent_pack: None,
2892            prepared_packs: false,
2893            prepared_registry: false,
2894        }
2895    }
2896
2897    pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
2898        if oid.format() != self.db.format {
2899            return Err(GitError::InvalidObjectId(format!(
2900                "object {oid} uses {}, store uses {}",
2901                oid.format().name(),
2902                self.db.format.name()
2903            )));
2904        }
2905        if self.db.loose.exists(oid)? {
2906            return Ok(true);
2907        }
2908        if self.find_packed(oid, false)? {
2909            return Ok(true);
2910        }
2911        if self.find_packed(oid, true)? {
2912            return Ok(true);
2913        }
2914        for alternate in &self.db.alternates {
2915            if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
2916                return Ok(true);
2917            }
2918        }
2919        // Preserve the regular contains() reprepare-on-miss behavior for loose
2920        // objects that appeared after the fanout cache was populated.
2921        self.db.loose.invalidate_cache();
2922        self.db.loose.exists(oid)
2923    }
2924
2925    fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
2926        self.prepare_packs(force_rescan)?;
2927        if let Some(midx) = &self.midx
2928            && midx.contains(oid)
2929        {
2930            return Ok(true);
2931        }
2932        self.prepare_registry(force_rescan)?;
2933        self.find_in_registry(oid)
2934    }
2935
2936    fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
2937        if self.prepared_packs && !force_rescan {
2938            return Ok(());
2939        }
2940        let midx_path = self.pack_dir.join("multi-pack-index");
2941        self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
2942        self.prepared_packs = true;
2943        Ok(())
2944    }
2945
2946    fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
2947        if self.prepared_registry && !force_rescan {
2948            return Ok(());
2949        }
2950        let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
2951        let registry_changed = match self.registry.as_ref() {
2952            Some(cached) => !Arc::ptr_eq(cached, &registry),
2953            None => true,
2954        };
2955        if registry_changed {
2956            self.registry_indexes = vec![None; registry.packs.len()];
2957            self.recent_pack = None;
2958            self.registry = Some(registry);
2959        }
2960        self.prepared_registry = true;
2961        Ok(())
2962    }
2963
2964    fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
2965        let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
2966            return Ok(false);
2967        };
2968        if let Some(pack_index) = self
2969            .recent_pack
2970            .filter(|pack_index| *pack_index < registry.packs.len())
2971        {
2972            let index = self.registry_index(&registry, pack_index)?;
2973            if index.find(oid).is_some() {
2974                return Ok(true);
2975            }
2976        }
2977        for pack_index in 0..registry.packs.len() {
2978            if Some(pack_index) == self.recent_pack {
2979                continue;
2980            }
2981            let index = self.registry_index(&registry, pack_index)?;
2982            if index.find(oid).is_some() {
2983                self.recent_pack = Some(pack_index);
2984                return Ok(true);
2985            }
2986        }
2987        Ok(false)
2988    }
2989
2990    fn registry_index(
2991        &mut self,
2992        registry: &PackRegistrySnapshot,
2993        pack_index: usize,
2994    ) -> Result<Arc<PackIndexViewData>> {
2995        if self.registry_indexes.len() != registry.packs.len() {
2996            self.registry_indexes = vec![None; registry.packs.len()];
2997            self.recent_pack = None;
2998        }
2999        if let Some(index) = self
3000            .registry_indexes
3001            .get(pack_index)
3002            .and_then(|index| index.as_ref())
3003        {
3004            return Ok(Arc::clone(index));
3005        }
3006        let index = registry.packs[pack_index].index(self.db.format)?;
3007        if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
3008            *slot = Some(Arc::clone(&index));
3009        }
3010        Ok(index)
3011    }
3012}
3013
3014/// Parse `$GIT_DIR/shallow`: one hex object id per line. A missing file is an
3015/// empty set (the repository is not shallow); unparsable lines are ignored so
3016/// a torn write never poisons walks.
3017fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
3018    let Ok(contents) = std::fs::read_to_string(shallow_file) else {
3019        return HashSet::new();
3020    };
3021    contents
3022        .lines()
3023        .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
3024        .collect()
3025}
3026
3027pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3028    env::var_os("GIT_OBJECT_DIRECTORY")
3029        .map(PathBuf::from)
3030        .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
3031}
3032
3033pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3034    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
3035        return PathBuf::from(common_dir);
3036    }
3037    let git_dir = git_dir.as_ref();
3038    let commondir = git_dir.join("commondir");
3039    if let Ok(value) = fs::read_to_string(&commondir) {
3040        let path = PathBuf::from(value.trim());
3041        let common = if path.is_absolute() {
3042            path
3043        } else {
3044            git_dir.join(path)
3045        };
3046        return fs::canonicalize(&common).unwrap_or(common);
3047    }
3048    git_dir.to_path_buf()
3049}
3050
3051pub fn repository_object_ids(
3052    git_dir: impl AsRef<Path>,
3053    format: ObjectFormat,
3054) -> Result<Vec<ObjectId>> {
3055    object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
3056}
3057
3058pub fn object_ids_in_objects_dir(
3059    objects_dir: impl AsRef<Path>,
3060    format: ObjectFormat,
3061) -> Result<Vec<ObjectId>> {
3062    let objects_dir = objects_dir.as_ref();
3063    let mut oids = HashSet::new();
3064    collect_loose_object_ids(objects_dir, format, &mut oids)?;
3065    collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
3066    let mut oids = oids.into_iter().collect::<Vec<_>>();
3067    oids.sort_by_key(ObjectId::to_hex);
3068    Ok(oids)
3069}
3070
3071fn collect_loose_object_ids(
3072    objects_dir: &Path,
3073    format: ObjectFormat,
3074    oids: &mut HashSet<ObjectId>,
3075) -> Result<()> {
3076    if !objects_dir.exists() {
3077        return Ok(());
3078    }
3079    let hex_len = format.hex_len();
3080    for entry in fs::read_dir(objects_dir)? {
3081        let entry = entry?;
3082        if !entry.file_type()?.is_dir() {
3083            continue;
3084        }
3085        let name = entry.file_name();
3086        let Some(fanout) = name.to_str() else {
3087            continue;
3088        };
3089        if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3090            continue;
3091        }
3092        for object_entry in fs::read_dir(entry.path())? {
3093            let object_entry = object_entry?;
3094            if !object_entry.file_type()?.is_file() {
3095                continue;
3096            }
3097            let name = object_entry.file_name();
3098            let Some(suffix) = name.to_str() else {
3099                continue;
3100            };
3101            if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3102                continue;
3103            }
3104            oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
3105        }
3106    }
3107    Ok(())
3108}
3109
3110fn collect_loose_fanout_object_ids(
3111    objects_dir: &Path,
3112    format: ObjectFormat,
3113    fanout: u8,
3114    oids: &mut HashSet<ObjectId>,
3115) -> Result<()> {
3116    let fanout_hex = format!("{fanout:02x}");
3117    let fanout_dir = objects_dir.join(&fanout_hex);
3118    let entries = match fs::read_dir(&fanout_dir) {
3119        Ok(entries) => entries,
3120        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
3121        Err(err) => return Err(GitError::Io(err.to_string())),
3122    };
3123    let hex_len = format.hex_len();
3124    for object_entry in entries {
3125        let object_entry = object_entry?;
3126        let name = object_entry.file_name();
3127        let Some(suffix) = name.to_str() else {
3128            continue;
3129        };
3130        if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3131            continue;
3132        }
3133        oids.insert(ObjectId::from_hex(
3134            format,
3135            &format!("{fanout_hex}{suffix}"),
3136        )?);
3137    }
3138    Ok(())
3139}
3140
3141#[derive(Debug, Default)]
3142struct LoosePresenceCache {
3143    loaded_fanouts: HashSet<u8>,
3144    objects: HashSet<ObjectId>,
3145}
3146
3147/// Every object id resolvable through a pack (any `.idx` or the
3148/// multi-pack-index) under `objects_dir/pack`. Used by `--unpacked`
3149/// filtering: an object is "unpacked" when absent from this set, regardless
3150/// of a loose copy also existing.
3151pub fn packed_object_ids(
3152    objects_dir: impl AsRef<Path>,
3153    format: ObjectFormat,
3154) -> Result<HashSet<ObjectId>> {
3155    let mut oids = HashSet::new();
3156    collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
3157    Ok(oids)
3158}
3159
3160fn collect_packed_object_ids(
3161    pack_dir: &Path,
3162    format: ObjectFormat,
3163    oids: &mut HashSet<ObjectId>,
3164) -> Result<()> {
3165    if !pack_dir.exists() {
3166        return Ok(());
3167    }
3168    let midx_path = pack_dir.join("multi-pack-index");
3169    if midx_path.exists() {
3170        let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
3171        oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
3172    }
3173    for entry in fs::read_dir(pack_dir)? {
3174        let path = entry?.path();
3175        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3176            continue;
3177        }
3178        let index = PackIndex::parse(&fs::read(path)?, format)?;
3179        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
3180    }
3181    Ok(())
3182}
3183
3184impl FileObjectDatabase {
3185    /// The object-id format (hash algorithm) this database was opened with.
3186    pub fn object_format(&self) -> ObjectFormat {
3187        self.format
3188    }
3189
3190    /// The repository object directory this database reads from.
3191    pub fn objects_dir(&self) -> &Path {
3192        &self.objects_dir
3193    }
3194
3195    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3196        let objects_dir = objects_dir.into();
3197        Self {
3198            loose: LooseObjectStore::new(objects_dir.clone(), format),
3199            alternates: alternate_object_dirs(&objects_dir),
3200            objects_dir,
3201            format,
3202            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3203            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3204            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3205            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3206            pack_registry: Arc::new(Mutex::new(None)),
3207            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3208            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3209            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3210            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3211        }
3212    }
3213
3214    fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3215        let objects_dir = objects_dir.into();
3216        Self {
3217            loose: LooseObjectStore::new(objects_dir.clone(), format),
3218            alternates: Vec::new(),
3219            objects_dir,
3220            format,
3221            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3222            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3223            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3224            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3225            pack_registry: Arc::new(Mutex::new(None)),
3226            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3227            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3228            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3229            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3230        }
3231    }
3232
3233    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3234        Self::new(repository_objects_dir(git_dir), format)
3235    }
3236
3237    /// Drop cached pack registries, indexes, and decoded objects so the next read
3238    /// sees packs/objects installed after this handle was created (e.g. after
3239    /// `fetch` or `install_pack`). Long-lived [`Repository`] sessions call this
3240    /// via the owning repository's `refresh_objects` hook.
3241    pub fn refresh_read_cache(&self) {
3242        if let Ok(mut cache) = self.pack_registry.lock() {
3243            *cache = None;
3244        }
3245        if let Ok(mut cache) = self.pack_indexes.lock() {
3246            cache.clear();
3247        }
3248        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3249            cache.clear();
3250        }
3251        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3252            cache.clear();
3253        }
3254        if let Ok(mut cache) = self.pack_bytes.lock() {
3255            cache.clear();
3256        }
3257        if let Ok(mut cache) = self.pack_deltas.lock() {
3258            cache.clear();
3259        }
3260        if let Ok(mut cache) = self.pack_header_types.lock() {
3261            cache.clear();
3262        }
3263        if let Ok(mut cache) = self.decoded.lock() {
3264            cache.clear();
3265        }
3266        self.loose.invalidate_cache();
3267    }
3268
3269    pub fn loose(&self) -> &LooseObjectStore {
3270        &self.loose
3271    }
3272
3273    pub fn presence_checker(&self) -> ObjectPresenceChecker {
3274        ObjectPresenceChecker::new(self.clone())
3275    }
3276
3277    pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3278        self.install_pack_with_options(pack, RawPackInstallOptions::default())
3279    }
3280
3281    pub fn install_pack_with_options(
3282        &self,
3283        pack: &PackWrite,
3284        options: RawPackInstallOptions,
3285    ) -> Result<PackInstallResult> {
3286        if pack.checksum.format() != self.format {
3287            return Err(GitError::InvalidObjectId(format!(
3288                "pack checksum uses {}, store uses {}",
3289                pack.checksum.format().name(),
3290                self.format.name()
3291            )));
3292        }
3293        for entry in &pack.entries {
3294            if entry.oid.format() != self.format {
3295                return Err(GitError::InvalidObjectId(format!(
3296                    "pack entry {} uses {}, store uses {}",
3297                    entry.oid,
3298                    entry.oid.format().name(),
3299                    self.format.name()
3300                )));
3301            }
3302        }
3303        let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
3304        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3305        if canonical_index.pack_checksum != pack.checksum
3306            || parsed_index.pack_checksum != pack.checksum
3307        {
3308            return Err(GitError::InvalidFormat(
3309                "pack and index checksums do not match pack write".into(),
3310            ));
3311        }
3312        if pack.index != canonical_index.index {
3313            return Err(GitError::InvalidFormat(
3314                "pack index does not match pack contents".into(),
3315            ));
3316        }
3317
3318        let pack_dir = self.objects_dir.join("pack");
3319        fs::create_dir_all(&pack_dir)?;
3320        let pack_name = format!("pack-{}", pack.checksum.to_hex());
3321        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3322        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3323        if !pack_path.exists() || !index_path.exists() {
3324            write_pack_component(&pack_path, &pack.pack)?;
3325            write_pack_component(&index_path, &pack.index)?;
3326        }
3327        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3328        Ok(PackInstallResult {
3329            pack_name,
3330            pack_path,
3331            index_path,
3332            promisor_path,
3333            object_ids: canonical_index
3334                .entries
3335                .iter()
3336                .map(|entry| entry.oid)
3337                .collect(),
3338        })
3339    }
3340
3341    /// Install a pack that was produced in this process by [`PackFile::write_packed`].
3342    ///
3343    /// Unlike [`Self::install_raw_pack_with_options`], this does not re-inflate
3344    /// every pack entry to rebuild the index. It validates the generated pack
3345    /// trailer and generated index against the writer's object ids, CRCs, and
3346    /// offsets, then writes those bytes directly. Use the raw installer for
3347    /// arbitrary pack bytes received from an untrusted transport.
3348    pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3349        self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
3350    }
3351
3352    pub fn install_written_pack_with_options(
3353        &self,
3354        pack: &PackWrite,
3355        options: RawPackInstallOptions,
3356    ) -> Result<PackInstallResult> {
3357        validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
3358        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3359        if parsed_index.pack_checksum != pack.checksum {
3360            return Err(GitError::InvalidFormat(
3361                "pack write index checksum does not match pack".into(),
3362            ));
3363        }
3364        if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
3365            return Err(GitError::InvalidFormat(
3366                "pack write index does not match generated entries".into(),
3367            ));
3368        }
3369        self.install_generated_pack_unchecked(pack, options)
3370    }
3371
3372    fn install_generated_pack_unchecked(
3373        &self,
3374        pack: &PackWrite,
3375        options: RawPackInstallOptions,
3376    ) -> Result<PackInstallResult> {
3377        let pack_dir = self.objects_dir.join("pack");
3378        fs::create_dir_all(&pack_dir)?;
3379        let pack_name = format!("pack-{}", pack.checksum.to_hex());
3380        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3381        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3382        if !pack_path.exists() || !index_path.exists() {
3383            write_pack_component(&pack_path, &pack.pack)?;
3384            write_pack_component(&index_path, &pack.index)?;
3385        }
3386        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3387        Ok(PackInstallResult {
3388            pack_name,
3389            pack_path,
3390            index_path,
3391            promisor_path,
3392            object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
3393        })
3394    }
3395
3396    pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
3397        self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
3398    }
3399
3400    pub fn install_raw_pack_with_options(
3401        &self,
3402        pack_bytes: &[u8],
3403        options: RawPackInstallOptions,
3404    ) -> Result<PackInstallResult> {
3405        let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
3406        let pack_dir = self.objects_dir.join("pack");
3407        fs::create_dir_all(&pack_dir)?;
3408        let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
3409        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3410        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3411        if !pack_path.exists() || !index_path.exists() {
3412            write_pack_component(&pack_path, pack_bytes)?;
3413            write_pack_component(&index_path, &built.index)?;
3414        }
3415        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3416        Ok(PackInstallResult {
3417            pack_name,
3418            pack_path,
3419            index_path,
3420            promisor_path,
3421            object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
3422        })
3423    }
3424
3425    pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
3426        if self.loose.exists(oid)? {
3427            return Ok(true);
3428        }
3429        if self.find_pack_containing(oid)?.is_some() {
3430            return Ok(true);
3431        }
3432        for alternate in &self.alternates {
3433            if Self::without_alternates(alternate, self.format).contains(oid)? {
3434                return Ok(true);
3435            }
3436        }
3437        // Reprepare-on-miss: a cached negative loose verdict may predate a
3438        // sibling write. Drop it and exact-probe once before reporting absence.
3439        self.loose.invalidate_cache();
3440        self.loose.exists(oid)
3441    }
3442
3443    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3444        let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
3445            .into_iter()
3446            .collect::<HashSet<_>>();
3447        for alternate in &self.alternates {
3448            oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
3449        }
3450        let mut oids = oids.into_iter().collect::<Vec<_>>();
3451        oids.sort_by_key(ObjectId::to_hex);
3452        Ok(oids)
3453    }
3454
3455    pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3456        if let Some(disk_size) = self.loose.disk_size(oid)? {
3457            return Ok(Some(ObjectStorageInfo {
3458                disk_size,
3459                deltabase: zero_oid(self.format)?,
3460            }));
3461        }
3462        if let Some(info) = self.packed_object_storage_info(oid)? {
3463            return Ok(Some(info));
3464        }
3465        for alternate in &self.alternates {
3466            if let Some(info) =
3467                Self::without_alternates(alternate, self.format).object_storage_info(oid)?
3468            {
3469                return Ok(Some(info));
3470            }
3471        }
3472        // Reprepare-on-miss: drop any stale negative loose cache and exact-probe
3473        // once before reporting absence (see `read_object`).
3474        self.loose.invalidate_cache();
3475        if let Some(disk_size) = self.loose.disk_size(oid)? {
3476            return Ok(Some(ObjectStorageInfo {
3477                disk_size,
3478                deltabase: zero_oid(self.format)?,
3479            }));
3480        }
3481        Ok(None)
3482    }
3483
3484    pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
3485        validate_object_id_prefix(self.format, prefix)?;
3486        let mut matches = Vec::new();
3487        for oid in self.object_ids()? {
3488            if object_id_matches_prefix(&oid, prefix) {
3489                matches.push(oid);
3490            }
3491        }
3492        Ok(match matches.len() {
3493            0 => ObjectPrefixResolution::Missing,
3494            1 => ObjectPrefixResolution::Unique(matches.remove(0)),
3495            _ => ObjectPrefixResolution::Ambiguous(matches),
3496        })
3497    }
3498
3499    /// The object type and content size of `oid` without decoding its full body —
3500    /// git's `cat-file --batch-check` fast path. Tries the decoded-object cache,
3501    /// then loose storage (inflating only the framing header), then packs (reading
3502    /// the entry header and, for deltas, only the delta's leading varints), then
3503    /// alternates. Returns `Ok(None)` if the object is not present.
3504    ///
3505    /// Unlike [`ObjectReader::read_object`], this never materializes the body, so it
3506    /// stays cheap on huge blobs and deep delta chains. It does not populate the
3507    /// decoded-object cache (nothing is decoded).
3508    pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3509        if implied_empty_tree_object(self.format, oid).is_some() {
3510            return Ok(Some((ObjectType::Tree, 0)));
3511        }
3512        if let Ok(mut cache) = self.decoded.lock()
3513            && let Some(object) = cache.get(oid)
3514        {
3515            return Ok(Some((object.object_type, object.body.len() as u64)));
3516        }
3517        if let Some(header) = self.loose.read_header(oid)? {
3518            return Ok(Some(header));
3519        }
3520        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
3521            let bytes = pack_lookup.pack_bytes(self)?;
3522            // Per-pack offset->type memo so the ofs-delta chain walk that resolves
3523            // a packed object's type runs at most once per chain across the batch,
3524            // instead of re-walking (and re-inflating each link's leading varints)
3525            // on every header read — the sley#26 super-linear cat-file --batch-check.
3526            let type_cache = pack_lookup.header_type_cache(self);
3527            let resolve_ref_base = |base: &ObjectId| {
3528                self.read_object_header(base)
3529                    .map(|header| header.map(|(t, _)| t))
3530            };
3531            let header = match &type_cache {
3532                Some(cache) => {
3533                    let mut adapter = PackHeaderTypeCacheAdapter(cache);
3534                    sley_pack::read_object_header_at_with_cache(
3535                        &bytes,
3536                        pack_lookup.offset,
3537                        self.format,
3538                        resolve_ref_base,
3539                        &mut adapter,
3540                    )?
3541                }
3542                None => sley_pack::read_object_header_at(
3543                    &bytes,
3544                    pack_lookup.offset,
3545                    self.format,
3546                    resolve_ref_base,
3547                )?,
3548            };
3549            return Ok(Some(header));
3550        }
3551        for alternate in &self.alternates {
3552            if let Some(header) =
3553                Self::without_alternates(alternate, self.format).read_object_header(oid)?
3554            {
3555                return Ok(Some(header));
3556            }
3557        }
3558        // Reprepare-on-miss: discard any stale negative loose cache and retry an
3559        // exact path probe once before reporting absence (see `read_object`).
3560        self.loose.invalidate_cache();
3561        if let Some(header) = self.loose.read_header(oid)? {
3562            return Ok(Some(header));
3563        }
3564        Ok(None)
3565    }
3566
3567    fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
3568        // Memory-capped decoded-object cache first (delta-base reuse for ref-delta
3569        // bases that resolve back through the store + repeated whole-object reads).
3570        if let Ok(mut cache) = self.decoded.lock()
3571            && let Some(object) = cache.get(oid)
3572        {
3573            return Ok(Some(object));
3574        }
3575        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3576            return Ok(None);
3577        };
3578        self.read_packed_object_at_lookup(oid, &pack_lookup).map(Some)
3579    }
3580
3581    fn read_packed_object_at_lookup(
3582        &self,
3583        oid: &ObjectId,
3584        pack_lookup: &PackLookup,
3585    ) -> Result<Arc<EncodedObject>> {
3586        if let Ok(mut cache) = self.decoded.lock()
3587            && let Some(object) = cache.get(oid)
3588        {
3589            return Ok(object);
3590        }
3591        let bytes = pack_lookup.pack_bytes(self)?;
3592        // Per-pack delta-base cache (keyed by in-pack offset). Resolving an
3593        // ofs-delta chain reuses already-decoded bases instead of re-inflating the
3594        // whole chain on every read. Scoped to this pack's path so an offset key is
3595        // never applied to the wrong pack's bytes.
3596        let delta_cache = pack_lookup.delta_cache(self);
3597        let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
3598        // Decode only this object at its offset (plus its delta-base chain). A
3599        // ref-delta base resolves through the full store (loose / other packs) and
3600        // reuses the decoded-object cache. No cache lock is held across the decode,
3601        // so the recursive resolver re-entry (which may re-enter read_object) is
3602        // safe.
3603        let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
3604        let object = match &delta_adapter {
3605            Some(adapter) => sley_pack::read_object_at_with_cache_arc(
3606                &bytes,
3607                pack_lookup.offset,
3608                self.format,
3609                resolve_ref_base,
3610                adapter,
3611            )?,
3612            None => sley_pack::read_object_at_arc(
3613                &bytes,
3614                pack_lookup.offset,
3615                self.format,
3616                resolve_ref_base,
3617            )?,
3618        };
3619        // Trust the index → offset mapping rather than re-hashing every decoded
3620        // object on read (see `verify_reads_enabled`); this re-hash dominated
3621        // bulk-read cost. Opt back in with `SLEY_VERIFY_READS` for a paranoid check.
3622        if verify_reads_enabled() {
3623            let actual = object.object_id(self.format)?;
3624            if actual != *oid {
3625                return Err(GitError::InvalidObject(format!(
3626                    "pack object id mismatch: index says {oid}, decoded {actual}"
3627                )));
3628            }
3629        }
3630        if let Ok(mut cache) = self.decoded.lock() {
3631            cache.put(*oid, Arc::clone(&object));
3632        }
3633        Ok(object)
3634    }
3635
3636    /// The per-pack delta-base cache for `pack_path`, creating it on first use.
3637    /// Returns `None` only if the shared map's lock is poisoned, in which case the
3638    /// caller falls back to an uncached decode (correctness preserved).
3639    fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
3640        let mut caches = self.pack_deltas.lock().ok()?;
3641        let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
3642            Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
3643        });
3644        Some(Arc::clone(cache))
3645    }
3646
3647    /// The per-pack header-type memo for `pack_path`, creating it on first use.
3648    /// Returns `None` only if the shared map's lock is poisoned, in which case the
3649    /// caller falls back to an unmemoized header walk (correctness preserved).
3650    fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
3651        let mut caches = self.pack_header_types.lock().ok()?;
3652        let cache = caches
3653            .entry(pack_path.to_path_buf())
3654            .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
3655        Some(Arc::clone(cache))
3656    }
3657
3658    /// Backing bytes of the pack at `pack_path`, loaded at most once per database
3659    /// handle (cached, shared across clones). Memory-mapped under the `mmap` feature,
3660    /// otherwise read into the heap. On a poisoned lock it falls back to loading
3661    /// without caching, preserving correctness.
3662    fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
3663        if let Ok(cache) = self.pack_bytes.lock()
3664            && let Some(bytes) = cache.get(pack_path)
3665        {
3666            return Ok(Arc::clone(bytes));
3667        }
3668        let bytes = Arc::new(load_pack_data(pack_path)?);
3669        if let Ok(mut cache) = self.pack_bytes.lock() {
3670            cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
3671        }
3672        Ok(bytes)
3673    }
3674
3675    /// Parsed index for the `.idx` at `index_path`, parsed at most once per
3676    /// database handle. On a poisoned lock it falls back to parsing without
3677    /// caching, preserving correctness.
3678    fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
3679        if let Ok(cache) = self.pack_indexes.lock()
3680            && let Some(index) = cache.get(index_path)
3681        {
3682            return Ok(Arc::clone(index));
3683        }
3684        let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
3685        if let Ok(mut cache) = self.pack_indexes.lock() {
3686            cache.insert(index_path.to_path_buf(), Arc::clone(&index));
3687        }
3688        Ok(index)
3689    }
3690
3691    fn cached_multi_pack_index_oid_lookup(
3692        &self,
3693        midx_path: &Path,
3694    ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
3695        if !midx_path.exists() {
3696            return Ok(None);
3697        }
3698        if let Ok(cache) = self.multi_pack_oid_lookups.lock()
3699            && let Some(midx) = cache.get(midx_path)
3700        {
3701            return Ok(Some(Arc::clone(midx)));
3702        }
3703        let bytes = Arc::new(fs::read(midx_path)?);
3704        let midx = Arc::new(MultiPackIndexOidLookup::parse(bytes, self.format)?);
3705        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3706            cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
3707        }
3708        Ok(Some(midx))
3709    }
3710
3711    /// Registry snapshot for this database's pack directory. With `force_rescan`,
3712    /// the directory is re-read; when the fingerprint and pack set match the
3713    /// cached snapshot, the same `Arc` is returned so miss handling can tell that
3714    /// no new packs appeared.
3715    fn cached_pack_registry(
3716        &self,
3717        pack_dir: &Path,
3718        force_rescan: bool,
3719    ) -> Result<Arc<PackRegistrySnapshot>> {
3720        if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
3721            return Ok(registry);
3722        }
3723        let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
3724        if let Ok(mut cache) = self.pack_registry.lock() {
3725            match cache.as_ref() {
3726                Some(existing)
3727                    if existing.fingerprint == scanned.fingerprint
3728                        && same_registered_pack_set(&existing.packs, &scanned.packs) =>
3729                {
3730                    return Ok(Arc::clone(existing));
3731                }
3732                _ => {
3733                    *cache = Some(Arc::clone(&scanned));
3734                }
3735            }
3736        }
3737        Ok(scanned)
3738    }
3739
3740    fn find_in_pack_registry(
3741        &self,
3742        registry: Arc<PackRegistrySnapshot>,
3743        oid: &ObjectId,
3744    ) -> Result<Option<PackLookup>> {
3745        let hinted_pack_index = registry.cached_hint();
3746        if let Some(pack_index) = hinted_pack_index {
3747            let pack = &registry.packs[pack_index];
3748            let index = pack.index(self.format)?;
3749            if let Some(entry) = index.find(oid) {
3750                return Ok(Some(PackLookup::from_registered(
3751                    Arc::clone(pack),
3752                    entry.offset,
3753                )));
3754            }
3755        }
3756        for (pack_index, pack) in registry.packs.iter().enumerate() {
3757            if Some(pack_index) == hinted_pack_index {
3758                continue;
3759            }
3760            let index = pack.index(self.format)?;
3761            if let Some(entry) = index.find(oid) {
3762                registry.remember_hint(pack_index);
3763                return Ok(Some(PackLookup::from_registered(
3764                    Arc::clone(pack),
3765                    entry.offset,
3766                )));
3767            }
3768        }
3769        Ok(None)
3770    }
3771
3772    /// Read `oid` from any pack *other than* the one named by `exclude`, used as
3773    /// a corruption fallback: a redundant packed copy survives one pack's
3774    /// damage. Scans the on-disk `.idx` files directly (bypassing the registry
3775    /// cache, whose first hit is the excluded pack) and decodes from the first
3776    /// other pack that both indexes the object and parses cleanly.
3777    fn read_packed_object_from_other_packs(
3778        &self,
3779        oid: &ObjectId,
3780        exclude: &PackLookup,
3781    ) -> Result<Option<Arc<EncodedObject>>> {
3782        let pack_dir = self.objects_dir.join("pack");
3783        let Ok(entries) = fs::read_dir(&pack_dir) else {
3784            return Ok(None);
3785        };
3786        let excluded_pack = exclude.pack_path().to_path_buf();
3787        for entry in entries {
3788            let idx_path = entry?.path();
3789            if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3790                continue;
3791            }
3792            let pack_path = idx_path.with_extension("pack");
3793            if pack_path == excluded_pack {
3794                continue;
3795            }
3796            let Ok(idx_bytes) = fs::read(&idx_path) else {
3797                continue;
3798            };
3799            let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
3800                continue;
3801            };
3802            let Some(entry) = index.find(oid) else {
3803                continue;
3804            };
3805            let candidate = PackLookup::from_path(pack_path, entry.offset);
3806            if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
3807                return Ok(Some(object));
3808            }
3809        }
3810        Ok(None)
3811    }
3812
3813    fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
3814        if oid.format() != self.format {
3815            return Err(GitError::InvalidObjectId(format!(
3816                "object {oid} uses {}, store uses {}",
3817                oid.format().name(),
3818                self.format.name()
3819            )));
3820        }
3821        let pack_dir = self.objects_dir.join("pack");
3822        // Hot path: a previously cached pack registry or multi-pack-index already
3823        // names every pack, and locating `oid` in them is pure in-memory index
3824        // work. Try that first so a warm handle does not parse indexes or hash
3825        // pack paths on every lookup.
3826        if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
3827            && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
3828        {
3829            return Ok(Some(pack_paths));
3830        }
3831        if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
3832            && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
3833        {
3834            return Ok(Some(pack_paths));
3835        }
3836
3837        if !pack_dir.exists() {
3838            return Ok(None);
3839        }
3840        if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
3841            return Ok(Some(pack_paths));
3842        }
3843        // Search the cached registry first. On a complete miss, re-scan the
3844        // directory once (picking up any pack added since the registry was
3845        // cached) and search again, so newly written packs are still found.
3846        let registry = self.cached_pack_registry(&pack_dir, false)?;
3847        if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(&registry), oid)? {
3848            return Ok(Some(pack_paths));
3849        }
3850        let refreshed = self.cached_pack_registry(&pack_dir, true)?;
3851        if Arc::ptr_eq(&registry, &refreshed) {
3852            // The re-scan produced the same registry, so nothing new appeared.
3853            return Ok(None);
3854        }
3855        self.find_in_pack_registry(refreshed, oid)
3856    }
3857
3858    fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3859        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3860            return Ok(None);
3861        };
3862        let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
3863        let trailer_offset = pack_len
3864            .checked_sub(self.format.raw_len() as u64)
3865            .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
3866        let index = pack_lookup.pack_index(self)?;
3867        let pack = pack_lookup.pack_bytes(self)?;
3868        let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
3869        let delta_base_offset = match &delta_base {
3870            Some(PackDeltaBase::Offset(offset)) => Some(*offset),
3871            Some(PackDeltaBase::Ref(_)) | None => None,
3872        };
3873        let offset_info = scan_pack_index_offsets(
3874            &index,
3875            pack_lookup.offset,
3876            trailer_offset,
3877            delta_base_offset,
3878        )?;
3879        let disk_size = offset_info
3880            .end_offset
3881            .checked_sub(pack_lookup.offset)
3882            .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
3883        let deltabase = match delta_base {
3884            Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
3885                // scan_pack_index_offsets returns Err when delta_base_offset is
3886                // Some but no matching entry is found, so this is unreachable for
3887                // valid packs; propagate as an error rather than panic to keep a
3888                // malformed pack from taking down the process if that invariant
3889                // ever drifts.
3890                GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
3891            })?,
3892            Some(PackDeltaBase::Ref(oid)) => oid,
3893            None => zero_oid(self.format)?,
3894        };
3895        Ok(Some(ObjectStorageInfo {
3896            disk_size,
3897            deltabase,
3898        }))
3899    }
3900
3901    fn find_midx_pack_containing(
3902        &self,
3903        pack_dir: &Path,
3904        oid: &ObjectId,
3905    ) -> Result<Option<PackLookup>> {
3906        let midx_path = pack_dir.join("multi-pack-index");
3907        let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
3908            return Ok(None);
3909        };
3910        self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
3911    }
3912
3913    fn midx_oid_lookup_pack_paths(
3914        &self,
3915        pack_dir: &Path,
3916        midx: &MultiPackIndexOidLookup,
3917        oid: &ObjectId,
3918    ) -> Result<Option<PackLookup>> {
3919        let Some(entry) = midx.find(oid)? else {
3920            return Ok(None);
3921        };
3922        let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
3923            return Err(GitError::InvalidFormat(
3924                "multi-pack-index object points past pack table".into(),
3925            ));
3926        };
3927        let pack_file_name = pack_name
3928            .strip_suffix(".idx")
3929            .map(|stem| format!("{stem}.pack"))
3930            .unwrap_or_else(|| pack_name.to_string());
3931        let pack = pack_dir.join(pack_file_name);
3932        Ok(Some(PackLookup::from_path(pack, entry.offset)))
3933    }
3934
3935    fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
3936        let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
3937        let cache = self.multi_pack_oid_lookups.lock().ok()?;
3938        cache.get(&midx_path).map(Arc::clone)
3939    }
3940
3941    /// The pack registry for `pack_dir` *only if already scanned and cached* —
3942    /// never touches the filesystem. Used by the lookup hot path to skip
3943    /// per-object pack-dir metadata checks once a handle is warm. A cold cache
3944    /// returns `None`, so the caller falls back to the scanning path. A complete
3945    /// miss still forces one rescan, preserving the new-pack discovery semantics.
3946    fn cached_loaded_pack_registry(
3947        &self,
3948        _pack_dir: &Path,
3949    ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
3950        let cache = match self.pack_registry.lock() {
3951            Ok(cache) => cache,
3952            Err(_) => return Ok(None),
3953        };
3954        Ok(cache.as_ref().map(Arc::clone))
3955    }
3956}
3957
3958fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
3959    if prefix.len() < 4 || prefix.len() > format.hex_len() {
3960        return Err(GitError::InvalidObjectId(format!(
3961            "expected 4 to {} hex digits for {}, got {}",
3962            format.hex_len(),
3963            format.name(),
3964            prefix.len()
3965        )));
3966    }
3967    if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3968        return Err(GitError::InvalidObjectId(format!(
3969            "non-hex object id prefix {prefix}"
3970        )));
3971    }
3972    Ok(())
3973}
3974
3975fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
3976    oid.to_hex()
3977        .as_bytes()
3978        .iter()
3979        .zip(prefix.as_bytes())
3980        .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
3981}
3982
3983fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
3984    match fs::metadata(pack_dir) {
3985        Ok(metadata) => Ok(metadata.modified().ok()),
3986        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
3987        Err(err) => Err(GitError::Io(err.to_string())),
3988    }
3989}
3990
3991/// Scan `pack_dir` for `.idx` files that have a matching `.pack` sibling and
3992/// parse each index into a registered pack. An `.idx` without its `.pack` is
3993/// skipped (an orphan index cannot serve objects), matching the prior per-read
3994/// behavior.
3995fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
3996    let modified = pack_dir_modified(pack_dir)?;
3997    let entries = match fs::read_dir(pack_dir) {
3998        Ok(entries) => entries,
3999        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4000            return Ok(PackRegistrySnapshot::new(
4001                PackDirFingerprint {
4002                    modified,
4003                    idx_count: 0,
4004                    pack_count: 0,
4005                },
4006                Vec::new(),
4007            ));
4008        }
4009        Err(err) => return Err(GitError::Io(err.to_string())),
4010    };
4011
4012    let mut idx_paths = Vec::new();
4013    let mut idx_count = 0;
4014    let mut pack_count = 0;
4015    for entry in entries {
4016        let entry = entry?;
4017        let path = entry.path();
4018        match path.extension().and_then(|ext| ext.to_str()) {
4019            Some("idx") => {
4020                idx_count += 1;
4021                idx_paths.push(path);
4022            }
4023            Some("pack") => {
4024                pack_count += 1;
4025            }
4026            _ => {}
4027        }
4028    }
4029
4030    let mut packs = Vec::new();
4031    for idx in idx_paths {
4032        let pack = idx.with_extension("pack");
4033        let Ok(metadata) = fs::metadata(&pack) else {
4034            continue;
4035        };
4036        let modified = pack_sort_modified(&metadata);
4037        packs.push((modified, metadata.len(), Arc::new(RegisteredPack::new(idx, pack))));
4038    }
4039    // Git keeps a most-recently-used pack order; seed ours with newer/larger
4040    // packs before falling back to the path. In repositories with many packs,
4041    // this avoids parsing a long run of unrelated `.idx` files before the first
4042    // lookup establishes the recent-pack hint.
4043    packs.sort_by(|left, right| {
4044        right
4045            .0
4046            .cmp(&left.0)
4047            .then_with(|| right.1.cmp(&left.1))
4048            .then_with(|| left.2.idx.cmp(&right.2.idx))
4049    });
4050    let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
4051    Ok(PackRegistrySnapshot::new(
4052        PackDirFingerprint {
4053            modified,
4054            idx_count,
4055            pack_count,
4056        },
4057        packs,
4058    ))
4059}
4060
4061fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
4062    metadata
4063        .modified()
4064        .ok()
4065        .and_then(|modified| {
4066            modified
4067                .duration_since(std::time::UNIX_EPOCH)
4068                .ok()
4069                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
4070        })
4071        .unwrap_or((0, 0))
4072}
4073
4074/// Whether two pack registries reference the same pack/index paths (order is
4075/// already normalized by [`scan_pack_registry`]).
4076fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
4077    left.len() == right.len()
4078        && left
4079            .iter()
4080            .zip(right.iter())
4081            .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
4082}
4083
4084fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
4085    let mut alternates = Vec::new();
4086    if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
4087        for raw in value.to_string_lossy().split(':') {
4088            if !raw.is_empty() {
4089                alternates.push(PathBuf::from(raw));
4090            }
4091        }
4092    }
4093    let alternates_path = objects_dir.join("info").join("alternates");
4094    if let Ok(contents) = fs::read(&alternates_path) {
4095        for raw in contents.split(|byte| *byte == b'\n') {
4096            let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4097            if line.is_empty() || line.starts_with(b"#") {
4098                continue;
4099            }
4100            let Ok(value) = std::str::from_utf8(line) else {
4101                continue;
4102            };
4103            let path = Path::new(value);
4104            let absolute = if path.is_absolute() {
4105                path.to_path_buf()
4106            } else {
4107                objects_dir.join(path)
4108            };
4109            alternates.push(absolute);
4110        }
4111    }
4112    alternates
4113}
4114
4115impl ObjectReader for FileObjectDatabase {
4116    fn has_shallow_grafts(&self) -> bool {
4117        !self
4118            .shallow_grafts
4119            .get_or_init(|| {
4120                let shallow_file = self
4121                    .objects_dir
4122                    .parent()
4123                    .map(|git_dir| git_dir.join("shallow"));
4124                match shallow_file {
4125                    Some(path) => read_shallow_grafts(&path, self.format),
4126                    None => HashSet::new(),
4127                }
4128            })
4129            .is_empty()
4130    }
4131
4132    fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
4133        self.shallow_grafts
4134            .get_or_init(|| {
4135                let shallow_file = self
4136                    .objects_dir
4137                    .parent()
4138                    .map(|git_dir| git_dir.join("shallow"));
4139                match shallow_file {
4140                    Some(path) => read_shallow_grafts(&path, self.format),
4141                    None => HashSet::new(),
4142                }
4143            })
4144            .contains(oid)
4145    }
4146
4147    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4148        if let Some(object) = implied_empty_tree_object(self.format, oid) {
4149            return Ok(object);
4150        }
4151        // A corrupt loose copy must not shadow a good packed copy: git's
4152        // `oid_object_info_extended` consults every source, so a repacked object
4153        // whose loose file was later corrupted still reads fine from the pack. If
4154        // a packed copy exists, prefer it WITHOUT touching the corrupt loose file
4155        // (which would otherwise emit a spurious `inflate:` diagnostic on each
4156        // probe). Only when no pack copy exists do we read (and, if corrupt,
4157        // surface the error from) the loose file.
4158        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4159            match self.read_packed_object_at_lookup(oid, &pack_lookup) {
4160                Ok(object) => return Ok(object),
4161                Err(GitError::NotFound(_)) => {}
4162                // A corrupt packed copy must not be fatal when another good copy
4163                // exists: git's `oid_object_info_extended` keeps consulting the
4164                // remaining sources (loose, other packs, alternates) when a pack
4165                // read fails. Fall through to the loose/other-pack probes and
4166                // only surface the packed error if every source comes up empty.
4167                Err(packed_err) => {
4168                    if let Ok(object) = self.loose.read_object(oid) {
4169                        return Ok(object);
4170                    }
4171                    // Try any *other* pack that also holds the object (a
4172                    // redundant copy survives one pack's corruption).
4173                    if let Some(object) =
4174                        self.read_packed_object_from_other_packs(oid, &pack_lookup)?
4175                    {
4176                        return Ok(object);
4177                    }
4178                    for alternate in &self.alternates {
4179                        if let Ok(object) =
4180                            Self::without_alternates(alternate, self.format).read_object(oid)
4181                        {
4182                            return Ok(object);
4183                        }
4184                    }
4185                    return Err(packed_err);
4186                }
4187            }
4188        }
4189        let loose_err = match self.loose.read_object(oid) {
4190            Ok(object) => return Ok(object),
4191            Err(GitError::NotFound(_)) => None,
4192            Err(err) => Some(err),
4193        };
4194        if let Some(object) = self.read_packed_object(oid)? {
4195            return Ok(object);
4196        }
4197        for alternate in &self.alternates {
4198            match Self::without_alternates(alternate, self.format).read_object(oid) {
4199                Ok(object) => return Ok(object),
4200                Err(GitError::NotFound(_)) => {}
4201                Err(err) => return Err(err),
4202            }
4203        }
4204        // Hard miss against every store. If an earlier enumeration built a loose
4205        // cache, an object written loose afterward by a sibling handle could have
4206        // been skipped above. Mirror git's `oid_object_info_extended`
4207        // reprepare-on-miss: drop stale cache state and retry an exact loose path
4208        // probe once before declaring the object missing.
4209        self.loose.invalidate_cache();
4210        match self.loose.read_object(oid) {
4211            Ok(object) => return Ok(object),
4212            Err(GitError::NotFound(_)) => {}
4213            Err(err) => return Err(err),
4214        }
4215        // No good copy in any store. If the local loose copy was corrupt (not
4216        // merely absent), surface that error — it is more specific than a plain
4217        // "not found".
4218        if let Some(err) = loose_err {
4219            return Err(err);
4220        }
4221        Err(GitError::object_not_found_in(
4222            *oid,
4223            MissingObjectContext::Read,
4224        ))
4225    }
4226}
4227
4228impl ObjectWriter for FileObjectDatabase {
4229    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4230        // Mirror git's freshen semantics (`write_object_file`:
4231        // `freshen_packed_object || freshen_loose_object`): an object already
4232        // present anywhere in the database — loose, packed, or through an
4233        // alternate — is not written again, so e.g. `git add` after
4234        // `git repack -ad` does not resurrect a loose copy of a packed object.
4235        let oid = object.object_id(self.format)?;
4236        if self.contains(&oid)? {
4237            return Ok(oid);
4238        }
4239        self.loose.write_object(object)
4240    }
4241}
4242
4243fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
4244    if path.exists() {
4245        return Ok(());
4246    }
4247    let parent = path
4248        .parent()
4249        .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
4250    fs::create_dir_all(parent)?;
4251    let temp_path = unique_temp_path(parent);
4252    let write_result = (|| -> Result<()> {
4253        {
4254            let mut file = fs::OpenOptions::new()
4255                .write(true)
4256                .create_new(true)
4257                .open(&temp_path)?;
4258            file.write_all(bytes)?;
4259            file.sync_all()?;
4260        }
4261        match fs::rename(&temp_path, path) {
4262            Ok(()) => Ok(()),
4263            Err(_) if path.exists() => {
4264                let _ = fs::remove_file(&temp_path);
4265                Ok(())
4266            }
4267            Err(err) => Err(GitError::Io(err.to_string())),
4268        }
4269    })();
4270    if write_result.is_err() {
4271        let _ = fs::remove_file(&temp_path);
4272    }
4273    write_result
4274}
4275
4276fn write_promisor_pack_sidecar(
4277    pack_dir: &Path,
4278    pack_name: &str,
4279    promisor: bool,
4280) -> Result<Option<PathBuf>> {
4281    if !promisor {
4282        return Ok(None);
4283    }
4284    let path = pack_dir.join(format!("{pack_name}.promisor"));
4285    write_pack_component(&path, b"")?;
4286    Ok(Some(path))
4287}
4288
4289/// Maximum number of bytes git will inflate when reading a loose object's
4290/// `"<type> <size>\0"` header (git's `MAX_HEADER_LEN` in object-file.c). The NUL
4291/// terminator must land within this window, so a header of 32 or more non-NUL
4292/// bytes is rejected as too long.
4293const MAX_LOOSE_HEADER_LEN: usize = 32;
4294
4295/// git's exact `error:`-level diagnostic for a loose object whose header overflows
4296/// `MAX_LOOSE_HEADER_LEN` (object-file.c: `error(_("header for %s too long, exceeds
4297/// %d bytes"), ...)`). Shared by the header-only and full-read paths so both surface
4298/// byte-identical text.
4299fn loose_header_too_long(oid: &ObjectId) -> GitError {
4300    GitError::InvalidObject(format!(
4301        "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
4302    ))
4303}
4304
4305/// git's `error:`-level diagnostic when the loose framing header cannot be inflated at
4306/// all (object-file.c `loose_object_info`, the `ULHR_BAD` arm: `error(_("unable to
4307/// unpack %s header"), ...)`).
4308fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
4309    GitError::InvalidObject(format!("unable to unpack {oid} header"))
4310}
4311
4312/// git-zlib.c's `error("inflate: %s (%s)", ...)` text for an inflate failure whose
4313/// cause is identifiable from the zlib stream header. The checks mirror zlib's own
4314/// `inflate()` HEAD-state validation, in order: the FCHECK checksum over CMF+FLG,
4315/// the compression method, the window size, and the FDICT preset-dictionary bit
4316/// (zlib reports `Z_NEED_DICT` with a NULL `msg`, which git renders as
4317/// "(no message)"). Failures past the stream header return `None`: flate2 does not
4318/// surface zlib's per-case `msg` strings, so no diagnostic is fabricated for them.
4319fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
4320    let [cmf, flg, ..] = *input else { return None };
4321    if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
4322        return Some("inflate: data stream error (incorrect header check)");
4323    }
4324    if cmf & 0x0f != 8 {
4325        return Some("inflate: data stream error (unknown compression method)");
4326    }
4327    if cmf >> 4 > 7 {
4328        return Some("inflate: data stream error (invalid window size)");
4329    }
4330    if flg & 0x20 != 0 {
4331        return Some("inflate: needs dictionary (no message)");
4332    }
4333    None
4334}
4335
4336/// Print the `error: inflate: ...` line git's zlib wrapper emits the moment
4337/// `inflate()` fails, when the failure is classifiable from the stream header.
4338fn emit_inflate_diagnostic(input: &[u8]) {
4339    if let Some(diagnostic) = inflate_header_diagnostic(input) {
4340        eprintln!("error: {diagnostic}");
4341    }
4342}
4343
4344/// Integrity verdict for a single loose object file, as classified by
4345/// [`LooseObjectStore::verify_object`].
4346#[derive(Debug, Clone, PartialEq, Eq)]
4347pub enum LooseObjectIntegrity {
4348    /// Inflated, parsed, and re-hashed to its path-derived oid.
4349    Ok,
4350    /// Readable and well-formed, but its content hashes to a different oid
4351    /// (a loose file stored under the wrong path).
4352    HashMismatch { actual: ObjectId },
4353    /// Unreadable: corrupt zlib stream, truncated content, or unparseable header.
4354    /// The `error:`-level diagnostics were already printed to stderr.
4355    Corrupt,
4356}
4357
4358#[derive(Debug, Clone)]
4359pub struct LooseObjectStore {
4360    objects_dir: PathBuf,
4361    format: ObjectFormat,
4362    /// Lazily-populated set of loose object ids present on disk, mirroring git's
4363    /// `loose_objects_cache` (object-file.c). A lookup scans the queried
4364    /// `objects/XX/` fanout once; afterward misses in that fanout are in-memory
4365    /// checks instead of failed exact-path opens. Shared across
4366    /// `FileObjectDatabase` clones via `Arc` so a write through one handle is
4367    /// visible to reads through another; cleared by `refresh_read_cache` so
4368    /// objects installed out-of-band (fetch, repack) become visible. Writes
4369    /// extend the set in place rather than invalidating it.
4370    loose_cache: Arc<Mutex<LoosePresenceCache>>,
4371}
4372
4373impl LooseObjectStore {
4374    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4375        Self {
4376            objects_dir: objects_dir.into(),
4377            format,
4378            loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
4379        }
4380    }
4381
4382    /// Whether `oid` is present according to the loose-object cache, populating
4383    /// the cache on first use. Returns `None` when the lock cannot be trusted or
4384    /// the scan fails; callers should fall back to an exact filesystem probe in
4385    /// that case so a cache-building problem cannot change read semantics.
4386    fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
4387        let mut guard = self.loose_cache.lock().ok()?;
4388        let fanout = oid.as_bytes()[0];
4389        if !guard.loaded_fanouts.contains(&fanout) {
4390            collect_loose_fanout_object_ids(
4391                &self.objects_dir,
4392                self.format,
4393                fanout,
4394                &mut guard.objects,
4395            )
4396            .ok()?;
4397            guard.loaded_fanouts.insert(fanout);
4398        }
4399        Some(guard.objects.contains(oid))
4400    }
4401
4402    /// Populate the loose-object cache and return the sorted ids. This mirrors
4403    /// git's `odb_loose_cache` lazy fill and is reserved for operations that
4404    /// really need loose-object enumeration.
4405    fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
4406        if let Ok(mut guard) = self.loose_cache.lock() {
4407            guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
4408            guard.loaded_fanouts = (0..=u8::MAX).collect();
4409            let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
4410            ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
4411            return Ok(ids);
4412        }
4413        loose_object_ids(&self.objects_dir, self.format)
4414    }
4415
4416    /// Record `oid` as present in loose storage so subsequent reads find it
4417    /// without a rescan. A no-op when the cache has not been populated yet (the
4418    /// eventual lazy scan will pick the object up) or the lock is poisoned.
4419    fn note_loose_write(&self, oid: ObjectId) {
4420        if let Ok(mut guard) = self.loose_cache.lock() {
4421            guard.objects.insert(oid);
4422        }
4423    }
4424
4425    /// Drop the in-memory loose set so the next access rescans the fanout. Called
4426    /// by `FileObjectDatabase::refresh_read_cache` after out-of-band installs.
4427    pub(crate) fn invalidate_cache(&self) {
4428        if let Ok(mut guard) = self.loose_cache.lock() {
4429            *guard = LoosePresenceCache::default();
4430        }
4431    }
4432
4433    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4434        Self::new(repository_objects_dir(git_dir), format)
4435    }
4436
4437    fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
4438        if oid.format() != self.format {
4439            return Err(GitError::InvalidObjectId(format!(
4440                "object {oid} uses {}, store uses {}",
4441                oid.format().name(),
4442                self.format.name()
4443            )));
4444        }
4445        Ok(())
4446    }
4447
4448    pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
4449        self.validate_oid_format(oid)?;
4450        let hex = oid.to_hex();
4451        Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
4452    }
4453
4454    pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
4455        self.validate_oid_format(oid)?;
4456        if self.cached_loose_presence(oid) == Some(false) {
4457            return Ok(false);
4458        }
4459        let path = self.object_path(oid)?;
4460        Ok(path.exists())
4461    }
4462
4463    pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
4464        self.validate_oid_format(oid)?;
4465        if self.cached_loose_presence(oid) == Some(false) {
4466            return Ok(None);
4467        }
4468        let path = self.object_path(oid)?;
4469        match fs::metadata(path) {
4470            Ok(metadata) => Ok(Some(metadata.len())),
4471            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
4472            Err(err) => Err(GitError::Io(err.to_string())),
4473        }
4474    }
4475
4476    /// The object type and content size of `oid` from loose storage, inflating only
4477    /// the framing header (`"<type> <size>\0"`) and not the body. Output-limited
4478    /// reads keep miniz from inflating past the header even for large objects.
4479    /// Returns `Ok(None)` when the loose object is absent.
4480    pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4481        self.validate_oid_format(oid)?;
4482        if self.cached_loose_presence(oid) == Some(false) {
4483            return Ok(None);
4484        }
4485        let path = self.object_path(oid)?;
4486        let compressed = match fs::read(&path) {
4487            Ok(compressed) => compressed,
4488            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4489            Err(err) => return Err(GitError::Io(err.to_string())),
4490        };
4491        match inflate_loose_header(&compressed)? {
4492            LooseHeader::Ok(header) => {
4493                let header = std::str::from_utf8(&header)
4494                    .map_err(|err| GitError::InvalidObject(err.to_string()))?;
4495                let (kind, size) = header
4496                    .split_once(' ')
4497                    .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
4498                let object_type = kind.parse::<ObjectType>()?;
4499                let size = size
4500                    .parse::<u64>()
4501                    .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
4502                Ok(Some((object_type, size)))
4503            }
4504            LooseHeader::Bad => {
4505                // git's ULHR_BAD: the zlib wrapper's `error: inflate: ...` line, then
4506                // "unable to unpack <oid> header".
4507                emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
4508                Err(loose_unpack_header_failed(oid))
4509            }
4510            LooseHeader::TooLong => {
4511                // git inflates only the first `MAX_LOOSE_HEADER_LEN` bytes
4512                // (object-file.c `unpack_loose_header`) and reports ULHR_TOO_LONG when
4513                // no NUL terminator lands within them — whether the stream simply ends
4514                // early or overflows the window. Both collapse to the same diagnostic.
4515                Err(loose_header_too_long(oid))
4516            }
4517        }
4518    }
4519
4520    /// Loose object ids in this store, sorted by hex.
4521    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4522        self.loose_object_ids_cached()
4523    }
4524
4525    /// fsck's loose-object integrity probe, mirroring C git's `read_loose_object`
4526    /// (object-file.c) as called from `fsck_loose` (builtin/fsck.c): inflate and
4527    /// parse the file at `oid`'s loose path, then re-hash its content against the
4528    /// path-derived oid. `display_path` appears verbatim in the `error:`-level
4529    /// diagnostics — the path-form messages of `read_loose_object` ("unable to
4530    /// unpack header of <path>"), unlike the oid-form messages of the normal read
4531    /// path. Returns `Ok(None)` when no loose file exists for `oid`.
4532    pub fn verify_object(
4533        &self,
4534        oid: &ObjectId,
4535        display_path: &str,
4536    ) -> Result<Option<LooseObjectIntegrity>> {
4537        let path = self.object_path(oid)?;
4538        let compressed = match fs::read(&path) {
4539            Ok(compressed) => compressed,
4540            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4541            Err(err) => return Err(GitError::Io(err.to_string())),
4542        };
4543        let mut decoder = ZlibDecoder::new(compressed.as_slice());
4544        let mut framed = Vec::new();
4545        if decoder.read_to_end(&mut framed).is_err() {
4546            emit_inflate_diagnostic(&compressed);
4547            // git inflates the header first (`unpack_loose_header`), then the body
4548            // (`unpack_loose_rest`). If the header inflated (its NUL is visible in
4549            // the partial output) but the body broke, that is a *content*
4550            // corruption: git's `unpack_loose_rest` prints `corrupt loose object
4551            // '<oid>'` (status != Z_STREAM_END), then `read_loose_object` adds
4552            // `unable to unpack contents of <path>`. If inflation died before the
4553            // header materialized, only the header message fires.
4554            if framed_loose_header_terminated(&framed) {
4555                eprintln!("error: corrupt loose object '{oid}'");
4556                eprintln!("error: unable to unpack contents of {display_path}");
4557            } else {
4558                eprintln!("error: unable to unpack header of {display_path}");
4559            }
4560            return Ok(Some(LooseObjectIntegrity::Corrupt));
4561        }
4562        if !framed_loose_header_terminated(&framed) {
4563            // ULHR_TOO_LONG collapses into the same path-form message here: C's
4564            // `read_loose_object` treats every non-OK `unpack_loose_header` alike.
4565            eprintln!("error: unable to unpack header of {display_path}");
4566            return Ok(Some(LooseObjectIntegrity::Corrupt));
4567        }
4568        // git's `unpack_loose_rest`/`check_stream_oid` reject trailing bytes after
4569        // the zlib stream: a fully-inflated object whose compressed input was not
4570        // entirely consumed is `garbage at end of loose object '<oid>'`, then
4571        // `object corrupt or missing: <path>` from `fsck_loose`. (read_to_end
4572        // stops at Z_STREAM_END and silently ignores the trailing bytes, so we
4573        // compare consumed input against the file size ourselves.)
4574        if (decoder.total_in() as usize) < compressed.len() {
4575            // git's `unpack_loose_rest` prints `garbage at end of loose object`
4576            // then returns NULL, so `read_loose_object` also prints `unable to
4577            // unpack contents of <path>`.
4578            eprintln!("error: garbage at end of loose object '{oid}'");
4579            eprintln!("error: unable to unpack contents of {display_path}");
4580            return Ok(Some(LooseObjectIntegrity::Corrupt));
4581        }
4582        // A truncated object can inflate to a clean stream end yet yield fewer
4583        // body bytes than the header's declared size. git's `unpack_loose_rest`
4584        // inflates exactly `size` bytes and, finding the stream ends short,
4585        // prints `corrupt loose object '<oid>'`; `read_loose_object` then adds
4586        // `unable to unpack contents of <path>`. Detect the short body here so it
4587        // is not misreported as a header-parse failure.
4588        if let Some(declared) = loose_header_declared_size(&framed) {
4589            let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
4590            let body_len = framed.len() - (nul + 1).min(framed.len());
4591            if body_len < declared {
4592                eprintln!("error: corrupt loose object '{oid}'");
4593                eprintln!("error: unable to unpack contents of {display_path}");
4594                return Ok(Some(LooseObjectIntegrity::Corrupt));
4595            }
4596        }
4597        let Ok(object) = parse_framed_object(&framed) else {
4598            // Distinguish git's two header-parse failures: a structurally valid
4599            // `"<word> <size>\0"` header whose *type word* is not a known object
4600            // type yields `unable to parse type from header '<header>'`, while a
4601            // genuinely malformed header yields `unable to parse header`.
4602            if let Some(header) = loose_header_with_unknown_type(&framed) {
4603                eprintln!(
4604                    "error: unable to parse type from header '{header}' of {display_path}"
4605                );
4606            } else {
4607                eprintln!("error: unable to parse header of {display_path}");
4608            }
4609            return Ok(Some(LooseObjectIntegrity::Corrupt));
4610        };
4611        let actual = object.object_id(self.format)?;
4612        if &actual != oid {
4613            return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
4614        }
4615        Ok(Some(LooseObjectIntegrity::Ok))
4616    }
4617}
4618
4619/// Whether the inflated framing bytes contain the header's NUL terminator within
4620/// git's `MAX_HEADER_LEN` window (object-file.c `unpack_loose_header`'s success
4621/// condition).
4622fn framed_loose_header_terminated(framed: &[u8]) -> bool {
4623    framed
4624        .iter()
4625        .take(MAX_LOOSE_HEADER_LEN)
4626        .any(|byte| *byte == 0)
4627}
4628
4629/// If the framing has a structurally valid `"<word> <size>\0"` header whose body
4630/// length matches `<size>` but whose `<word>` is not a known object type, return
4631/// the header string (the bytes before the NUL). Mirrors git's
4632/// `parse_loose_header` reporting `unable to parse type from header '<header>'`.
4633fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
4634    let nul = framed.iter().position(|&b| b == 0)?;
4635    let header = std::str::from_utf8(&framed[..nul]).ok()?;
4636    let (kind, size) = header.split_once(' ')?;
4637    let size: usize = size.parse().ok()?;
4638    // Body length must match the declared size (otherwise it is a different
4639    // corruption, handled by the generic path).
4640    if framed.len() - (nul + 1) != size {
4641        return None;
4642    }
4643    // A known type word would have parsed successfully upstream; only return
4644    // when the word is genuinely unknown.
4645    if kind.parse::<ObjectType>().is_ok() {
4646        return None;
4647    }
4648    Some(header.to_string())
4649}
4650
4651/// The size declared in a loose object's `"<type> <size>\0"` header, if the
4652/// header is structurally a `<word> <decimal-size>` pair. Used to detect a body
4653/// inflated short of its declared length (a truncated object).
4654fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
4655    let nul = framed.iter().position(|&b| b == 0)?;
4656    let header = std::str::from_utf8(&framed[..nul]).ok()?;
4657    let (_kind, size) = header.split_once(' ')?;
4658    size.parse::<usize>().ok()
4659}
4660
4661/// Read up to `prefix.len()` bytes from the start of `file`, returning how many
4662/// were available (short only when the file itself is shorter).
4663/// Outcome of inflating a loose object's header, mirroring git's
4664/// `unpack_loose_header` result codes (object-file.c `enum
4665/// unpack_loose_header_result`).
4666enum LooseHeader {
4667    /// ULHR_OK: a NUL-terminated header was found within the window. Carries the
4668    /// header bytes up to (not including) the NUL.
4669    Ok(Vec<u8>),
4670    /// ULHR_BAD: the zlib stream would not inflate (status != Z_OK/Z_STREAM_END).
4671    Bad,
4672    /// ULHR_TOO_LONG: the inflated output filled the header window with no NUL.
4673    TooLong,
4674}
4675
4676/// Inflate a loose object's *header* exactly as git's `unpack_loose_header` does
4677/// (object-file.c): a single bounded inflate into a `MAX_LOOSE_HEADER_LEN`-byte
4678/// output buffer, then look for the header-terminating NUL in what came out.
4679///
4680/// The byte budget is load-bearing for corruption parity: git inflates only up to
4681/// `MAX_HEADER_LEN` (32) bytes of *output* before stopping, so a `cat-file -s`/`-t`
4682/// header read detects a zlib data error only when it lands within those first 32
4683/// inflated bytes (the header plus the start of the body for a small object) — and
4684/// silently returns the header for corruption buried deeper in the body, which the
4685/// full-object read path catches instead. A byte-by-byte loop that stopped at the
4686/// NUL would never inflate into the corrupt region and miss the bit-error case
4687/// (t1060 "getting type of a corrupt blob fails"); feeding too much output budget
4688/// would over-detect relative to git. So this matches git's exact window.
4689fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
4690    let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
4691    let mut decompress = Decompress::new(true);
4692    // git feeds the whole mapped file as `avail_in` and inflates once into a
4693    // 32-byte `avail_out`; zlib stops at the output limit (Z_OK with avail_out==0)
4694    // or at the stream's end, propagating Z_DATA_ERROR for a corrupt stream.
4695    let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
4696    let produced = decompress.total_out() as usize;
4697    match status {
4698        Ok(_) => {
4699            let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
4700            match window.iter().position(|&byte| byte == 0) {
4701                Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
4702                // No NUL within the window: either the stream ended early or the
4703                // header overflows `MAX_LOOSE_HEADER_LEN`. git collapses both into
4704                // ULHR_TOO_LONG (object-file.c `unpack_loose_header`).
4705                None => Ok(LooseHeader::TooLong),
4706            }
4707        }
4708        // Any zlib error before a NUL materializes is git's ULHR_BAD.
4709        Err(_) => Ok(LooseHeader::Bad),
4710    }
4711}
4712
4713impl ObjectReader for LooseObjectStore {
4714    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4715        self.validate_oid_format(oid)?;
4716        // Skip the `open()` (and its ENOENT) when an already-built loose cache
4717        // knows the id is absent. Without a cache, use an exact path probe; a
4718        // full fanout scan is far more expensive for one-shot packed-object reads.
4719        if self.cached_loose_presence(oid) == Some(false) {
4720            return Err(GitError::object_not_found_in(
4721                *oid,
4722                MissingObjectContext::Read,
4723            ));
4724        }
4725        let path = self.object_path(oid)?;
4726        let compressed = match fs::read(&path) {
4727            Ok(compressed) => compressed,
4728            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4729                return Err(GitError::object_not_found_in(
4730                    *oid,
4731                    MissingObjectContext::Read,
4732                ));
4733            }
4734            Err(err) => return Err(GitError::Io(err.to_string())),
4735        };
4736        let mut decoder = ZlibDecoder::new(compressed.as_slice());
4737        let mut framed = Vec::new();
4738        if decoder.read_to_end(&mut framed).is_err() {
4739            emit_inflate_diagnostic(&compressed);
4740            // A stream that dies before the framing header materializes is git's
4741            // ULHR_BAD ("unable to unpack <oid> header"); with the header intact,
4742            // the body is what broke (`unpack_loose_rest`'s "corrupt loose
4743            // object").
4744            if !framed_loose_header_terminated(&framed) {
4745                return Err(loose_unpack_header_failed(oid));
4746            }
4747            return Err(GitError::InvalidObject(format!(
4748                "corrupt loose object '{oid}'"
4749            )));
4750        }
4751        // git only inflates the first `MAX_LOOSE_HEADER_LEN` bytes looking for the
4752        // header's NUL terminator before parsing the type; an over-long header is
4753        // rejected here (with git's diagnostic) rather than failing later as an
4754        // "unknown object type". Mirror that so `cat-file -p` matches upstream.
4755        if framed
4756            .iter()
4757            .take(MAX_LOOSE_HEADER_LEN)
4758            .all(|byte| *byte != 0)
4759        {
4760            return Err(loose_header_too_long(oid));
4761        }
4762        let object = parse_framed_object(&framed)?;
4763        // Trust the loose object's on-disk name rather than re-hashing its full body
4764        // on every read (see `verify_reads_enabled`); use `validate`/fsck or
4765        // `SLEY_VERIFY_READS` for an explicit integrity check.
4766        if verify_reads_enabled() {
4767            let actual = object.object_id(self.format)?;
4768            if &actual != oid {
4769                return Err(GitError::InvalidObject(format!(
4770                    "loose object {} hashes to {actual}",
4771                    path.display()
4772                )));
4773            }
4774        }
4775        Ok(Arc::new(object))
4776    }
4777}
4778
4779impl ObjectWriter for LooseObjectStore {
4780    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4781        let oid = object.object_id(self.format)?;
4782        let path = self.object_path(&oid)?;
4783        if path.exists() {
4784            self.note_loose_write(oid);
4785            return Ok(oid);
4786        }
4787        let parent = path
4788            .parent()
4789            .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
4790        fs::create_dir_all(parent)?;
4791        let temp_path = unique_temp_path(parent);
4792        let write_result = (|| -> Result<()> {
4793            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4794            encoder.write_all(&object.framed_bytes())?;
4795            let compressed = encoder.finish()?;
4796            {
4797                let mut file = fs::OpenOptions::new()
4798                    .write(true)
4799                    .create_new(true)
4800                    .open(&temp_path)?;
4801                file.write_all(&compressed)?;
4802                // No fsync: git's default `core.fsync=none` fsyncs nothing on the
4803                // loose-object write path (object-file.c writes the temp file and
4804                // renames it without syncing unless `core.fsync` names
4805                // `loose-object`/`objects`/`all`, which it does not by default).
4806                // A per-object sync_all() here made `git add` of N files cost N
4807                // fsyncs — the dominant term in sley#27's 10x `add -u` slowdown —
4808                // for durability git itself does not provide by default. The
4809                // create_new temp + atomic rename below still guarantees the
4810                // object never appears half-written under its final name.
4811            }
4812            match fs::rename(&temp_path, &path) {
4813                Ok(()) => Ok(()),
4814                Err(_) if path.exists() => {
4815                    let _ = fs::remove_file(&temp_path);
4816                    Ok(())
4817                }
4818                Err(err) => Err(GitError::Io(err.to_string())),
4819            }
4820        })();
4821        if write_result.is_err() {
4822            let _ = fs::remove_file(&temp_path);
4823        }
4824        write_result?;
4825        self.note_loose_write(oid);
4826        Ok(oid)
4827    }
4828}
4829
4830fn unique_temp_path(parent: &Path) -> PathBuf {
4831    let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
4832    parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
4833}
4834
4835#[cfg(test)]
4836mod tests {
4837    use super::*;
4838    use sley_core::BString;
4839    use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
4840    use sley_pack::{PackFile, PackWriteOptions};
4841
4842    fn blob_of(byte: u8, len: usize) -> EncodedObject {
4843        EncodedObject::new(ObjectType::Blob, vec![byte; len])
4844    }
4845
4846    fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
4847        Arc::new(blob_of(byte, len))
4848    }
4849
4850    fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
4851        reader
4852            .read_object(oid)
4853            .expect("test operation should succeed")
4854            .as_ref()
4855            .clone()
4856    }
4857
4858    #[test]
4859    fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
4860        // Budget holds two ~1 KiB objects but not three.
4861        let one = cached_object_cost(&blob_of(0, 1000));
4862        let mut cache = LruCache::<u32>::new(one * 2 + 8);
4863        cache.put(1, cached_blob_of(b'a', 1000));
4864        cache.put(2, cached_blob_of(b'b', 1000));
4865        // Touch key 1 so key 2 becomes least-recently-used.
4866        assert!(cache.get(&1).is_some());
4867        cache.put(3, cached_blob_of(b'c', 1000));
4868        // Key 2 (LRU) is evicted; 1 and 3 remain.
4869        assert!(cache.get(&1).is_some());
4870        assert!(cache.get(&2).is_none());
4871        assert!(cache.get(&3).is_some());
4872    }
4873
4874    #[test]
4875    fn lru_cache_zero_budget_is_inert() {
4876        let mut cache = LruCache::<u32>::new(0);
4877        cache.put(1, cached_blob_of(b'a', 16));
4878        assert!(cache.get(&1).is_none());
4879    }
4880
4881    #[test]
4882    fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
4883        let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
4884        cache.put(1, cached_blob_of(b'a', 50));
4885        assert!(cache.get(&1).is_some());
4886        // An object that cannot fit is not cached, and it evicts the prior entry
4887        // stored under the same key (so we never serve a stale value for it).
4888        cache.put(1, cached_blob_of(b'b', 10_000));
4889        assert!(cache.get(&1).is_none());
4890        // A subsequent fitting insert under another key still works and accounting
4891        // is not corrupted by the oversized insert.
4892        cache.put(2, cached_blob_of(b'c', 50));
4893        assert!(cache.get(&2).is_some());
4894    }
4895
4896    #[test]
4897    fn lru_cache_replacing_entry_updates_byte_accounting() {
4898        // Budget holds two 500-byte objects (plus headroom) but not a 500 + a
4899        // ~1900-byte object.
4900        let small = cached_object_cost(&blob_of(0, 500));
4901        let mut cache = LruCache::<u32>::new(small * 2 + 200);
4902        cache.put(1, cached_blob_of(b'a', 500));
4903        cache.put(2, cached_blob_of(b'b', 500));
4904        assert!(cache.get(&1).is_some());
4905        assert!(cache.get(&2).is_some());
4906        // Replace key 2 (now MRU after the gets above re-ordered 1 then 2) with a
4907        // bigger value that still fits the budget alone but makes the running total
4908        // exceed it; the LRU (key 1) is evicted while the replaced key 2 stays.
4909        // This exercises the replace-path accounting.
4910        cache.put(2, cached_blob_of(b'b', 1000));
4911        assert!(cache.get(&2).is_some());
4912        assert!(cache.get(&1).is_none());
4913    }
4914
4915    #[test]
4916    fn write_and_validate_blob() {
4917        let db = ObjectDatabase::new(ObjectFormat::Sha1);
4918        let oid = db
4919            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
4920            .expect("test operation should succeed");
4921        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
4922        db.validate(&oid).expect("test operation should succeed");
4923    }
4924
4925    #[test]
4926    fn loose_store_writes_and_reads_object() {
4927        let root = std::env::temp_dir().join(format!(
4928            "sley-loose-store-{}-{}",
4929            std::process::id(),
4930            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
4931        ));
4932        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4933        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4934        let oid = store
4935            .write_object(object.clone())
4936            .expect("test operation should succeed");
4937        assert_eq!(read_object_for_assert(&store, &oid), object);
4938        assert!(
4939            store
4940                .object_path(&oid)
4941                .expect("test operation should succeed")
4942                .exists()
4943        );
4944        fs::remove_dir_all(root).expect("test operation should succeed");
4945    }
4946
4947    #[test]
4948    fn read_header_detects_corruption_within_gits_header_window() {
4949        // git's `unpack_loose_header` inflates only the first MAX_HEADER_LEN (32)
4950        // bytes of output; a zlib data error inside that window makes `cat-file
4951        // -s`/`-t` fail (ULHR_BAD → "unable to unpack header"). A byte-by-byte
4952        // header read that stopped at the NUL would never inflate into the corrupt
4953        // region and would silently return a bogus size — the t1060 "getting type
4954        // of a corrupt blob fails" bug. Corrupt a byte inside the inflate stream of
4955        // a tiny object so the damage lands within the first 32 inflated bytes.
4956        let root = temp_root("sley-loose-header-corrupt");
4957        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4958        let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
4959        let oid = store
4960            .write_object(object)
4961            .expect("test operation should succeed");
4962        let path = store
4963            .object_path(&oid)
4964            .expect("test operation should succeed");
4965        let mut bytes = fs::read(&path).expect("test operation should succeed");
4966        // Offset 10 is inside the deflate stream (past the 2-byte zlib header) and,
4967        // for an 8-byte blob, decodes into the first 32 output bytes. Zero it to
4968        // break inflation, mirroring t1060's `corrupt_byte HEAD:content.t 10`.
4969        bytes[10] = 0;
4970        fs::write(&path, &bytes).expect("test operation should succeed");
4971        store.invalidate_cache();
4972        let err = store
4973            .read_header(&oid)
4974            .expect_err("corrupt loose header must fail like git's ULHR_BAD");
4975        let msg = err.to_string();
4976        assert!(
4977            msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
4978            "expected git's ULHR_BAD message, got: {msg}"
4979        );
4980        fs::remove_dir_all(root).expect("test operation should succeed");
4981    }
4982
4983    #[test]
4984    fn read_header_ignores_corruption_past_gits_header_window() {
4985        // Mirror git: corruption deeper than the 32-byte header window is NOT
4986        // detected by a header-only read (`cat-file -s` still returns the size);
4987        // the full-object read path catches it instead. Over-detecting here would
4988        // diverge from upstream on large objects with a clean header.
4989        let root = temp_root("sley-loose-header-deep-corrupt");
4990        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4991        // Incompressible body so the deflate stream is long and a deep byte is well
4992        // past the 32 inflated header-window bytes.
4993        let body: Vec<u8> = (0..4096u32).map(|i| (i.wrapping_mul(2654435761)) as u8).collect();
4994        let object = EncodedObject::new(ObjectType::Blob, body.clone());
4995        let oid = store
4996            .write_object(object)
4997            .expect("test operation should succeed");
4998        let path = store
4999            .object_path(&oid)
5000            .expect("test operation should succeed");
5001        let mut bytes = fs::read(&path).expect("test operation should succeed");
5002        let deep = bytes.len() / 2;
5003        bytes[deep] ^= 0xff;
5004        fs::write(&path, &bytes).expect("test operation should succeed");
5005        store.invalidate_cache();
5006        let header = store
5007            .read_header(&oid)
5008            .expect("header-only read must still succeed for deep body corruption");
5009        assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
5010        fs::remove_dir_all(root).expect("test operation should succeed");
5011    }
5012
5013    #[test]
5014    fn file_database_reads_object_from_pack_index() {
5015        let root = temp_root("sley-file-odb-pack");
5016        let git_dir = root.join(".git");
5017        let pack_dir = git_dir.join("objects").join("pack");
5018        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5019        let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
5020        let oid = object
5021            .object_id(ObjectFormat::Sha1)
5022            .expect("test operation should succeed");
5023        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5024            .expect("test operation should succeed");
5025        let pack_name = written.checksum.to_hex();
5026        fs::write(
5027            pack_dir.join(format!("pack-{pack_name}.pack")),
5028            written.pack,
5029        )
5030        .expect("test operation should succeed");
5031        fs::write(
5032            pack_dir.join(format!("pack-{pack_name}.idx")),
5033            written.index,
5034        )
5035        .expect("test operation should succeed");
5036
5037        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5038        assert!(db.contains(&oid).expect("test operation should succeed"));
5039        assert_eq!(read_object_for_assert(&db, &oid), object);
5040        fs::remove_dir_all(root).expect("test operation should succeed");
5041    }
5042
5043    #[test]
5044    fn file_database_loose_cache_observes_same_process_write_after_miss() {
5045        let root = temp_root("sley-file-odb-loose-cache-write");
5046        let git_dir = root.join(".git");
5047        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5048        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5049
5050        let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
5051        let oid = object
5052            .object_id(ObjectFormat::Sha1)
5053            .expect("test operation should succeed");
5054
5055        assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
5056        db.loose()
5057            .write_object(object.clone())
5058            .expect("test operation should succeed");
5059
5060        assert_eq!(read_object_for_assert(&db, &oid), object);
5061        fs::remove_dir_all(root).expect("test operation should succeed");
5062    }
5063
5064    #[test]
5065    fn object_presence_checker_observes_same_process_loose_write_after_miss() {
5066        let root = temp_root("sley-presence-checker-loose-cache-write");
5067        let git_dir = root.join(".git");
5068        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5069        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5070        let mut checker = db.presence_checker();
5071
5072        let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
5073        let oid = object
5074            .object_id(ObjectFormat::Sha1)
5075            .expect("test operation should succeed");
5076
5077        assert!(
5078            !checker
5079                .contains(&oid)
5080                .expect("test operation should succeed")
5081        );
5082        db.loose()
5083            .write_object(object)
5084            .expect("test operation should succeed");
5085
5086        assert!(
5087            checker
5088                .contains(&oid)
5089                .expect("test operation should succeed")
5090        );
5091        fs::remove_dir_all(root).expect("test operation should succeed");
5092    }
5093
5094    #[test]
5095    fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
5096        let root = temp_root("sley-read-object-header");
5097        let git_dir = root.join(".git");
5098        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5099        let format = ObjectFormat::Sha1;
5100        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5101
5102        // Loose object: the header read inflates only the framing, not the body.
5103        let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
5104        let loose_oid = db
5105            .write_object(loose.clone())
5106            .expect("test operation should succeed");
5107
5108        // Packed objects, including an ofs-delta whose *result* size lives in the
5109        // delta stream (not the pack entry header) and whose type is inherited from
5110        // its base at the end of the chain.
5111        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5112        let mut child_body = vec![b'a'; 4096];
5113        child_body.extend_from_slice(b" plus a deltified tail\n");
5114        let child = EncodedObject::new(ObjectType::Blob, child_body);
5115        let commitish =
5116            EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
5117        let base_oid = base
5118            .object_id(format)
5119            .expect("test operation should succeed");
5120        let child_oid = child
5121            .object_id(format)
5122            .expect("test operation should succeed");
5123        let commit_oid = commitish
5124            .object_id(format)
5125            .expect("test operation should succeed");
5126        let options = PackWriteOptions::new()
5127            .with_prefer_ofs_delta(true)
5128            .with_reorder(false);
5129        let pack = PackFile::write_packed_with_options(
5130            &[base.clone(), child.clone(), commitish.clone()],
5131            format,
5132            &options,
5133        )
5134        .expect("test operation should succeed");
5135        db.install_pack(&pack)
5136            .expect("test operation should succeed");
5137
5138        // The header read agrees with a full decode for every object and storage
5139        // class, without ever materializing the body.
5140        for (oid, want_type, want_len) in [
5141            (&loose_oid, ObjectType::Blob, loose.body.len()),
5142            (&base_oid, ObjectType::Blob, base.body.len()),
5143            (&child_oid, ObjectType::Blob, child.body.len()),
5144            (&commit_oid, ObjectType::Commit, commitish.body.len()),
5145        ] {
5146            assert_eq!(
5147                db.read_object_header(oid)
5148                    .expect("test operation should succeed"),
5149                Some((want_type, want_len as u64)),
5150                "header for {oid}"
5151            );
5152            let full = db.read_object(oid).expect("test operation should succeed");
5153            assert_eq!(
5154                db.read_object_header(oid)
5155                    .expect("test operation should succeed"),
5156                Some((full.object_type, full.body.len() as u64))
5157            );
5158        }
5159
5160        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5161            .expect("test operation should succeed");
5162        assert_eq!(
5163            db.read_object_header(&missing)
5164                .expect("test operation should succeed"),
5165            None
5166        );
5167        fs::remove_dir_all(root).expect("test operation should succeed");
5168    }
5169
5170    #[test]
5171    fn object_storage_info_reports_loose_packed_and_delta_metadata() {
5172        let root = temp_root("sley-object-storage-info");
5173        let git_dir = root.join(".git");
5174        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5175        let format = ObjectFormat::Sha1;
5176        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5177
5178        let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
5179        let loose_oid = db
5180            .write_object(loose)
5181            .expect("test operation should succeed");
5182        let loose_size = fs::metadata(
5183            db.loose()
5184                .object_path(&loose_oid)
5185                .expect("test operation should succeed"),
5186        )
5187        .expect("test operation should succeed")
5188        .len();
5189        let loose_info = db
5190            .object_storage_info(&loose_oid)
5191            .expect("test operation should succeed")
5192            .expect("test operation should succeed");
5193        assert_eq!(loose_info.disk_size, loose_size);
5194        assert_eq!(
5195            loose_info.deltabase,
5196            zero_oid(format).expect("test operation should succeed")
5197        );
5198
5199        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5200        let mut child_body = vec![b'a'; 4096];
5201        child_body.extend_from_slice(b" changed tail\n");
5202        let child = EncodedObject::new(ObjectType::Blob, child_body);
5203        let base_oid = base
5204            .object_id(format)
5205            .expect("test operation should succeed");
5206        let child_oid = child
5207            .object_id(format)
5208            .expect("test operation should succeed");
5209        let options = PackWriteOptions::new()
5210            .with_prefer_ofs_delta(true)
5211            .with_reorder(false);
5212        let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
5213            .expect("test operation should succeed");
5214        db.install_pack(&pack)
5215            .expect("test operation should succeed");
5216
5217        let base_info = db
5218            .object_storage_info(&base_oid)
5219            .expect("test operation should succeed")
5220            .expect("test operation should succeed");
5221        assert!(base_info.disk_size > 0);
5222        assert_eq!(
5223            base_info.deltabase,
5224            zero_oid(format).expect("test operation should succeed")
5225        );
5226
5227        let child_info = db
5228            .object_storage_info(&child_oid)
5229            .expect("test operation should succeed")
5230            .expect("test operation should succeed");
5231        assert!(child_info.disk_size > 0);
5232        assert_eq!(child_info.deltabase, base_oid);
5233
5234        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5235            .expect("test operation should succeed");
5236        assert_eq!(
5237            db.object_storage_info(&missing)
5238                .expect("test operation should succeed"),
5239            None
5240        );
5241        fs::remove_dir_all(root).expect("test operation should succeed");
5242    }
5243
5244    #[test]
5245    fn file_database_resolves_unique_loose_object_prefix() {
5246        let root = temp_root("sley-file-odb-prefix-loose");
5247        let git_dir = root.join(".git");
5248        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5249        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5250        let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
5251        let oid = db
5252            .write_object(object)
5253            .expect("test operation should succeed");
5254        let prefix = &oid.to_hex()[..8];
5255
5256        assert_eq!(
5257            db.resolve_prefix(prefix)
5258                .expect("test operation should succeed"),
5259            ObjectPrefixResolution::Unique(oid)
5260        );
5261        assert!(
5262            db.object_ids()
5263                .expect("test operation should succeed")
5264                .contains(&oid)
5265        );
5266        fs::remove_dir_all(root).expect("test operation should succeed");
5267    }
5268
5269    #[test]
5270    fn file_database_resolves_unique_packed_object_prefix() {
5271        let root = temp_root("sley-file-odb-prefix-packed");
5272        let git_dir = root.join(".git");
5273        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5274        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5275        let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
5276        let oid = object
5277            .object_id(ObjectFormat::Sha1)
5278            .expect("test operation should succeed");
5279        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5280            .expect("test operation should succeed");
5281        db.install_pack(&pack)
5282            .expect("test operation should succeed");
5283        let prefix = &oid.to_hex()[..8];
5284
5285        assert_eq!(
5286            db.resolve_prefix(prefix)
5287                .expect("test operation should succeed"),
5288            ObjectPrefixResolution::Unique(oid)
5289        );
5290        fs::remove_dir_all(root).expect("test operation should succeed");
5291    }
5292
5293    #[test]
5294    fn file_database_reports_ambiguous_object_prefix() {
5295        let root = temp_root("sley-file-odb-prefix-ambiguous");
5296        let git_dir = root.join(".git");
5297        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5298        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5299        let mut seen = HashMap::new();
5300        let (prefix, first, second) = (0..10_000)
5301            .find_map(|idx| {
5302                let object =
5303                    EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
5304                let oid = db
5305                    .write_object(object)
5306                    .expect("test operation should succeed");
5307                let prefix = oid.to_hex()[..4].to_string();
5308                seen.insert(prefix.clone(), oid)
5309                    .map(|first| (prefix, first, oid))
5310            })
5311            .expect("test should find a 4-hex collision");
5312
5313        let ObjectPrefixResolution::Ambiguous(mut matches) = db
5314            .resolve_prefix(&prefix)
5315            .expect("test operation should succeed")
5316        else {
5317            panic!("expected ambiguous prefix {prefix}");
5318        };
5319        matches.sort_by_key(ObjectId::to_hex);
5320        let mut expected = vec![first, second];
5321        expected.sort_by_key(ObjectId::to_hex);
5322        assert_eq!(matches, expected);
5323        fs::remove_dir_all(root).expect("test operation should succeed");
5324    }
5325
5326    #[test]
5327    fn file_database_rejects_too_short_object_prefix() {
5328        let root = temp_root("sley-file-odb-prefix-short");
5329        let git_dir = root.join(".git");
5330        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5331        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5332
5333        assert!(matches!(
5334            db.resolve_prefix("abc"),
5335            Err(GitError::InvalidObjectId(_))
5336        ));
5337        fs::remove_dir_all(root).expect("test operation should succeed");
5338    }
5339
5340    #[test]
5341    fn file_database_reads_sha256_object_from_pack_index() {
5342        let root = temp_root("sley-file-odb-pack-sha256");
5343        let git_dir = root.join(".git");
5344        let pack_dir = git_dir.join("objects").join("pack");
5345        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5346        let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
5347        let oid = object
5348            .object_id(ObjectFormat::Sha256)
5349            .expect("test operation should succeed");
5350        let written =
5351            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5352                .expect("test operation should succeed");
5353        let pack_name = written.checksum.to_hex();
5354        fs::write(
5355            pack_dir.join(format!("pack-{pack_name}.pack")),
5356            written.pack,
5357        )
5358        .expect("test operation should succeed");
5359        fs::write(
5360            pack_dir.join(format!("pack-{pack_name}.idx")),
5361            written.index,
5362        )
5363        .expect("test operation should succeed");
5364
5365        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5366        assert!(db.contains(&oid).expect("test operation should succeed"));
5367        assert_eq!(read_object_for_assert(&db, &oid), object);
5368        fs::remove_dir_all(root).expect("test operation should succeed");
5369    }
5370
5371    #[test]
5372    fn file_database_installs_sha256_pack_without_loose_objects() {
5373        let root = temp_root("sley-file-odb-install-pack");
5374        let git_dir = root.join(".git");
5375        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5376        let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
5377        let oid = object
5378            .object_id(ObjectFormat::Sha256)
5379            .expect("test operation should succeed");
5380        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5381            .expect("test operation should succeed");
5382        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5383
5384        let result = db
5385            .install_pack(&pack)
5386            .expect("test operation should succeed");
5387
5388        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5389        assert_eq!(result.object_ids, vec![oid]);
5390        assert!(result.pack_path.exists());
5391        assert!(result.index_path.exists());
5392        assert_eq!(result.promisor_path, None);
5393        assert!(
5394            !db.loose()
5395                .object_path(&oid)
5396                .expect("test operation should succeed")
5397                .exists()
5398        );
5399        assert!(db.contains(&oid).expect("test operation should succeed"));
5400        assert_eq!(read_object_for_assert(&db, &oid), object);
5401        fs::remove_dir_all(root).expect("test operation should succeed");
5402    }
5403
5404    #[test]
5405    fn file_database_installs_raw_sha256_pack_without_loose_objects() {
5406        let root = temp_root("sley-file-odb-install-raw-pack");
5407        let git_dir = root.join(".git");
5408        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5409        let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
5410        let oid = object
5411            .object_id(ObjectFormat::Sha256)
5412            .expect("test operation should succeed");
5413        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5414            .expect("test operation should succeed");
5415        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5416
5417        let result = db
5418            .install_raw_pack(&pack.pack)
5419            .expect("test operation should succeed");
5420
5421        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5422        assert_eq!(result.object_ids, vec![oid]);
5423        assert!(result.pack_path.exists());
5424        assert!(result.index_path.exists());
5425        assert_eq!(result.promisor_path, None);
5426        assert!(
5427            !db.loose()
5428                .object_path(&oid)
5429                .expect("test operation should succeed")
5430                .exists()
5431        );
5432        assert!(db.contains(&oid).expect("test operation should succeed"));
5433        assert_eq!(read_object_for_assert(&db, &oid), object);
5434        fs::remove_dir_all(root).expect("test operation should succeed");
5435    }
5436
5437    #[test]
5438    fn file_database_rejects_noncanonical_pack_index() {
5439        let root = temp_root("sley-file-odb-install-bad-index");
5440        let git_dir = root.join(".git");
5441        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5442        let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
5443        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5444            .expect("test operation should succeed");
5445        let mut entries = pack.entries.clone();
5446        entries[0].crc32 ^= 1;
5447        let mut bad_pack = pack.clone();
5448        bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
5449            .expect("test operation should succeed");
5450        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5451
5452        assert!(db.install_pack(&bad_pack).is_err());
5453
5454        fs::remove_dir_all(root).expect("test operation should succeed");
5455    }
5456
5457    #[test]
5458    fn file_database_installs_raw_promisor_pack_with_sidecar() {
5459        let root = temp_root("sley-file-odb-install-raw-promisor-pack");
5460        let git_dir = root.join(".git");
5461        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5462        let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
5463        let oid = object
5464            .object_id(ObjectFormat::Sha1)
5465            .expect("test operation should succeed");
5466        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5467            .expect("test operation should succeed");
5468        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5469
5470        let result = db
5471            .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
5472            .expect("test operation should succeed");
5473
5474        let promisor_path = result.promisor_path.expect("promisor sidecar");
5475        assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
5476        assert_eq!(
5477            promisor_path.extension().and_then(|ext| ext.to_str()),
5478            Some("promisor")
5479        );
5480        assert!(promisor_path.exists());
5481        assert_eq!(
5482            fs::read(&promisor_path).expect("test operation should succeed"),
5483            b""
5484        );
5485        assert!(result.pack_path.exists());
5486        assert!(result.index_path.exists());
5487        assert!(
5488            !db.loose()
5489                .object_path(&oid)
5490                .expect("test operation should succeed")
5491                .exists()
5492        );
5493        assert_eq!(read_object_for_assert(&db, &oid), object);
5494        fs::remove_dir_all(root).expect("test operation should succeed");
5495    }
5496
5497    #[test]
5498    fn repository_objects_dir_uses_linked_worktree_common_dir() {
5499        let root = temp_root("sley-odb-common-dir");
5500        let common = root.join(".git");
5501        let admin = common.join("worktrees").join("linked");
5502        fs::create_dir_all(&admin).expect("test operation should succeed");
5503        fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
5504
5505        let common = fs::canonicalize(common).expect("test operation should succeed");
5506        assert_eq!(repository_common_dir(&admin), common);
5507        assert_eq!(repository_objects_dir(&admin), common.join("objects"));
5508
5509        fs::remove_dir_all(root).expect("test operation should succeed");
5510    }
5511
5512    #[test]
5513    fn reachable_object_helpers_walk_graph_and_install_pack() {
5514        let root = temp_root("sley-reachable-pack");
5515        let source_git_dir = root.join("source.git");
5516        let destination_git_dir = root.join("destination.git");
5517        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5518        fs::create_dir_all(destination_git_dir.join("objects"))
5519            .expect("test operation should succeed");
5520        let format = ObjectFormat::Sha1;
5521        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5522        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5523
5524        let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
5525        let blob_oid = source
5526            .write_object(blob.clone())
5527            .expect("test operation should succeed");
5528        let tree = EncodedObject::new(
5529            ObjectType::Tree,
5530            Tree {
5531                entries: vec![TreeEntry {
5532                    mode: 0o100644,
5533                    name: BString::from(b"payload.txt"),
5534                    oid: blob_oid,
5535                }],
5536            }
5537            .write(),
5538        );
5539        let tree_oid = source
5540            .write_object(tree.clone())
5541            .expect("test operation should succeed");
5542        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5543        let commit = EncodedObject::new(
5544            ObjectType::Commit,
5545            Commit {
5546                tree: tree_oid,
5547                parents: Vec::new(),
5548                author: identity.clone(),
5549                committer: identity,
5550                encoding: None,
5551                message: b"initial\n".to_vec(),
5552            }
5553            .write(),
5554        );
5555        let commit_oid = source
5556            .write_object(commit.clone())
5557            .expect("test operation should succeed");
5558
5559        let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
5560            .expect("test operation should succeed");
5561        assert!(reachable.contains(&commit_oid));
5562        assert!(reachable.contains(&tree_oid));
5563        assert!(reachable.contains(&blob_oid));
5564
5565        let install =
5566            install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
5567                .expect("test operation should succeed")
5568                .expect("reachable pack should be written");
5569        assert_eq!(install.object_ids.len(), 3);
5570        for (oid, object) in [
5571            (&commit_oid, &commit),
5572            (&tree_oid, &tree),
5573            (&blob_oid, &blob),
5574        ] {
5575            assert!(
5576                !destination
5577                    .loose()
5578                    .object_path(oid)
5579                    .expect("test operation should succeed")
5580                    .exists()
5581            );
5582            assert!(
5583                destination
5584                    .contains(oid)
5585                    .expect("test operation should succeed")
5586            );
5587            assert_eq!(read_object_for_assert(&destination, oid), *object);
5588        }
5589        fs::remove_dir_all(root).expect("test operation should succeed");
5590    }
5591
5592    #[test]
5593    fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
5594        let root = temp_root("sley-reachable-exclusions");
5595        let git_dir = root.join("repo.git");
5596        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5597        let format = ObjectFormat::Sha1;
5598        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5599
5600        let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
5601        let blob_oid = db
5602            .write_object(blob)
5603            .expect("test operation should succeed");
5604        let tree = EncodedObject::new(
5605            ObjectType::Tree,
5606            Tree {
5607                entries: vec![TreeEntry {
5608                    mode: 0o100644,
5609                    name: BString::from(b"payload.txt"),
5610                    oid: blob_oid,
5611                }],
5612            }
5613            .write(),
5614        );
5615        let tree_oid = db
5616            .write_object(tree)
5617            .expect("test operation should succeed");
5618        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5619        let commit = EncodedObject::new(
5620            ObjectType::Commit,
5621            Commit {
5622                tree: tree_oid,
5623                parents: Vec::new(),
5624                author: identity.clone(),
5625                committer: identity,
5626                encoding: None,
5627                message: b"initial\n".to_vec(),
5628            }
5629            .write(),
5630        );
5631        let commit_oid = db
5632            .write_object(commit)
5633            .expect("test operation should succeed");
5634        let excluded = HashSet::from([tree_oid]);
5635
5636        let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
5637            .expect("test operation should succeed");
5638
5639        assert_eq!(objects.len(), 1);
5640        assert_eq!(
5641            objects[0]
5642                .object_id(format)
5643                .expect("test operation should succeed"),
5644            commit_oid
5645        );
5646        fs::remove_dir_all(root).expect("test operation should succeed");
5647    }
5648
5649    #[test]
5650    fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
5651        let root = temp_root("sley-build-reachable-pack");
5652        let git_dir = root.join("repo.git");
5653        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5654        let format = ObjectFormat::Sha1;
5655        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5656
5657        let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
5658        let oid = db
5659            .write_object(object.clone())
5660            .expect("test operation should succeed");
5661        let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
5662            .expect("test operation should succeed")
5663            .expect("reachable pack should be built");
5664        assert!(pack.pack.starts_with(b"PACK"));
5665        assert_eq!(pack.entries.len(), 1);
5666        assert_eq!(pack.entries[0].oid, oid);
5667
5668        let excluded = HashSet::from([oid]);
5669        assert!(
5670            build_reachable_pack(
5671                &db,
5672                format,
5673                pack.entries.into_iter().map(|entry| entry.oid),
5674                &excluded
5675            )
5676            .expect("test operation should succeed")
5677            .is_none()
5678        );
5679        fs::remove_dir_all(root).expect("test operation should succeed");
5680    }
5681
5682    #[test]
5683    fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
5684        let root = temp_root("sley-reachable-tags");
5685        let git_dir = root.join("repo.git");
5686        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5687        let format = ObjectFormat::Sha1;
5688        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5689
5690        let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
5691        let blob_oid = db
5692            .write_object(blob)
5693            .expect("test operation should succeed");
5694        let tag = EncodedObject::new(
5695            ObjectType::Tag,
5696            Tag {
5697                object: blob_oid,
5698                object_type: ObjectType::Blob,
5699                name: b"v1".to_vec(),
5700                tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
5701                message: b"tag message\n".to_vec(),
5702                raw_body: None,
5703            }
5704            .write(),
5705        );
5706        let tag_oid = db.write_object(tag).expect("test operation should succeed");
5707
5708        let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
5709            .expect("test operation should succeed");
5710        assert!(reachable.contains(&tag_oid));
5711        assert!(reachable.contains(&blob_oid));
5712
5713        let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
5714            .expect("test operation should succeed");
5715        let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
5716            .expect_err("missing traversal root should error");
5717        let kind = err.not_found_kind().expect("typed not found");
5718        assert_eq!(kind.object_id(), Some(missing));
5719        assert_eq!(
5720            kind.missing_object_context(),
5721            Some(MissingObjectContext::Traversal)
5722        );
5723        fs::remove_dir_all(root).expect("test operation should succeed");
5724    }
5725
5726    #[test]
5727    fn install_reachable_pack_empty_starts_create_no_pack() {
5728        let root = temp_root("sley-reachable-empty");
5729        let source_git_dir = root.join("source.git");
5730        let destination_git_dir = root.join("destination.git");
5731        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5732        fs::create_dir_all(destination_git_dir.join("objects"))
5733            .expect("test operation should succeed");
5734        let format = ObjectFormat::Sha1;
5735        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5736        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5737
5738        let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
5739            .expect("test operation should succeed");
5740
5741        assert!(result.is_none());
5742        assert!(!destination_git_dir.join("objects").join("pack").exists());
5743        fs::remove_dir_all(root).expect("test operation should succeed");
5744    }
5745
5746    #[test]
5747    fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
5748        let root = temp_root("sley-reachable-install-excluding");
5749        let source_git_dir = root.join("source.git");
5750        let destination_git_dir = root.join("destination.git");
5751        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5752        fs::create_dir_all(destination_git_dir.join("objects"))
5753            .expect("test operation should succeed");
5754        let format = ObjectFormat::Sha1;
5755        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5756        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5757        let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
5758        let oid = source
5759            .write_object(object)
5760            .expect("test operation should succeed");
5761        let excluded = HashSet::from([oid]);
5762
5763        let result = install_reachable_pack_excluding(
5764            &source,
5765            &destination,
5766            format,
5767            std::iter::once(oid),
5768            &excluded,
5769        )
5770        .expect("test operation should succeed");
5771
5772        assert!(result.is_none());
5773        assert!(!destination_git_dir.join("objects").join("pack").exists());
5774        fs::remove_dir_all(root).expect("test operation should succeed");
5775    }
5776
5777    #[test]
5778    fn install_reachable_pack_supports_sha256() {
5779        let root = temp_root("sley-reachable-pack-sha256");
5780        let source_git_dir = root.join("source.git");
5781        let destination_git_dir = root.join("destination.git");
5782        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5783        fs::create_dir_all(destination_git_dir.join("objects"))
5784            .expect("test operation should succeed");
5785        let format = ObjectFormat::Sha256;
5786        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5787        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5788        let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
5789        let oid = source
5790            .write_object(object.clone())
5791            .expect("test operation should succeed");
5792
5793        let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
5794            .expect("test operation should succeed")
5795            .expect("sha256 reachable pack should be built");
5796        assert!(pack.pack.starts_with(b"PACK"));
5797        assert_eq!(pack.entries[0].oid, oid);
5798
5799        let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
5800            .expect("test operation should succeed")
5801            .expect("sha256 reachable pack should be written");
5802
5803        assert_eq!(result.object_ids, vec![oid]);
5804        assert!(
5805            !destination
5806                .loose()
5807                .object_path(&oid)
5808                .expect("test operation should succeed")
5809                .exists()
5810        );
5811        assert_eq!(read_object_for_assert(&destination, &oid), object);
5812        fs::remove_dir_all(root).expect("test operation should succeed");
5813    }
5814
5815    #[test]
5816    fn install_helpers_accept_custom_raw_pack_installer() {
5817        #[derive(Default)]
5818        struct RecordingInstaller {
5819            packs: std::cell::RefCell<Vec<Vec<u8>>>,
5820            installed: std::cell::RefCell<Vec<ObjectId>>,
5821        }
5822
5823        impl RawPackInstaller for RecordingInstaller {
5824            fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
5825                self.packs.borrow_mut().push(pack_bytes.to_vec());
5826                let object_ids = self.installed.borrow().clone();
5827                Ok(RawPackInstallResult { object_ids })
5828            }
5829        }
5830
5831        let format = ObjectFormat::Sha1;
5832        let source = ObjectDatabase::new(format);
5833        let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
5834        let oid = source
5835            .write_object(object)
5836            .expect("test operation should succeed");
5837        let installer = RecordingInstaller::default();
5838        installer.installed.borrow_mut().push(oid);
5839
5840        let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
5841            .expect("test operation should succeed")
5842            .expect("custom installer should receive pack");
5843
5844        assert_eq!(result.object_ids, installer.installed.into_inner());
5845        let packs = installer.packs.into_inner();
5846        assert_eq!(packs.len(), 1);
5847        assert!(packs[0].starts_with(b"PACK"));
5848    }
5849
5850    #[test]
5851    fn file_database_reads_object_from_multi_pack_index() {
5852        let root = temp_root("sley-file-odb-midx");
5853        let git_dir = root.join(".git");
5854        let pack_dir = git_dir.join("objects").join("pack");
5855        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5856        let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
5857        let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
5858        let first_oid = first
5859            .object_id(ObjectFormat::Sha1)
5860            .expect("test operation should succeed");
5861        let second_oid = second
5862            .object_id(ObjectFormat::Sha1)
5863            .expect("test operation should succeed");
5864        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5865            .expect("test operation should succeed");
5866        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5867            .expect("test operation should succeed");
5868        let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
5869        let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
5870        fs::write(
5871            pack_dir.join(first_pack_name.replace(".idx", ".pack")),
5872            first_pack.pack,
5873        )
5874        .expect("test operation should succeed");
5875        fs::write(
5876            pack_dir.join(second_pack_name.replace(".idx", ".pack")),
5877            second_pack.pack,
5878        )
5879        .expect("test operation should succeed");
5880        let midx = MultiPackIndex::write(
5881            ObjectFormat::Sha1,
5882            2,
5883            &[first_pack_name, second_pack_name],
5884            &[
5885                sley_pack::MultiPackIndexEntry {
5886                    oid: first_oid,
5887                    pack_int_id: 0,
5888                    offset: first_pack.entries[0].offset,
5889                },
5890                sley_pack::MultiPackIndexEntry {
5891                    oid: second_oid,
5892                    pack_int_id: 1,
5893                    offset: second_pack.entries[0].offset,
5894                },
5895            ],
5896        )
5897        .expect("test operation should succeed");
5898        fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
5899
5900        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5901        assert!(
5902            db.contains(&second_oid)
5903                .expect("test operation should succeed")
5904        );
5905        assert_eq!(
5906            db.resolve_prefix(&second_oid.to_hex()[..8])
5907                .expect("test operation should succeed"),
5908            ObjectPrefixResolution::Unique(second_oid)
5909        );
5910        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5911        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5912        fs::remove_dir_all(root).expect("test operation should succeed");
5913    }
5914
5915    #[test]
5916    fn file_database_finds_pack_added_after_registry_was_cached() {
5917        // Regression guard for the cached pack-directory registry: a pack written
5918        // after the registry was first cached (via a prior read) must still be
5919        // discovered by the same handle, because a miss triggers a re-scan.
5920        let root = temp_root("sley-file-odb-pack-added-late");
5921        let git_dir = root.join(".git");
5922        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5923        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5924
5925        // First pack + object; reading it populates the registry cache.
5926        let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
5927        let first_oid = first
5928            .object_id(ObjectFormat::Sha1)
5929            .expect("test operation should succeed");
5930        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5931            .expect("test operation should succeed");
5932        db.install_pack(&first_pack)
5933            .expect("test operation should succeed");
5934        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5935
5936        // A second object that the cached registry does not yet know about.
5937        let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
5938        let second_oid = second
5939            .object_id(ObjectFormat::Sha1)
5940            .expect("test operation should succeed");
5941        // It is genuinely absent right now.
5942        assert!(matches!(
5943            db.read_object(&second_oid),
5944            Err(GitError::NotFound(_))
5945        ));
5946
5947        // Install its pack through the same handle; the next read must find it via
5948        // a re-scan, not be masked by the stale registry.
5949        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5950            .expect("test operation should succeed");
5951        db.install_pack(&second_pack)
5952            .expect("test operation should succeed");
5953        assert!(
5954            db.contains(&second_oid)
5955                .expect("test operation should succeed")
5956        );
5957        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5958        // The original object still resolves too.
5959        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5960
5961        fs::remove_dir_all(root).expect("test operation should succeed");
5962    }
5963
5964    #[test]
5965    fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
5966        let root = temp_root("sley-presence-checker-pack-added-late");
5967        let git_dir = root.join(".git");
5968        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5969        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5970
5971        let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
5972        let first_oid = first
5973            .object_id(ObjectFormat::Sha1)
5974            .expect("test operation should succeed");
5975        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5976            .expect("test operation should succeed");
5977        db.install_pack(&first_pack)
5978            .expect("test operation should succeed");
5979
5980        let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
5981        let second_oid = second
5982            .object_id(ObjectFormat::Sha1)
5983            .expect("test operation should succeed");
5984        let mut checker = db.presence_checker();
5985        assert!(
5986            checker
5987                .contains(&first_oid)
5988                .expect("test operation should succeed")
5989        );
5990        assert!(
5991            !checker
5992                .contains(&second_oid)
5993                .expect("test operation should succeed")
5994        );
5995
5996        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5997            .expect("test operation should succeed");
5998        db.install_pack(&second_pack)
5999            .expect("test operation should succeed");
6000
6001        assert!(
6002            checker
6003                .contains(&second_oid)
6004                .expect("test operation should succeed")
6005        );
6006        fs::remove_dir_all(root).expect("test operation should succeed");
6007    }
6008
6009    #[test]
6010    fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
6011        let root = temp_root("sley-file-odb-pack-registry-refresh");
6012        let git_dir = root.join(".git");
6013        let pack_dir = git_dir.join("objects").join("pack");
6014        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6015        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6016
6017        let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
6018        let first_oid = first
6019            .object_id(ObjectFormat::Sha1)
6020            .expect("test operation should succeed");
6021        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6022            .expect("test operation should succeed");
6023        db.install_pack(&first_pack)
6024            .expect("test operation should succeed");
6025
6026        let first_registry = db
6027            .cached_pack_registry(&pack_dir, false)
6028            .expect("test operation should succeed");
6029        assert_eq!(first_registry.fingerprint.idx_count, 1);
6030        assert_eq!(first_registry.fingerprint.pack_count, 1);
6031        assert_eq!(first_registry.packs.len(), 1);
6032        assert!(
6033            first_registry.packs[0]
6034                .index
6035                .lock()
6036                .expect("test operation should succeed")
6037                .is_none()
6038        );
6039        assert!(
6040            first_registry.packs[0]
6041                .data
6042                .lock()
6043                .expect("test operation should succeed")
6044                .is_none()
6045        );
6046
6047        // Existence checks use the parsed index directly and do not load pack
6048        // bytes; a full read fills the registry-owned pack data handle.
6049        assert!(
6050            db.contains(&first_oid)
6051                .expect("test operation should succeed")
6052        );
6053        assert!(
6054            first_registry.packs[0]
6055                .index
6056                .lock()
6057                .expect("test operation should succeed")
6058                .is_some()
6059        );
6060        assert!(
6061            first_registry.packs[0]
6062                .data
6063                .lock()
6064                .expect("test operation should succeed")
6065                .is_none()
6066        );
6067        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6068        assert!(
6069            first_registry.packs[0]
6070                .data
6071                .lock()
6072                .expect("test operation should succeed")
6073                .is_some()
6074        );
6075
6076        let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
6077        let second_oid = second
6078            .object_id(ObjectFormat::Sha1)
6079            .expect("test operation should succeed");
6080        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6081            .expect("test operation should succeed");
6082        db.install_pack(&second_pack)
6083            .expect("test operation should succeed");
6084
6085        let refreshed = db
6086            .cached_pack_registry(&pack_dir, true)
6087            .expect("test operation should succeed");
6088        assert!(!Arc::ptr_eq(&first_registry, &refreshed));
6089        assert_eq!(refreshed.fingerprint.idx_count, 2);
6090        assert_eq!(refreshed.fingerprint.pack_count, 2);
6091        assert_eq!(refreshed.packs.len(), 2);
6092        assert_eq!(read_object_for_assert(&db, &second_oid), second);
6093
6094        fs::remove_dir_all(root).expect("test operation should succeed");
6095    }
6096
6097    #[test]
6098    fn file_database_pack_search_hint_rebuilds_after_pack_added() {
6099        // Regression guard for the recent-pack search hint: it is tied to the
6100        // cached pack registry, so a miss followed by a changed registry must not
6101        // hide newly-added packs.
6102        let root = temp_root("sley-file-odb-pack-lookup-added-late");
6103        let git_dir = root.join(".git");
6104        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6105        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6106
6107        let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
6108        let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
6109        let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
6110        let first_oid = first
6111            .object_id(ObjectFormat::Sha1)
6112            .expect("test operation should succeed");
6113        let second_oid = second
6114            .object_id(ObjectFormat::Sha1)
6115            .expect("test operation should succeed");
6116        let third_oid = third
6117            .object_id(ObjectFormat::Sha1)
6118            .expect("test operation should succeed");
6119
6120        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6121            .expect("test operation should succeed");
6122        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6123            .expect("test operation should succeed");
6124        db.install_pack(&first_pack)
6125            .expect("test operation should succeed");
6126        db.install_pack(&second_pack)
6127            .expect("test operation should succeed");
6128
6129        // With two packs, these reads establish a cached registry and pack hint.
6130        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6131        assert_eq!(read_object_for_assert(&db, &second_oid), second);
6132        assert!(matches!(
6133            db.read_object(&third_oid),
6134            Err(GitError::NotFound(_))
6135        ));
6136
6137        let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
6138            .expect("test operation should succeed");
6139        db.install_pack(&third_pack)
6140            .expect("test operation should succeed");
6141
6142        assert_eq!(read_object_for_assert(&db, &third_oid), third);
6143        assert_eq!(read_object_for_assert(&db, &first_oid), first);
6144
6145        fs::remove_dir_all(root).expect("test operation should succeed");
6146    }
6147
6148    #[test]
6149    fn file_database_prefers_loose_object_over_packed_object() {
6150        let root = temp_root("sley-file-odb-prefer-loose");
6151        let git_dir = root.join(".git");
6152        let pack_dir = git_dir.join("objects").join("pack");
6153        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6154        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6155        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6156            .expect("test operation should succeed");
6157        let pack_name = written.checksum.to_hex();
6158        fs::write(
6159            pack_dir.join(format!("pack-{pack_name}.pack")),
6160            written.pack,
6161        )
6162        .expect("test operation should succeed");
6163        fs::write(
6164            pack_dir.join(format!("pack-{pack_name}.idx")),
6165            written.index,
6166        )
6167        .expect("test operation should succeed");
6168
6169        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6170        let oid = db
6171            .write_object(object.clone())
6172            .expect("test operation should succeed");
6173        assert_eq!(read_object_for_assert(&db, &oid), object);
6174        fs::remove_dir_all(root).expect("test operation should succeed");
6175    }
6176
6177    #[test]
6178    fn bundle_prerequisite_verification_reads_existing_objects() {
6179        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6180        let oid = db
6181            .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
6182            .expect("test operation should succeed");
6183        let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
6184        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6185            .expect("test operation should succeed");
6186
6187        verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
6188    }
6189
6190    #[test]
6191    fn bundle_prerequisite_verification_reports_missing_objects() {
6192        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6193        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6194            .expect("test operation should succeed");
6195        let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
6196        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6197            .expect("test operation should succeed");
6198
6199        assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
6200    }
6201
6202    #[test]
6203    fn unbundle_objects_writes_pack_entries_and_returns_refs() {
6204        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6205        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6206        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6207        let oid = object
6208            .object_id(ObjectFormat::Sha1)
6209            .expect("test operation should succeed");
6210        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6211            .expect("test operation should succeed");
6212        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6213            .into_bytes()
6214            .into_iter()
6215            .chain(pack.pack)
6216            .collect::<Vec<_>>();
6217        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6218            .expect("test operation should succeed");
6219
6220        let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
6221            .expect("test operation should succeed");
6222        assert_eq!(result.written_objects, vec![oid]);
6223        assert_eq!(result.references, bundle.references);
6224        assert_eq!(read_object_for_assert(&writer, &oid), object);
6225    }
6226
6227    #[test]
6228    fn install_bundle_pack_writes_pack_and_returns_refs() {
6229        let root = temp_root("sley-install-bundle-pack");
6230        let git_dir = root.join(".git");
6231        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6232        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6233        let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6234        let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
6235        let oid = object
6236            .object_id(ObjectFormat::Sha1)
6237            .expect("test operation should succeed");
6238        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6239            .expect("test operation should succeed");
6240        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6241            .into_bytes()
6242            .into_iter()
6243            .chain(pack.pack)
6244            .collect::<Vec<_>>();
6245        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6246            .expect("test operation should succeed");
6247
6248        let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
6249            .expect("test operation should succeed");
6250
6251        assert_eq!(result.written_objects, vec![oid]);
6252        assert_eq!(result.references, bundle.references);
6253        assert!(
6254            database
6255                .contains(&oid)
6256                .expect("test operation should succeed")
6257        );
6258        assert_eq!(read_object_for_assert(&database, &oid), object);
6259        assert!(
6260            !database
6261                .loose()
6262                .object_path(&oid)
6263                .expect("test operation should succeed")
6264                .exists()
6265        );
6266        fs::remove_dir_all(root).expect("test operation should succeed");
6267    }
6268
6269    #[test]
6270    fn unpack_packfile_objects_writes_sha256_pack_entries() {
6271        let writer = ObjectDatabase::new(ObjectFormat::Sha256);
6272        let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
6273        let oid = object
6274            .object_id(ObjectFormat::Sha256)
6275            .expect("test operation should succeed");
6276        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6277            .expect("test operation should succeed");
6278
6279        let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
6280            .expect("test operation should succeed");
6281
6282        assert_eq!(result.written_objects, vec![oid]);
6283        assert_eq!(read_object_for_assert(&writer, &oid), object);
6284    }
6285
6286    #[test]
6287    fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
6288        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6289        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6290        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6291            .expect("test operation should succeed");
6292        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6293        let oid = object
6294            .object_id(ObjectFormat::Sha1)
6295            .expect("test operation should succeed");
6296        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6297            .expect("test operation should succeed");
6298        let bundle_bytes =
6299            format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
6300                .into_bytes()
6301                .into_iter()
6302                .chain(pack.pack)
6303                .collect::<Vec<_>>();
6304        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6305            .expect("test operation should succeed");
6306
6307        assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
6308        assert!(!writer.contains(&oid));
6309    }
6310
6311    /// Build a commit -> tree -> blob graph in `db`, returning the three object
6312    /// ids and their canonical encodings as `(oid, object)` pairs.
6313    fn write_commit_graph(
6314        db: &mut FileObjectDatabase,
6315        payload: &[u8],
6316    ) -> Vec<(ObjectId, EncodedObject)> {
6317        let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
6318        let blob_oid = db
6319            .write_object(blob.clone())
6320            .expect("test operation should succeed");
6321        let tree = EncodedObject::new(
6322            ObjectType::Tree,
6323            Tree {
6324                entries: vec![TreeEntry {
6325                    mode: 0o100644,
6326                    name: BString::from(b"payload.txt"),
6327                    oid: blob_oid,
6328                }],
6329            }
6330            .write(),
6331        );
6332        let tree_oid = db
6333            .write_object(tree.clone())
6334            .expect("test operation should succeed");
6335        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6336        let commit = EncodedObject::new(
6337            ObjectType::Commit,
6338            Commit {
6339                tree: tree_oid,
6340                parents: Vec::new(),
6341                author: identity.clone(),
6342                committer: identity,
6343                encoding: None,
6344                message: b"initial\n".to_vec(),
6345            }
6346            .write(),
6347        );
6348        let commit_oid = db
6349            .write_object(commit.clone())
6350            .expect("test operation should succeed");
6351        vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
6352    }
6353
6354    fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
6355        let root = temp_root("sley-repack-all");
6356        let git_dir = root.join(".git");
6357        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6358        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6359
6360        // A pre-existing pack holds one blob; the rest of the graph is loose.
6361        let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
6362        let packed_oid = packed_blob
6363            .object_id(format)
6364            .expect("test operation should succeed");
6365        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6366            .expect("test operation should succeed");
6367        let existing = db
6368            .install_pack(&existing_pack)
6369            .expect("test operation should succeed");
6370
6371        let graph = write_commit_graph(&mut db, b"repack payload\n");
6372
6373        let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
6374        expected.insert(packed_oid, packed_blob.clone());
6375
6376        let result = repack_all_objects(&git_dir, format)
6377            .expect("test operation should succeed")
6378            .expect("repository has objects");
6379
6380        // The new pack round-trips and contains every original object byte-for-byte.
6381        assert_eq!(result.object_count, expected.len());
6382        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6383        assert_eq!(parsed.entries.len(), expected.len());
6384        for entry in &parsed.entries {
6385            let want = expected
6386                .get(&entry.entry.oid)
6387                .expect("packed object was in the repository");
6388            assert_eq!(&entry.object, want);
6389            assert_eq!(
6390                entry
6391                    .object
6392                    .object_id(format)
6393                    .expect("test operation should succeed"),
6394                entry.entry.oid
6395            );
6396        }
6397        // The generated index parses and agrees with the pack checksum.
6398        let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
6399        assert_eq!(idx.pack_checksum, parsed.checksum);
6400        assert_eq!(idx.entries.len(), expected.len());
6401
6402        // The pre-existing pack is reported obsolete (by its .pack path).
6403        assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
6404        // Every loose object id is reported as now packed.
6405        let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
6406        want_loose.sort_by_key(ObjectId::to_hex);
6407        assert_eq!(result.packed_loose, want_loose);
6408        assert!(!result.packed_loose.contains(&packed_oid));
6409
6410        fs::remove_dir_all(root).expect("test operation should succeed");
6411    }
6412
6413    #[test]
6414    fn repack_all_objects_consolidates_loose_and_pack_sha1() {
6415        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
6416    }
6417
6418    #[test]
6419    fn repack_all_objects_consolidates_loose_and_pack_sha256() {
6420        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
6421    }
6422
6423    #[test]
6424    fn repack_all_objects_returns_none_for_empty_repository() {
6425        let root = temp_root("sley-repack-empty");
6426        let git_dir = root.join(".git");
6427        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6428
6429        assert!(
6430            repack_all_objects(&git_dir, ObjectFormat::Sha1)
6431                .expect("test operation should succeed")
6432                .is_none()
6433        );
6434
6435        fs::remove_dir_all(root).expect("test operation should succeed");
6436    }
6437
6438    #[test]
6439    fn install_repack_result_writes_pack_without_pruning_by_default() {
6440        let root = temp_root("sley-repack-install-nodelete");
6441        let git_dir = root.join(".git");
6442        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6443        let format = ObjectFormat::Sha1;
6444        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6445        let graph = write_commit_graph(&mut db, b"install no prune\n");
6446
6447        let result = repack_all_objects(&git_dir, format)
6448            .expect("test operation should succeed")
6449            .expect("test operation should succeed");
6450        install_repack_result(&git_dir, format, &result, false)
6451            .expect("test operation should succeed");
6452
6453        // New pack is on disk and readable.
6454        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6455        let pack_dir = git_dir.join("objects").join("pack");
6456        let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
6457        let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
6458        assert!(pack_path.exists());
6459        assert!(idx_path.exists());
6460        // Loose objects survive because prune was not requested.
6461        for (oid, object) in &graph {
6462            assert!(
6463                db.loose()
6464                    .object_path(oid)
6465                    .expect("test operation should succeed")
6466                    .exists()
6467            );
6468            assert_eq!(read_object_for_assert(&db, oid), *object);
6469        }
6470
6471        fs::remove_dir_all(root).expect("test operation should succeed");
6472    }
6473
6474    #[test]
6475    fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
6476        let root = temp_root("sley-repack-install-prune");
6477        let git_dir = root.join(".git");
6478        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6479        let format = ObjectFormat::Sha1;
6480        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6481
6482        let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
6483        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6484            .expect("test operation should succeed");
6485        let existing = db
6486            .install_pack(&existing_pack)
6487            .expect("test operation should succeed");
6488        let graph = write_commit_graph(&mut db, b"prune payload\n");
6489
6490        let result = repack_all_objects(&git_dir, format)
6491            .expect("test operation should succeed")
6492            .expect("test operation should succeed");
6493        let new_pack_checksum = PackFile::parse(&result.pack, format)
6494            .expect("test operation should succeed")
6495            .checksum;
6496        install_repack_result(&git_dir, format, &result, true)
6497            .expect("test operation should succeed");
6498
6499        // Obsolete pack and its index are gone.
6500        assert!(!existing.pack_path.exists());
6501        assert!(!existing.index_path.exists());
6502        // Packed loose objects are gone from disk.
6503        for (oid, _) in &graph {
6504            assert!(
6505                !db.loose()
6506                    .object_path(oid)
6507                    .expect("test operation should succeed")
6508                    .exists()
6509            );
6510        }
6511        // The new consolidated pack remains and still serves every object.
6512        let pack_dir = git_dir.join("objects").join("pack");
6513        assert!(
6514            pack_dir
6515                .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
6516                .exists()
6517        );
6518        let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
6519        for (oid, object) in &graph {
6520            assert!(
6521                reopened
6522                    .contains(oid)
6523                    .expect("test operation should succeed")
6524            );
6525            assert_eq!(read_object_for_assert(&reopened, oid), *object);
6526        }
6527        let packed_oid = packed_blob
6528            .object_id(format)
6529            .expect("test operation should succeed");
6530        assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
6531
6532        fs::remove_dir_all(root).expect("test operation should succeed");
6533    }
6534
6535    #[test]
6536    fn install_repack_result_preserves_keep_and_promisor_packs() {
6537        let root = temp_root("sley-repack-install-keep-promisor");
6538        let git_dir = root.join(".git");
6539        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6540        let format = ObjectFormat::Sha1;
6541        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6542
6543        let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
6544        let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
6545            .expect("test operation should succeed");
6546        let keep_install = db
6547            .install_pack(&keep_pack)
6548            .expect("test operation should succeed");
6549        let keep_sidecar = keep_install.pack_path.with_extension("keep");
6550        fs::write(&keep_sidecar, b"").expect("test operation should succeed");
6551
6552        let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
6553        let promisor_pack =
6554            PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
6555                .expect("test operation should succeed");
6556        let promisor_install = db
6557            .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
6558            .expect("test operation should succeed");
6559        let promisor_sidecar = promisor_install
6560            .promisor_path
6561            .clone()
6562            .expect("promisor sidecar");
6563
6564        let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
6565        let result = repack_all_objects(&git_dir, format)
6566            .expect("test operation should succeed")
6567            .expect("test operation should succeed");
6568        assert!(result.obsolete_packs.contains(&keep_install.pack_path));
6569        assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
6570
6571        install_repack_result(&git_dir, format, &result, true)
6572            .expect("test operation should succeed");
6573
6574        for path in [
6575            &keep_install.pack_path,
6576            &keep_install.index_path,
6577            &keep_sidecar,
6578            &promisor_install.pack_path,
6579            &promisor_install.index_path,
6580            &promisor_sidecar,
6581        ] {
6582            assert!(path.exists(), "{} should be preserved", path.display());
6583        }
6584        for (oid, _) in &graph {
6585            assert!(
6586                !db.loose()
6587                    .object_path(oid)
6588                    .expect("test operation should succeed")
6589                    .exists()
6590            );
6591        }
6592
6593        fs::remove_dir_all(root).expect("test operation should succeed");
6594    }
6595
6596    #[test]
6597    fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
6598        // Safety: a loose object whose id is not in the new pack must survive
6599        // pruning even if the caller lists it in `packed_loose`.
6600        let root = temp_root("sley-repack-install-safety");
6601        let git_dir = root.join(".git");
6602        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6603        let format = ObjectFormat::Sha1;
6604        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6605        let graph = write_commit_graph(&mut db, b"safety packed\n");
6606
6607        let mut result = repack_all_objects(&git_dir, format)
6608            .expect("test operation should succeed")
6609            .expect("test operation should succeed");
6610
6611        // A loose object that is NOT in the new pack, but mislabeled as packed.
6612        let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
6613        let stray_oid = db
6614            .write_object(stray.clone())
6615            .expect("test operation should succeed");
6616        assert!(!result.packed_loose.contains(&stray_oid));
6617        result.packed_loose.push(stray_oid);
6618
6619        install_repack_result(&git_dir, format, &result, true)
6620            .expect("test operation should succeed");
6621
6622        // The stray loose object is untouched because it is not in the new pack.
6623        assert!(
6624            db.loose()
6625                .object_path(&stray_oid)
6626                .expect("test operation should succeed")
6627                .exists()
6628        );
6629        assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
6630        // Genuinely packed loose objects were still removed.
6631        for (oid, _) in &graph {
6632            assert!(
6633                !db.loose()
6634                    .object_path(oid)
6635                    .expect("test operation should succeed")
6636                    .exists()
6637            );
6638        }
6639
6640        fs::remove_dir_all(root).expect("test operation should succeed");
6641    }
6642
6643    #[test]
6644    fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
6645        let root = temp_root("sley-prune-unreachable");
6646        let git_dir = root.join(".git");
6647        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6648        let format = ObjectFormat::Sha1;
6649        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6650        let graph = write_commit_graph(&mut db, b"reachable payload\n");
6651        let commit_oid = graph[0].0.clone();
6652
6653        // A dangling loose blob not referenced by the commit graph.
6654        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
6655        let dangling_oid = db
6656            .write_object(dangling)
6657            .expect("test operation should succeed");
6658
6659        // Report-only pass leaves everything on disk.
6660        let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
6661            .expect("test operation should succeed");
6662        assert_eq!(reported, vec![dangling_oid]);
6663        assert!(
6664            db.loose()
6665                .object_path(&dangling_oid)
6666                .expect("test operation should succeed")
6667                .exists()
6668        );
6669
6670        // Deleting pass removes only the unreachable object.
6671        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6672            .expect("test operation should succeed");
6673        assert_eq!(deleted, vec![dangling_oid]);
6674        assert!(
6675            !db.loose()
6676                .object_path(&dangling_oid)
6677                .expect("test operation should succeed")
6678                .exists()
6679        );
6680        for (oid, object) in &graph {
6681            assert!(
6682                db.loose()
6683                    .object_path(oid)
6684                    .expect("test operation should succeed")
6685                    .exists()
6686            );
6687            assert_eq!(read_object_for_assert(&db, oid), *object);
6688        }
6689
6690        fs::remove_dir_all(root).expect("test operation should succeed");
6691    }
6692
6693    #[test]
6694    fn prune_unreachable_loose_ignores_gitlink_targets() {
6695        let root = temp_root("sley-prune-gitlink");
6696        let git_dir = root.join(".git");
6697        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6698        let format = ObjectFormat::Sha1;
6699        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6700
6701        let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
6702            .expect("test operation should succeed");
6703        let tree = EncodedObject::new(
6704            ObjectType::Tree,
6705            Tree {
6706                entries: vec![TreeEntry {
6707                    mode: 0o160000,
6708                    name: BString::from(b"submodule"),
6709                    oid: submodule_oid,
6710                }],
6711            }
6712            .write(),
6713        );
6714        let tree_oid = db
6715            .write_object(tree)
6716            .expect("test operation should succeed");
6717        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6718        let commit = EncodedObject::new(
6719            ObjectType::Commit,
6720            Commit {
6721                tree: tree_oid,
6722                parents: Vec::new(),
6723                author: identity.clone(),
6724                committer: identity,
6725                encoding: None,
6726                message: b"gitlink\n".to_vec(),
6727            }
6728            .write(),
6729        );
6730        let commit_oid = db
6731            .write_object(commit)
6732            .expect("test operation should succeed");
6733        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
6734        let dangling_oid = db
6735            .write_object(dangling)
6736            .expect("test operation should succeed");
6737
6738        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6739            .expect("test operation should succeed");
6740
6741        assert_eq!(deleted, vec![dangling_oid]);
6742        assert!(
6743            !db.loose()
6744                .object_path(&dangling_oid)
6745                .expect("test operation should succeed")
6746                .exists()
6747        );
6748
6749        fs::remove_dir_all(root).expect("test operation should succeed");
6750    }
6751
6752    fn temp_root(prefix: &str) -> PathBuf {
6753        std::env::temp_dir().join(format!(
6754            "{prefix}-{}-{}",
6755            std::process::id(),
6756            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6757        ))
6758    }
6759}