Skip to main content

sley_odb/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
9use sley_formats::{Bundle, BundleReference};
10use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
11use sley_pack::{
12    MultiPackIndex, PackBitmapIndex, PackBitmapWriter, PackFile, PackIndex, PackIndexEntry,
13    PackInput, PackWrite,
14};
15use std::collections::{HashMap, HashSet, VecDeque};
16use std::io::{Read, Seek, SeekFrom, Write};
17use std::path::{Path, PathBuf};
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::sync::{Arc, Mutex, OnceLock};
20use std::{env, fs};
21
22static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24pub trait ObjectReader {
25    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
26
27    /// Graft-points seam (shallow clones today, replace refs/grafts later):
28    /// `true` when history is cut at `oid`, so every walk must treat the
29    /// commit as parentless even though its raw body still names parents.
30    ///
31    /// [`FileObjectDatabase`] answers from `$GIT_DIR/shallow`; readers that
32    /// are not backed by a repository (in-memory stores, pack overlays)
33    /// keep the default "no grafts".
34    fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
35        false
36    }
37}
38
39fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
40    (*oid == ObjectId::empty_tree(format))
41        .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
42}
43
44fn with_missing_object_context(
45    err: GitError,
46    oid: ObjectId,
47    context: MissingObjectContext,
48) -> GitError {
49    let kind = err
50        .not_found_kind()
51        .and_then(sley_core::NotFoundKind::missing_object_kind);
52    match kind {
53        Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
54        None => err,
55    }
56}
57
58/// Parents of a parsed commit with the graft seam applied: empty when the
59/// reader cuts history at `oid` (shallow boundary), the raw parsed parents
60/// otherwise.
61pub fn grafted_parents<R: ObjectReader + ?Sized>(
62    reader: &R,
63    oid: &ObjectId,
64    parents: Vec<ObjectId>,
65) -> Vec<ObjectId> {
66    if reader.is_shallow_graft(oid) {
67        Vec::new()
68    } else {
69        parents
70    }
71}
72
73pub trait ObjectWriter {
74    /// Write `object`, returning its id. Takes `&self`: every implementation's
75    /// write state (in-memory map, loose-object cache) is behind interior
76    /// mutability, so a single handle can interleave reads and writes without a
77    /// `&mut` borrow. This lets the merge engine read and write through one `db`
78    /// instead of opening a second read-only handle that re-warms the caches.
79    fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct BundleUnbundleResult {
84    pub written_objects: Vec<ObjectId>,
85    pub references: Vec<BundleReference>,
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct PackUnpackResult {
90    pub written_objects: Vec<ObjectId>,
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct PackInstallResult {
95    pub pack_name: String,
96    pub pack_path: PathBuf,
97    pub index_path: PathBuf,
98    pub promisor_path: Option<PathBuf>,
99    pub object_ids: Vec<ObjectId>,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct RawPackInstallResult {
104    pub object_ids: Vec<ObjectId>,
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
108pub struct RawPackInstallOptions {
109    pub promisor: bool,
110}
111
112pub trait RawPackInstaller {
113    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum ObjectPrefixResolution {
118    Missing,
119    Unique(ObjectId),
120    Ambiguous(Vec<ObjectId>),
121}
122
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct ObjectStorageInfo {
125    pub disk_size: u64,
126    pub deltabase: ObjectId,
127}
128
129impl RawPackInstaller for FileObjectDatabase {
130    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
131        let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
132        Ok(RawPackInstallResult {
133            object_ids: result.object_ids,
134        })
135    }
136}
137
138impl RawPackInstaller for ObjectDatabase {
139    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
140        let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
141        Ok(RawPackInstallResult {
142            object_ids: result.written_objects,
143        })
144    }
145}
146
147pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
148    let mut missing = Vec::new();
149    for prerequisite in &bundle.prerequisites {
150        match reader.read_object(&prerequisite.oid) {
151            Ok(object) => {
152                let actual = object.object_id(bundle.format)?;
153                if actual != prerequisite.oid {
154                    return Err(GitError::InvalidObject(format!(
155                        "bundle prerequisite {} hashes to {actual}",
156                        prerequisite.oid
157                    )));
158                }
159            }
160            Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
161            Err(err) => return Err(err),
162        }
163    }
164    if missing.is_empty() {
165        return Ok(());
166    }
167    Err(GitError::object_not_found_in(
168        missing[0],
169        MissingObjectContext::PackInstall,
170    ))
171}
172
173pub fn unbundle_objects<R, W>(
174    bundle: &Bundle,
175    prerequisite_reader: &R,
176    writer: &mut W,
177) -> Result<BundleUnbundleResult>
178where
179    R: ObjectReader,
180    W: ObjectWriter,
181{
182    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
183    let pack = PackFile::parse_bundle(bundle)?;
184    let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
185    Ok(BundleUnbundleResult {
186        written_objects,
187        references: bundle.references.clone(),
188    })
189}
190
191pub fn install_bundle_pack<R>(
192    bundle: &Bundle,
193    prerequisite_reader: &R,
194    destination: &impl RawPackInstaller,
195) -> Result<BundleUnbundleResult>
196where
197    R: ObjectReader,
198{
199    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
200    let install = destination.install_raw_pack(&bundle.pack)?;
201    Ok(BundleUnbundleResult {
202        written_objects: install.object_ids,
203        references: bundle.references.clone(),
204    })
205}
206
207pub fn unpack_packfile_objects<W>(
208    pack_bytes: &[u8],
209    format: ObjectFormat,
210    writer: &W,
211) -> Result<PackUnpackResult>
212where
213    W: ObjectWriter,
214{
215    let pack = PackFile::parse(pack_bytes, format)?;
216    write_pack_objects(pack, writer, "pack")
217}
218
219fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
220where
221    W: ObjectWriter,
222{
223    let mut written_objects = Vec::with_capacity(pack.entries.len());
224    for entry in pack.entries {
225        let expected = entry.entry.oid;
226        let actual = writer.write_object(entry.object)?;
227        if actual != expected {
228            return Err(GitError::InvalidObject(format!(
229                "{source} object id mismatch: expected {expected}, wrote {actual}"
230            )));
231        }
232        written_objects.push(actual);
233    }
234    Ok(PackUnpackResult { written_objects })
235}
236
237pub fn collect_reachable_object_ids<R, I>(
238    reader: &R,
239    format: ObjectFormat,
240    starts: I,
241) -> Result<HashSet<ObjectId>>
242where
243    R: ObjectReader,
244    I: IntoIterator<Item = ObjectId>,
245{
246    walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
247}
248
249/// [`collect_reachable_object_ids`] with a cut set: commits in `cut` are
250/// collected, but the walk does not continue to their parents — the view a
251/// shallow repository has of its own refs (`$GIT_DIR/shallow` of the *other*
252/// side, threaded explicitly because `reader` belongs to this side).
253pub fn collect_reachable_object_ids_with_cut<R, I>(
254    reader: &R,
255    format: ObjectFormat,
256    starts: I,
257    cut: &HashSet<ObjectId>,
258) -> Result<HashSet<ObjectId>>
259where
260    R: ObjectReader,
261    I: IntoIterator<Item = ObjectId>,
262{
263    walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
264}
265
266/// [`collect_reachable_object_ids`] with a stop set: objects in `excluded` are
267/// not visited and not expanded, so the walk never sees anything reachable only
268/// through them (used to truncate history at a shallow boundary).
269pub fn collect_reachable_object_ids_excluding<R, I>(
270    reader: &R,
271    format: ObjectFormat,
272    starts: I,
273    excluded: &HashSet<ObjectId>,
274) -> Result<HashSet<ObjectId>>
275where
276    R: ObjectReader,
277    I: IntoIterator<Item = ObjectId>,
278{
279    walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
280}
281
282pub fn collect_reachable_objects<R, I>(
283    reader: &R,
284    format: ObjectFormat,
285    starts: I,
286    excluded: &HashSet<ObjectId>,
287) -> Result<Vec<Arc<EncodedObject>>>
288where
289    R: ObjectReader,
290    I: IntoIterator<Item = ObjectId>,
291{
292    let mut objects = Vec::new();
293    walk_reachable_objects(reader, format, starts, excluded, |_, object| {
294        objects.push(Arc::clone(object));
295    })?;
296    Ok(objects)
297}
298
299#[derive(Debug, Clone)]
300struct ReachablePackObject {
301    oid: ObjectId,
302    object: Arc<EncodedObject>,
303}
304
305fn collect_reachable_pack_objects<R, I>(
306    reader: &R,
307    format: ObjectFormat,
308    starts: I,
309    excluded: &HashSet<ObjectId>,
310) -> Result<Vec<ReachablePackObject>>
311where
312    R: ObjectReader,
313    I: IntoIterator<Item = ObjectId>,
314{
315    let mut objects = Vec::new();
316    walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
317        objects.push(ReachablePackObject {
318            oid: *oid,
319            object: Arc::clone(object),
320        });
321    })?;
322    Ok(objects)
323}
324
325fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
326    objects
327        .iter()
328        .map(|entry| PackInput {
329            oid: &entry.oid,
330            object: &entry.object,
331        })
332        .collect()
333}
334
335pub fn install_reachable_pack<I>(
336    source: &impl ObjectReader,
337    destination: &impl RawPackInstaller,
338    format: ObjectFormat,
339    starts: I,
340) -> Result<Option<RawPackInstallResult>>
341where
342    I: IntoIterator<Item = ObjectId>,
343{
344    install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
345}
346
347pub fn install_reachable_pack_excluding<I>(
348    source: &impl ObjectReader,
349    destination: &impl RawPackInstaller,
350    format: ObjectFormat,
351    starts: I,
352    excluded: &HashSet<ObjectId>,
353) -> Result<Option<RawPackInstallResult>>
354where
355    I: IntoIterator<Item = ObjectId>,
356{
357    let pack = match build_reachable_pack(source, format, starts, excluded)? {
358        Some(pack) => pack,
359        None => return Ok(None),
360    };
361    destination.install_raw_pack(&pack.pack).map(Some)
362}
363
364pub fn build_reachable_pack<R, I>(
365    reader: &R,
366    format: ObjectFormat,
367    starts: I,
368    excluded: &HashSet<ObjectId>,
369) -> Result<Option<PackWrite>>
370where
371    R: ObjectReader,
372    I: IntoIterator<Item = ObjectId>,
373{
374    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
375    if objects.is_empty() {
376        return Ok(None);
377    }
378    // Delta-compress reachable packs (used by install/push/fetch) via git-pack's
379    // sliding-window selection. Self-contained, ofs-delta by default; round-trips
380    // through the existing parser. PackWrite shape is unchanged, so callers are
381    // unaffected.
382    let inputs = pack_inputs(&objects);
383    PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
384}
385
386pub fn build_and_install_reachable_pack<R, I>(
387    source: &R,
388    destination: &FileObjectDatabase,
389    format: ObjectFormat,
390    starts: I,
391    excluded: &HashSet<ObjectId>,
392    options: RawPackInstallOptions,
393) -> Result<Option<PackInstallResult>>
394where
395    R: ObjectReader,
396    I: IntoIterator<Item = ObjectId>,
397{
398    build_and_install_reachable_pack_filtered(
399        source,
400        destination,
401        format,
402        starts,
403        excluded,
404        options,
405        None,
406        None,
407    )
408}
409
410/// A partial-clone object filter applied while building a transfer pack.
411///
412/// Mirrors the subset of upstream's `list-objects-filter` the in-process local
413/// server supports: directly-wanted tips are always packed; the filter only
414/// prunes objects reached *through* the traversal (upstream's
415/// `filter_blobs_none` runs on traversed blobs, never on wanted tips).
416#[derive(Debug, Clone, Copy, PartialEq, Eq)]
417pub enum PackObjectFilter {
418    /// `blob:none`: omit every blob reached through tree traversal.
419    BlobNone,
420}
421
422/// [`build_and_install_reachable_pack`] with an optional partial-clone
423/// `filter`. With `Some(BlobNone)`, blobs are dropped from the pack unless
424/// they are directly wanted (named in `starts`).
425#[allow(clippy::too_many_arguments)]
426pub fn build_and_install_reachable_pack_filtered<R, I>(
427    source: &R,
428    destination: &FileObjectDatabase,
429    format: ObjectFormat,
430    starts: I,
431    excluded: &HashSet<ObjectId>,
432    options: RawPackInstallOptions,
433    filter: Option<PackObjectFilter>,
434    unpack_limit: Option<usize>,
435) -> Result<Option<PackInstallResult>>
436where
437    R: ObjectReader,
438    I: IntoIterator<Item = ObjectId>,
439{
440    let starts: Vec<ObjectId> = starts.into_iter().collect();
441    let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
442    let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
443    match filter {
444        Some(PackObjectFilter::BlobNone) => {
445            objects.retain(|entry| {
446                entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
447            });
448        }
449        None => {}
450    }
451    if objects.is_empty() {
452        return Ok(None);
453    }
454    // Mirror fetch-pack's unpack-limit: small transfers are exploded into
455    // loose objects instead of landing as a pack (upstream `get_pack` picks
456    // unpack-objects when the header count is below fetch/transfer.unpackLimit).
457    if let Some(limit) = unpack_limit
458        && objects.len() < limit
459    {
460        for entry in &objects {
461            destination.loose().write_object((*entry.object).clone())?;
462        }
463        return Ok(None);
464    }
465    let inputs = pack_inputs(&objects);
466    let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
467    destination
468        .install_generated_pack_unchecked(&pack, options)
469        .map(Some)
470}
471
472/// Assemble a pack stream that reuses an existing pack's object data verbatim
473/// (upstream pack-objects' "pack reuse" fast path, full-pack case) and appends
474/// `appended` as freshly encoded undeltified entries.
475///
476/// The reused pack's entry bytes are copied as-is between our own header and
477/// trailer: a full-pack copy preserves every relative distance, so internal
478/// `OFS_DELTA` bases stay valid. The header object count covers both the
479/// reused and appended entries, and the trailing pack checksum is recomputed
480/// over the assembled stream.
481pub fn assemble_pack_with_verbatim_reuse(
482    format: ObjectFormat,
483    reused_pack_bytes: &[u8],
484    appended: &[PackInput<'_>],
485) -> Result<(Vec<u8>, u32)> {
486    assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
487}
488
489/// Like [`assemble_pack_with_verbatim_reuse`], but concatenates multiple whole
490/// packs before appending fresh entries.
491pub fn assemble_pack_with_verbatim_reuses(
492    format: ObjectFormat,
493    reused_packs: &[&[u8]],
494    appended: &[PackInput<'_>],
495) -> Result<(Vec<u8>, u32)> {
496    let hash_len = format.raw_len();
497    let mut reused_count = 0u32;
498    let mut capacity = 12 + hash_len + 64 * appended.len();
499    for reused_pack_bytes in reused_packs {
500        if reused_pack_bytes.len() < 12 + hash_len {
501            return Err(GitError::InvalidFormat("reused pack too short".into()));
502        }
503        if &reused_pack_bytes[..4] != b"PACK" {
504            return Err(GitError::InvalidFormat(
505                "reused pack has no signature".into(),
506            ));
507        }
508        let version = u32::from_be_bytes([
509            reused_pack_bytes[4],
510            reused_pack_bytes[5],
511            reused_pack_bytes[6],
512            reused_pack_bytes[7],
513        ]);
514        if version != 2 {
515            return Err(GitError::Unsupported(format!(
516                "reused pack version {version}"
517            )));
518        }
519        let count = u32::from_be_bytes([
520            reused_pack_bytes[8],
521            reused_pack_bytes[9],
522            reused_pack_bytes[10],
523            reused_pack_bytes[11],
524        ]);
525        reused_count = reused_count
526            .checked_add(count)
527            .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
528        capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
529    }
530    let total = reused_count
531        .checked_add(appended.len() as u32)
532        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
533
534    let mut out = Vec::with_capacity(capacity);
535    out.extend_from_slice(b"PACK");
536    out.extend_from_slice(&2u32.to_be_bytes());
537    out.extend_from_slice(&total.to_be_bytes());
538    for reused_pack_bytes in reused_packs {
539        out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
540    }
541    for input in appended {
542        write_undeltified_pack_entry(&mut out, input.object)?;
543    }
544    let checksum = sley_core::digest_bytes(format, &out)?;
545    out.extend_from_slice(checksum.as_bytes());
546    Ok((out, reused_count))
547}
548
549/// Assemble a pack stream by copying already-encoded pack entries verbatim and
550/// appending freshly encoded undeltified entries.
551pub fn assemble_pack_with_verbatim_entries(
552    format: ObjectFormat,
553    reused_entries: &[&[u8]],
554    appended: &[PackInput<'_>],
555) -> Result<(Vec<u8>, u32)> {
556    let reused_count = u32::try_from(reused_entries.len())
557        .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
558    let total = reused_count
559        .checked_add(appended.len() as u32)
560        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
561
562    let mut capacity = 12 + format.raw_len() + 64 * appended.len();
563    for entry in reused_entries {
564        capacity = capacity.saturating_add(entry.len());
565    }
566    let mut out = Vec::with_capacity(capacity);
567    out.extend_from_slice(b"PACK");
568    out.extend_from_slice(&2u32.to_be_bytes());
569    out.extend_from_slice(&total.to_be_bytes());
570    for entry in reused_entries {
571        out.extend_from_slice(entry);
572    }
573    for input in appended {
574        write_undeltified_pack_entry(&mut out, input.object)?;
575    }
576    let checksum = sley_core::digest_bytes(format, &out)?;
577    out.extend_from_slice(checksum.as_bytes());
578    Ok((out, reused_count))
579}
580
581/// Append one undeltified pack entry (type/size varint header + zlib body).
582fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
583    let type_bits: u8 = match object.object_type {
584        ObjectType::Commit => 1,
585        ObjectType::Tree => 2,
586        ObjectType::Blob => 3,
587        ObjectType::Tag => 4,
588    };
589    let mut size = object.body.len() as u64;
590    let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
591    size >>= 4;
592    while size > 0 {
593        out.push(byte | 0x80);
594        byte = (size & 0x7f) as u8;
595        size >>= 7;
596    }
597    out.push(byte);
598    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
599    encoder.write_all(&object.body)?;
600    out.extend_from_slice(&encoder.finish()?);
601    Ok(())
602}
603
604/// Outcome of consolidating every object in a repository into a single pack.
605///
606/// This is the engine for `git gc` / `git repack`: [`repack_all_objects`]
607/// produces the bytes for one new delta-compressed pack plus its index, and
608/// reports which on-disk artifacts the caller could now remove. No deletions
609/// are performed by the engine itself; the CLI decides reachability policy and
610/// performs any pruning (see [`install_repack_result`]).
611#[derive(Debug, Clone, PartialEq, Eq)]
612pub struct RepackResult {
613    /// Bytes of the freshly written `.pack` file.
614    pub pack: Vec<u8>,
615    /// Bytes of the matching `.idx` file for [`RepackResult::pack`].
616    pub idx: Vec<u8>,
617    /// Number of distinct objects contained in the new pack.
618    pub object_count: usize,
619    /// Absolute paths of pre-existing `*.pack` files now superseded by the new
620    /// pack (every object they hold is present in [`RepackResult::pack`]).
621    pub obsolete_packs: Vec<PathBuf>,
622    /// Loose object ids that are now also present in the new pack and therefore
623    /// redundant on disk.
624    pub packed_loose: Vec<ObjectId>,
625    pack_checksum: ObjectId,
626    index_entries: Vec<PackIndexEntry>,
627}
628
629/// Gather every object in `git_dir` (loose objects and every existing pack) and
630/// write them into a single new delta-compressed pack.
631///
632/// Returns the new pack/index bytes, the count of packed objects, the list of
633/// pre-existing pack files that the new pack supersedes, and the loose object
634/// ids that are now packed. Nothing is deleted: the caller (CLI) decides
635/// reachability policy and performs any pruning, optionally via
636/// [`install_repack_result`].
637///
638/// Returns `Ok(None)` when the repository contains no objects at all.
639/// `git repack -a`'s gathering rule: pack the reachability closure of `roots`
640/// (ref tips, `HEAD`, reflog entries, indexed objects) instead of everything
641/// on disk. Borrowed objects (alternates) reachable from the roots are packed
642/// into the new local pack like upstream `pack-objects --all` without
643/// `--local`; previously-packed objects that are no longer reachable are NOT
644/// carried forward (that is how `repack -a -d` drops them). Missing objects
645/// are tolerated (stale reflog entries may reference pruned history).
646///
647/// Returns `Ok(None)` when no roots resolve to any object.
648pub fn repack_reachable_objects(
649    git_dir: &Path,
650    format: ObjectFormat,
651    roots: &[ObjectId],
652) -> Result<Option<RepackResult>> {
653    let objects_dir = repository_objects_dir(git_dir);
654    let database = FileObjectDatabase::new(objects_dir.clone(), format);
655
656    let mut seen: HashSet<ObjectId> = HashSet::new();
657    let mut objects: Vec<ReachablePackObject> = Vec::new();
658    let mut pending: Vec<ObjectId> = roots.to_vec();
659    while let Some(oid) = pending.pop() {
660        if !seen.insert(oid) {
661            continue;
662        }
663        let object = match database.read_object(&oid) {
664            Ok(object) => object,
665            Err(GitError::NotFound(_)) => continue,
666            Err(err) => return Err(err),
667        };
668        match object.object_type {
669            ObjectType::Commit => {
670                let commit = Commit::parse_ref(format, &object.body)?;
671                pending.extend(grafted_parents(&database, &oid, commit.parents));
672                pending.push(commit.tree);
673            }
674            ObjectType::Tree => {
675                for entry in TreeEntries::new(format, &object.body) {
676                    let entry = entry?;
677                    if !entry.is_gitlink() {
678                        pending.push(entry.oid);
679                    }
680                }
681            }
682            ObjectType::Tag => {
683                let tag = Tag::parse_ref(format, &object.body)?;
684                pending.push(tag.object);
685            }
686            ObjectType::Blob => {}
687        }
688        objects.push(ReachablePackObject { oid, object });
689    }
690    if objects.is_empty() {
691        return Ok(None);
692    }
693
694    let inputs = pack_inputs(&objects);
695    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
696    let object_count = written.entries.len();
697
698    // Every pre-existing local pack is superseded under `-a` (their reachable
699    // objects are in the new pack; their unreachable ones are being dropped).
700    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
701    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
702        .into_iter()
703        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
704        .collect();
705
706    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
707    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
708        .into_iter()
709        .filter(|oid| packed_oid_set.contains(oid))
710        .collect();
711    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
712
713    let pack_checksum = written.checksum;
714    let index_entries = written.entries.clone();
715    Ok(Some(RepackResult {
716        pack: written.pack,
717        idx: written.index,
718        object_count,
719        obsolete_packs,
720        packed_loose,
721        pack_checksum,
722        index_entries,
723    }))
724}
725
726pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
727    let objects_dir = repository_objects_dir(git_dir);
728    let database = FileObjectDatabase::new(objects_dir.clone(), format);
729
730    // Enumerate every object id reachable on disk: loose objects, every pack
731    // index, and any multi-pack-index. `object_ids_in_objects_dir` already
732    // unions all of these and de-duplicates them.
733    let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
734    if all_oids.is_empty() {
735        return Ok(None);
736    }
737
738    // Read each object's canonical encoding so the new pack stores byte-for-byte
739    // identical payloads. Loose objects take precedence over packed copies in
740    // `FileObjectDatabase::read_object`, but both decode to the same bytes.
741    let mut objects = Vec::with_capacity(all_oids.len());
742    for oid in &all_oids {
743        objects.push(ReachablePackObject {
744            oid: *oid,
745            object: database.read_object(oid)?,
746        });
747    }
748
749    let inputs = pack_inputs(&objects);
750    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
751    let object_count = written.entries.len();
752
753    // The new pack contains every object on disk, so every pre-existing pack is
754    // fully superseded. We still record the exact pack paths (not the index
755    // paths) so the caller can delete the right files. The pack we are about to
756    // write is excluded by name in case its checksum collides with an existing
757    // pack (identical contents).
758    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
759    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
760        .into_iter()
761        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
762        .collect();
763
764    // Loose object ids that the new pack now also holds (which is all of them,
765    // since they were gathered into it).
766    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
767    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
768        .into_iter()
769        .filter(|oid| packed_oid_set.contains(oid))
770        .collect();
771    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
772
773    Ok(Some(RepackResult {
774        pack: written.pack,
775        idx: written.index,
776        object_count,
777        obsolete_packs,
778        packed_loose,
779        pack_checksum: written.checksum,
780        index_entries: written.entries,
781    }))
782}
783
784/// Gather only loose objects in `git_dir` and write them into a new pack.
785///
786/// This is the engine for plain `git repack -d` (without `-a`): existing packs
787/// remain in place, and pruning removes only the loose copies that the new pack
788/// now serves.
789pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
790    let objects_dir = repository_objects_dir(git_dir);
791    let database = FileObjectDatabase::new(objects_dir.clone(), format);
792    let loose_oids = loose_object_ids(&objects_dir, format)?;
793    if loose_oids.is_empty() {
794        return Ok(None);
795    }
796
797    let mut objects = Vec::with_capacity(loose_oids.len());
798    for oid in &loose_oids {
799        objects.push(ReachablePackObject {
800            oid: *oid,
801            object: database.read_object(oid)?,
802        });
803    }
804
805    let inputs = pack_inputs(&objects);
806    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
807    let object_count = written.entries.len();
808    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
809    let mut packed_loose: Vec<ObjectId> = loose_oids
810        .into_iter()
811        .filter(|oid| packed_oid_set.contains(oid))
812        .collect();
813    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
814
815    let pack_checksum = written.checksum;
816    let index_entries = written.entries.clone();
817    Ok(Some(RepackResult {
818        pack: written.pack,
819        idx: written.index,
820        object_count,
821        obsolete_packs: Vec::new(),
822        packed_loose,
823        pack_checksum,
824        index_entries,
825    }))
826}
827
828/// Write the consolidated pack from a [`RepackResult`] into
829/// `objects/pack/` and, when `prune` is set, remove the now-redundant
830/// pre-existing packs and packed loose objects.
831///
832/// Pruning is opt-in and deliberately conservative: an object or pack is only
833/// removed after verifying it is actually present in the freshly written pack
834/// on disk. Concretely:
835///
836/// * a loose object is removed only if its id appears in the new pack;
837/// * a pre-existing pack is removed only if it is not the pack we just wrote
838///   *and* every object listed in its `.idx` is present in the new pack (its
839///   `.idx` and known sidecars are removed alongside it);
840/// * a stale `multi-pack-index` is removed only if every pack it references is
841///   being removed, so no reader is ever left pointing at a deleted pack.
842pub fn install_repack_result(
843    git_dir: &Path,
844    format: ObjectFormat,
845    result: &RepackResult,
846    prune: bool,
847) -> Result<()> {
848    install_repack_result_with_bitmap(git_dir, format, result, prune, None)
849}
850
851/// [`install_repack_result`] that additionally writes a `pack-<checksum>.bitmap`
852/// reachability bitmap alongside the new pack when `bitmap_tips` is `Some`.
853/// `bitmap_tips` carries the repository's ref tips (peeled to commits): they
854/// receive selection preference, mirroring upstream's `NEEDS_BITMAP` flagging of
855/// ref tips in `git repack -b` / `pack-objects --write-bitmap-index`.
856pub fn install_repack_result_with_bitmap(
857    git_dir: &Path,
858    format: ObjectFormat,
859    result: &RepackResult,
860    prune: bool,
861    bitmap_tips: Option<&HashSet<ObjectId>>,
862) -> Result<()> {
863    let objects_dir = repository_objects_dir(git_dir);
864    let pack_dir = objects_dir.join("pack");
865    fs::create_dir_all(&pack_dir)?;
866
867    // Validate the public bytes against the private provenance that
868    // `repack_all_objects` captured from `PackFile::write_packed`. This avoids
869    // inflating and resolving the freshly-written pack a second time while still
870    // catching caller mutations before anything is written or pruned.
871    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
872    let parsed_index = PackIndex::parse(&result.idx, format)?;
873    if parsed_index.pack_checksum != result.pack_checksum {
874        return Err(GitError::InvalidFormat(
875            "repack index checksum does not match the new pack".into(),
876        ));
877    }
878    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
879        return Err(GitError::InvalidFormat(
880            "repack index does not match the new pack contents".into(),
881        ));
882    }
883    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
884    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
885    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
886    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
887    // git writes a `.rev` alongside every repacked pack (`pack.writeReverseIndex`
888    // defaults to true). Write it before the `.idx` so the index never becomes
889    // visible ahead of its companions, mirroring upstream's finalize order.
890    let reverse_index = sley_pack::PackReverseIndex::write(
891        format,
892        &sley_pack::pack_order_index_positions(&parsed_index.entries),
893        &result.pack_checksum,
894    )?;
895    write_pack_component(&new_pack_path, &result.pack)?;
896    write_pack_component(&new_rev_path, &reverse_index)?;
897    write_pack_component(&new_index_path, &result.idx)?;
898
899    if let Some(tips) = bitmap_tips {
900        // Build before pruning: the closure walk reads objects through the
901        // pre-existing packs/loose store (the new pack holds the same bytes).
902        let database = FileObjectDatabase::new(objects_dir.clone(), format);
903        if let Some(bitmap) = build_pack_bitmap(
904            &database,
905            format,
906            &result.index_entries,
907            &result.pack_checksum,
908            tips,
909        )? {
910            // Unlike the pack/idx/rev (content-addressed by the pack
911            // checksum), the bitmap depends on selection inputs (e.g.
912            // pack.preferBitmapTips), so an existing file must be replaced —
913            // write_pack_component's exists-skip would keep a stale selection.
914            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
915            remove_file_if_exists(&bitmap_path)?;
916            write_pack_component(&bitmap_path, &bitmap)?;
917        }
918    }
919
920    if !prune {
921        return Ok(());
922    }
923
924    // Prune based on the objects the new pack's *index* can resolve (what reads use
925    // once the old packs are gone), not just what the pack contains — so a stale
926    // pack is never removed for an object the new index cannot serve.
927    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
928
929    prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
930    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
931    Ok(())
932}
933
934fn validate_pack_checksum(
935    pack: &[u8],
936    format: ObjectFormat,
937    expected: &ObjectId,
938    context: &str,
939) -> Result<()> {
940    if expected.format() != format {
941        return Err(GitError::InvalidObjectId(format!(
942            "{context} checksum format does not match object format"
943        )));
944    }
945    let hash_len = format.raw_len();
946    if pack.len() < 12 + hash_len {
947        return Err(GitError::InvalidFormat(format!(
948            "{context} pack file too short"
949        )));
950    }
951    if &pack[..4] != b"PACK" {
952        return Err(GitError::InvalidFormat(format!(
953            "{context} pack file missing PACK signature"
954        )));
955    }
956    let trailer_offset = pack.len() - hash_len;
957    let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
958    let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
959    if &actual != expected || trailer != *expected {
960        return Err(GitError::InvalidFormat(format!(
961            "{context} pack checksum does not match generated pack"
962        )));
963    }
964    Ok(())
965}
966
967fn pack_index_entries_match_writer(
968    parsed: &[PackIndexEntry],
969    writer_entries: &[PackIndexEntry],
970) -> bool {
971    if parsed.len() != writer_entries.len() {
972        return false;
973    }
974    let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
975    writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
976    parsed.iter().zip(writer_entries).all(|(left, right)| {
977        left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
978    })
979}
980
981/// List loose objects under `git_dir` that are *not* reachable from `roots`,
982/// optionally deleting them.
983///
984/// Reachability is computed with [`collect_reachable_object_ids`] over the
985/// repository's object database, so trees, parents, and tag targets are all
986/// followed. When `delete` is `false` the returned ids are merely reported;
987/// when `true` each unreachable loose object file is removed (packed copies are
988/// never touched). Deletion is therefore opt-in.
989pub fn prune_unreachable_loose<I>(
990    git_dir: &Path,
991    format: ObjectFormat,
992    roots: I,
993    delete: bool,
994) -> Result<Vec<ObjectId>>
995where
996    I: IntoIterator<Item = ObjectId>,
997{
998    let objects_dir = repository_objects_dir(git_dir);
999    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1000    let reachable = collect_reachable_object_ids(&database, format, roots)?;
1001
1002    let store = LooseObjectStore::new(objects_dir.clone(), format);
1003    let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1004        .into_iter()
1005        .filter(|oid| !reachable.contains(oid))
1006        .collect();
1007    pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1008
1009    if delete {
1010        for oid in &pruned {
1011            let path = store.object_path(oid)?;
1012            match fs::remove_file(&path) {
1013                Ok(()) => {}
1014                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1015                Err(err) => return Err(GitError::Io(err.to_string())),
1016            }
1017        }
1018    }
1019    Ok(pruned)
1020}
1021
1022/// Loose object ids under `objects_dir`, sorted by hex, with packed objects
1023/// excluded.
1024fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1025    let oids = loose_object_id_set(objects_dir, format)?;
1026    let mut oids = oids.into_iter().collect::<Vec<_>>();
1027    oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1028    Ok(oids)
1029}
1030
1031fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1032    let mut oids = HashSet::new();
1033    collect_loose_object_ids(objects_dir, format, &mut oids)?;
1034    Ok(oids)
1035}
1036
1037/// Absolute paths of every `*.pack` file directly inside `pack_dir`, sorted for
1038/// deterministic output.
1039fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1040    if !pack_dir.exists() {
1041        return Ok(Vec::new());
1042    }
1043    let mut packs = Vec::new();
1044    for entry in fs::read_dir(pack_dir)? {
1045        let path = entry?.path();
1046        if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1047            packs.push(path);
1048        }
1049    }
1050    packs.sort();
1051    Ok(packs)
1052}
1053
1054/// Remove pre-existing packs whose every object is contained in `present`,
1055/// skipping `keep` (the pack just written), `.keep` packs, and `.promisor` packs.
1056/// A stale multi-pack-index that references any removed pack is removed too.
1057fn prune_packs_contained_in(
1058    objects_dir: &Path,
1059    format: ObjectFormat,
1060    present: &HashSet<ObjectId>,
1061    keep: &Path,
1062) -> Result<()> {
1063    let pack_dir = objects_dir.join("pack");
1064    let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1065    let mut removed_stems: HashSet<String> = HashSet::new();
1066
1067    for pack_path in existing_pack_files(&pack_dir)? {
1068        if pack_path == keep {
1069            continue;
1070        }
1071        let Some(stem) = pack_path.file_stem() else {
1072            continue;
1073        };
1074        if Some(stem) == keep_stem.as_deref() {
1075            continue;
1076        }
1077        if pack_path.with_extension("keep").exists()
1078            || pack_path.with_extension("promisor").exists()
1079        {
1080            continue;
1081        }
1082        let index_path = pack_path.with_extension("idx");
1083        if !index_path.exists() {
1084            // Without an index we cannot prove containment; leave it alone.
1085            continue;
1086        }
1087        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1088        if !index
1089            .entries
1090            .iter()
1091            .all(|entry| present.contains(&entry.oid))
1092        {
1093            continue;
1094        }
1095        // Every object in this pack is safely in the new pack and it has no Git
1096        // policy sidecar that says to keep it: remove the pack, its index, and
1097        // cache sidecars derived from them.
1098        remove_file_if_exists(&pack_path)?;
1099        remove_file_if_exists(&index_path)?;
1100        for ext in ["rev", "mtimes", "bitmap"] {
1101            remove_file_if_exists(&pack_path.with_extension(ext))?;
1102        }
1103        removed_stems.insert(stem.to_string_lossy().into_owned());
1104    }
1105
1106    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1107    Ok(())
1108}
1109
1110/// Remove a `multi-pack-index` if it names *any* pack that was removed.
1111///
1112/// A MIDX that still references a deleted pack makes reads fail (the lookup
1113/// resolves to a pack that is gone) before any fallback. Removing the whole MIDX
1114/// when even one of its packs is pruned forces readers back to the individual pack
1115/// indexes, which are correct; `multi-pack-index write` can rebuild it later.
1116fn prune_stale_multi_pack_index(
1117    pack_dir: &Path,
1118    format: ObjectFormat,
1119    removed_stems: &HashSet<String>,
1120) -> Result<()> {
1121    if removed_stems.is_empty() {
1122        return Ok(());
1123    }
1124    let midx_path = pack_dir.join("multi-pack-index");
1125    if !midx_path.exists() {
1126        return Ok(());
1127    }
1128    let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1129    let references_removed_pack = midx.pack_names.iter().any(|name| {
1130        let stem = name.strip_suffix(".idx").unwrap_or(name);
1131        removed_stems.contains(stem)
1132    });
1133    if references_removed_pack {
1134        remove_file_if_exists(&midx_path)?;
1135    }
1136    Ok(())
1137}
1138
1139/// Remove each loose object in `candidates` whose id is in `present`, leaving
1140/// any object not actually packed untouched.
1141fn prune_loose_objects<'a, I>(
1142    objects_dir: &Path,
1143    format: ObjectFormat,
1144    candidates: I,
1145    present: &HashSet<ObjectId>,
1146) -> Result<()>
1147where
1148    I: IntoIterator<Item = &'a ObjectId>,
1149{
1150    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1151    for oid in candidates {
1152        if !present.contains(oid) {
1153            continue;
1154        }
1155        remove_file_if_exists(&store.object_path(oid)?)?;
1156    }
1157    Ok(())
1158}
1159
1160enum PackDeltaBase {
1161    Offset(u64),
1162    Ref(ObjectId),
1163}
1164
1165struct PackIndexOffsetInfo {
1166    end_offset: u64,
1167    delta_base_oid: Option<ObjectId>,
1168}
1169
1170fn scan_pack_index_offsets(
1171    index: &PackIndex,
1172    target_offset: u64,
1173    trailer_offset: u64,
1174    delta_base_offset: Option<u64>,
1175) -> Result<PackIndexOffsetInfo> {
1176    let mut target_count = 0usize;
1177    let mut next_offset = None;
1178    let mut delta_base_oid = None;
1179
1180    for entry in &index.entries {
1181        if entry.offset == target_offset {
1182            target_count += 1;
1183        } else if entry.offset > target_offset {
1184            match next_offset {
1185                Some(current) if current <= entry.offset => {}
1186                _ => next_offset = Some(entry.offset),
1187            }
1188        }
1189        if Some(entry.offset) == delta_base_offset {
1190            delta_base_oid = Some(entry.oid);
1191        }
1192    }
1193
1194    if target_count == 0 {
1195        return Err(GitError::InvalidFormat(format!(
1196            "pack index offset {target_offset} not found"
1197        )));
1198    }
1199    if let Some(offset) = delta_base_offset
1200        && delta_base_oid.is_none()
1201    {
1202        return Err(GitError::InvalidFormat(format!(
1203            "ofs-delta base offset {offset} not found"
1204        )));
1205    }
1206
1207    Ok(PackIndexOffsetInfo {
1208        // Preserve the old sorted-vector behavior for malformed indexes with
1209        // duplicate offsets: the next sorted entry has the same offset.
1210        end_offset: if target_count > 1 {
1211            target_offset
1212        } else {
1213            next_offset.unwrap_or(trailer_offset)
1214        },
1215        delta_base_oid,
1216    })
1217}
1218
1219fn pack_entry_delta_base(
1220    format: ObjectFormat,
1221    pack: &[u8],
1222    entry_offset: u64,
1223) -> Result<Option<PackDeltaBase>> {
1224    let mut cursor = usize::try_from(entry_offset)
1225        .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1226    let first = pack_next_byte(pack, &mut cursor)?;
1227    let kind = (first >> 4) & 0x07;
1228    let mut byte = first;
1229    while byte & 0x80 != 0 {
1230        byte = pack_next_byte(pack, &mut cursor)?;
1231    }
1232    match kind {
1233        6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1234            pack,
1235            &mut cursor,
1236            entry_offset,
1237        )?))),
1238        7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1239            format,
1240            pack,
1241            &mut cursor,
1242        )?))),
1243        _ => Ok(None),
1244    }
1245}
1246
1247fn parse_ref_delta_base_oid(
1248    format: ObjectFormat,
1249    pack: &[u8],
1250    cursor: &mut usize,
1251) -> Result<ObjectId> {
1252    let raw_len = format.raw_len();
1253    if *cursor + raw_len > pack.len() {
1254        return Err(GitError::InvalidFormat(
1255            "truncated ref-delta base object id".into(),
1256        ));
1257    }
1258    let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1259    *cursor += raw_len;
1260    Ok(oid)
1261}
1262
1263fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
1264    let mut byte = pack_next_byte(pack, cursor)?;
1265    let mut relative = u64::from(byte & 0x7f);
1266    while byte & 0x80 != 0 {
1267        byte = pack_next_byte(pack, cursor)?;
1268        relative = relative
1269            .checked_add(1)
1270            .and_then(|value| value.checked_shl(7))
1271            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
1272            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
1273    }
1274    entry_offset
1275        .checked_sub(relative)
1276        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
1277}
1278
1279fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
1280    let Some(byte) = pack.get(*cursor).copied() else {
1281        return Err(GitError::InvalidFormat("truncated pack entry".into()));
1282    };
1283    *cursor += 1;
1284    Ok(byte)
1285}
1286
1287fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
1288    Ok(ObjectId::null(format))
1289}
1290
1291/// Remove `path` if it exists, treating a missing file as success.
1292fn remove_file_if_exists(path: &Path) -> Result<()> {
1293    match fs::remove_file(path) {
1294        Ok(()) => Ok(()),
1295        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1296        Err(err) => Err(GitError::Io(err.to_string())),
1297    }
1298}
1299
1300fn walk_reachable_objects<R, I, F>(
1301    reader: &R,
1302    format: ObjectFormat,
1303    starts: I,
1304    excluded: &HashSet<ObjectId>,
1305    visit: F,
1306) -> Result<HashSet<ObjectId>>
1307where
1308    R: ObjectReader,
1309    I: IntoIterator<Item = ObjectId>,
1310    F: FnMut(&ObjectId, &Arc<EncodedObject>),
1311{
1312    walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
1313}
1314
1315/// [`walk_reachable_objects`] with an additional `cut` set: commits in `cut`
1316/// are visited (their trees and blobs too) but their parents are not followed,
1317/// mirroring a shallow client's view of its own history during negotiation.
1318fn walk_reachable_objects_with_cut<R, I, F>(
1319    reader: &R,
1320    format: ObjectFormat,
1321    starts: I,
1322    excluded: &HashSet<ObjectId>,
1323    cut: &HashSet<ObjectId>,
1324    mut visit: F,
1325) -> Result<HashSet<ObjectId>>
1326where
1327    R: ObjectReader,
1328    I: IntoIterator<Item = ObjectId>,
1329    F: FnMut(&ObjectId, &Arc<EncodedObject>),
1330{
1331    let mut seen = HashSet::new();
1332    let mut pending = Vec::new();
1333    for start in starts {
1334        pending.push(start);
1335        while let Some(oid) = pending.pop() {
1336            if excluded.contains(&oid) {
1337                continue;
1338            }
1339            if !seen.insert(oid) {
1340                continue;
1341            }
1342            let object = reader.read_object(&oid).map_err(|err| {
1343                with_missing_object_context(err, oid, MissingObjectContext::Traversal)
1344            })?;
1345            match object.object_type {
1346                ObjectType::Commit => {
1347                    let (tree, parents) = {
1348                        let commit = Commit::parse_ref(format, &object.body)?;
1349                        (commit.tree, commit.parents)
1350                    };
1351                    visit(&oid, &object);
1352                    if !cut.contains(&oid) {
1353                        for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
1354                            pending.push(parent);
1355                        }
1356                    }
1357                    pending.push(tree);
1358                }
1359                ObjectType::Tree => {
1360                    let mut child_oids = Vec::new();
1361                    for entry in TreeEntries::new(format, &object.body) {
1362                        let entry = entry?;
1363                        if entry.is_gitlink() {
1364                            continue;
1365                        }
1366                        child_oids.push(entry.oid);
1367                    }
1368                    visit(&oid, &object);
1369                    pending.extend(child_oids.into_iter().rev());
1370                }
1371                ObjectType::Tag => {
1372                    let target = {
1373                        let tag = Tag::parse_ref(format, &object.body)?;
1374                        tag.object
1375                    };
1376                    visit(&oid, &object);
1377                    pending.push(target);
1378                }
1379                ObjectType::Blob => visit(&oid, &object),
1380            }
1381        }
1382    }
1383    Ok(seen)
1384}
1385
1386// ===== reachability bitmaps (.bitmap write + consult) =====
1387
1388/// Bit accessors over a `Vec<u64>` bitset using git's bitmap convention:
1389/// bit `i` lives in word `i / 64` at bit `i % 64` (LSB-first within a word).
1390fn bitset_get(words: &[u64], position: u32) -> bool {
1391    let word = (position / 64) as usize;
1392    word < words.len() && words[word] & (1u64 << (position % 64)) != 0
1393}
1394
1395fn bitset_set(words: &mut [u64], position: u32) {
1396    let word = (position / 64) as usize;
1397    if word < words.len() {
1398        words[word] |= 1u64 << (position % 64);
1399    }
1400}
1401
1402fn bitset_or(acc: &mut [u64], other: &[u64]) {
1403    for (dst, src) in acc.iter_mut().zip(other) {
1404        *dst |= *src;
1405    }
1406}
1407
1408/// Sorted set-bit positions of a bitset (the inverse of repeated [`bitset_set`]).
1409fn bitset_positions(words: &[u64]) -> Vec<u32> {
1410    let mut positions = Vec::new();
1411    for (word_index, word) in words.iter().enumerate() {
1412        let mut remaining = *word;
1413        while remaining != 0 {
1414            let bit = remaining.trailing_zeros();
1415            positions.push(word_index as u32 * 64 + bit);
1416            remaining &= remaining - 1;
1417        }
1418    }
1419    positions
1420}
1421
1422/// Committer timestamp (epoch seconds) of a commit identity line
1423/// (`Name <email> <timestamp> <tz>`); 0 when unparseable, matching git's
1424/// tolerance for bogus dates during bitmap commit selection.
1425fn commit_identity_timestamp(identity: &[u8]) -> i64 {
1426    let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
1427    let _tz = fields.next();
1428    fields
1429        .next()
1430        .and_then(|raw| std::str::from_utf8(raw).ok())
1431        .and_then(|raw| raw.parse::<i64>().ok())
1432        .unwrap_or(0)
1433}
1434
1435/// Upstream `next_commit_index` (pack-bitmap-write.c): the spacing schedule for
1436/// bitmap commit selection over the date-descending commit list.
1437fn bitmap_next_commit_index(idx: u32) -> u32 {
1438    const MIN_COMMITS: u32 = 100;
1439    const MAX_COMMITS: u32 = 5000;
1440    const MUST_REGION: u32 = 100;
1441    const MIN_REGION: u32 = 20000;
1442
1443    if idx <= MUST_REGION {
1444        return 0;
1445    }
1446    if idx <= MIN_REGION {
1447        let offset = idx - MUST_REGION;
1448        return offset.min(MIN_COMMITS);
1449    }
1450    let offset = idx - MIN_REGION;
1451    offset.clamp(MIN_COMMITS, MAX_COMMITS)
1452}
1453
1454/// Builds a serialised `.bitmap` for the pack described by `index_entries` /
1455/// `pack_checksum`, mirroring upstream pack-bitmap-write.c:
1456///
1457/// * commit selection walks the pack's commits in committer-date-descending
1458///   order through [`bitmap_next_commit_index`]'s spacing schedule, preferring
1459///   `preferred_tips` (ref tips — upstream's `NEEDS_BITMAP`) and merge commits
1460///   inside each window;
1461/// * each selected commit stores its full reachability closure (commits, trees,
1462///   blobs) as pack-order bit positions (no XOR compression — `xor_offset` 0 is
1463///   valid on disk and what readers see after resolution anyway).
1464///
1465/// Returns `Ok(None)` — mirroring upstream's warn-and-skip — when the pack
1466/// lacks full closure (a reachable object is missing from it).
1467pub fn build_pack_bitmap(
1468    db: &FileObjectDatabase,
1469    format: ObjectFormat,
1470    index_entries: &[PackIndexEntry],
1471    pack_checksum: &ObjectId,
1472    preferred_tips: &HashSet<ObjectId>,
1473) -> Result<Option<Vec<u8>>> {
1474    // `index_entries` carries no ordering guarantee (writer provenance is in
1475    // pack-write order); bit numbering follows pack (offset) order.
1476    let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
1477    by_offset.sort_by_key(|&slot| index_entries[slot].offset);
1478    let bit_order: Vec<ObjectId> = by_offset
1479        .into_iter()
1480        .map(|slot| index_entries[slot].oid)
1481        .collect();
1482    build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
1483}
1484
1485/// [`build_pack_bitmap`]'s multi-pack sibling: builds the serialised
1486/// `multi-pack-index-<checksum>.bitmap` for `midx_entries`, with bits in
1487/// pseudo-pack order (preferred pack first, then pack id, then offset — the
1488/// same order [`MultiPackIndex::write_with_reverse_index`] records in `RIDX`)
1489/// and the midx checksum in the BITM checksum field.
1490pub fn build_midx_bitmap(
1491    db: &FileObjectDatabase,
1492    format: ObjectFormat,
1493    midx_entries: &[sley_pack::MultiPackIndexEntry],
1494    midx_checksum: &ObjectId,
1495    preferred_pack: u32,
1496    preferred_tips: &HashSet<ObjectId>,
1497) -> Result<Option<Vec<u8>>> {
1498    let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
1499    pseudo.sort_by_key(|&slot| {
1500        let entry = &midx_entries[slot];
1501        (
1502            entry.pack_int_id != preferred_pack,
1503            entry.pack_int_id,
1504            entry.offset,
1505        )
1506    });
1507    let bit_order: Vec<ObjectId> = pseudo
1508        .into_iter()
1509        .map(|slot| midx_entries[slot].oid)
1510        .collect();
1511    build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
1512}
1513
1514/// Upstream `bitmap_builder_init`'s `num_maximal` counter (pack-bitmap-write.c):
1515/// walk the first-parent ancestry of the selected commits, children before
1516/// parents, propagating per-commit "which selected commits reach me" masks.
1517/// A commit counts as maximal when it is selected, or when distinct selected
1518/// lineages converge on it (its mask gains bits its last contributing child
1519/// did not carry). Only the count is needed (for the trace2 data event), so no
1520/// reverse-edge bookkeeping is kept.
1521fn bitmap_num_maximal_commits(
1522    db: &FileObjectDatabase,
1523    format: ObjectFormat,
1524    selected: &[ObjectId],
1525) -> Result<usize> {
1526    // First-parent subgraph reachable from the selected commits.
1527    let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
1528    let mut stack: Vec<ObjectId> = selected.to_vec();
1529    while let Some(oid) = stack.pop() {
1530        if first_parent.contains_key(&oid) {
1531            continue;
1532        }
1533        let object = db.read_object(&oid)?;
1534        let commit = Commit::parse_ref(format, &object.body)?;
1535        let parent = grafted_parents(db, &oid, commit.parents).first().copied();
1536        first_parent.insert(oid, parent);
1537        if let Some(parent) = parent {
1538            stack.push(parent);
1539        }
1540    }
1541    // Children-before-parents order (Kahn over the single first-parent edge).
1542    let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
1543    for parent in first_parent.values().flatten() {
1544        *pending_children.entry(*parent).or_default() += 1;
1545    }
1546    let word_count = selected.len().div_ceil(64);
1547    struct MaximalEnt {
1548        mask: Vec<u64>,
1549        maximal: bool,
1550    }
1551    let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
1552    for (bit, oid) in selected.iter().enumerate() {
1553        let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
1554            mask: vec![0u64; word_count],
1555            maximal: true,
1556        });
1557        ent.mask[bit / 64] |= 1u64 << (bit % 64);
1558        ent.maximal = true;
1559    }
1560    let mut queue: Vec<ObjectId> = first_parent
1561        .keys()
1562        .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
1563        .copied()
1564        .collect();
1565    let mut num_maximal = 0usize;
1566    while let Some(oid) = queue.pop() {
1567        if let Some(ent) = ents.remove(&oid) {
1568            if ent.maximal {
1569                num_maximal += 1;
1570            }
1571            if let Some(Some(parent)) = first_parent.get(&oid) {
1572                match ents.entry(*parent) {
1573                    std::collections::hash_map::Entry::Vacant(vacant) => {
1574                        // Fresh parent mask: c_not_p, !p_not_c -> not maximal.
1575                        vacant.insert(MaximalEnt {
1576                            mask: ent.mask.clone(),
1577                            maximal: false,
1578                        });
1579                    }
1580                    std::collections::hash_map::Entry::Occupied(mut occupied) => {
1581                        let parent_ent = occupied.get_mut();
1582                        let c_not_p = ent
1583                            .mask
1584                            .iter()
1585                            .zip(&parent_ent.mask)
1586                            .any(|(child, parent)| child & !parent != 0);
1587                        if c_not_p {
1588                            let p_not_c = parent_ent
1589                                .mask
1590                                .iter()
1591                                .zip(&ent.mask)
1592                                .any(|(parent, child)| parent & !child != 0);
1593                            for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
1594                                *parent |= child;
1595                            }
1596                            parent_ent.maximal = p_not_c;
1597                        }
1598                    }
1599                }
1600            }
1601        }
1602        if let Some(Some(parent)) = first_parent.get(&oid)
1603            && let Some(remaining) = pending_children.get_mut(parent)
1604        {
1605            *remaining -= 1;
1606            if *remaining == 0 {
1607                queue.push(*parent);
1608            }
1609        }
1610    }
1611    Ok(num_maximal)
1612}
1613
1614/// Shared write half: `bit_order` lists every covered object's oid in bit
1615/// order (pack order for a single pack, pseudo-pack order for a midx);
1616/// `checksum` fills the BITM checksum field (pack checksum / midx checksum).
1617fn build_reachability_bitmap(
1618    db: &FileObjectDatabase,
1619    format: ObjectFormat,
1620    checksum: &ObjectId,
1621    bit_order: &[ObjectId],
1622    preferred_tips: &HashSet<ObjectId>,
1623) -> Result<Option<Vec<u8>>> {
1624    if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
1625        return Ok(None);
1626    }
1627    let object_count = bit_order.len();
1628
1629    // The on-disk entry position space is the oid-sorted lookup order (.idx /
1630    // midx OIDL); derive each bit-order slot's rank there.
1631    let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
1632    oid_sorted.sort_by(|&left, &right| {
1633        bit_order[left as usize]
1634            .as_bytes()
1635            .cmp(bit_order[right as usize].as_bytes())
1636    });
1637    let mut index_position = vec![0u32; object_count];
1638    for (position, &slot) in oid_sorted.iter().enumerate() {
1639        index_position[slot as usize] = position as u32;
1640    }
1641    let mut oid_to_pack = HashMap::with_capacity(object_count);
1642    for (pack_pos, oid) in bit_order.iter().enumerate() {
1643        oid_to_pack.insert(*oid, pack_pos as u32);
1644    }
1645
1646    // Object types in bit order; commits also collect (date, parent count).
1647    let mut object_types = Vec::with_capacity(object_count);
1648    struct IndexedCommit {
1649        oid: ObjectId,
1650        pack_pos: u32,
1651        index_pos: u32,
1652        date: i64,
1653        parent_count: usize,
1654    }
1655    let mut indexed_commits = Vec::new();
1656    for (pack_pos, oid) in bit_order.iter().enumerate() {
1657        // Type via the header fast path: blobs (the bulk of most packs) never
1658        // need their bodies inflated here.
1659        let object_type = match db.read_object_header(oid)? {
1660            Some((object_type, _)) => object_type,
1661            None => db.read_object(oid)?.object_type,
1662        };
1663        object_types.push(object_type);
1664        if object_type == ObjectType::Commit {
1665            let object = db.read_object(oid)?;
1666            let commit = Commit::parse_ref(format, &object.body)?;
1667            indexed_commits.push(IndexedCommit {
1668                oid: *oid,
1669                pack_pos: pack_pos as u32,
1670                index_pos: index_position[pack_pos],
1671                date: commit_identity_timestamp(commit.committer),
1672                parent_count: grafted_parents(db, oid, commit.parents).len(),
1673            });
1674        }
1675    }
1676
1677    // Selection: date-descending, then the spacing schedule.
1678    indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
1679    let mut selected: Vec<&IndexedCommit> = Vec::new();
1680    let commit_count = indexed_commits.len() as u32;
1681    if commit_count < 100 {
1682        selected.extend(indexed_commits.iter());
1683    } else {
1684        let mut i = 0u32;
1685        loop {
1686            let next = bitmap_next_commit_index(i);
1687            if i + next >= commit_count {
1688                break;
1689            }
1690            let mut chosen = &indexed_commits[(i + next) as usize];
1691            if next > 0 {
1692                for j in 0..=next {
1693                    let candidate = &indexed_commits[(i + j) as usize];
1694                    if preferred_tips.contains(&candidate.oid) {
1695                        chosen = candidate;
1696                        break;
1697                    }
1698                    if candidate.parent_count >= 2 {
1699                        chosen = candidate;
1700                    }
1701                }
1702            }
1703            selected.push(chosen);
1704            i += next + 1;
1705        }
1706    }
1707
1708    // Trace2 selection counters (upstream bitmap_builder_init): emitted before
1709    // the closure walk, like upstream emits them before building the ewah
1710    // bitmaps. Computing num_maximal_commits needs its own first-parent walk,
1711    // so it only runs when the trace2 event target is active.
1712    if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
1713        let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
1714        let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
1715        sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
1716        sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
1717    }
1718
1719    // Reachability closures, oldest-first so newer walks stop at memoised
1720    // older selected commits.
1721    let word_count = object_count.div_ceil(64);
1722    let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
1723    for commit in selected.iter().rev() {
1724        let mut acc = vec![0u64; word_count];
1725        let mut pending = vec![commit.oid];
1726        while let Some(oid) = pending.pop() {
1727            let Some(&pack_pos) = oid_to_pack.get(&oid) else {
1728                // Mirrors upstream's "Packfile doesn't have full closure".
1729                eprintln!(
1730                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
1731                );
1732                return Ok(None);
1733            };
1734            if bitset_get(&acc, pack_pos) {
1735                continue;
1736            }
1737            if let Some(stored) = memo.get(&oid) {
1738                bitset_or(&mut acc, stored);
1739                continue;
1740            }
1741            bitset_set(&mut acc, pack_pos);
1742            let object = db.read_object(&oid)?;
1743            let tree = {
1744                let parsed = Commit::parse_ref(format, &object.body)?;
1745                pending.extend(grafted_parents(db, &oid, parsed.parents));
1746                parsed.tree
1747            };
1748            if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
1749                return Ok(None);
1750            }
1751        }
1752        memo.insert(commit.oid, Arc::new(acc));
1753    }
1754
1755    let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
1756    for commit in &selected {
1757        let words = match memo.get(&commit.oid) {
1758            Some(words) => words,
1759            None => continue,
1760        };
1761        writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
1762    }
1763    writer.write().map(Some)
1764}
1765
1766/// Marks `tree` and everything below it (sub-trees, blobs) in `acc`, skipping
1767/// already-set bits (their closure is already covered). Returns `false` when an
1768/// object is missing from the pack (no full closure), after warning.
1769fn bitmap_mark_tree(
1770    db: &impl ObjectReader,
1771    format: ObjectFormat,
1772    tree: &ObjectId,
1773    oid_to_pack: &HashMap<ObjectId, u32>,
1774    acc: &mut [u64],
1775) -> Result<bool> {
1776    let Some(&pack_pos) = oid_to_pack.get(tree) else {
1777        eprintln!(
1778            "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
1779        );
1780        return Ok(false);
1781    };
1782    if bitset_get(acc, pack_pos) {
1783        return Ok(true);
1784    }
1785    bitset_set(acc, pack_pos);
1786    let object = db.read_object(tree)?;
1787    for entry in TreeEntries::new(format, &object.body) {
1788        let entry = entry?;
1789        if entry.is_gitlink() {
1790            continue;
1791        }
1792        if entry.is_tree() {
1793            if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
1794                return Ok(false);
1795            }
1796        } else {
1797            let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
1798                eprintln!(
1799                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
1800                    entry.oid
1801                );
1802                return Ok(false);
1803            };
1804            bitset_set(acc, blob_pos);
1805        }
1806    }
1807    Ok(true)
1808}
1809
1810/// A pack's `.bitmap` loaded for consultation: oid <-> pack-position mappings,
1811/// resolved (XOR-expanded) per-commit reachability bitsets, and the four object
1812/// type bitmaps. Bit numbering follows pack order throughout.
1813pub struct LoadedPackBitmap {
1814    object_count: u32,
1815    oid_to_pack: HashMap<ObjectId, u32>,
1816    pack_to_oid: Vec<ObjectId>,
1817    commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
1818    commits: Vec<u64>,
1819    trees: Vec<u64>,
1820    blobs: Vec<u64>,
1821    tags: Vec<u64>,
1822}
1823
1824impl LoadedPackBitmap {
1825    pub fn object_count(&self) -> u32 {
1826        self.object_count
1827    }
1828
1829    /// Pack-order position of `oid`, when the object is in the bitmapped pack.
1830    pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
1831        self.oid_to_pack.get(oid).copied()
1832    }
1833
1834    pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
1835        self.pack_to_oid.get(position as usize)
1836    }
1837
1838    /// The resolved reachability bitset stored for `oid`, when it was one of
1839    /// the writer's selected commits.
1840    pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
1841        self.commit_words.get(oid)
1842    }
1843
1844    /// Oids of every commit with a stored bitmap entry (unordered).
1845    pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
1846        self.commit_words.keys()
1847    }
1848
1849    /// The type bitmap for `object_type` (bit per pack position).
1850    pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
1851        match object_type {
1852            ObjectType::Commit => &self.commits,
1853            ObjectType::Tree => &self.trees,
1854            ObjectType::Blob => &self.blobs,
1855            ObjectType::Tag => &self.tags,
1856        }
1857    }
1858
1859    fn word_count(&self) -> usize {
1860        (self.object_count as usize).div_ceil(64)
1861    }
1862}
1863
1864/// Loads the single-pack `.bitmap` of `objects_dir/pack`, if a valid one
1865/// exists. Scans `pack-*.bitmap` files (sorted, first valid wins, like
1866/// upstream's "first bitmap" behaviour), requires the sibling `.idx`, and
1867/// verifies the recorded pack checksum. Any unreadable/corrupt bitmap yields
1868/// `Ok(None)` — consumers fall back to a regular object walk, mirroring
1869/// upstream's warn-and-ignore on bitmap load failure.
1870pub fn load_pack_bitmap(
1871    objects_dir: &Path,
1872    format: ObjectFormat,
1873) -> Result<Option<LoadedPackBitmap>> {
1874    let pack_dir = objects_dir.join("pack");
1875    if !pack_dir.exists() {
1876        return Ok(None);
1877    }
1878    // A multi-pack bitmap wins over single-pack bitmaps, like upstream's
1879    // open_bitmap trying the midx first.
1880    if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
1881        return Ok(Some(bitmap));
1882    }
1883    let mut bitmap_paths = Vec::new();
1884    for entry in fs::read_dir(&pack_dir)? {
1885        let path = entry?.path();
1886        if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
1887            && path
1888                .file_name()
1889                .and_then(|name| name.to_str())
1890                .is_some_and(|name| name.starts_with("pack-"))
1891        {
1892            bitmap_paths.push(path);
1893        }
1894    }
1895    bitmap_paths.sort();
1896    for bitmap_path in bitmap_paths {
1897        match load_pack_bitmap_file(&bitmap_path, format) {
1898            Ok(Some(bitmap)) => return Ok(Some(bitmap)),
1899            Ok(None) | Err(_) => continue,
1900        }
1901    }
1902    Ok(None)
1903}
1904
1905/// Loads `multi-pack-index-<checksum>.bitmap` when the pack directory has a
1906/// multi-pack-index with a `RIDX` chunk (the bit-order permutation) and a
1907/// matching bitmap file. Returns `Ok(None)` — never an error — on any missing
1908/// or unusable piece, so callers fall through to single-pack bitmaps.
1909fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
1910    let midx_path = pack_dir.join("multi-pack-index");
1911    if !midx_path.exists() {
1912        return Ok(None);
1913    }
1914    let Ok(midx_bytes) = fs::read(&midx_path) else {
1915        return Ok(None);
1916    };
1917    let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
1918        return Ok(None);
1919    };
1920    let bitmap_path = pack_dir.join(format!(
1921        "multi-pack-index-{}.bitmap",
1922        midx.checksum.to_hex()
1923    ));
1924    if !bitmap_path.exists() {
1925        return Ok(None);
1926    }
1927    let object_count = midx.objects.len();
1928    // Upstream `load_midx_revindex`: prefer the midx's own RIDX chunk unless
1929    // GIT_TEST_MIDX_READ_RIDX=0 disables it, else fall back to the separate
1930    // `multi-pack-index-<checksum>.rev` file; a trace2 data event records
1931    // which source supplied the permutation.
1932    let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
1933        .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
1934        .unwrap_or(true);
1935    let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
1936        (Some(chunk), true) => {
1937            sley_core::trace2::data("load_midx_revindex", "source", "midx");
1938            chunk.clone()
1939        }
1940        _ => {
1941            let rev_path =
1942                pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
1943            let Ok(rev_bytes) = fs::read(&rev_path) else {
1944                // Without the RIDX permutation the bit numbering is unknown.
1945                return Ok(None);
1946            };
1947            let Ok(parsed_rev) =
1948                sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
1949            else {
1950                return Ok(None);
1951            };
1952            sley_core::trace2::data("load_midx_revindex", "source", "rev");
1953            parsed_rev.positions
1954        }
1955    };
1956    let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
1957        return Ok(None);
1958    };
1959    let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
1960        Ok(parsed) => parsed,
1961        Err(_) => return Ok(None),
1962    };
1963    if parsed.pack_checksum != midx.checksum {
1964        return Ok(None);
1965    }
1966
1967    // midx.objects is in lookup (oid-sorted) order; RIDX maps bit positions
1968    // to lookup positions.
1969    let mut pack_to_oid = Vec::with_capacity(object_count);
1970    for &midx_pos in &reverse_index {
1971        let Some(entry) = midx.objects.get(midx_pos as usize) else {
1972            return Ok(None);
1973        };
1974        pack_to_oid.push(entry.oid);
1975    }
1976    let mut oid_to_pack = HashMap::with_capacity(object_count);
1977    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
1978        oid_to_pack.insert(*oid, pack_pos as u32);
1979    }
1980    match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
1981        midx.objects.get(position).map(|entry| entry.oid)
1982    }) {
1983        Ok(loaded) => Ok(Some(loaded)),
1984        Err(_) => Ok(None),
1985    }
1986}
1987
1988fn load_pack_bitmap_file(
1989    bitmap_path: &Path,
1990    format: ObjectFormat,
1991) -> Result<Option<LoadedPackBitmap>> {
1992    let index_path = bitmap_path.with_extension("idx");
1993    if !index_path.exists() {
1994        return Ok(None);
1995    }
1996    let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1997    let object_count = index.entries.len();
1998    let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
1999    if parsed.pack_checksum != index.pack_checksum {
2000        return Ok(None);
2001    }
2002
2003    let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2004    pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2005    let mut pack_to_oid = Vec::with_capacity(object_count);
2006    for index_pos in &pack_order {
2007        pack_to_oid.push(index.entries[*index_pos as usize].oid);
2008    }
2009    let mut oid_to_pack = HashMap::with_capacity(object_count);
2010    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2011        oid_to_pack.insert(*oid, pack_pos as u32);
2012    }
2013
2014    assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2015        index.entries.get(position).map(|entry| entry.oid)
2016    })
2017    .map(Some)
2018}
2019
2020/// Shared tail of the bitmap loaders: expands the type bitmaps, resolves the
2021/// per-commit entries (XOR offsets reference earlier entries in file order),
2022/// and maps each entry's lookup-order position back to a commit oid via
2023/// `lookup_oid`.
2024fn assemble_loaded_bitmap(
2025    parsed: PackBitmapIndex,
2026    object_count: usize,
2027    pack_to_oid: Vec<ObjectId>,
2028    oid_to_pack: HashMap<ObjectId, u32>,
2029    lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2030) -> Result<LoadedPackBitmap> {
2031    let word_count = object_count.div_ceil(64);
2032    let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2033        let mut words = bitmap.to_words()?;
2034        words.resize(word_count, 0);
2035        Ok(words)
2036    };
2037
2038    let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2039    let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2040    for (entry_index, entry) in parsed.entries.iter().enumerate() {
2041        let mut words = expand(&entry.bitmap)?;
2042        if entry.xor_offset > 0 {
2043            let base_index = entry_index - entry.xor_offset as usize;
2044            let base = &resolved[base_index];
2045            for (dst, src) in words.iter_mut().zip(base.iter()) {
2046                *dst ^= *src;
2047            }
2048        }
2049        let words = Arc::new(words);
2050        resolved.push(Arc::clone(&words));
2051        let commit_oid = lookup_oid(entry.object_position as usize)
2052            .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2053        commit_words.insert(commit_oid, words);
2054    }
2055
2056    Ok(LoadedPackBitmap {
2057        object_count: object_count as u32,
2058        oid_to_pack,
2059        pack_to_oid,
2060        commit_words,
2061        commits: expand(&parsed.type_bitmaps.commits)?,
2062        trees: expand(&parsed.type_bitmaps.trees)?,
2063        blobs: expand(&parsed.type_bitmaps.blobs)?,
2064        tags: expand(&parsed.type_bitmaps.tags)?,
2065    })
2066}
2067
2068/// Result of a bitmap-assisted reachability walk: pack-position bits for
2069/// in-pack objects plus the "extended" objects encountered outside the
2070/// bitmapped pack (in first-seen order, like upstream's extended index).
2071pub struct BitmapWalkResult {
2072    pub words: Vec<u64>,
2073    pub extended: Vec<(ObjectId, ObjectType)>,
2074}
2075
2076impl BitmapWalkResult {
2077    /// Removes everything reachable in `haves` from this result.
2078    pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2079        for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2080            *dst &= !*src;
2081        }
2082        let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2083        self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2084    }
2085}
2086
2087/// Computes the set of objects reachable from `roots` using stored bitmaps
2088/// where available and a fill-in object walk where not — the consult half of
2089/// the bitmap engine (upstream `find_objects` + `fill_in_bitmap`).
2090///
2091/// Roots may be any object type; tag chains are peeled with every tag object
2092/// itself included, like the pending-object handling in
2093/// `prepare_bitmap_walk`. When `include_objects` is false only commits are
2094/// walked (tree contents of fill-in commits are not marked) — callers that
2095/// only count/enumerate commits mask with the commit type bitmap, so the
2096/// extra non-commit bits OR-ed in from stored (closed) bitmaps are harmless.
2097pub fn bitmap_reachable(
2098    bitmap: &LoadedPackBitmap,
2099    db: &impl ObjectReader,
2100    format: ObjectFormat,
2101    roots: &[ObjectId],
2102    include_objects: bool,
2103) -> Result<BitmapWalkResult> {
2104    let mut walk = BitmapFillWalk {
2105        bitmap,
2106        words: vec![0u64; bitmap.word_count()],
2107        extended: Vec::new(),
2108        extended_seen: HashSet::new(),
2109    };
2110    let mut commit_stack: Vec<ObjectId> = Vec::new();
2111
2112    for root in roots {
2113        let mut oid = *root;
2114        // Peel tag chains, marking each tag object on the way.
2115        loop {
2116            let object = db.read_object(&oid)?;
2117            match object.object_type {
2118                ObjectType::Tag => {
2119                    walk.mark(&oid, ObjectType::Tag);
2120                    let tag = Tag::parse_ref(format, &object.body)?;
2121                    oid = tag.object;
2122                }
2123                ObjectType::Commit => {
2124                    commit_stack.push(oid);
2125                    break;
2126                }
2127                ObjectType::Tree => {
2128                    walk.mark_tree_closure(db, format, &oid)?;
2129                    break;
2130                }
2131                ObjectType::Blob => {
2132                    walk.mark(&oid, ObjectType::Blob);
2133                    break;
2134                }
2135            }
2136        }
2137    }
2138
2139    while let Some(oid) = commit_stack.pop() {
2140        if let Some(position) = bitmap.pack_position(&oid) {
2141            if bitset_get(&walk.words, position) {
2142                continue;
2143            }
2144            if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2145                bitset_or(&mut walk.words, stored);
2146                continue;
2147            }
2148            bitset_set(&mut walk.words, position);
2149        } else {
2150            if walk.extended_seen.contains(&oid) {
2151                continue;
2152            }
2153            walk.extended_seen.insert(oid);
2154            walk.extended.push((oid, ObjectType::Commit));
2155        }
2156        let object = db.read_object(&oid)?;
2157        let commit = Commit::parse_ref(format, &object.body)?;
2158        commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2159        if include_objects {
2160            walk.mark_tree_closure(db, format, &commit.tree)?;
2161        }
2162    }
2163
2164    Ok(BitmapWalkResult {
2165        words: walk.words,
2166        extended: walk.extended,
2167    })
2168}
2169
2170struct BitmapFillWalk<'a> {
2171    bitmap: &'a LoadedPackBitmap,
2172    words: Vec<u64>,
2173    extended: Vec<(ObjectId, ObjectType)>,
2174    extended_seen: HashSet<ObjectId>,
2175}
2176
2177impl BitmapFillWalk<'_> {
2178    /// Marks one object; returns false when it was already marked.
2179    fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2180        if let Some(position) = self.bitmap.pack_position(oid) {
2181            if bitset_get(&self.words, position) {
2182                return false;
2183            }
2184            bitset_set(&mut self.words, position);
2185            true
2186        } else {
2187            if !self.extended_seen.insert(*oid) {
2188                return false;
2189            }
2190            self.extended.push((*oid, object_type));
2191            true
2192        }
2193    }
2194
2195    /// Marks `tree` and everything below it, skipping subtrees already marked
2196    /// (a set in-pack bit means its closure is covered: either it came from a
2197    /// stored — closed — bitmap, or this walk already expanded it).
2198    fn mark_tree_closure(
2199        &mut self,
2200        db: &impl ObjectReader,
2201        format: ObjectFormat,
2202        tree: &ObjectId,
2203    ) -> Result<()> {
2204        if !self.mark(tree, ObjectType::Tree) {
2205            return Ok(());
2206        }
2207        let object = db.read_object(tree)?;
2208        for entry in TreeEntries::new(format, &object.body) {
2209            let entry = entry?;
2210            if entry.is_gitlink() {
2211                continue;
2212            }
2213            if entry.is_tree() {
2214                self.mark_tree_closure(db, format, &entry.oid)?;
2215            } else {
2216                self.mark(&entry.oid, ObjectType::Blob);
2217            }
2218        }
2219        Ok(())
2220    }
2221}
2222
2223#[derive(Debug)]
2224pub struct ObjectDatabase {
2225    format: ObjectFormat,
2226    // Behind a `Mutex` so `write_object` can take `&self` (matching the
2227    // `ObjectWriter` trait) and a single handle can interleave reads and writes
2228    // without a `&mut` borrow — the same shared-by-`&` shape the file-backed
2229    // database uses for its caches. Removes the need for callers to wrap this in
2230    // a `RefCell`/`&mut` just to write (see sley-fetch's former `RefCell` dance).
2231    objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2232    promisor: bool,
2233}
2234
2235impl ObjectDatabase {
2236    pub fn new(format: ObjectFormat) -> Self {
2237        Self {
2238            format,
2239            objects: Mutex::new(HashMap::new()),
2240            promisor: false,
2241        }
2242    }
2243
2244    pub fn with_promisor(mut self, promisor: bool) -> Self {
2245        self.promisor = promisor;
2246        self
2247    }
2248
2249    pub fn contains(&self, oid: &ObjectId) -> bool {
2250        self.objects
2251            .lock()
2252            .map(|objects| objects.contains_key(oid))
2253            .unwrap_or(false)
2254    }
2255
2256    pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2257        let object = self.read_object(oid)?;
2258        let actual = object.object_id(self.format)?;
2259        if &actual == oid {
2260            Ok(())
2261        } else {
2262            Err(GitError::InvalidObject(format!(
2263                "object id mismatch: expected {oid}, got {actual}"
2264            )))
2265        }
2266    }
2267}
2268
2269impl ObjectReader for ObjectDatabase {
2270    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
2271        self.objects
2272            .lock()
2273            .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
2274            .get(oid)
2275            .map(Arc::clone)
2276            .or_else(|| implied_empty_tree_object(self.format, oid))
2277            .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
2278    }
2279}
2280
2281impl ObjectWriter for ObjectDatabase {
2282    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
2283        let oid = object.object_id(self.format)?;
2284        self.objects
2285            .lock()
2286            .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
2287            .entry(oid)
2288            .or_insert_with(|| Arc::new(object));
2289        Ok(oid)
2290    }
2291}
2292
2293#[derive(Debug, Clone, PartialEq, Eq)]
2294pub struct Alternate {
2295    pub path: std::path::PathBuf,
2296}
2297
2298#[derive(Debug, Clone, PartialEq, Eq)]
2299pub struct PartialClonePolicy {
2300    pub promisor_remote: Option<String>,
2301    pub allow_missing_promised_objects: bool,
2302}
2303
2304/// Raw pack-file bytes keyed by pack path, shared across cloned handles. Loaded
2305/// once so individual objects can be decoded at their offsets (see
2306/// [`sley_pack::read_object_at`]) without re-reading the whole file per read.
2307type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
2308
2309/// Backing bytes of a pack file: either memory-mapped (under the `mmap` feature)
2310/// or read into the heap. Both deref to `&[u8]`, so the decode path is identical.
2311#[derive(Debug)]
2312enum PackData {
2313    #[cfg(feature = "mmap")]
2314    Mapped(sley_mmap::MappedFile),
2315    Heap(Vec<u8>),
2316}
2317
2318impl std::ops::Deref for PackData {
2319    type Target = [u8];
2320
2321    fn deref(&self) -> &[u8] {
2322        match self {
2323            #[cfg(feature = "mmap")]
2324            Self::Mapped(mapped) => mapped,
2325            Self::Heap(bytes) => bytes,
2326        }
2327    }
2328}
2329
2330/// Load a pack file's bytes: memory-mapped when the `mmap` feature is on (falling
2331/// back to a heap read if the map fails), otherwise read into the heap.
2332#[cfg(feature = "mmap")]
2333fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2334    match sley_mmap::MappedFile::open_pack(pack_path) {
2335        Ok(mapped) => Ok(PackData::Mapped(mapped)),
2336        Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
2337    }
2338}
2339
2340#[cfg(not(feature = "mmap"))]
2341fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2342    Ok(PackData::Heap(fs::read(pack_path)?))
2343}
2344
2345/// Memory-capped LRU of recently decoded objects, shared across cloned handles,
2346/// so hot delta bases and repeated reads during a walk aren't re-decoded. The
2347/// cache is bounded by an approximate byte budget (not a fixed object count) so
2348/// it neither thrashes on bulk reads of small objects nor blows up on a few
2349/// large ones.
2350type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
2351
2352/// Per-pack caches of objects decoded from a pack, keyed by pack path and then by
2353/// the in-pack byte offset of each object's entry. Shared across cloned handles.
2354/// This is the delta-base cache: resolving a delta chain by offset reuses already
2355/// decoded bases instead of re-inflating the whole chain on every read.
2356type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
2357
2358/// Per-pack memo of `in-pack offset -> end-of-chain object type` for the
2359/// `cat-file --batch-check` header fast path. Resolving a packed delta's *type*
2360/// walks the delta chain to its base; without this memo every header read
2361/// re-walks (and re-inflates) the whole chain, so reading every object in a
2362/// deeply-deltified pack is super-linear (sley#26). The type only depends on the
2363/// chain base, so memoizing `offset -> type` lets each chain be walked at most
2364/// once across a batch. Keyed by pack path so an offset key is never applied to
2365/// the wrong pack's bytes; shared across cloned handles.
2366/// One pack's offset-keyed header memo (see [`PackHeaderTypeCaches`]).
2367type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
2368
2369type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
2370
2371/// Default approximate byte budget for the decoded-object LRU. Sized to comfortably
2372/// hold the working set of a history walk (commits/trees/blobs and their delta
2373/// bases) without growing without bound on large repositories. Overridable via the
2374/// `SLEY_OBJECT_CACHE_BYTES` environment variable; there is currently no git-config
2375/// hook threaded into the object database, so this constant is the default.
2376const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
2377
2378/// Default approximate byte budget for each per-pack delta-base cache. Holds the
2379/// decoded bases of the delta chains being walked so neighboring reads stay warm.
2380/// Overridable via `SLEY_DELTA_BASE_CACHE_BYTES`.
2381const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
2382
2383/// Approximate heap cost of caching one [`EncodedObject`]: its body plus a fixed
2384/// allowance for the key, enum/`Vec` headers, and per-entry map overhead. Used
2385/// only to drive eviction, so an estimate is fine.
2386fn cached_object_cost(object: &EncodedObject) -> usize {
2387    object.body.len().saturating_add(64)
2388}
2389
2390/// Read an approximate byte budget from `var`, falling back to `default` when the
2391/// variable is unset or unparseable. A value of `0` disables the cache.
2392fn cache_budget_from_env(var: &str, default: usize) -> usize {
2393    match env::var(var) {
2394        Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
2395        Err(_) => default,
2396    }
2397}
2398
2399/// Approximate byte budget for the decoded-object LRU (see
2400/// [`DEFAULT_OBJECT_CACHE_BYTES`], `SLEY_OBJECT_CACHE_BYTES`).
2401///
2402/// Resolved once per process: the environment does not change under us, and a new
2403/// `FileObjectDatabase` is built often enough (e.g. once per revision resolved)
2404/// that re-reading the variable each time showed up as per-object overhead.
2405fn object_cache_budget() -> usize {
2406    static BUDGET: OnceLock<usize> = OnceLock::new();
2407    *BUDGET.get_or_init(|| {
2408        cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
2409    })
2410}
2411
2412/// Approximate byte budget for each per-pack delta-base cache (see
2413/// [`DEFAULT_DELTA_BASE_CACHE_BYTES`], `SLEY_DELTA_BASE_CACHE_BYTES`). Resolved
2414/// once per process for the same reason as [`object_cache_budget`].
2415fn delta_base_cache_budget() -> usize {
2416    static BUDGET: OnceLock<usize> = OnceLock::new();
2417    *BUDGET.get_or_init(|| {
2418        cache_budget_from_env(
2419            "SLEY_DELTA_BASE_CACHE_BYTES",
2420            DEFAULT_DELTA_BASE_CACHE_BYTES,
2421        )
2422    })
2423}
2424
2425/// Whether to re-hash every object on read and compare it to the requested id.
2426///
2427/// Off by default, matching git: reads trust the pack index → offset mapping and
2428/// the loose object's on-disk name, and object ids are verified where git verifies
2429/// them — when a pack is received (the index build re-hashes every object) and on
2430/// demand via [`FileObjectDatabase`]'s `validate`/fsck. Re-hashing on *every* read
2431/// dominated bulk-read cost (a scalar pure-Rust SHA-1 over each object's full
2432/// body), so it is opt-in via `SLEY_VERIFY_READS` (any value other than unset, ``,
2433/// or `0`) for callers that want the paranoid check back. Read once and cached, so
2434/// the default path pays only a single relaxed atomic load per read.
2435fn verify_reads_enabled() -> bool {
2436    static VERIFY: OnceLock<bool> = OnceLock::new();
2437    *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
2438        Ok(value) => !matches!(value.trim(), "" | "0"),
2439        Err(_) => false,
2440    })
2441}
2442
2443/// A memory-capped LRU map from a key `K` to a decoded [`EncodedObject`].
2444///
2445/// Eviction is by approximate byte budget (gix-style), not object count, so the
2446/// cache adapts to object size. On access an entry is moved to most-recently-used;
2447/// on insert, least-recently-used entries are dropped until the budget holds. A
2448/// budget of `0` makes the cache inert. Generic over the key so it backs both the
2449/// oid-keyed decoded-object cache and the offset-keyed delta-base cache.
2450#[derive(Debug)]
2451struct LruCache<K: std::hash::Hash + Eq + Clone> {
2452    budget: usize,
2453    used: usize,
2454    map: HashMap<K, Arc<EncodedObject>>,
2455    order: VecDeque<K>,
2456}
2457
2458impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
2459    fn new(budget: usize) -> Self {
2460        Self {
2461            budget,
2462            used: 0,
2463            map: HashMap::new(),
2464            order: VecDeque::new(),
2465        }
2466    }
2467
2468    fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
2469        let object = Arc::clone(self.map.get(key)?);
2470        self.touch(key);
2471        Some(object)
2472    }
2473
2474    /// Move `key` to the most-recently-used end. Linear in the recency queue, but
2475    /// the queue is bounded by the byte budget and this only runs on cache hits.
2476    fn touch(&mut self, key: &K) {
2477        if let Some(position) = self.order.iter().position(|existing| existing == key)
2478            && let Some(found) = self.order.remove(position)
2479        {
2480            self.order.push_back(found);
2481        }
2482    }
2483
2484    /// Drop `key` from both the map and the recency queue, releasing its budget.
2485    fn remove(&mut self, key: &K) {
2486        if let Some(object) = self.map.remove(key) {
2487            self.used = self.used.saturating_sub(cached_object_cost(&object));
2488        }
2489        if let Some(position) = self.order.iter().position(|existing| existing == key) {
2490            self.order.remove(position);
2491        }
2492    }
2493
2494    fn clear(&mut self) {
2495        self.map.clear();
2496        self.order.clear();
2497        self.used = 0;
2498    }
2499
2500    fn put(&mut self, key: K, object: Arc<EncodedObject>) {
2501        if self.budget == 0 {
2502            return;
2503        }
2504        let cost = cached_object_cost(&object);
2505        // A single object larger than the whole budget is not worth caching; it
2506        // would immediately evict everything including itself. Drop any stale
2507        // smaller entry stored under the same key so accounting stays exact.
2508        if cost > self.budget {
2509            self.remove(&key);
2510            return;
2511        }
2512        if let Some(previous) = self.map.insert(key.clone(), object) {
2513            // Replacing an existing entry: adjust accounting and refresh recency.
2514            self.used = self
2515                .used
2516                .saturating_sub(cached_object_cost(&previous))
2517                .saturating_add(cost);
2518            self.touch(&key);
2519        } else {
2520            self.used = self.used.saturating_add(cost);
2521            self.order.push_back(key);
2522        }
2523        while self.used > self.budget {
2524            let Some(evicted) = self.order.pop_front() else {
2525                break;
2526            };
2527            if let Some(object) = self.map.remove(&evicted) {
2528                self.used = self.used.saturating_sub(cached_object_cost(&object));
2529            }
2530        }
2531    }
2532}
2533
2534/// Decoded-object cache keyed by object id (loose + packed reads share it).
2535type LruObjectCache = LruCache<ObjectId>;
2536/// Delta-base cache keyed by in-pack byte offset, scoped to one pack.
2537type LruOffsetCache = LruCache<u64>;
2538
2539/// Bridges the offset-keyed [`LruOffsetCache`] to [`sley_pack::PackDeltaCache`]
2540/// so the pack decoder can reuse decoded delta bases. Holds the shared cache
2541/// behind its mutex; a poisoned lock simply behaves as a cache miss/no-op, so a
2542/// decode still completes correctly (just without reuse).
2543struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
2544
2545impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
2546    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
2547        self.0.lock().ok()?.get(&offset)
2548    }
2549
2550    fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
2551        if let Ok(mut cache) = self.0.lock() {
2552            cache.put(offset, object);
2553        }
2554    }
2555}
2556
2557/// Bridges a per-pack `offset -> ObjectType` memo into the header fast path so
2558/// the ofs-delta chain walk is performed at most once per chain across a batch
2559/// of `read_object_header` calls (sley#26).
2560struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
2561
2562impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
2563    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
2564        self.0.lock().ok()?.get(&pack_offset).copied()
2565    }
2566
2567    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
2568        if let Ok(mut cache) = self.0.lock() {
2569            cache.insert(pack_offset, header);
2570        }
2571    }
2572}
2573
2574/// Parsed pack indexes keyed by `.idx` path, shared across cloned handles. Caches
2575/// the index parse so locating a packed object doesn't re-parse every `.idx` on
2576/// each read.
2577type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
2578
2579/// Parsed multi-pack-index files keyed by path, shared across cloned handles.
2580/// Caches the MIDX parse so object lookups in repositories with a MIDX avoid
2581/// reparsing the same fanout/object tables for every read.
2582type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
2583
2584/// A `.idx`/`.pack` pair discovered in a pack directory.
2585#[derive(Debug, Clone)]
2586struct DiscoveredPack {
2587    idx: PathBuf,
2588    pack: PathBuf,
2589}
2590
2591/// The discovered `.idx`/`.pack` pairs in each pack directory, keyed by the pack
2592/// directory and shared across cloned handles. Caches the directory scan so a
2593/// bulk read (e.g. `cat-file --batch`) does not `read_dir` the pack directory on
2594/// every object lookup. New packs are still found: a lookup that misses every
2595/// cached pack re-scans the directory once before concluding the object is absent
2596/// (see [`FileObjectDatabase::find_pack_containing`]).
2597type PackListingCache = Arc<Mutex<HashMap<PathBuf, Arc<Vec<DiscoveredPack>>>>>;
2598
2599#[derive(Debug, Clone)]
2600pub struct FileObjectDatabase {
2601    loose: LooseObjectStore,
2602    objects_dir: PathBuf,
2603    alternates: Vec<PathBuf>,
2604    format: ObjectFormat,
2605    pack_bytes: PackBytesCache,
2606    pack_indexes: PackIndexCache,
2607    multi_pack_indexes: MultiPackIndexCache,
2608    pack_listing: PackListingCache,
2609    decoded: DecodedObjectCache,
2610    pack_deltas: PackDeltaCaches,
2611    pack_header_types: PackHeaderTypeCaches,
2612    /// Graft points (`$GIT_DIR/shallow`), loaded lazily on the first
2613    /// [`ObjectReader::is_shallow_graft`] query. `$GIT_DIR` is taken to be
2614    /// the parent of `objects_dir`, matching the standard layout.
2615    shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
2616}
2617
2618/// Parse `$GIT_DIR/shallow`: one hex object id per line. A missing file is an
2619/// empty set (the repository is not shallow); unparsable lines are ignored so
2620/// a torn write never poisons walks.
2621fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
2622    let Ok(contents) = std::fs::read_to_string(shallow_file) else {
2623        return HashSet::new();
2624    };
2625    contents
2626        .lines()
2627        .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
2628        .collect()
2629}
2630
2631pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
2632    env::var_os("GIT_OBJECT_DIRECTORY")
2633        .map(PathBuf::from)
2634        .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
2635}
2636
2637pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
2638    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
2639        return PathBuf::from(common_dir);
2640    }
2641    let git_dir = git_dir.as_ref();
2642    let commondir = git_dir.join("commondir");
2643    if let Ok(value) = fs::read_to_string(&commondir) {
2644        let path = PathBuf::from(value.trim());
2645        let common = if path.is_absolute() {
2646            path
2647        } else {
2648            git_dir.join(path)
2649        };
2650        return fs::canonicalize(&common).unwrap_or(common);
2651    }
2652    git_dir.to_path_buf()
2653}
2654
2655pub fn repository_object_ids(
2656    git_dir: impl AsRef<Path>,
2657    format: ObjectFormat,
2658) -> Result<Vec<ObjectId>> {
2659    object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
2660}
2661
2662pub fn object_ids_in_objects_dir(
2663    objects_dir: impl AsRef<Path>,
2664    format: ObjectFormat,
2665) -> Result<Vec<ObjectId>> {
2666    let objects_dir = objects_dir.as_ref();
2667    let mut oids = HashSet::new();
2668    collect_loose_object_ids(objects_dir, format, &mut oids)?;
2669    collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
2670    let mut oids = oids.into_iter().collect::<Vec<_>>();
2671    oids.sort_by_key(ObjectId::to_hex);
2672    Ok(oids)
2673}
2674
2675fn collect_loose_object_ids(
2676    objects_dir: &Path,
2677    format: ObjectFormat,
2678    oids: &mut HashSet<ObjectId>,
2679) -> Result<()> {
2680    if !objects_dir.exists() {
2681        return Ok(());
2682    }
2683    let hex_len = format.hex_len();
2684    for entry in fs::read_dir(objects_dir)? {
2685        let entry = entry?;
2686        if !entry.file_type()?.is_dir() {
2687            continue;
2688        }
2689        let name = entry.file_name();
2690        let Some(fanout) = name.to_str() else {
2691            continue;
2692        };
2693        if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
2694            continue;
2695        }
2696        for object_entry in fs::read_dir(entry.path())? {
2697            let object_entry = object_entry?;
2698            if !object_entry.file_type()?.is_file() {
2699                continue;
2700            }
2701            let name = object_entry.file_name();
2702            let Some(suffix) = name.to_str() else {
2703                continue;
2704            };
2705            if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
2706                continue;
2707            }
2708            oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
2709        }
2710    }
2711    Ok(())
2712}
2713
2714fn collect_loose_fanout_object_ids(
2715    objects_dir: &Path,
2716    format: ObjectFormat,
2717    fanout: u8,
2718    oids: &mut HashSet<ObjectId>,
2719) -> Result<()> {
2720    let fanout_hex = format!("{fanout:02x}");
2721    let fanout_dir = objects_dir.join(&fanout_hex);
2722    let entries = match fs::read_dir(&fanout_dir) {
2723        Ok(entries) => entries,
2724        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
2725        Err(err) => return Err(GitError::Io(err.to_string())),
2726    };
2727    let hex_len = format.hex_len();
2728    for object_entry in entries {
2729        let object_entry = object_entry?;
2730        let name = object_entry.file_name();
2731        let Some(suffix) = name.to_str() else {
2732            continue;
2733        };
2734        if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
2735            continue;
2736        }
2737        oids.insert(ObjectId::from_hex(format, &format!("{fanout_hex}{suffix}"))?);
2738    }
2739    Ok(())
2740}
2741
2742#[derive(Debug, Default)]
2743struct LoosePresenceCache {
2744    loaded_fanouts: HashSet<u8>,
2745    objects: HashSet<ObjectId>,
2746}
2747
2748/// Every object id resolvable through a pack (any `.idx` or the
2749/// multi-pack-index) under `objects_dir/pack`. Used by `--unpacked`
2750/// filtering: an object is "unpacked" when absent from this set, regardless
2751/// of a loose copy also existing.
2752pub fn packed_object_ids(
2753    objects_dir: impl AsRef<Path>,
2754    format: ObjectFormat,
2755) -> Result<HashSet<ObjectId>> {
2756    let mut oids = HashSet::new();
2757    collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
2758    Ok(oids)
2759}
2760
2761fn collect_packed_object_ids(
2762    pack_dir: &Path,
2763    format: ObjectFormat,
2764    oids: &mut HashSet<ObjectId>,
2765) -> Result<()> {
2766    if !pack_dir.exists() {
2767        return Ok(());
2768    }
2769    let midx_path = pack_dir.join("multi-pack-index");
2770    if midx_path.exists() {
2771        let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
2772        oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
2773    }
2774    for entry in fs::read_dir(pack_dir)? {
2775        let path = entry?.path();
2776        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
2777            continue;
2778        }
2779        let index = PackIndex::parse(&fs::read(path)?, format)?;
2780        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
2781    }
2782    Ok(())
2783}
2784
2785impl FileObjectDatabase {
2786    /// The object-id format (hash algorithm) this database was opened with.
2787    pub fn object_format(&self) -> ObjectFormat {
2788        self.format
2789    }
2790
2791    /// The repository object directory this database reads from.
2792    pub fn objects_dir(&self) -> &Path {
2793        &self.objects_dir
2794    }
2795
2796    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
2797        let objects_dir = objects_dir.into();
2798        Self {
2799            loose: LooseObjectStore::new(objects_dir.clone(), format),
2800            alternates: alternate_object_dirs(&objects_dir),
2801            objects_dir,
2802            format,
2803            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
2804            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2805            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2806            pack_listing: Arc::new(Mutex::new(HashMap::new())),
2807            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
2808            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
2809            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
2810            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
2811        }
2812    }
2813
2814    fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
2815        let objects_dir = objects_dir.into();
2816        Self {
2817            loose: LooseObjectStore::new(objects_dir.clone(), format),
2818            alternates: Vec::new(),
2819            objects_dir,
2820            format,
2821            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
2822            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2823            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2824            pack_listing: Arc::new(Mutex::new(HashMap::new())),
2825            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
2826            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
2827            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
2828            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
2829        }
2830    }
2831
2832    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
2833        Self::new(repository_objects_dir(git_dir), format)
2834    }
2835
2836    /// Drop cached pack listings, indexes, and decoded objects so the next read
2837    /// sees packs/objects installed after this handle was created (e.g. after
2838    /// `fetch` or `install_pack`). Long-lived [`Repository`] sessions call this
2839    /// via the owning repository's `refresh_objects` hook.
2840    pub fn refresh_read_cache(&self) {
2841        if let Ok(mut cache) = self.pack_listing.lock() {
2842            cache.clear();
2843        }
2844        if let Ok(mut cache) = self.pack_indexes.lock() {
2845            cache.clear();
2846        }
2847        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
2848            cache.clear();
2849        }
2850        if let Ok(mut cache) = self.pack_bytes.lock() {
2851            cache.clear();
2852        }
2853        if let Ok(mut cache) = self.pack_deltas.lock() {
2854            cache.clear();
2855        }
2856        if let Ok(mut cache) = self.pack_header_types.lock() {
2857            cache.clear();
2858        }
2859        if let Ok(mut cache) = self.decoded.lock() {
2860            cache.clear();
2861        }
2862        self.loose.invalidate_cache();
2863    }
2864
2865    pub fn loose(&self) -> &LooseObjectStore {
2866        &self.loose
2867    }
2868
2869    pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
2870        self.install_pack_with_options(pack, RawPackInstallOptions::default())
2871    }
2872
2873    pub fn install_pack_with_options(
2874        &self,
2875        pack: &PackWrite,
2876        options: RawPackInstallOptions,
2877    ) -> Result<PackInstallResult> {
2878        if pack.checksum.format() != self.format {
2879            return Err(GitError::InvalidObjectId(format!(
2880                "pack checksum uses {}, store uses {}",
2881                pack.checksum.format().name(),
2882                self.format.name()
2883            )));
2884        }
2885        for entry in &pack.entries {
2886            if entry.oid.format() != self.format {
2887                return Err(GitError::InvalidObjectId(format!(
2888                    "pack entry {} uses {}, store uses {}",
2889                    entry.oid,
2890                    entry.oid.format().name(),
2891                    self.format.name()
2892                )));
2893            }
2894        }
2895        let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
2896        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
2897        if canonical_index.pack_checksum != pack.checksum
2898            || parsed_index.pack_checksum != pack.checksum
2899        {
2900            return Err(GitError::InvalidFormat(
2901                "pack and index checksums do not match pack write".into(),
2902            ));
2903        }
2904        if pack.index != canonical_index.index {
2905            return Err(GitError::InvalidFormat(
2906                "pack index does not match pack contents".into(),
2907            ));
2908        }
2909
2910        let pack_dir = self.objects_dir.join("pack");
2911        fs::create_dir_all(&pack_dir)?;
2912        let pack_name = format!("pack-{}", pack.checksum.to_hex());
2913        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
2914        let index_path = pack_dir.join(format!("{pack_name}.idx"));
2915        if !pack_path.exists() || !index_path.exists() {
2916            write_pack_component(&pack_path, &pack.pack)?;
2917            write_pack_component(&index_path, &pack.index)?;
2918        }
2919        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
2920        Ok(PackInstallResult {
2921            pack_name,
2922            pack_path,
2923            index_path,
2924            promisor_path,
2925            object_ids: canonical_index
2926                .entries
2927                .iter()
2928                .map(|entry| entry.oid)
2929                .collect(),
2930        })
2931    }
2932
2933    /// Install a pack that was produced in this process by [`PackFile::write_packed`].
2934    ///
2935    /// Unlike [`Self::install_raw_pack_with_options`], this does not re-inflate
2936    /// every pack entry to rebuild the index. It validates the generated pack
2937    /// trailer and generated index against the writer's object ids, CRCs, and
2938    /// offsets, then writes those bytes directly. Use the raw installer for
2939    /// arbitrary pack bytes received from an untrusted transport.
2940    pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
2941        self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
2942    }
2943
2944    pub fn install_written_pack_with_options(
2945        &self,
2946        pack: &PackWrite,
2947        options: RawPackInstallOptions,
2948    ) -> Result<PackInstallResult> {
2949        validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
2950        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
2951        if parsed_index.pack_checksum != pack.checksum {
2952            return Err(GitError::InvalidFormat(
2953                "pack write index checksum does not match pack".into(),
2954            ));
2955        }
2956        if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
2957            return Err(GitError::InvalidFormat(
2958                "pack write index does not match generated entries".into(),
2959            ));
2960        }
2961        self.install_generated_pack_unchecked(pack, options)
2962    }
2963
2964    fn install_generated_pack_unchecked(
2965        &self,
2966        pack: &PackWrite,
2967        options: RawPackInstallOptions,
2968    ) -> Result<PackInstallResult> {
2969        let pack_dir = self.objects_dir.join("pack");
2970        fs::create_dir_all(&pack_dir)?;
2971        let pack_name = format!("pack-{}", pack.checksum.to_hex());
2972        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
2973        let index_path = pack_dir.join(format!("{pack_name}.idx"));
2974        if !pack_path.exists() || !index_path.exists() {
2975            write_pack_component(&pack_path, &pack.pack)?;
2976            write_pack_component(&index_path, &pack.index)?;
2977        }
2978        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
2979        Ok(PackInstallResult {
2980            pack_name,
2981            pack_path,
2982            index_path,
2983            promisor_path,
2984            object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
2985        })
2986    }
2987
2988    pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
2989        self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
2990    }
2991
2992    pub fn install_raw_pack_with_options(
2993        &self,
2994        pack_bytes: &[u8],
2995        options: RawPackInstallOptions,
2996    ) -> Result<PackInstallResult> {
2997        let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
2998        let pack_dir = self.objects_dir.join("pack");
2999        fs::create_dir_all(&pack_dir)?;
3000        let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
3001        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3002        let index_path = pack_dir.join(format!("{pack_name}.idx"));
3003        if !pack_path.exists() || !index_path.exists() {
3004            write_pack_component(&pack_path, pack_bytes)?;
3005            write_pack_component(&index_path, &built.index)?;
3006        }
3007        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3008        Ok(PackInstallResult {
3009            pack_name,
3010            pack_path,
3011            index_path,
3012            promisor_path,
3013            object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
3014        })
3015    }
3016
3017    pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
3018        if self.loose.exists(oid)? {
3019            return Ok(true);
3020        }
3021        if self.find_pack_containing(oid)?.is_some() {
3022            return Ok(true);
3023        }
3024        for alternate in &self.alternates {
3025            if Self::without_alternates(alternate, self.format).contains(oid)? {
3026                return Ok(true);
3027            }
3028        }
3029        // Reprepare-on-miss: a cached negative loose verdict may predate a
3030        // sibling write. Drop it and exact-probe once before reporting absence.
3031        self.loose.invalidate_cache();
3032        self.loose.exists(oid)
3033    }
3034
3035    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3036        let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
3037            .into_iter()
3038            .collect::<HashSet<_>>();
3039        for alternate in &self.alternates {
3040            oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
3041        }
3042        let mut oids = oids.into_iter().collect::<Vec<_>>();
3043        oids.sort_by_key(ObjectId::to_hex);
3044        Ok(oids)
3045    }
3046
3047    pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3048        if let Some(disk_size) = self.loose.disk_size(oid)? {
3049            return Ok(Some(ObjectStorageInfo {
3050                disk_size,
3051                deltabase: zero_oid(self.format)?,
3052            }));
3053        }
3054        if let Some(info) = self.packed_object_storage_info(oid)? {
3055            return Ok(Some(info));
3056        }
3057        for alternate in &self.alternates {
3058            if let Some(info) =
3059                Self::without_alternates(alternate, self.format).object_storage_info(oid)?
3060            {
3061                return Ok(Some(info));
3062            }
3063        }
3064        // Reprepare-on-miss: drop any stale negative loose cache and exact-probe
3065        // once before reporting absence (see `read_object`).
3066        self.loose.invalidate_cache();
3067        if let Some(disk_size) = self.loose.disk_size(oid)? {
3068            return Ok(Some(ObjectStorageInfo {
3069                disk_size,
3070                deltabase: zero_oid(self.format)?,
3071            }));
3072        }
3073        Ok(None)
3074    }
3075
3076    pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
3077        validate_object_id_prefix(self.format, prefix)?;
3078        let mut matches = Vec::new();
3079        for oid in self.object_ids()? {
3080            if object_id_matches_prefix(&oid, prefix) {
3081                matches.push(oid);
3082            }
3083        }
3084        Ok(match matches.len() {
3085            0 => ObjectPrefixResolution::Missing,
3086            1 => ObjectPrefixResolution::Unique(matches.remove(0)),
3087            _ => ObjectPrefixResolution::Ambiguous(matches),
3088        })
3089    }
3090
3091    /// The object type and content size of `oid` without decoding its full body —
3092    /// git's `cat-file --batch-check` fast path. Tries the decoded-object cache,
3093    /// then loose storage (inflating only the framing header), then packs (reading
3094    /// the entry header and, for deltas, only the delta's leading varints), then
3095    /// alternates. Returns `Ok(None)` if the object is not present.
3096    ///
3097    /// Unlike [`ObjectReader::read_object`], this never materializes the body, so it
3098    /// stays cheap on huge blobs and deep delta chains. It does not populate the
3099    /// decoded-object cache (nothing is decoded).
3100    pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3101        if implied_empty_tree_object(self.format, oid).is_some() {
3102            return Ok(Some((ObjectType::Tree, 0)));
3103        }
3104        if let Ok(mut cache) = self.decoded.lock()
3105            && let Some(object) = cache.get(oid)
3106        {
3107            return Ok(Some((object.object_type, object.body.len() as u64)));
3108        }
3109        if let Some(header) = self.loose.read_header(oid)? {
3110            return Ok(Some(header));
3111        }
3112        if let Some(pack_paths) = self.find_pack_containing(oid)? {
3113            let bytes = self.cached_pack_bytes(&pack_paths.pack)?;
3114            // Per-pack offset->type memo so the ofs-delta chain walk that resolves
3115            // a packed object's type runs at most once per chain across the batch,
3116            // instead of re-walking (and re-inflating each link's leading varints)
3117            // on every header read — the sley#26 super-linear cat-file --batch-check.
3118            let type_cache = self.pack_header_type_cache(&pack_paths.pack);
3119            let resolve_ref_base = |base: &ObjectId| {
3120                self.read_object_header(base)
3121                    .map(|header| header.map(|(t, _)| t))
3122            };
3123            let header = match &type_cache {
3124                Some(cache) => {
3125                    let mut adapter = PackHeaderTypeCacheAdapter(cache);
3126                    sley_pack::read_object_header_at_with_cache(
3127                        &bytes,
3128                        pack_paths.offset,
3129                        self.format,
3130                        resolve_ref_base,
3131                        &mut adapter,
3132                    )?
3133                }
3134                None => sley_pack::read_object_header_at(
3135                    &bytes,
3136                    pack_paths.offset,
3137                    self.format,
3138                    resolve_ref_base,
3139                )?,
3140            };
3141            return Ok(Some(header));
3142        }
3143        for alternate in &self.alternates {
3144            if let Some(header) =
3145                Self::without_alternates(alternate, self.format).read_object_header(oid)?
3146            {
3147                return Ok(Some(header));
3148            }
3149        }
3150        // Reprepare-on-miss: discard any stale negative loose cache and retry an
3151        // exact path probe once before reporting absence (see `read_object`).
3152        self.loose.invalidate_cache();
3153        if let Some(header) = self.loose.read_header(oid)? {
3154            return Ok(Some(header));
3155        }
3156        Ok(None)
3157    }
3158
3159    fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
3160        // Memory-capped decoded-object cache first (delta-base reuse for ref-delta
3161        // bases that resolve back through the store + repeated whole-object reads).
3162        if let Ok(mut cache) = self.decoded.lock()
3163            && let Some(object) = cache.get(oid)
3164        {
3165            return Ok(Some(object));
3166        }
3167        let Some(pack_paths) = self.find_pack_containing(oid)? else {
3168            return Ok(None);
3169        };
3170        let bytes = self.cached_pack_bytes(&pack_paths.pack)?;
3171        // Per-pack delta-base cache (keyed by in-pack offset). Resolving an
3172        // ofs-delta chain reuses already-decoded bases instead of re-inflating the
3173        // whole chain on every read. Scoped to this pack's path so an offset key is
3174        // never applied to the wrong pack's bytes.
3175        let delta_cache = self.pack_delta_cache(&pack_paths.pack);
3176        let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
3177        // Decode only this object at its offset (plus its delta-base chain). A
3178        // ref-delta base resolves through the full store (loose / other packs) and
3179        // reuses the decoded-object cache. No cache lock is held across the decode,
3180        // so the recursive resolver re-entry (which may re-enter read_object) is
3181        // safe.
3182        let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
3183        let object = match &delta_adapter {
3184            Some(adapter) => sley_pack::read_object_at_with_cache_arc(
3185                &bytes,
3186                pack_paths.offset,
3187                self.format,
3188                resolve_ref_base,
3189                adapter,
3190            )?,
3191            None => sley_pack::read_object_at_arc(
3192                &bytes,
3193                pack_paths.offset,
3194                self.format,
3195                resolve_ref_base,
3196            )?,
3197        };
3198        // Trust the index → offset mapping rather than re-hashing every decoded
3199        // object on read (see `verify_reads_enabled`); this re-hash dominated
3200        // bulk-read cost. Opt back in with `SLEY_VERIFY_READS` for a paranoid check.
3201        if verify_reads_enabled() {
3202            let actual = object.object_id(self.format)?;
3203            if actual != *oid {
3204                return Err(GitError::InvalidObject(format!(
3205                    "pack object id mismatch: index says {oid}, decoded {actual}"
3206                )));
3207            }
3208        }
3209        if let Ok(mut cache) = self.decoded.lock() {
3210            cache.put(*oid, Arc::clone(&object));
3211        }
3212        Ok(Some(object))
3213    }
3214
3215    /// The per-pack delta-base cache for `pack_path`, creating it on first use.
3216    /// Returns `None` only if the shared map's lock is poisoned, in which case the
3217    /// caller falls back to an uncached decode (correctness preserved).
3218    fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
3219        let mut caches = self.pack_deltas.lock().ok()?;
3220        let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
3221            Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
3222        });
3223        Some(Arc::clone(cache))
3224    }
3225
3226    /// The per-pack header-type memo for `pack_path`, creating it on first use.
3227    /// Returns `None` only if the shared map's lock is poisoned, in which case the
3228    /// caller falls back to an unmemoized header walk (correctness preserved).
3229    fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
3230        let mut caches = self.pack_header_types.lock().ok()?;
3231        let cache = caches
3232            .entry(pack_path.to_path_buf())
3233            .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
3234        Some(Arc::clone(cache))
3235    }
3236
3237    /// Backing bytes of the pack at `pack_path`, loaded at most once per database
3238    /// handle (cached, shared across clones). Memory-mapped under the `mmap` feature,
3239    /// otherwise read into the heap. On a poisoned lock it falls back to loading
3240    /// without caching, preserving correctness.
3241    fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
3242        if let Ok(cache) = self.pack_bytes.lock()
3243            && let Some(bytes) = cache.get(pack_path)
3244        {
3245            return Ok(Arc::clone(bytes));
3246        }
3247        let bytes = Arc::new(load_pack_data(pack_path)?);
3248        if let Ok(mut cache) = self.pack_bytes.lock() {
3249            cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
3250        }
3251        Ok(bytes)
3252    }
3253
3254    /// Parsed index for the `.idx` at `index_path`, parsed at most once per
3255    /// database handle. On a poisoned lock it falls back to parsing without
3256    /// caching, preserving correctness.
3257    fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
3258        if let Ok(cache) = self.pack_indexes.lock()
3259            && let Some(index) = cache.get(index_path)
3260        {
3261            return Ok(Arc::clone(index));
3262        }
3263        let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
3264        if let Ok(mut cache) = self.pack_indexes.lock() {
3265            cache.insert(index_path.to_path_buf(), Arc::clone(&index));
3266        }
3267        Ok(index)
3268    }
3269
3270    /// Parsed multi-pack-index at `midx_path`, parsed at most once per database
3271    /// handle. Returns `Ok(None)` when no MIDX exists. On a poisoned lock it
3272    /// falls back to parsing without caching, preserving correctness.
3273    fn cached_multi_pack_index(&self, midx_path: &Path) -> Result<Option<Arc<MultiPackIndex>>> {
3274        if !midx_path.exists() {
3275            return Ok(None);
3276        }
3277        if let Ok(cache) = self.multi_pack_indexes.lock()
3278            && let Some(midx) = cache.get(midx_path)
3279        {
3280            return Ok(Some(Arc::clone(midx)));
3281        }
3282        let midx = Arc::new(MultiPackIndex::parse(&fs::read(midx_path)?, self.format)?);
3283        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3284            cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
3285        }
3286        Ok(Some(midx))
3287    }
3288
3289    /// The discovered `.idx`/`.pack` pairs in `pack_dir`, cached and shared across
3290    /// clones. With `force_rescan`, the directory is re-read; the freshly scanned
3291    /// listing is only stored (and returned as a new `Arc`) when its set of `.idx`
3292    /// files actually differs from the cached one, so an unchanged directory keeps
3293    /// the same `Arc` (letting callers detect "nothing new" cheaply). On a poisoned
3294    /// lock it scans without caching, preserving correctness.
3295    fn cached_pack_listing(
3296        &self,
3297        pack_dir: &Path,
3298        force_rescan: bool,
3299    ) -> Result<Arc<Vec<DiscoveredPack>>> {
3300        if !force_rescan
3301            && let Ok(cache) = self.pack_listing.lock()
3302            && let Some(listing) = cache.get(pack_dir)
3303        {
3304            return Ok(Arc::clone(listing));
3305        }
3306        let scanned = Arc::new(scan_pack_listing(pack_dir)?);
3307        if let Ok(mut cache) = self.pack_listing.lock() {
3308            match cache.get(pack_dir) {
3309                // Keep the existing Arc when the scan found the same set of packs,
3310                // so repeated misses don't churn the cache or callers' pointers.
3311                Some(existing) if same_pack_set(existing, &scanned) => {
3312                    return Ok(Arc::clone(existing));
3313                }
3314                _ => {
3315                    cache.insert(pack_dir.to_path_buf(), Arc::clone(&scanned));
3316                }
3317            }
3318        }
3319        Ok(scanned)
3320    }
3321
3322    /// Find `oid` among a cached pack listing, returning its pack path and offset.
3323    /// Uses the parsed-index cache, so this performs no directory I/O.
3324    fn find_in_pack_listing(
3325        &self,
3326        listing: &[DiscoveredPack],
3327        oid: &ObjectId,
3328    ) -> Result<Option<PackPaths>> {
3329        for pack in listing {
3330            let index = self.cached_pack_index(&pack.idx)?;
3331            if let Some(entry) = index.find(oid) {
3332                return Ok(Some(PackPaths {
3333                    pack: pack.pack.clone(),
3334                    offset: entry.offset,
3335                }));
3336            }
3337        }
3338        Ok(None)
3339    }
3340
3341    fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackPaths>> {
3342        if oid.format() != self.format {
3343            return Err(GitError::InvalidObjectId(format!(
3344                "object {oid} uses {}, store uses {}",
3345                oid.format().name(),
3346                self.format.name()
3347            )));
3348        }
3349        let pack_dir = self.objects_dir.join("pack");
3350        // Hot path: a previously cached pack listing or multi-pack-index already
3351        // names every pack, and locating `oid` in them is pure in-memory index
3352        // work (no directory I/O). Try that first so a warm handle doesn't stat
3353        // the pack dir / multi-pack-index on every single lookup — that redundant
3354        // per-object FS probing is what made `cat-file --batch-check` scale poorly
3355        // versus git, which resolves through its in-memory index (sley#26).
3356        if let Some(midx) = self.cached_loaded_multi_pack_index()
3357            && let Some(pack_paths) = self.midx_pack_paths(&pack_dir, &midx, oid)?
3358        {
3359            return Ok(Some(pack_paths));
3360        }
3361        if let Some(listing) = self.cached_loaded_pack_listing(&pack_dir)
3362            && let Some(pack_paths) = self.find_in_pack_listing(&listing, oid)?
3363        {
3364            return Ok(Some(pack_paths));
3365        }
3366
3367        if !pack_dir.exists() {
3368            return Ok(None);
3369        }
3370        if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
3371            return Ok(Some(pack_paths));
3372        }
3373        // Search the cached directory listing first. On a complete miss, re-scan
3374        // the directory once (picking up any pack added since the listing was
3375        // cached) and search again, so newly written packs are still found.
3376        let listing = self.cached_pack_listing(&pack_dir, false)?;
3377        if let Some(pack_paths) = self.find_in_pack_listing(&listing, oid)? {
3378            return Ok(Some(pack_paths));
3379        }
3380        let refreshed = self.cached_pack_listing(&pack_dir, true)?;
3381        if Arc::ptr_eq(&listing, &refreshed) {
3382            // The re-scan produced the same listing, so nothing new appeared.
3383            return Ok(None);
3384        }
3385        self.find_in_pack_listing(&refreshed, oid)
3386    }
3387
3388    fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3389        let Some(pack_paths) = self.find_pack_containing(oid)? else {
3390            return Ok(None);
3391        };
3392        let pack_len = fs::metadata(&pack_paths.pack)?.len();
3393        let trailer_offset = pack_len
3394            .checked_sub(self.format.raw_len() as u64)
3395            .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
3396        let index_path = pack_paths.pack.with_extension("idx");
3397        let index = self.cached_pack_index(&index_path)?;
3398        let pack = self.cached_pack_bytes(&pack_paths.pack)?;
3399        let delta_base = pack_entry_delta_base(self.format, &pack, pack_paths.offset)?;
3400        let delta_base_offset = match &delta_base {
3401            Some(PackDeltaBase::Offset(offset)) => Some(*offset),
3402            Some(PackDeltaBase::Ref(_)) | None => None,
3403        };
3404        let offset_info =
3405            scan_pack_index_offsets(&index, pack_paths.offset, trailer_offset, delta_base_offset)?;
3406        let disk_size = offset_info
3407            .end_offset
3408            .checked_sub(pack_paths.offset)
3409            .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
3410        let deltabase = match delta_base {
3411            Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
3412                // scan_pack_index_offsets returns Err when delta_base_offset is
3413                // Some but no matching entry is found, so this is unreachable for
3414                // valid packs; propagate as an error rather than panic to keep a
3415                // malformed pack from taking down the process if that invariant
3416                // ever drifts.
3417                GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
3418            })?,
3419            Some(PackDeltaBase::Ref(oid)) => oid,
3420            None => zero_oid(self.format)?,
3421        };
3422        Ok(Some(ObjectStorageInfo {
3423            disk_size,
3424            deltabase,
3425        }))
3426    }
3427
3428    fn find_midx_pack_containing(
3429        &self,
3430        pack_dir: &Path,
3431        oid: &ObjectId,
3432    ) -> Result<Option<PackPaths>> {
3433        let midx_path = pack_dir.join("multi-pack-index");
3434        let Some(midx) = self.cached_multi_pack_index(&midx_path)? else {
3435            return Ok(None);
3436        };
3437        self.midx_pack_paths(pack_dir, &midx, oid)
3438    }
3439
3440    /// Resolve `oid` against an already-loaded multi-pack-index, returning the pack
3441    /// path and in-pack offset. Pure in-memory index work; performs no filesystem
3442    /// access. The named pack's existence was established when the midx was parsed
3443    /// and cached, so the hot lookup path no longer re-`stat()`s it on every call —
3444    /// a missing pack surfaces as an `open()` failure when the bytes are actually
3445    /// read (`cached_pack_bytes`), not as a redundant per-lookup existence probe.
3446    fn midx_pack_paths(
3447        &self,
3448        pack_dir: &Path,
3449        midx: &MultiPackIndex,
3450        oid: &ObjectId,
3451    ) -> Result<Option<PackPaths>> {
3452        let Some(entry) = midx.find(oid) else {
3453            return Ok(None);
3454        };
3455        let Some(pack_name) = midx.pack_names.get(entry.pack_int_id as usize) else {
3456            return Err(GitError::InvalidFormat(
3457                "multi-pack-index object points past pack table".into(),
3458            ));
3459        };
3460        let pack_file_name = pack_name
3461            .strip_suffix(".idx")
3462            .map(|stem| format!("{stem}.pack"))
3463            .unwrap_or_else(|| pack_name.clone());
3464        let pack = pack_dir.join(pack_file_name);
3465        Ok(Some(PackPaths {
3466            pack,
3467            offset: entry.offset,
3468        }))
3469    }
3470
3471    /// The multi-pack-index for this object store *only if already parsed and
3472    /// cached* — never touches the filesystem. Used by the lookup hot path to skip
3473    /// the per-call `multi-pack-index` existence stat when a handle is warm.
3474    fn cached_loaded_multi_pack_index(&self) -> Option<Arc<MultiPackIndex>> {
3475        let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
3476        let cache = self.multi_pack_indexes.lock().ok()?;
3477        cache.get(&midx_path).map(Arc::clone)
3478    }
3479
3480    /// The discovered pack listing for `pack_dir` *only if already scanned and
3481    /// cached* — never touches the filesystem. Used by the lookup hot path to skip
3482    /// the per-call pack-dir existence stat when a handle is warm. A cold cache (or
3483    /// a poisoned lock) returns `None`, so the caller falls back to the scanning
3484    /// path that establishes the cache and preserves the new-pack rescan semantics.
3485    fn cached_loaded_pack_listing(&self, pack_dir: &Path) -> Option<Arc<Vec<DiscoveredPack>>> {
3486        let cache = self.pack_listing.lock().ok()?;
3487        cache.get(pack_dir).map(Arc::clone)
3488    }
3489}
3490
3491fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
3492    if prefix.len() < 4 || prefix.len() > format.hex_len() {
3493        return Err(GitError::InvalidObjectId(format!(
3494            "expected 4 to {} hex digits for {}, got {}",
3495            format.hex_len(),
3496            format.name(),
3497            prefix.len()
3498        )));
3499    }
3500    if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3501        return Err(GitError::InvalidObjectId(format!(
3502            "non-hex object id prefix {prefix}"
3503        )));
3504    }
3505    Ok(())
3506}
3507
3508fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
3509    oid.to_hex()
3510        .as_bytes()
3511        .iter()
3512        .zip(prefix.as_bytes())
3513        .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
3514}
3515
3516/// Scan `pack_dir` for `.idx` files that have a matching `.pack` sibling,
3517/// returning the discovered pairs. An `.idx` without its `.pack` is skipped (an
3518/// orphan index cannot serve objects), matching the prior per-read behavior.
3519fn scan_pack_listing(pack_dir: &Path) -> Result<Vec<DiscoveredPack>> {
3520    let mut packs = Vec::new();
3521    for entry in fs::read_dir(pack_dir)? {
3522        let entry = entry?;
3523        let idx = entry.path();
3524        if idx.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3525            continue;
3526        }
3527        let Some(stem) = idx.file_stem() else {
3528            continue;
3529        };
3530        let pack = idx.with_file_name(format!("{}.pack", stem.to_string_lossy()));
3531        if !pack.exists() {
3532            continue;
3533        }
3534        packs.push(DiscoveredPack { idx, pack });
3535    }
3536    // Deterministic order so lookups and set comparison are stable.
3537    packs.sort_by(|left, right| left.idx.cmp(&right.idx));
3538    Ok(packs)
3539}
3540
3541/// Whether two pack listings reference the same set of `.idx` files (order is
3542/// already normalized by [`scan_pack_listing`]).
3543fn same_pack_set(left: &[DiscoveredPack], right: &[DiscoveredPack]) -> bool {
3544    left.len() == right.len()
3545        && left
3546            .iter()
3547            .zip(right.iter())
3548            .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
3549}
3550
3551fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
3552    let mut alternates = Vec::new();
3553    if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
3554        for raw in value.to_string_lossy().split(':') {
3555            if !raw.is_empty() {
3556                alternates.push(PathBuf::from(raw));
3557            }
3558        }
3559    }
3560    let alternates_path = objects_dir.join("info").join("alternates");
3561    if let Ok(contents) = fs::read(&alternates_path) {
3562        for raw in contents.split(|byte| *byte == b'\n') {
3563            let line = raw.strip_suffix(b"\r").unwrap_or(raw);
3564            if line.is_empty() || line.starts_with(b"#") {
3565                continue;
3566            }
3567            let Ok(value) = std::str::from_utf8(line) else {
3568                continue;
3569            };
3570            let path = Path::new(value);
3571            let absolute = if path.is_absolute() {
3572                path.to_path_buf()
3573            } else {
3574                objects_dir.join(path)
3575            };
3576            alternates.push(absolute);
3577        }
3578    }
3579    alternates
3580}
3581
3582impl ObjectReader for FileObjectDatabase {
3583    fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
3584        self.shallow_grafts
3585            .get_or_init(|| {
3586                let shallow_file = self
3587                    .objects_dir
3588                    .parent()
3589                    .map(|git_dir| git_dir.join("shallow"));
3590                match shallow_file {
3591                    Some(path) => read_shallow_grafts(&path, self.format),
3592                    None => HashSet::new(),
3593                }
3594            })
3595            .contains(oid)
3596    }
3597
3598    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3599        if let Some(object) = implied_empty_tree_object(self.format, oid) {
3600            return Ok(object);
3601        }
3602        match self.loose.read_object(oid) {
3603            Ok(object) => return Ok(object),
3604            Err(GitError::NotFound(_)) => {}
3605            Err(err) => return Err(err),
3606        }
3607        if let Some(object) = self.read_packed_object(oid)? {
3608            return Ok(object);
3609        }
3610        for alternate in &self.alternates {
3611            match Self::without_alternates(alternate, self.format).read_object(oid) {
3612                Ok(object) => return Ok(object),
3613                Err(GitError::NotFound(_)) => {}
3614                Err(err) => return Err(err),
3615            }
3616        }
3617        // Hard miss against every store. If an earlier enumeration built a loose
3618        // cache, an object written loose afterward by a sibling handle could have
3619        // been skipped above. Mirror git's `oid_object_info_extended`
3620        // reprepare-on-miss: drop stale cache state and retry an exact loose path
3621        // probe once before declaring the object missing.
3622        self.loose.invalidate_cache();
3623        match self.loose.read_object(oid) {
3624            Ok(object) => return Ok(object),
3625            Err(GitError::NotFound(_)) => {}
3626            Err(err) => return Err(err),
3627        }
3628        Err(GitError::object_not_found_in(
3629            *oid,
3630            MissingObjectContext::Read,
3631        ))
3632    }
3633}
3634
3635impl ObjectWriter for FileObjectDatabase {
3636    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3637        // Mirror git's freshen semantics (`write_object_file`:
3638        // `freshen_packed_object || freshen_loose_object`): an object already
3639        // present anywhere in the database — loose, packed, or through an
3640        // alternate — is not written again, so e.g. `git add` after
3641        // `git repack -ad` does not resurrect a loose copy of a packed object.
3642        let oid = object.object_id(self.format)?;
3643        if self.contains(&oid)? {
3644            return Ok(oid);
3645        }
3646        self.loose.write_object(object)
3647    }
3648}
3649
3650#[derive(Debug, Clone)]
3651struct PackPaths {
3652    pack: PathBuf,
3653    offset: u64,
3654}
3655
3656fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
3657    if path.exists() {
3658        return Ok(());
3659    }
3660    let parent = path
3661        .parent()
3662        .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
3663    fs::create_dir_all(parent)?;
3664    let temp_path = unique_temp_path(parent);
3665    let write_result = (|| -> Result<()> {
3666        {
3667            let mut file = fs::OpenOptions::new()
3668                .write(true)
3669                .create_new(true)
3670                .open(&temp_path)?;
3671            file.write_all(bytes)?;
3672            file.sync_all()?;
3673        }
3674        match fs::rename(&temp_path, path) {
3675            Ok(()) => Ok(()),
3676            Err(_) if path.exists() => {
3677                let _ = fs::remove_file(&temp_path);
3678                Ok(())
3679            }
3680            Err(err) => Err(GitError::Io(err.to_string())),
3681        }
3682    })();
3683    if write_result.is_err() {
3684        let _ = fs::remove_file(&temp_path);
3685    }
3686    write_result
3687}
3688
3689fn write_promisor_pack_sidecar(
3690    pack_dir: &Path,
3691    pack_name: &str,
3692    promisor: bool,
3693) -> Result<Option<PathBuf>> {
3694    if !promisor {
3695        return Ok(None);
3696    }
3697    let path = pack_dir.join(format!("{pack_name}.promisor"));
3698    write_pack_component(&path, b"")?;
3699    Ok(Some(path))
3700}
3701
3702/// Maximum number of bytes git will inflate when reading a loose object's
3703/// `"<type> <size>\0"` header (git's `MAX_HEADER_LEN` in object-file.c). The NUL
3704/// terminator must land within this window, so a header of 32 or more non-NUL
3705/// bytes is rejected as too long.
3706const MAX_LOOSE_HEADER_LEN: usize = 32;
3707
3708/// git's exact `error:`-level diagnostic for a loose object whose header overflows
3709/// `MAX_LOOSE_HEADER_LEN` (object-file.c: `error(_("header for %s too long, exceeds
3710/// %d bytes"), ...)`). Shared by the header-only and full-read paths so both surface
3711/// byte-identical text.
3712fn loose_header_too_long(oid: &ObjectId) -> GitError {
3713    GitError::InvalidObject(format!(
3714        "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
3715    ))
3716}
3717
3718/// git's `error:`-level diagnostic when the loose framing header cannot be inflated at
3719/// all (object-file.c `loose_object_info`, the `ULHR_BAD` arm: `error(_("unable to
3720/// unpack %s header"), ...)`).
3721fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
3722    GitError::InvalidObject(format!("unable to unpack {oid} header"))
3723}
3724
3725/// git-zlib.c's `error("inflate: %s (%s)", ...)` text for an inflate failure whose
3726/// cause is identifiable from the zlib stream header. The checks mirror zlib's own
3727/// `inflate()` HEAD-state validation, in order: the FCHECK checksum over CMF+FLG,
3728/// the compression method, the window size, and the FDICT preset-dictionary bit
3729/// (zlib reports `Z_NEED_DICT` with a NULL `msg`, which git renders as
3730/// "(no message)"). Failures past the stream header return `None`: flate2 does not
3731/// surface zlib's per-case `msg` strings, so no diagnostic is fabricated for them.
3732fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
3733    let [cmf, flg, ..] = *input else { return None };
3734    if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
3735        return Some("inflate: data stream error (incorrect header check)");
3736    }
3737    if cmf & 0x0f != 8 {
3738        return Some("inflate: data stream error (unknown compression method)");
3739    }
3740    if cmf >> 4 > 7 {
3741        return Some("inflate: data stream error (invalid window size)");
3742    }
3743    if flg & 0x20 != 0 {
3744        return Some("inflate: needs dictionary (no message)");
3745    }
3746    None
3747}
3748
3749/// Print the `error: inflate: ...` line git's zlib wrapper emits the moment
3750/// `inflate()` fails, when the failure is classifiable from the stream header.
3751fn emit_inflate_diagnostic(input: &[u8]) {
3752    if let Some(diagnostic) = inflate_header_diagnostic(input) {
3753        eprintln!("error: {diagnostic}");
3754    }
3755}
3756
3757/// Integrity verdict for a single loose object file, as classified by
3758/// [`LooseObjectStore::verify_object`].
3759#[derive(Debug, Clone, PartialEq, Eq)]
3760pub enum LooseObjectIntegrity {
3761    /// Inflated, parsed, and re-hashed to its path-derived oid.
3762    Ok,
3763    /// Readable and well-formed, but its content hashes to a different oid
3764    /// (a loose file stored under the wrong path).
3765    HashMismatch { actual: ObjectId },
3766    /// Unreadable: corrupt zlib stream, truncated content, or unparseable header.
3767    /// The `error:`-level diagnostics were already printed to stderr.
3768    Corrupt,
3769}
3770
3771#[derive(Debug, Clone)]
3772pub struct LooseObjectStore {
3773    objects_dir: PathBuf,
3774    format: ObjectFormat,
3775    /// Lazily-populated set of loose object ids present on disk, mirroring git's
3776    /// `loose_objects_cache` (object-file.c). A lookup scans the queried
3777    /// `objects/XX/` fanout once; afterward misses in that fanout are in-memory
3778    /// checks instead of failed exact-path opens. Shared across
3779    /// `FileObjectDatabase` clones via `Arc` so a write through one handle is
3780    /// visible to reads through another; cleared by `refresh_read_cache` so
3781    /// objects installed out-of-band (fetch, repack) become visible. Writes
3782    /// extend the set in place rather than invalidating it.
3783    loose_cache: Arc<Mutex<LoosePresenceCache>>,
3784}
3785
3786impl LooseObjectStore {
3787    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3788        Self {
3789            objects_dir: objects_dir.into(),
3790            format,
3791            loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
3792        }
3793    }
3794
3795    /// Whether `oid` is present according to the loose-object cache, populating
3796    /// the cache on first use. Returns `None` when the lock cannot be trusted or
3797    /// the scan fails; callers should fall back to an exact filesystem probe in
3798    /// that case so a cache-building problem cannot change read semantics.
3799    fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
3800        let mut guard = self.loose_cache.lock().ok()?;
3801        let fanout = oid.as_bytes()[0];
3802        if !guard.loaded_fanouts.contains(&fanout) {
3803            collect_loose_fanout_object_ids(
3804                &self.objects_dir,
3805                self.format,
3806                fanout,
3807                &mut guard.objects,
3808            )
3809            .ok()?;
3810            guard.loaded_fanouts.insert(fanout);
3811        }
3812        Some(guard.objects.contains(oid))
3813    }
3814
3815    /// Populate the loose-object cache and return the sorted ids. This mirrors
3816    /// git's `odb_loose_cache` lazy fill and is reserved for operations that
3817    /// really need loose-object enumeration.
3818    fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
3819        if let Ok(mut guard) = self.loose_cache.lock() {
3820            guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
3821            guard.loaded_fanouts = (0..=u8::MAX).collect();
3822            let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
3823            ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
3824            return Ok(ids);
3825        }
3826        loose_object_ids(&self.objects_dir, self.format)
3827    }
3828
3829    /// Record `oid` as present in loose storage so subsequent reads find it
3830    /// without a rescan. A no-op when the cache has not been populated yet (the
3831    /// eventual lazy scan will pick the object up) or the lock is poisoned.
3832    fn note_loose_write(&self, oid: ObjectId) {
3833        if let Ok(mut guard) = self.loose_cache.lock() {
3834            guard.objects.insert(oid);
3835        }
3836    }
3837
3838    /// Drop the in-memory loose set so the next access rescans the fanout. Called
3839    /// by `FileObjectDatabase::refresh_read_cache` after out-of-band installs.
3840    pub(crate) fn invalidate_cache(&self) {
3841        if let Ok(mut guard) = self.loose_cache.lock() {
3842            *guard = LoosePresenceCache::default();
3843        }
3844    }
3845
3846    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3847        Self::new(repository_objects_dir(git_dir), format)
3848    }
3849
3850    pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
3851        if oid.format() != self.format {
3852            return Err(GitError::InvalidObjectId(format!(
3853                "object {oid} uses {}, store uses {}",
3854                oid.format().name(),
3855                self.format.name()
3856            )));
3857        }
3858        let hex = oid.to_hex();
3859        Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
3860    }
3861
3862    pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
3863        let path = self.object_path(oid)?;
3864        if self.cached_loose_presence(oid) == Some(false) {
3865            return Ok(false);
3866        }
3867        Ok(path.exists())
3868    }
3869
3870    pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
3871        let path = self.object_path(oid)?;
3872        if self.cached_loose_presence(oid) == Some(false) {
3873            return Ok(None);
3874        }
3875        match fs::metadata(path) {
3876            Ok(metadata) => Ok(Some(metadata.len())),
3877            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
3878            Err(err) => Err(GitError::Io(err.to_string())),
3879        }
3880    }
3881
3882    /// The object type and content size of `oid` from loose storage, inflating only
3883    /// the framing header (`"<type> <size>\0"`) and not the body. Output-limited
3884    /// reads keep miniz from inflating past the header even for large objects.
3885    /// Returns `Ok(None)` when the loose object is absent.
3886    pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3887        let path = self.object_path(oid)?;
3888        if self.cached_loose_presence(oid) == Some(false) {
3889            return Ok(None);
3890        }
3891        let mut file = match fs::File::open(&path) {
3892            Ok(file) => file,
3893            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3894            Err(err) => return Err(GitError::Io(err.to_string())),
3895        };
3896        // Capture the zlib stream's 2-byte header before inflating: when the stream
3897        // is corrupt, those bytes identify zlib's diagnostic (incorrect header
3898        // check, needs dictionary, ...) exactly as zlib's `inflate()` would report
3899        // it through git's wrapper.
3900        let mut stream_prefix = [0u8; 2];
3901        let prefix_len = read_full_prefix(&mut file, &mut stream_prefix)?;
3902        file.seek(SeekFrom::Start(0))
3903            .map_err(|err| GitError::Io(err.to_string()))?;
3904        let mut decoder = ZlibDecoder::new(file);
3905        let mut header = Vec::new();
3906        let mut byte = [0u8; 1];
3907        loop {
3908            // git inflates only the first `MAX_LOOSE_HEADER_LEN` bytes
3909            // (object-file.c `unpack_loose_header`) and reports ULHR_TOO_LONG when no
3910            // NUL terminator lands within them — whether the stream simply ends early
3911            // or overflows the window. Both collapse to the same `error:`-level
3912            // diagnostic, so a header that ends before its NUL is "too long" too.
3913            // A stream that won't inflate at all is git's ULHR_BAD instead: the
3914            // zlib wrapper's `error: inflate: ...` line, then "unable to unpack
3915            // <oid> header".
3916            let read = match decoder.read(&mut byte) {
3917                Ok(read) => read,
3918                Err(_) => {
3919                    emit_inflate_diagnostic(&stream_prefix[..prefix_len]);
3920                    return Err(loose_unpack_header_failed(oid));
3921                }
3922            };
3923            if read == 0 {
3924                return Err(loose_header_too_long(oid));
3925            }
3926            if byte[0] == 0 {
3927                break;
3928            }
3929            header.push(byte[0]);
3930            // A 31-byte header (NUL at the 32nd byte) is the longest that fits; 32
3931            // non-NUL bytes overflow the window.
3932            if header.len() >= MAX_LOOSE_HEADER_LEN {
3933                return Err(loose_header_too_long(oid));
3934            }
3935        }
3936        let header =
3937            std::str::from_utf8(&header).map_err(|err| GitError::InvalidObject(err.to_string()))?;
3938        let (kind, size) = header
3939            .split_once(' ')
3940            .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
3941        let object_type = kind.parse::<ObjectType>()?;
3942        let size = size
3943            .parse::<u64>()
3944            .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
3945        Ok(Some((object_type, size)))
3946    }
3947
3948    /// Loose object ids in this store, sorted by hex.
3949    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3950        self.loose_object_ids_cached()
3951    }
3952
3953    /// fsck's loose-object integrity probe, mirroring C git's `read_loose_object`
3954    /// (object-file.c) as called from `fsck_loose` (builtin/fsck.c): inflate and
3955    /// parse the file at `oid`'s loose path, then re-hash its content against the
3956    /// path-derived oid. `display_path` appears verbatim in the `error:`-level
3957    /// diagnostics — the path-form messages of `read_loose_object` ("unable to
3958    /// unpack header of <path>"), unlike the oid-form messages of the normal read
3959    /// path. Returns `Ok(None)` when no loose file exists for `oid`.
3960    pub fn verify_object(
3961        &self,
3962        oid: &ObjectId,
3963        display_path: &str,
3964    ) -> Result<Option<LooseObjectIntegrity>> {
3965        let path = self.object_path(oid)?;
3966        let compressed = match fs::read(&path) {
3967            Ok(compressed) => compressed,
3968            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3969            Err(err) => return Err(GitError::Io(err.to_string())),
3970        };
3971        let mut decoder = ZlibDecoder::new(compressed.as_slice());
3972        let mut framed = Vec::new();
3973        if decoder.read_to_end(&mut framed).is_err() {
3974            emit_inflate_diagnostic(&compressed);
3975            // No NUL inside the header window means inflation died before the
3976            // framing header materialized (`unpack_loose_header` != ULHR_OK);
3977            // with the header intact it is the body that broke
3978            // (`unpack_loose_rest`).
3979            if framed_loose_header_terminated(&framed) {
3980                eprintln!("error: unable to unpack contents of {display_path}");
3981            } else {
3982                eprintln!("error: unable to unpack header of {display_path}");
3983            }
3984            return Ok(Some(LooseObjectIntegrity::Corrupt));
3985        }
3986        if !framed_loose_header_terminated(&framed) {
3987            // ULHR_TOO_LONG collapses into the same path-form message here: C's
3988            // `read_loose_object` treats every non-OK `unpack_loose_header` alike.
3989            eprintln!("error: unable to unpack header of {display_path}");
3990            return Ok(Some(LooseObjectIntegrity::Corrupt));
3991        }
3992        let Ok(object) = parse_framed_object(&framed) else {
3993            eprintln!("error: unable to parse header of {display_path}");
3994            return Ok(Some(LooseObjectIntegrity::Corrupt));
3995        };
3996        let actual = object.object_id(self.format)?;
3997        if &actual != oid {
3998            return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
3999        }
4000        Ok(Some(LooseObjectIntegrity::Ok))
4001    }
4002}
4003
4004/// Whether the inflated framing bytes contain the header's NUL terminator within
4005/// git's `MAX_HEADER_LEN` window (object-file.c `unpack_loose_header`'s success
4006/// condition).
4007fn framed_loose_header_terminated(framed: &[u8]) -> bool {
4008    framed
4009        .iter()
4010        .take(MAX_LOOSE_HEADER_LEN)
4011        .any(|byte| *byte == 0)
4012}
4013
4014/// Read up to `prefix.len()` bytes from the start of `file`, returning how many
4015/// were available (short only when the file itself is shorter).
4016fn read_full_prefix(file: &mut fs::File, prefix: &mut [u8]) -> Result<usize> {
4017    let mut len = 0;
4018    while len < prefix.len() {
4019        let read = file
4020            .read(&mut prefix[len..])
4021            .map_err(|err| GitError::Io(err.to_string()))?;
4022        if read == 0 {
4023            break;
4024        }
4025        len += read;
4026    }
4027    Ok(len)
4028}
4029
4030impl ObjectReader for LooseObjectStore {
4031    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4032        let path = self.object_path(oid)?;
4033        // Skip the `open()` (and its ENOENT) when an already-built loose cache
4034        // knows the id is absent. Without a cache, use an exact path probe; a
4035        // full fanout scan is far more expensive for one-shot packed-object reads.
4036        if self.cached_loose_presence(oid) == Some(false) {
4037            return Err(GitError::object_not_found_in(
4038                *oid,
4039                MissingObjectContext::Read,
4040            ));
4041        }
4042        let compressed = match fs::read(&path) {
4043            Ok(compressed) => compressed,
4044            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4045                return Err(GitError::object_not_found_in(
4046                    *oid,
4047                    MissingObjectContext::Read,
4048                ));
4049            }
4050            Err(err) => return Err(GitError::Io(err.to_string())),
4051        };
4052        let mut decoder = ZlibDecoder::new(compressed.as_slice());
4053        let mut framed = Vec::new();
4054        if decoder.read_to_end(&mut framed).is_err() {
4055            emit_inflate_diagnostic(&compressed);
4056            // A stream that dies before the framing header materializes is git's
4057            // ULHR_BAD ("unable to unpack <oid> header"); with the header intact,
4058            // the body is what broke (`unpack_loose_rest`'s "corrupt loose
4059            // object").
4060            if !framed_loose_header_terminated(&framed) {
4061                return Err(loose_unpack_header_failed(oid));
4062            }
4063            return Err(GitError::InvalidObject(format!(
4064                "corrupt loose object '{oid}'"
4065            )));
4066        }
4067        // git only inflates the first `MAX_LOOSE_HEADER_LEN` bytes looking for the
4068        // header's NUL terminator before parsing the type; an over-long header is
4069        // rejected here (with git's diagnostic) rather than failing later as an
4070        // "unknown object type". Mirror that so `cat-file -p` matches upstream.
4071        if framed
4072            .iter()
4073            .take(MAX_LOOSE_HEADER_LEN)
4074            .all(|byte| *byte != 0)
4075        {
4076            return Err(loose_header_too_long(oid));
4077        }
4078        let object = parse_framed_object(&framed)?;
4079        // Trust the loose object's on-disk name rather than re-hashing its full body
4080        // on every read (see `verify_reads_enabled`); use `validate`/fsck or
4081        // `SLEY_VERIFY_READS` for an explicit integrity check.
4082        if verify_reads_enabled() {
4083            let actual = object.object_id(self.format)?;
4084            if &actual != oid {
4085                return Err(GitError::InvalidObject(format!(
4086                    "loose object {} hashes to {actual}",
4087                    path.display()
4088                )));
4089            }
4090        }
4091        Ok(Arc::new(object))
4092    }
4093}
4094
4095impl ObjectWriter for LooseObjectStore {
4096    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4097        let oid = object.object_id(self.format)?;
4098        let path = self.object_path(&oid)?;
4099        if path.exists() {
4100            self.note_loose_write(oid);
4101            return Ok(oid);
4102        }
4103        let parent = path
4104            .parent()
4105            .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
4106        fs::create_dir_all(parent)?;
4107        let temp_path = unique_temp_path(parent);
4108        let write_result = (|| -> Result<()> {
4109            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4110            encoder.write_all(&object.framed_bytes())?;
4111            let compressed = encoder.finish()?;
4112            {
4113                let mut file = fs::OpenOptions::new()
4114                    .write(true)
4115                    .create_new(true)
4116                    .open(&temp_path)?;
4117                file.write_all(&compressed)?;
4118                // No fsync: git's default `core.fsync=none` fsyncs nothing on the
4119                // loose-object write path (object-file.c writes the temp file and
4120                // renames it without syncing unless `core.fsync` names
4121                // `loose-object`/`objects`/`all`, which it does not by default).
4122                // A per-object sync_all() here made `git add` of N files cost N
4123                // fsyncs — the dominant term in sley#27's 10x `add -u` slowdown —
4124                // for durability git itself does not provide by default. The
4125                // create_new temp + atomic rename below still guarantees the
4126                // object never appears half-written under its final name.
4127            }
4128            match fs::rename(&temp_path, &path) {
4129                Ok(()) => Ok(()),
4130                Err(_) if path.exists() => {
4131                    let _ = fs::remove_file(&temp_path);
4132                    Ok(())
4133                }
4134                Err(err) => Err(GitError::Io(err.to_string())),
4135            }
4136        })();
4137        if write_result.is_err() {
4138            let _ = fs::remove_file(&temp_path);
4139        }
4140        write_result?;
4141        self.note_loose_write(oid);
4142        Ok(oid)
4143    }
4144}
4145
4146fn unique_temp_path(parent: &Path) -> PathBuf {
4147    let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
4148    parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
4149}
4150
4151#[cfg(test)]
4152mod tests {
4153    use super::*;
4154    use sley_core::BString;
4155    use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
4156    use sley_pack::{PackFile, PackWriteOptions};
4157
4158    fn blob_of(byte: u8, len: usize) -> EncodedObject {
4159        EncodedObject::new(ObjectType::Blob, vec![byte; len])
4160    }
4161
4162    fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
4163        Arc::new(blob_of(byte, len))
4164    }
4165
4166    fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
4167        reader
4168            .read_object(oid)
4169            .expect("test operation should succeed")
4170            .as_ref()
4171            .clone()
4172    }
4173
4174    #[test]
4175    fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
4176        // Budget holds two ~1 KiB objects but not three.
4177        let one = cached_object_cost(&blob_of(0, 1000));
4178        let mut cache = LruCache::<u32>::new(one * 2 + 8);
4179        cache.put(1, cached_blob_of(b'a', 1000));
4180        cache.put(2, cached_blob_of(b'b', 1000));
4181        // Touch key 1 so key 2 becomes least-recently-used.
4182        assert!(cache.get(&1).is_some());
4183        cache.put(3, cached_blob_of(b'c', 1000));
4184        // Key 2 (LRU) is evicted; 1 and 3 remain.
4185        assert!(cache.get(&1).is_some());
4186        assert!(cache.get(&2).is_none());
4187        assert!(cache.get(&3).is_some());
4188    }
4189
4190    #[test]
4191    fn lru_cache_zero_budget_is_inert() {
4192        let mut cache = LruCache::<u32>::new(0);
4193        cache.put(1, cached_blob_of(b'a', 16));
4194        assert!(cache.get(&1).is_none());
4195    }
4196
4197    #[test]
4198    fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
4199        let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
4200        cache.put(1, cached_blob_of(b'a', 50));
4201        assert!(cache.get(&1).is_some());
4202        // An object that cannot fit is not cached, and it evicts the prior entry
4203        // stored under the same key (so we never serve a stale value for it).
4204        cache.put(1, cached_blob_of(b'b', 10_000));
4205        assert!(cache.get(&1).is_none());
4206        // A subsequent fitting insert under another key still works and accounting
4207        // is not corrupted by the oversized insert.
4208        cache.put(2, cached_blob_of(b'c', 50));
4209        assert!(cache.get(&2).is_some());
4210    }
4211
4212    #[test]
4213    fn lru_cache_replacing_entry_updates_byte_accounting() {
4214        // Budget holds two 500-byte objects (plus headroom) but not a 500 + a
4215        // ~1900-byte object.
4216        let small = cached_object_cost(&blob_of(0, 500));
4217        let mut cache = LruCache::<u32>::new(small * 2 + 200);
4218        cache.put(1, cached_blob_of(b'a', 500));
4219        cache.put(2, cached_blob_of(b'b', 500));
4220        assert!(cache.get(&1).is_some());
4221        assert!(cache.get(&2).is_some());
4222        // Replace key 2 (now MRU after the gets above re-ordered 1 then 2) with a
4223        // bigger value that still fits the budget alone but makes the running total
4224        // exceed it; the LRU (key 1) is evicted while the replaced key 2 stays.
4225        // This exercises the replace-path accounting.
4226        cache.put(2, cached_blob_of(b'b', 1000));
4227        assert!(cache.get(&2).is_some());
4228        assert!(cache.get(&1).is_none());
4229    }
4230
4231    #[test]
4232    fn write_and_validate_blob() {
4233        let db = ObjectDatabase::new(ObjectFormat::Sha1);
4234        let oid = db
4235            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
4236            .expect("test operation should succeed");
4237        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
4238        db.validate(&oid).expect("test operation should succeed");
4239    }
4240
4241    #[test]
4242    fn loose_store_writes_and_reads_object() {
4243        let root = std::env::temp_dir().join(format!(
4244            "sley-loose-store-{}-{}",
4245            std::process::id(),
4246            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
4247        ));
4248        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4249        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4250        let oid = store
4251            .write_object(object.clone())
4252            .expect("test operation should succeed");
4253        assert_eq!(read_object_for_assert(&store, &oid), object);
4254        assert!(
4255            store
4256                .object_path(&oid)
4257                .expect("test operation should succeed")
4258                .exists()
4259        );
4260        fs::remove_dir_all(root).expect("test operation should succeed");
4261    }
4262
4263    #[test]
4264    fn file_database_reads_object_from_pack_index() {
4265        let root = temp_root("sley-file-odb-pack");
4266        let git_dir = root.join(".git");
4267        let pack_dir = git_dir.join("objects").join("pack");
4268        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
4269        let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
4270        let oid = object
4271            .object_id(ObjectFormat::Sha1)
4272            .expect("test operation should succeed");
4273        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4274            .expect("test operation should succeed");
4275        let pack_name = written.checksum.to_hex();
4276        fs::write(
4277            pack_dir.join(format!("pack-{pack_name}.pack")),
4278            written.pack,
4279        )
4280        .expect("test operation should succeed");
4281        fs::write(
4282            pack_dir.join(format!("pack-{pack_name}.idx")),
4283            written.index,
4284        )
4285        .expect("test operation should succeed");
4286
4287        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4288        assert!(db.contains(&oid).expect("test operation should succeed"));
4289        assert_eq!(read_object_for_assert(&db, &oid), object);
4290        fs::remove_dir_all(root).expect("test operation should succeed");
4291    }
4292
4293    #[test]
4294    fn file_database_loose_cache_observes_same_process_write_after_miss() {
4295        let root = temp_root("sley-file-odb-loose-cache-write");
4296        let git_dir = root.join(".git");
4297        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4298        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4299
4300        let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
4301        let oid = object
4302            .object_id(ObjectFormat::Sha1)
4303            .expect("test operation should succeed");
4304
4305        assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
4306        db.loose()
4307            .write_object(object.clone())
4308            .expect("test operation should succeed");
4309
4310        assert_eq!(read_object_for_assert(&db, &oid), object);
4311        fs::remove_dir_all(root).expect("test operation should succeed");
4312    }
4313
4314    #[test]
4315    fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
4316        let root = temp_root("sley-read-object-header");
4317        let git_dir = root.join(".git");
4318        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4319        let format = ObjectFormat::Sha1;
4320        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4321
4322        // Loose object: the header read inflates only the framing, not the body.
4323        let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
4324        let loose_oid = db
4325            .write_object(loose.clone())
4326            .expect("test operation should succeed");
4327
4328        // Packed objects, including an ofs-delta whose *result* size lives in the
4329        // delta stream (not the pack entry header) and whose type is inherited from
4330        // its base at the end of the chain.
4331        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
4332        let mut child_body = vec![b'a'; 4096];
4333        child_body.extend_from_slice(b" plus a deltified tail\n");
4334        let child = EncodedObject::new(ObjectType::Blob, child_body);
4335        let commitish =
4336            EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
4337        let base_oid = base
4338            .object_id(format)
4339            .expect("test operation should succeed");
4340        let child_oid = child
4341            .object_id(format)
4342            .expect("test operation should succeed");
4343        let commit_oid = commitish
4344            .object_id(format)
4345            .expect("test operation should succeed");
4346        let options = PackWriteOptions::new()
4347            .with_prefer_ofs_delta(true)
4348            .with_reorder(false);
4349        let pack = PackFile::write_packed_with_options(
4350            &[base.clone(), child.clone(), commitish.clone()],
4351            format,
4352            &options,
4353        )
4354        .expect("test operation should succeed");
4355        db.install_pack(&pack)
4356            .expect("test operation should succeed");
4357
4358        // The header read agrees with a full decode for every object and storage
4359        // class, without ever materializing the body.
4360        for (oid, want_type, want_len) in [
4361            (&loose_oid, ObjectType::Blob, loose.body.len()),
4362            (&base_oid, ObjectType::Blob, base.body.len()),
4363            (&child_oid, ObjectType::Blob, child.body.len()),
4364            (&commit_oid, ObjectType::Commit, commitish.body.len()),
4365        ] {
4366            assert_eq!(
4367                db.read_object_header(oid)
4368                    .expect("test operation should succeed"),
4369                Some((want_type, want_len as u64)),
4370                "header for {oid}"
4371            );
4372            let full = db.read_object(oid).expect("test operation should succeed");
4373            assert_eq!(
4374                db.read_object_header(oid)
4375                    .expect("test operation should succeed"),
4376                Some((full.object_type, full.body.len() as u64))
4377            );
4378        }
4379
4380        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
4381            .expect("test operation should succeed");
4382        assert_eq!(
4383            db.read_object_header(&missing)
4384                .expect("test operation should succeed"),
4385            None
4386        );
4387        fs::remove_dir_all(root).expect("test operation should succeed");
4388    }
4389
4390    #[test]
4391    fn object_storage_info_reports_loose_packed_and_delta_metadata() {
4392        let root = temp_root("sley-object-storage-info");
4393        let git_dir = root.join(".git");
4394        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4395        let format = ObjectFormat::Sha1;
4396        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4397
4398        let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
4399        let loose_oid = db
4400            .write_object(loose)
4401            .expect("test operation should succeed");
4402        let loose_size = fs::metadata(
4403            db.loose()
4404                .object_path(&loose_oid)
4405                .expect("test operation should succeed"),
4406        )
4407        .expect("test operation should succeed")
4408        .len();
4409        let loose_info = db
4410            .object_storage_info(&loose_oid)
4411            .expect("test operation should succeed")
4412            .expect("test operation should succeed");
4413        assert_eq!(loose_info.disk_size, loose_size);
4414        assert_eq!(
4415            loose_info.deltabase,
4416            zero_oid(format).expect("test operation should succeed")
4417        );
4418
4419        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
4420        let mut child_body = vec![b'a'; 4096];
4421        child_body.extend_from_slice(b" changed tail\n");
4422        let child = EncodedObject::new(ObjectType::Blob, child_body);
4423        let base_oid = base
4424            .object_id(format)
4425            .expect("test operation should succeed");
4426        let child_oid = child
4427            .object_id(format)
4428            .expect("test operation should succeed");
4429        let options = PackWriteOptions::new()
4430            .with_prefer_ofs_delta(true)
4431            .with_reorder(false);
4432        let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
4433            .expect("test operation should succeed");
4434        db.install_pack(&pack)
4435            .expect("test operation should succeed");
4436
4437        let base_info = db
4438            .object_storage_info(&base_oid)
4439            .expect("test operation should succeed")
4440            .expect("test operation should succeed");
4441        assert!(base_info.disk_size > 0);
4442        assert_eq!(
4443            base_info.deltabase,
4444            zero_oid(format).expect("test operation should succeed")
4445        );
4446
4447        let child_info = db
4448            .object_storage_info(&child_oid)
4449            .expect("test operation should succeed")
4450            .expect("test operation should succeed");
4451        assert!(child_info.disk_size > 0);
4452        assert_eq!(child_info.deltabase, base_oid);
4453
4454        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
4455            .expect("test operation should succeed");
4456        assert_eq!(
4457            db.object_storage_info(&missing)
4458                .expect("test operation should succeed"),
4459            None
4460        );
4461        fs::remove_dir_all(root).expect("test operation should succeed");
4462    }
4463
4464    #[test]
4465    fn file_database_resolves_unique_loose_object_prefix() {
4466        let root = temp_root("sley-file-odb-prefix-loose");
4467        let git_dir = root.join(".git");
4468        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4469        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4470        let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
4471        let oid = db
4472            .write_object(object)
4473            .expect("test operation should succeed");
4474        let prefix = &oid.to_hex()[..8];
4475
4476        assert_eq!(
4477            db.resolve_prefix(prefix)
4478                .expect("test operation should succeed"),
4479            ObjectPrefixResolution::Unique(oid)
4480        );
4481        assert!(
4482            db.object_ids()
4483                .expect("test operation should succeed")
4484                .contains(&oid)
4485        );
4486        fs::remove_dir_all(root).expect("test operation should succeed");
4487    }
4488
4489    #[test]
4490    fn file_database_resolves_unique_packed_object_prefix() {
4491        let root = temp_root("sley-file-odb-prefix-packed");
4492        let git_dir = root.join(".git");
4493        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4494        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4495        let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
4496        let oid = object
4497            .object_id(ObjectFormat::Sha1)
4498            .expect("test operation should succeed");
4499        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4500            .expect("test operation should succeed");
4501        db.install_pack(&pack)
4502            .expect("test operation should succeed");
4503        let prefix = &oid.to_hex()[..8];
4504
4505        assert_eq!(
4506            db.resolve_prefix(prefix)
4507                .expect("test operation should succeed"),
4508            ObjectPrefixResolution::Unique(oid)
4509        );
4510        fs::remove_dir_all(root).expect("test operation should succeed");
4511    }
4512
4513    #[test]
4514    fn file_database_reports_ambiguous_object_prefix() {
4515        let root = temp_root("sley-file-odb-prefix-ambiguous");
4516        let git_dir = root.join(".git");
4517        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4518        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4519        let mut seen = HashMap::new();
4520        let (prefix, first, second) = (0..10_000)
4521            .find_map(|idx| {
4522                let object =
4523                    EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
4524                let oid = db
4525                    .write_object(object)
4526                    .expect("test operation should succeed");
4527                let prefix = oid.to_hex()[..4].to_string();
4528                seen.insert(prefix.clone(), oid)
4529                    .map(|first| (prefix, first, oid))
4530            })
4531            .expect("test should find a 4-hex collision");
4532
4533        let ObjectPrefixResolution::Ambiguous(mut matches) = db
4534            .resolve_prefix(&prefix)
4535            .expect("test operation should succeed")
4536        else {
4537            panic!("expected ambiguous prefix {prefix}");
4538        };
4539        matches.sort_by_key(ObjectId::to_hex);
4540        let mut expected = vec![first, second];
4541        expected.sort_by_key(ObjectId::to_hex);
4542        assert_eq!(matches, expected);
4543        fs::remove_dir_all(root).expect("test operation should succeed");
4544    }
4545
4546    #[test]
4547    fn file_database_rejects_too_short_object_prefix() {
4548        let root = temp_root("sley-file-odb-prefix-short");
4549        let git_dir = root.join(".git");
4550        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4551        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4552
4553        assert!(matches!(
4554            db.resolve_prefix("abc"),
4555            Err(GitError::InvalidObjectId(_))
4556        ));
4557        fs::remove_dir_all(root).expect("test operation should succeed");
4558    }
4559
4560    #[test]
4561    fn file_database_reads_sha256_object_from_pack_index() {
4562        let root = temp_root("sley-file-odb-pack-sha256");
4563        let git_dir = root.join(".git");
4564        let pack_dir = git_dir.join("objects").join("pack");
4565        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
4566        let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
4567        let oid = object
4568            .object_id(ObjectFormat::Sha256)
4569            .expect("test operation should succeed");
4570        let written =
4571            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4572                .expect("test operation should succeed");
4573        let pack_name = written.checksum.to_hex();
4574        fs::write(
4575            pack_dir.join(format!("pack-{pack_name}.pack")),
4576            written.pack,
4577        )
4578        .expect("test operation should succeed");
4579        fs::write(
4580            pack_dir.join(format!("pack-{pack_name}.idx")),
4581            written.index,
4582        )
4583        .expect("test operation should succeed");
4584
4585        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
4586        assert!(db.contains(&oid).expect("test operation should succeed"));
4587        assert_eq!(read_object_for_assert(&db, &oid), object);
4588        fs::remove_dir_all(root).expect("test operation should succeed");
4589    }
4590
4591    #[test]
4592    fn file_database_installs_sha256_pack_without_loose_objects() {
4593        let root = temp_root("sley-file-odb-install-pack");
4594        let git_dir = root.join(".git");
4595        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4596        let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
4597        let oid = object
4598            .object_id(ObjectFormat::Sha256)
4599            .expect("test operation should succeed");
4600        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4601            .expect("test operation should succeed");
4602        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
4603
4604        let result = db
4605            .install_pack(&pack)
4606            .expect("test operation should succeed");
4607
4608        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
4609        assert_eq!(result.object_ids, vec![oid]);
4610        assert!(result.pack_path.exists());
4611        assert!(result.index_path.exists());
4612        assert_eq!(result.promisor_path, None);
4613        assert!(
4614            !db.loose()
4615                .object_path(&oid)
4616                .expect("test operation should succeed")
4617                .exists()
4618        );
4619        assert!(db.contains(&oid).expect("test operation should succeed"));
4620        assert_eq!(read_object_for_assert(&db, &oid), object);
4621        fs::remove_dir_all(root).expect("test operation should succeed");
4622    }
4623
4624    #[test]
4625    fn file_database_installs_raw_sha256_pack_without_loose_objects() {
4626        let root = temp_root("sley-file-odb-install-raw-pack");
4627        let git_dir = root.join(".git");
4628        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4629        let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
4630        let oid = object
4631            .object_id(ObjectFormat::Sha256)
4632            .expect("test operation should succeed");
4633        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4634            .expect("test operation should succeed");
4635        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
4636
4637        let result = db
4638            .install_raw_pack(&pack.pack)
4639            .expect("test operation should succeed");
4640
4641        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
4642        assert_eq!(result.object_ids, vec![oid]);
4643        assert!(result.pack_path.exists());
4644        assert!(result.index_path.exists());
4645        assert_eq!(result.promisor_path, None);
4646        assert!(
4647            !db.loose()
4648                .object_path(&oid)
4649                .expect("test operation should succeed")
4650                .exists()
4651        );
4652        assert!(db.contains(&oid).expect("test operation should succeed"));
4653        assert_eq!(read_object_for_assert(&db, &oid), object);
4654        fs::remove_dir_all(root).expect("test operation should succeed");
4655    }
4656
4657    #[test]
4658    fn file_database_rejects_noncanonical_pack_index() {
4659        let root = temp_root("sley-file-odb-install-bad-index");
4660        let git_dir = root.join(".git");
4661        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4662        let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
4663        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
4664            .expect("test operation should succeed");
4665        let mut entries = pack.entries.clone();
4666        entries[0].crc32 ^= 1;
4667        let mut bad_pack = pack.clone();
4668        bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
4669            .expect("test operation should succeed");
4670        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4671
4672        assert!(db.install_pack(&bad_pack).is_err());
4673
4674        fs::remove_dir_all(root).expect("test operation should succeed");
4675    }
4676
4677    #[test]
4678    fn file_database_installs_raw_promisor_pack_with_sidecar() {
4679        let root = temp_root("sley-file-odb-install-raw-promisor-pack");
4680        let git_dir = root.join(".git");
4681        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4682        let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
4683        let oid = object
4684            .object_id(ObjectFormat::Sha1)
4685            .expect("test operation should succeed");
4686        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
4687            .expect("test operation should succeed");
4688        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4689
4690        let result = db
4691            .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
4692            .expect("test operation should succeed");
4693
4694        let promisor_path = result.promisor_path.expect("promisor sidecar");
4695        assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
4696        assert_eq!(
4697            promisor_path.extension().and_then(|ext| ext.to_str()),
4698            Some("promisor")
4699        );
4700        assert!(promisor_path.exists());
4701        assert_eq!(
4702            fs::read(&promisor_path).expect("test operation should succeed"),
4703            b""
4704        );
4705        assert!(result.pack_path.exists());
4706        assert!(result.index_path.exists());
4707        assert!(
4708            !db.loose()
4709                .object_path(&oid)
4710                .expect("test operation should succeed")
4711                .exists()
4712        );
4713        assert_eq!(read_object_for_assert(&db, &oid), object);
4714        fs::remove_dir_all(root).expect("test operation should succeed");
4715    }
4716
4717    #[test]
4718    fn repository_objects_dir_uses_linked_worktree_common_dir() {
4719        let root = temp_root("sley-odb-common-dir");
4720        let common = root.join(".git");
4721        let admin = common.join("worktrees").join("linked");
4722        fs::create_dir_all(&admin).expect("test operation should succeed");
4723        fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
4724
4725        let common = fs::canonicalize(common).expect("test operation should succeed");
4726        assert_eq!(repository_common_dir(&admin), common);
4727        assert_eq!(repository_objects_dir(&admin), common.join("objects"));
4728
4729        fs::remove_dir_all(root).expect("test operation should succeed");
4730    }
4731
4732    #[test]
4733    fn reachable_object_helpers_walk_graph_and_install_pack() {
4734        let root = temp_root("sley-reachable-pack");
4735        let source_git_dir = root.join("source.git");
4736        let destination_git_dir = root.join("destination.git");
4737        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
4738        fs::create_dir_all(destination_git_dir.join("objects"))
4739            .expect("test operation should succeed");
4740        let format = ObjectFormat::Sha1;
4741        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
4742        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
4743
4744        let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
4745        let blob_oid = source
4746            .write_object(blob.clone())
4747            .expect("test operation should succeed");
4748        let tree = EncodedObject::new(
4749            ObjectType::Tree,
4750            Tree {
4751                entries: vec![TreeEntry {
4752                    mode: 0o100644,
4753                    name: BString::from(b"payload.txt"),
4754                    oid: blob_oid,
4755                }],
4756            }
4757            .write(),
4758        );
4759        let tree_oid = source
4760            .write_object(tree.clone())
4761            .expect("test operation should succeed");
4762        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
4763        let commit = EncodedObject::new(
4764            ObjectType::Commit,
4765            Commit {
4766                tree: tree_oid,
4767                parents: Vec::new(),
4768                author: identity.clone(),
4769                committer: identity,
4770                encoding: None,
4771                message: b"initial\n".to_vec(),
4772            }
4773            .write(),
4774        );
4775        let commit_oid = source
4776            .write_object(commit.clone())
4777            .expect("test operation should succeed");
4778
4779        let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
4780            .expect("test operation should succeed");
4781        assert!(reachable.contains(&commit_oid));
4782        assert!(reachable.contains(&tree_oid));
4783        assert!(reachable.contains(&blob_oid));
4784
4785        let install =
4786            install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
4787                .expect("test operation should succeed")
4788                .expect("reachable pack should be written");
4789        assert_eq!(install.object_ids.len(), 3);
4790        for (oid, object) in [
4791            (&commit_oid, &commit),
4792            (&tree_oid, &tree),
4793            (&blob_oid, &blob),
4794        ] {
4795            assert!(
4796                !destination
4797                    .loose()
4798                    .object_path(oid)
4799                    .expect("test operation should succeed")
4800                    .exists()
4801            );
4802            assert!(
4803                destination
4804                    .contains(oid)
4805                    .expect("test operation should succeed")
4806            );
4807            assert_eq!(read_object_for_assert(&destination, oid), *object);
4808        }
4809        fs::remove_dir_all(root).expect("test operation should succeed");
4810    }
4811
4812    #[test]
4813    fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
4814        let root = temp_root("sley-reachable-exclusions");
4815        let git_dir = root.join("repo.git");
4816        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4817        let format = ObjectFormat::Sha1;
4818        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4819
4820        let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
4821        let blob_oid = db
4822            .write_object(blob)
4823            .expect("test operation should succeed");
4824        let tree = EncodedObject::new(
4825            ObjectType::Tree,
4826            Tree {
4827                entries: vec![TreeEntry {
4828                    mode: 0o100644,
4829                    name: BString::from(b"payload.txt"),
4830                    oid: blob_oid,
4831                }],
4832            }
4833            .write(),
4834        );
4835        let tree_oid = db
4836            .write_object(tree)
4837            .expect("test operation should succeed");
4838        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
4839        let commit = EncodedObject::new(
4840            ObjectType::Commit,
4841            Commit {
4842                tree: tree_oid,
4843                parents: Vec::new(),
4844                author: identity.clone(),
4845                committer: identity,
4846                encoding: None,
4847                message: b"initial\n".to_vec(),
4848            }
4849            .write(),
4850        );
4851        let commit_oid = db
4852            .write_object(commit)
4853            .expect("test operation should succeed");
4854        let excluded = HashSet::from([tree_oid]);
4855
4856        let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
4857            .expect("test operation should succeed");
4858
4859        assert_eq!(objects.len(), 1);
4860        assert_eq!(
4861            objects[0]
4862                .object_id(format)
4863                .expect("test operation should succeed"),
4864            commit_oid
4865        );
4866        fs::remove_dir_all(root).expect("test operation should succeed");
4867    }
4868
4869    #[test]
4870    fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
4871        let root = temp_root("sley-build-reachable-pack");
4872        let git_dir = root.join("repo.git");
4873        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4874        let format = ObjectFormat::Sha1;
4875        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4876
4877        let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
4878        let oid = db
4879            .write_object(object.clone())
4880            .expect("test operation should succeed");
4881        let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
4882            .expect("test operation should succeed")
4883            .expect("reachable pack should be built");
4884        assert!(pack.pack.starts_with(b"PACK"));
4885        assert_eq!(pack.entries.len(), 1);
4886        assert_eq!(pack.entries[0].oid, oid);
4887
4888        let excluded = HashSet::from([oid]);
4889        assert!(
4890            build_reachable_pack(
4891                &db,
4892                format,
4893                pack.entries.into_iter().map(|entry| entry.oid),
4894                &excluded
4895            )
4896            .expect("test operation should succeed")
4897            .is_none()
4898        );
4899        fs::remove_dir_all(root).expect("test operation should succeed");
4900    }
4901
4902    #[test]
4903    fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
4904        let root = temp_root("sley-reachable-tags");
4905        let git_dir = root.join("repo.git");
4906        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4907        let format = ObjectFormat::Sha1;
4908        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4909
4910        let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
4911        let blob_oid = db
4912            .write_object(blob)
4913            .expect("test operation should succeed");
4914        let tag = EncodedObject::new(
4915            ObjectType::Tag,
4916            Tag {
4917                object: blob_oid,
4918                object_type: ObjectType::Blob,
4919                name: b"v1".to_vec(),
4920                tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
4921                message: b"tag message\n".to_vec(),
4922                raw_body: None,
4923            }
4924            .write(),
4925        );
4926        let tag_oid = db.write_object(tag).expect("test operation should succeed");
4927
4928        let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
4929            .expect("test operation should succeed");
4930        assert!(reachable.contains(&tag_oid));
4931        assert!(reachable.contains(&blob_oid));
4932
4933        let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
4934            .expect("test operation should succeed");
4935        let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
4936            .expect_err("missing traversal root should error");
4937        let kind = err.not_found_kind().expect("typed not found");
4938        assert_eq!(kind.object_id(), Some(missing));
4939        assert_eq!(
4940            kind.missing_object_context(),
4941            Some(MissingObjectContext::Traversal)
4942        );
4943        fs::remove_dir_all(root).expect("test operation should succeed");
4944    }
4945
4946    #[test]
4947    fn install_reachable_pack_empty_starts_create_no_pack() {
4948        let root = temp_root("sley-reachable-empty");
4949        let source_git_dir = root.join("source.git");
4950        let destination_git_dir = root.join("destination.git");
4951        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
4952        fs::create_dir_all(destination_git_dir.join("objects"))
4953            .expect("test operation should succeed");
4954        let format = ObjectFormat::Sha1;
4955        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
4956        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
4957
4958        let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
4959            .expect("test operation should succeed");
4960
4961        assert!(result.is_none());
4962        assert!(!destination_git_dir.join("objects").join("pack").exists());
4963        fs::remove_dir_all(root).expect("test operation should succeed");
4964    }
4965
4966    #[test]
4967    fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
4968        let root = temp_root("sley-reachable-install-excluding");
4969        let source_git_dir = root.join("source.git");
4970        let destination_git_dir = root.join("destination.git");
4971        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
4972        fs::create_dir_all(destination_git_dir.join("objects"))
4973            .expect("test operation should succeed");
4974        let format = ObjectFormat::Sha1;
4975        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
4976        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
4977        let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
4978        let oid = source
4979            .write_object(object)
4980            .expect("test operation should succeed");
4981        let excluded = HashSet::from([oid]);
4982
4983        let result = install_reachable_pack_excluding(
4984            &source,
4985            &destination,
4986            format,
4987            std::iter::once(oid),
4988            &excluded,
4989        )
4990        .expect("test operation should succeed");
4991
4992        assert!(result.is_none());
4993        assert!(!destination_git_dir.join("objects").join("pack").exists());
4994        fs::remove_dir_all(root).expect("test operation should succeed");
4995    }
4996
4997    #[test]
4998    fn install_reachable_pack_supports_sha256() {
4999        let root = temp_root("sley-reachable-pack-sha256");
5000        let source_git_dir = root.join("source.git");
5001        let destination_git_dir = root.join("destination.git");
5002        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5003        fs::create_dir_all(destination_git_dir.join("objects"))
5004            .expect("test operation should succeed");
5005        let format = ObjectFormat::Sha256;
5006        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5007        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5008        let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
5009        let oid = source
5010            .write_object(object.clone())
5011            .expect("test operation should succeed");
5012
5013        let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
5014            .expect("test operation should succeed")
5015            .expect("sha256 reachable pack should be built");
5016        assert!(pack.pack.starts_with(b"PACK"));
5017        assert_eq!(pack.entries[0].oid, oid);
5018
5019        let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
5020            .expect("test operation should succeed")
5021            .expect("sha256 reachable pack should be written");
5022
5023        assert_eq!(result.object_ids, vec![oid]);
5024        assert!(
5025            !destination
5026                .loose()
5027                .object_path(&oid)
5028                .expect("test operation should succeed")
5029                .exists()
5030        );
5031        assert_eq!(read_object_for_assert(&destination, &oid), object);
5032        fs::remove_dir_all(root).expect("test operation should succeed");
5033    }
5034
5035    #[test]
5036    fn install_helpers_accept_custom_raw_pack_installer() {
5037        #[derive(Default)]
5038        struct RecordingInstaller {
5039            packs: std::cell::RefCell<Vec<Vec<u8>>>,
5040            installed: std::cell::RefCell<Vec<ObjectId>>,
5041        }
5042
5043        impl RawPackInstaller for RecordingInstaller {
5044            fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
5045                self.packs.borrow_mut().push(pack_bytes.to_vec());
5046                let object_ids = self.installed.borrow().clone();
5047                Ok(RawPackInstallResult { object_ids })
5048            }
5049        }
5050
5051        let format = ObjectFormat::Sha1;
5052        let source = ObjectDatabase::new(format);
5053        let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
5054        let oid = source
5055            .write_object(object)
5056            .expect("test operation should succeed");
5057        let installer = RecordingInstaller::default();
5058        installer.installed.borrow_mut().push(oid);
5059
5060        let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
5061            .expect("test operation should succeed")
5062            .expect("custom installer should receive pack");
5063
5064        assert_eq!(result.object_ids, installer.installed.into_inner());
5065        let packs = installer.packs.into_inner();
5066        assert_eq!(packs.len(), 1);
5067        assert!(packs[0].starts_with(b"PACK"));
5068    }
5069
5070    #[test]
5071    fn file_database_reads_object_from_multi_pack_index() {
5072        let root = temp_root("sley-file-odb-midx");
5073        let git_dir = root.join(".git");
5074        let pack_dir = git_dir.join("objects").join("pack");
5075        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5076        let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
5077        let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
5078        let first_oid = first
5079            .object_id(ObjectFormat::Sha1)
5080            .expect("test operation should succeed");
5081        let second_oid = second
5082            .object_id(ObjectFormat::Sha1)
5083            .expect("test operation should succeed");
5084        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5085            .expect("test operation should succeed");
5086        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5087            .expect("test operation should succeed");
5088        let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
5089        let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
5090        fs::write(
5091            pack_dir.join(first_pack_name.replace(".idx", ".pack")),
5092            first_pack.pack,
5093        )
5094        .expect("test operation should succeed");
5095        fs::write(
5096            pack_dir.join(second_pack_name.replace(".idx", ".pack")),
5097            second_pack.pack,
5098        )
5099        .expect("test operation should succeed");
5100        let midx = MultiPackIndex::write(
5101            ObjectFormat::Sha1,
5102            2,
5103            &[first_pack_name, second_pack_name],
5104            &[
5105                sley_pack::MultiPackIndexEntry {
5106                    oid: first_oid,
5107                    pack_int_id: 0,
5108                    offset: first_pack.entries[0].offset,
5109                },
5110                sley_pack::MultiPackIndexEntry {
5111                    oid: second_oid,
5112                    pack_int_id: 1,
5113                    offset: second_pack.entries[0].offset,
5114                },
5115            ],
5116        )
5117        .expect("test operation should succeed");
5118        fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
5119
5120        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5121        assert!(
5122            db.contains(&second_oid)
5123                .expect("test operation should succeed")
5124        );
5125        assert_eq!(
5126            db.resolve_prefix(&second_oid.to_hex()[..8])
5127                .expect("test operation should succeed"),
5128            ObjectPrefixResolution::Unique(second_oid)
5129        );
5130        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5131        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5132        fs::remove_dir_all(root).expect("test operation should succeed");
5133    }
5134
5135    #[test]
5136    fn file_database_finds_pack_added_after_listing_was_cached() {
5137        // Regression guard for the cached pack-directory listing: a pack written
5138        // after the listing was first cached (via a prior read) must still be
5139        // discovered by the same handle, because a miss triggers a re-scan.
5140        let root = temp_root("sley-file-odb-pack-added-late");
5141        let git_dir = root.join(".git");
5142        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5143        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5144
5145        // First pack + object; reading it populates the listing cache.
5146        let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
5147        let first_oid = first
5148            .object_id(ObjectFormat::Sha1)
5149            .expect("test operation should succeed");
5150        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5151            .expect("test operation should succeed");
5152        db.install_pack(&first_pack)
5153            .expect("test operation should succeed");
5154        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5155
5156        // A second object that the cached listing does not yet know about.
5157        let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
5158        let second_oid = second
5159            .object_id(ObjectFormat::Sha1)
5160            .expect("test operation should succeed");
5161        // It is genuinely absent right now.
5162        assert!(matches!(
5163            db.read_object(&second_oid),
5164            Err(GitError::NotFound(_))
5165        ));
5166
5167        // Install its pack through the same handle; the next read must find it via
5168        // a re-scan, not be masked by the stale listing.
5169        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5170            .expect("test operation should succeed");
5171        db.install_pack(&second_pack)
5172            .expect("test operation should succeed");
5173        assert!(
5174            db.contains(&second_oid)
5175                .expect("test operation should succeed")
5176        );
5177        assert_eq!(read_object_for_assert(&db, &second_oid), second);
5178        // The original object still resolves too.
5179        assert_eq!(read_object_for_assert(&db, &first_oid), first);
5180
5181        fs::remove_dir_all(root).expect("test operation should succeed");
5182    }
5183
5184    #[test]
5185    fn file_database_prefers_loose_object_over_packed_object() {
5186        let root = temp_root("sley-file-odb-prefer-loose");
5187        let git_dir = root.join(".git");
5188        let pack_dir = git_dir.join("objects").join("pack");
5189        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5190        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
5191        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5192            .expect("test operation should succeed");
5193        let pack_name = written.checksum.to_hex();
5194        fs::write(
5195            pack_dir.join(format!("pack-{pack_name}.pack")),
5196            written.pack,
5197        )
5198        .expect("test operation should succeed");
5199        fs::write(
5200            pack_dir.join(format!("pack-{pack_name}.idx")),
5201            written.index,
5202        )
5203        .expect("test operation should succeed");
5204
5205        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5206        let oid = db
5207            .write_object(object.clone())
5208            .expect("test operation should succeed");
5209        assert_eq!(read_object_for_assert(&db, &oid), object);
5210        fs::remove_dir_all(root).expect("test operation should succeed");
5211    }
5212
5213    #[test]
5214    fn bundle_prerequisite_verification_reads_existing_objects() {
5215        let db = ObjectDatabase::new(ObjectFormat::Sha1);
5216        let oid = db
5217            .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
5218            .expect("test operation should succeed");
5219        let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
5220        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5221            .expect("test operation should succeed");
5222
5223        verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
5224    }
5225
5226    #[test]
5227    fn bundle_prerequisite_verification_reports_missing_objects() {
5228        let db = ObjectDatabase::new(ObjectFormat::Sha1);
5229        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
5230            .expect("test operation should succeed");
5231        let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
5232        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5233            .expect("test operation should succeed");
5234
5235        assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
5236    }
5237
5238    #[test]
5239    fn unbundle_objects_writes_pack_entries_and_returns_refs() {
5240        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
5241        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
5242        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
5243        let oid = object
5244            .object_id(ObjectFormat::Sha1)
5245            .expect("test operation should succeed");
5246        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5247            .expect("test operation should succeed");
5248        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5249            .into_bytes()
5250            .into_iter()
5251            .chain(pack.pack)
5252            .collect::<Vec<_>>();
5253        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5254            .expect("test operation should succeed");
5255
5256        let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
5257            .expect("test operation should succeed");
5258        assert_eq!(result.written_objects, vec![oid]);
5259        assert_eq!(result.references, bundle.references);
5260        assert_eq!(read_object_for_assert(&writer, &oid), object);
5261    }
5262
5263    #[test]
5264    fn install_bundle_pack_writes_pack_and_returns_refs() {
5265        let root = temp_root("sley-install-bundle-pack");
5266        let git_dir = root.join(".git");
5267        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5268        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
5269        let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5270        let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
5271        let oid = object
5272            .object_id(ObjectFormat::Sha1)
5273            .expect("test operation should succeed");
5274        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5275            .expect("test operation should succeed");
5276        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5277            .into_bytes()
5278            .into_iter()
5279            .chain(pack.pack)
5280            .collect::<Vec<_>>();
5281        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5282            .expect("test operation should succeed");
5283
5284        let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
5285            .expect("test operation should succeed");
5286
5287        assert_eq!(result.written_objects, vec![oid]);
5288        assert_eq!(result.references, bundle.references);
5289        assert!(
5290            database
5291                .contains(&oid)
5292                .expect("test operation should succeed")
5293        );
5294        assert_eq!(read_object_for_assert(&database, &oid), object);
5295        assert!(
5296            !database
5297                .loose()
5298                .object_path(&oid)
5299                .expect("test operation should succeed")
5300                .exists()
5301        );
5302        fs::remove_dir_all(root).expect("test operation should succeed");
5303    }
5304
5305    #[test]
5306    fn unpack_packfile_objects_writes_sha256_pack_entries() {
5307        let writer = ObjectDatabase::new(ObjectFormat::Sha256);
5308        let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
5309        let oid = object
5310            .object_id(ObjectFormat::Sha256)
5311            .expect("test operation should succeed");
5312        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5313            .expect("test operation should succeed");
5314
5315        let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
5316            .expect("test operation should succeed");
5317
5318        assert_eq!(result.written_objects, vec![oid]);
5319        assert_eq!(read_object_for_assert(&writer, &oid), object);
5320    }
5321
5322    #[test]
5323    fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
5324        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
5325        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
5326        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
5327            .expect("test operation should succeed");
5328        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
5329        let oid = object
5330            .object_id(ObjectFormat::Sha1)
5331            .expect("test operation should succeed");
5332        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5333            .expect("test operation should succeed");
5334        let bundle_bytes =
5335            format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
5336                .into_bytes()
5337                .into_iter()
5338                .chain(pack.pack)
5339                .collect::<Vec<_>>();
5340        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5341            .expect("test operation should succeed");
5342
5343        assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
5344        assert!(!writer.contains(&oid));
5345    }
5346
5347    /// Build a commit -> tree -> blob graph in `db`, returning the three object
5348    /// ids and their canonical encodings as `(oid, object)` pairs.
5349    fn write_commit_graph(
5350        db: &mut FileObjectDatabase,
5351        payload: &[u8],
5352    ) -> Vec<(ObjectId, EncodedObject)> {
5353        let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
5354        let blob_oid = db
5355            .write_object(blob.clone())
5356            .expect("test operation should succeed");
5357        let tree = EncodedObject::new(
5358            ObjectType::Tree,
5359            Tree {
5360                entries: vec![TreeEntry {
5361                    mode: 0o100644,
5362                    name: BString::from(b"payload.txt"),
5363                    oid: blob_oid,
5364                }],
5365            }
5366            .write(),
5367        );
5368        let tree_oid = db
5369            .write_object(tree.clone())
5370            .expect("test operation should succeed");
5371        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5372        let commit = EncodedObject::new(
5373            ObjectType::Commit,
5374            Commit {
5375                tree: tree_oid,
5376                parents: Vec::new(),
5377                author: identity.clone(),
5378                committer: identity,
5379                encoding: None,
5380                message: b"initial\n".to_vec(),
5381            }
5382            .write(),
5383        );
5384        let commit_oid = db
5385            .write_object(commit.clone())
5386            .expect("test operation should succeed");
5387        vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
5388    }
5389
5390    fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
5391        let root = temp_root("sley-repack-all");
5392        let git_dir = root.join(".git");
5393        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5394        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5395
5396        // A pre-existing pack holds one blob; the rest of the graph is loose.
5397        let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
5398        let packed_oid = packed_blob
5399            .object_id(format)
5400            .expect("test operation should succeed");
5401        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
5402            .expect("test operation should succeed");
5403        let existing = db
5404            .install_pack(&existing_pack)
5405            .expect("test operation should succeed");
5406
5407        let graph = write_commit_graph(&mut db, b"repack payload\n");
5408
5409        let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
5410        expected.insert(packed_oid, packed_blob.clone());
5411
5412        let result = repack_all_objects(&git_dir, format)
5413            .expect("test operation should succeed")
5414            .expect("repository has objects");
5415
5416        // The new pack round-trips and contains every original object byte-for-byte.
5417        assert_eq!(result.object_count, expected.len());
5418        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
5419        assert_eq!(parsed.entries.len(), expected.len());
5420        for entry in &parsed.entries {
5421            let want = expected
5422                .get(&entry.entry.oid)
5423                .expect("packed object was in the repository");
5424            assert_eq!(&entry.object, want);
5425            assert_eq!(
5426                entry
5427                    .object
5428                    .object_id(format)
5429                    .expect("test operation should succeed"),
5430                entry.entry.oid
5431            );
5432        }
5433        // The generated index parses and agrees with the pack checksum.
5434        let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
5435        assert_eq!(idx.pack_checksum, parsed.checksum);
5436        assert_eq!(idx.entries.len(), expected.len());
5437
5438        // The pre-existing pack is reported obsolete (by its .pack path).
5439        assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
5440        // Every loose object id is reported as now packed.
5441        let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
5442        want_loose.sort_by_key(ObjectId::to_hex);
5443        assert_eq!(result.packed_loose, want_loose);
5444        assert!(!result.packed_loose.contains(&packed_oid));
5445
5446        fs::remove_dir_all(root).expect("test operation should succeed");
5447    }
5448
5449    #[test]
5450    fn repack_all_objects_consolidates_loose_and_pack_sha1() {
5451        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
5452    }
5453
5454    #[test]
5455    fn repack_all_objects_consolidates_loose_and_pack_sha256() {
5456        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
5457    }
5458
5459    #[test]
5460    fn repack_all_objects_returns_none_for_empty_repository() {
5461        let root = temp_root("sley-repack-empty");
5462        let git_dir = root.join(".git");
5463        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5464
5465        assert!(
5466            repack_all_objects(&git_dir, ObjectFormat::Sha1)
5467                .expect("test operation should succeed")
5468                .is_none()
5469        );
5470
5471        fs::remove_dir_all(root).expect("test operation should succeed");
5472    }
5473
5474    #[test]
5475    fn install_repack_result_writes_pack_without_pruning_by_default() {
5476        let root = temp_root("sley-repack-install-nodelete");
5477        let git_dir = root.join(".git");
5478        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5479        let format = ObjectFormat::Sha1;
5480        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5481        let graph = write_commit_graph(&mut db, b"install no prune\n");
5482
5483        let result = repack_all_objects(&git_dir, format)
5484            .expect("test operation should succeed")
5485            .expect("test operation should succeed");
5486        install_repack_result(&git_dir, format, &result, false)
5487            .expect("test operation should succeed");
5488
5489        // New pack is on disk and readable.
5490        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
5491        let pack_dir = git_dir.join("objects").join("pack");
5492        let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
5493        let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
5494        assert!(pack_path.exists());
5495        assert!(idx_path.exists());
5496        // Loose objects survive because prune was not requested.
5497        for (oid, object) in &graph {
5498            assert!(
5499                db.loose()
5500                    .object_path(oid)
5501                    .expect("test operation should succeed")
5502                    .exists()
5503            );
5504            assert_eq!(read_object_for_assert(&db, oid), *object);
5505        }
5506
5507        fs::remove_dir_all(root).expect("test operation should succeed");
5508    }
5509
5510    #[test]
5511    fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
5512        let root = temp_root("sley-repack-install-prune");
5513        let git_dir = root.join(".git");
5514        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5515        let format = ObjectFormat::Sha1;
5516        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5517
5518        let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
5519        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
5520            .expect("test operation should succeed");
5521        let existing = db
5522            .install_pack(&existing_pack)
5523            .expect("test operation should succeed");
5524        let graph = write_commit_graph(&mut db, b"prune payload\n");
5525
5526        let result = repack_all_objects(&git_dir, format)
5527            .expect("test operation should succeed")
5528            .expect("test operation should succeed");
5529        let new_pack_checksum = PackFile::parse(&result.pack, format)
5530            .expect("test operation should succeed")
5531            .checksum;
5532        install_repack_result(&git_dir, format, &result, true)
5533            .expect("test operation should succeed");
5534
5535        // Obsolete pack and its index are gone.
5536        assert!(!existing.pack_path.exists());
5537        assert!(!existing.index_path.exists());
5538        // Packed loose objects are gone from disk.
5539        for (oid, _) in &graph {
5540            assert!(
5541                !db.loose()
5542                    .object_path(oid)
5543                    .expect("test operation should succeed")
5544                    .exists()
5545            );
5546        }
5547        // The new consolidated pack remains and still serves every object.
5548        let pack_dir = git_dir.join("objects").join("pack");
5549        assert!(
5550            pack_dir
5551                .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
5552                .exists()
5553        );
5554        let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
5555        for (oid, object) in &graph {
5556            assert!(
5557                reopened
5558                    .contains(oid)
5559                    .expect("test operation should succeed")
5560            );
5561            assert_eq!(read_object_for_assert(&reopened, oid), *object);
5562        }
5563        let packed_oid = packed_blob
5564            .object_id(format)
5565            .expect("test operation should succeed");
5566        assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
5567
5568        fs::remove_dir_all(root).expect("test operation should succeed");
5569    }
5570
5571    #[test]
5572    fn install_repack_result_preserves_keep_and_promisor_packs() {
5573        let root = temp_root("sley-repack-install-keep-promisor");
5574        let git_dir = root.join(".git");
5575        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5576        let format = ObjectFormat::Sha1;
5577        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5578
5579        let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
5580        let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
5581            .expect("test operation should succeed");
5582        let keep_install = db
5583            .install_pack(&keep_pack)
5584            .expect("test operation should succeed");
5585        let keep_sidecar = keep_install.pack_path.with_extension("keep");
5586        fs::write(&keep_sidecar, b"").expect("test operation should succeed");
5587
5588        let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
5589        let promisor_pack =
5590            PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
5591                .expect("test operation should succeed");
5592        let promisor_install = db
5593            .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
5594            .expect("test operation should succeed");
5595        let promisor_sidecar = promisor_install
5596            .promisor_path
5597            .clone()
5598            .expect("promisor sidecar");
5599
5600        let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
5601        let result = repack_all_objects(&git_dir, format)
5602            .expect("test operation should succeed")
5603            .expect("test operation should succeed");
5604        assert!(result.obsolete_packs.contains(&keep_install.pack_path));
5605        assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
5606
5607        install_repack_result(&git_dir, format, &result, true)
5608            .expect("test operation should succeed");
5609
5610        for path in [
5611            &keep_install.pack_path,
5612            &keep_install.index_path,
5613            &keep_sidecar,
5614            &promisor_install.pack_path,
5615            &promisor_install.index_path,
5616            &promisor_sidecar,
5617        ] {
5618            assert!(path.exists(), "{} should be preserved", path.display());
5619        }
5620        for (oid, _) in &graph {
5621            assert!(
5622                !db.loose()
5623                    .object_path(oid)
5624                    .expect("test operation should succeed")
5625                    .exists()
5626            );
5627        }
5628
5629        fs::remove_dir_all(root).expect("test operation should succeed");
5630    }
5631
5632    #[test]
5633    fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
5634        // Safety: a loose object whose id is not in the new pack must survive
5635        // pruning even if the caller lists it in `packed_loose`.
5636        let root = temp_root("sley-repack-install-safety");
5637        let git_dir = root.join(".git");
5638        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5639        let format = ObjectFormat::Sha1;
5640        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5641        let graph = write_commit_graph(&mut db, b"safety packed\n");
5642
5643        let mut result = repack_all_objects(&git_dir, format)
5644            .expect("test operation should succeed")
5645            .expect("test operation should succeed");
5646
5647        // A loose object that is NOT in the new pack, but mislabeled as packed.
5648        let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
5649        let stray_oid = db
5650            .write_object(stray.clone())
5651            .expect("test operation should succeed");
5652        assert!(!result.packed_loose.contains(&stray_oid));
5653        result.packed_loose.push(stray_oid);
5654
5655        install_repack_result(&git_dir, format, &result, true)
5656            .expect("test operation should succeed");
5657
5658        // The stray loose object is untouched because it is not in the new pack.
5659        assert!(
5660            db.loose()
5661                .object_path(&stray_oid)
5662                .expect("test operation should succeed")
5663                .exists()
5664        );
5665        assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
5666        // Genuinely packed loose objects were still removed.
5667        for (oid, _) in &graph {
5668            assert!(
5669                !db.loose()
5670                    .object_path(oid)
5671                    .expect("test operation should succeed")
5672                    .exists()
5673            );
5674        }
5675
5676        fs::remove_dir_all(root).expect("test operation should succeed");
5677    }
5678
5679    #[test]
5680    fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
5681        let root = temp_root("sley-prune-unreachable");
5682        let git_dir = root.join(".git");
5683        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5684        let format = ObjectFormat::Sha1;
5685        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5686        let graph = write_commit_graph(&mut db, b"reachable payload\n");
5687        let commit_oid = graph[0].0.clone();
5688
5689        // A dangling loose blob not referenced by the commit graph.
5690        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
5691        let dangling_oid = db
5692            .write_object(dangling)
5693            .expect("test operation should succeed");
5694
5695        // Report-only pass leaves everything on disk.
5696        let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
5697            .expect("test operation should succeed");
5698        assert_eq!(reported, vec![dangling_oid]);
5699        assert!(
5700            db.loose()
5701                .object_path(&dangling_oid)
5702                .expect("test operation should succeed")
5703                .exists()
5704        );
5705
5706        // Deleting pass removes only the unreachable object.
5707        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
5708            .expect("test operation should succeed");
5709        assert_eq!(deleted, vec![dangling_oid]);
5710        assert!(
5711            !db.loose()
5712                .object_path(&dangling_oid)
5713                .expect("test operation should succeed")
5714                .exists()
5715        );
5716        for (oid, object) in &graph {
5717            assert!(
5718                db.loose()
5719                    .object_path(oid)
5720                    .expect("test operation should succeed")
5721                    .exists()
5722            );
5723            assert_eq!(read_object_for_assert(&db, oid), *object);
5724        }
5725
5726        fs::remove_dir_all(root).expect("test operation should succeed");
5727    }
5728
5729    #[test]
5730    fn prune_unreachable_loose_ignores_gitlink_targets() {
5731        let root = temp_root("sley-prune-gitlink");
5732        let git_dir = root.join(".git");
5733        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5734        let format = ObjectFormat::Sha1;
5735        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5736
5737        let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
5738            .expect("test operation should succeed");
5739        let tree = EncodedObject::new(
5740            ObjectType::Tree,
5741            Tree {
5742                entries: vec![TreeEntry {
5743                    mode: 0o160000,
5744                    name: BString::from(b"submodule"),
5745                    oid: submodule_oid,
5746                }],
5747            }
5748            .write(),
5749        );
5750        let tree_oid = db
5751            .write_object(tree)
5752            .expect("test operation should succeed");
5753        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5754        let commit = EncodedObject::new(
5755            ObjectType::Commit,
5756            Commit {
5757                tree: tree_oid,
5758                parents: Vec::new(),
5759                author: identity.clone(),
5760                committer: identity,
5761                encoding: None,
5762                message: b"gitlink\n".to_vec(),
5763            }
5764            .write(),
5765        );
5766        let commit_oid = db
5767            .write_object(commit)
5768            .expect("test operation should succeed");
5769        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
5770        let dangling_oid = db
5771            .write_object(dangling)
5772            .expect("test operation should succeed");
5773
5774        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
5775            .expect("test operation should succeed");
5776
5777        assert_eq!(deleted, vec![dangling_oid]);
5778        assert!(
5779            !db.loose()
5780                .object_path(&dangling_oid)
5781                .expect("test operation should succeed")
5782                .exists()
5783        );
5784
5785        fs::remove_dir_all(root).expect("test operation should succeed");
5786    }
5787
5788    fn temp_root(prefix: &str) -> PathBuf {
5789        std::env::temp_dir().join(format!(
5790            "{prefix}-{}-{}",
5791            std::process::id(),
5792            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
5793        ))
5794    }
5795}