Skip to main content

sley_odb/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{
12    Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object,
13    tree_entry_object_type,
14};
15use sley_pack::{
16    MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
17    PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
18    PackWriteOptions,
19};
20use std::collections::{HashMap, HashSet};
21use std::io::{Read, Write};
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicU64, Ordering};
24use std::sync::{Arc, Mutex, OnceLock};
25use std::{env, fs};
26
27static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
28
29pub trait ObjectReader {
30    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
31
32    /// Graft-points seam (shallow clones today, replace refs/grafts later):
33    /// `true` when history is cut at `oid`, so every walk must treat the
34    /// commit as parentless even though its raw body still names parents.
35    ///
36    /// [`FileObjectDatabase`] answers from `$GIT_DIR/shallow`; readers that
37    /// are not backed by a repository (in-memory stores, pack overlays)
38    /// keep the default "no grafts".
39    fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
40        false
41    }
42
43    /// Whether this reader has any shallow/graft boundaries at all. Walkers can
44    /// use this to choose dense graph-only traversal when no boundary can cut
45    /// parent edges.
46    fn has_shallow_grafts(&self) -> bool {
47        false
48    }
49
50    /// True when `oid` is covered by a promisor pack. Partial clones are
51    /// allowed to omit promised objects until a later on-demand fetch hydrates
52    /// them; ordinary readers keep the default "no promised objects".
53    fn is_promised_object(&self, _oid: &ObjectId) -> bool {
54        false
55    }
56}
57
58fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
59    (*oid == ObjectId::empty_tree(format))
60        .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
61}
62
63fn with_missing_object_context(
64    err: GitError,
65    oid: ObjectId,
66    context: MissingObjectContext,
67) -> GitError {
68    let kind = err
69        .not_found_kind()
70        .and_then(sley_core::NotFoundKind::missing_object_kind);
71    match kind {
72        Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
73        None => err,
74    }
75}
76
77/// Parents of a parsed commit with the graft seam applied: empty when the
78/// reader cuts history at `oid` (shallow boundary), the raw parsed parents
79/// otherwise.
80pub fn grafted_parents<R: ObjectReader + ?Sized>(
81    reader: &R,
82    oid: &ObjectId,
83    parents: Vec<ObjectId>,
84) -> Vec<ObjectId> {
85    if reader.is_shallow_graft(oid) {
86        Vec::new()
87    } else {
88        parents
89    }
90}
91
92pub trait ObjectWriter {
93    /// Write `object`, returning its id. Takes `&self`: every implementation's
94    /// write state (in-memory map, loose-object cache) is behind interior
95    /// mutability, so a single handle can interleave reads and writes without a
96    /// `&mut` borrow. This lets the merge engine read and write through one `db`
97    /// instead of opening a second read-only handle that re-warms the caches.
98    fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct BundleUnbundleResult {
103    pub written_objects: Vec<ObjectId>,
104    pub references: Vec<BundleReference>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct PackUnpackResult {
109    pub written_objects: Vec<ObjectId>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct PackInstallResult {
114    pub pack_name: String,
115    pub pack_path: PathBuf,
116    pub index_path: PathBuf,
117    pub promisor_path: Option<PathBuf>,
118    pub object_ids: Vec<ObjectId>,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct RawPackInstallResult {
123    pub object_ids: Vec<ObjectId>,
124}
125
126#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
127pub struct RawPackInstallOptions {
128    pub promisor: bool,
129}
130
131pub trait RawPackInstaller {
132    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
133}
134
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub enum ObjectPrefixResolution {
137    Missing,
138    Unique(ObjectId),
139    Ambiguous(Vec<ObjectId>),
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct ObjectStorageInfo {
144    pub disk_size: u64,
145    pub deltabase: ObjectId,
146}
147
148impl RawPackInstaller for FileObjectDatabase {
149    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
150        let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
151        Ok(RawPackInstallResult {
152            object_ids: result.object_ids,
153        })
154    }
155}
156
157impl RawPackInstaller for ObjectDatabase {
158    fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
159        let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
160        Ok(RawPackInstallResult {
161            object_ids: result.written_objects,
162        })
163    }
164}
165
166pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
167    let mut missing = Vec::new();
168    for prerequisite in &bundle.prerequisites {
169        match reader.read_object(&prerequisite.oid) {
170            Ok(object) => {
171                let actual = object.object_id(bundle.format)?;
172                if actual != prerequisite.oid {
173                    return Err(GitError::InvalidObject(format!(
174                        "bundle prerequisite {} hashes to {actual}",
175                        prerequisite.oid
176                    )));
177                }
178            }
179            Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
180            Err(err) => return Err(err),
181        }
182    }
183    if missing.is_empty() {
184        return Ok(());
185    }
186    Err(GitError::object_not_found_in(
187        missing[0],
188        MissingObjectContext::PackInstall,
189    ))
190}
191
192pub fn unbundle_objects<R, W>(
193    bundle: &Bundle,
194    prerequisite_reader: &R,
195    writer: &mut W,
196) -> Result<BundleUnbundleResult>
197where
198    R: ObjectReader,
199    W: ObjectWriter,
200{
201    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
202    let pack = PackFile::parse_bundle(bundle)?;
203    let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
204    Ok(BundleUnbundleResult {
205        written_objects,
206        references: bundle.references.clone(),
207    })
208}
209
210pub fn install_bundle_pack<R>(
211    bundle: &Bundle,
212    prerequisite_reader: &R,
213    destination: &impl RawPackInstaller,
214) -> Result<BundleUnbundleResult>
215where
216    R: ObjectReader,
217{
218    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
219    let install = destination.install_raw_pack(&bundle.pack)?;
220    Ok(BundleUnbundleResult {
221        written_objects: install.object_ids,
222        references: bundle.references.clone(),
223    })
224}
225
226pub fn unpack_packfile_objects<W>(
227    pack_bytes: &[u8],
228    format: ObjectFormat,
229    writer: &W,
230) -> Result<PackUnpackResult>
231where
232    W: ObjectWriter,
233{
234    let pack = PackFile::parse(pack_bytes, format)?;
235    write_pack_objects(pack, writer, "pack")
236}
237
238fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
239where
240    W: ObjectWriter,
241{
242    let mut written_objects = Vec::with_capacity(pack.entries.len());
243    for entry in pack.entries {
244        let expected = entry.entry.oid;
245        let actual = writer.write_object(entry.object)?;
246        if actual != expected {
247            return Err(GitError::InvalidObject(format!(
248                "{source} object id mismatch: expected {expected}, wrote {actual}"
249            )));
250        }
251        written_objects.push(actual);
252    }
253    Ok(PackUnpackResult { written_objects })
254}
255
256pub fn collect_reachable_object_ids<R, I>(
257    reader: &R,
258    format: ObjectFormat,
259    starts: I,
260) -> Result<HashSet<ObjectId>>
261where
262    R: ObjectReader,
263    I: IntoIterator<Item = ObjectId>,
264{
265    walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
266}
267
268/// [`collect_reachable_object_ids`] with a cut set: commits in `cut` are
269/// collected, but the walk does not continue to their parents — the view a
270/// shallow repository has of its own refs (`$GIT_DIR/shallow` of the *other*
271/// side, threaded explicitly because `reader` belongs to this side).
272pub fn collect_reachable_object_ids_with_cut<R, I>(
273    reader: &R,
274    format: ObjectFormat,
275    starts: I,
276    cut: &HashSet<ObjectId>,
277) -> Result<HashSet<ObjectId>>
278where
279    R: ObjectReader,
280    I: IntoIterator<Item = ObjectId>,
281{
282    walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
283}
284
285/// [`collect_reachable_object_ids`] with a stop set: objects in `excluded` are
286/// not visited and not expanded, so the walk never sees anything reachable only
287/// through them (used to truncate history at a shallow boundary).
288pub fn collect_reachable_object_ids_excluding<R, I>(
289    reader: &R,
290    format: ObjectFormat,
291    starts: I,
292    excluded: &HashSet<ObjectId>,
293) -> Result<HashSet<ObjectId>>
294where
295    R: ObjectReader,
296    I: IntoIterator<Item = ObjectId>,
297{
298    walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
299}
300
301pub fn collect_reachable_objects<R, I>(
302    reader: &R,
303    format: ObjectFormat,
304    starts: I,
305    excluded: &HashSet<ObjectId>,
306) -> Result<Vec<Arc<EncodedObject>>>
307where
308    R: ObjectReader,
309    I: IntoIterator<Item = ObjectId>,
310{
311    let mut objects = Vec::new();
312    walk_reachable_objects(reader, format, starts, excluded, |_, object| {
313        objects.push(Arc::clone(object));
314    })?;
315    Ok(objects)
316}
317
318#[derive(Debug, Clone)]
319struct ReachablePackObject {
320    oid: ObjectId,
321    object: Arc<EncodedObject>,
322}
323
324fn collect_reachable_pack_objects<R, I>(
325    reader: &R,
326    format: ObjectFormat,
327    starts: I,
328    excluded: &HashSet<ObjectId>,
329) -> Result<Vec<ReachablePackObject>>
330where
331    R: ObjectReader,
332    I: IntoIterator<Item = ObjectId>,
333{
334    let mut objects = Vec::new();
335    walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
336        objects.push(ReachablePackObject {
337            oid: *oid,
338            object: Arc::clone(object),
339        });
340    })?;
341    Ok(objects)
342}
343
344fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
345    objects
346        .iter()
347        .map(|entry| PackInput {
348            oid: &entry.oid,
349            object: &entry.object,
350        })
351        .collect()
352}
353
354pub fn install_reachable_pack<I>(
355    source: &impl ObjectReader,
356    destination: &impl RawPackInstaller,
357    format: ObjectFormat,
358    starts: I,
359) -> Result<Option<RawPackInstallResult>>
360where
361    I: IntoIterator<Item = ObjectId>,
362{
363    install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
364}
365
366pub fn install_reachable_pack_excluding<I>(
367    source: &impl ObjectReader,
368    destination: &impl RawPackInstaller,
369    format: ObjectFormat,
370    starts: I,
371    excluded: &HashSet<ObjectId>,
372) -> Result<Option<RawPackInstallResult>>
373where
374    I: IntoIterator<Item = ObjectId>,
375{
376    let pack = match build_reachable_pack(source, format, starts, excluded)? {
377        Some(pack) => pack,
378        None => return Ok(None),
379    };
380    destination.install_raw_pack(&pack.pack).map(Some)
381}
382
383pub fn build_reachable_pack<R, I>(
384    reader: &R,
385    format: ObjectFormat,
386    starts: I,
387    excluded: &HashSet<ObjectId>,
388) -> Result<Option<PackWrite>>
389where
390    R: ObjectReader,
391    I: IntoIterator<Item = ObjectId>,
392{
393    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
394    if objects.is_empty() {
395        return Ok(None);
396    }
397    // Delta-compress reachable packs (used by install/push/fetch) via git-pack's
398    // sliding-window selection. Self-contained, ofs-delta by default; round-trips
399    // through the existing parser. PackWrite shape is unchanged, so callers are
400    // unaffected.
401    let inputs = pack_inputs(&objects);
402    PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
403}
404
405pub fn build_and_install_reachable_pack<R, I>(
406    source: &R,
407    destination: &FileObjectDatabase,
408    format: ObjectFormat,
409    starts: I,
410    excluded: &HashSet<ObjectId>,
411    options: RawPackInstallOptions,
412) -> Result<Option<PackInstallResult>>
413where
414    R: ObjectReader,
415    I: IntoIterator<Item = ObjectId>,
416{
417    build_and_install_reachable_pack_filtered(
418        source,
419        destination,
420        format,
421        starts,
422        excluded,
423        options,
424        None,
425        None,
426    )
427}
428
429/// A partial-clone object filter applied while building a transfer pack.
430///
431/// Mirrors the subset of upstream's `list-objects-filter` the in-process local
432/// server supports: directly-wanted tips are always packed; the filter only
433/// prunes objects reached *through* the traversal (upstream's
434/// `filter_blobs_none` runs on traversed blobs, never on wanted tips).
435#[derive(Debug, Clone, PartialEq, Eq)]
436pub enum PackObjectFilter {
437    /// `blob:none`: omit every blob reached through tree traversal.
438    BlobNone,
439    /// `blob:limit=<n>`: omit traversed blobs whose body is at least `n` bytes.
440    BlobLimit(u64),
441    /// `tree:<n>`: keep only trees shallower than `n`, and omit traversed blobs.
442    TreeDepth(u32),
443    /// `sparse:oid=<blob>`: keep only blobs whose repo path is listed.
444    SparsePathSet(Vec<String>),
445}
446
447/// [`build_and_install_reachable_pack`] with an optional partial-clone
448/// `filter`. With `Some(BlobNone)`, blobs are dropped from the pack unless
449/// they are directly wanted (named in `starts`).
450#[allow(clippy::too_many_arguments)]
451pub fn build_and_install_reachable_pack_filtered<R, I>(
452    source: &R,
453    destination: &FileObjectDatabase,
454    format: ObjectFormat,
455    starts: I,
456    excluded: &HashSet<ObjectId>,
457    options: RawPackInstallOptions,
458    filter: Option<PackObjectFilter>,
459    unpack_limit: Option<usize>,
460) -> Result<Option<PackInstallResult>>
461where
462    R: ObjectReader,
463    I: IntoIterator<Item = ObjectId>,
464{
465    let starts: Vec<ObjectId> = starts.into_iter().collect();
466    let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
467    let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
468    match filter {
469        Some(PackObjectFilter::BlobNone) => {
470            objects.retain(|entry| {
471                entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
472            });
473        }
474        Some(PackObjectFilter::BlobLimit(limit)) => {
475            objects.retain(|entry| {
476                entry.object.object_type != ObjectType::Blob
477                    || wanted.contains(&entry.oid)
478                    || (entry.object.body.len() as u64) < limit
479            });
480        }
481        Some(PackObjectFilter::TreeDepth(depth)) => {
482            let tree_depths = collect_tree_filter_depths(source, format, &objects)?;
483            objects.retain(|entry| {
484                if wanted.contains(&entry.oid) {
485                    return true;
486                }
487                match entry.object.object_type {
488                    ObjectType::Blob => false,
489                    ObjectType::Tree => tree_depths
490                        .get(&entry.oid)
491                        .is_some_and(|tree_depth| *tree_depth < depth),
492                    _ => true,
493                }
494            });
495        }
496        Some(PackObjectFilter::SparsePathSet(paths)) => {
497            let allowed_blobs = collect_sparse_filter_blobs(source, format, &objects, &paths)?;
498            objects.retain(|entry| {
499                entry.object.object_type != ObjectType::Blob
500                    || wanted.contains(&entry.oid)
501                    || allowed_blobs.contains(&entry.oid)
502            });
503        }
504        None => {}
505    }
506    if objects.is_empty() {
507        return Ok(None);
508    }
509    // Mirror fetch-pack's unpack-limit: small transfers are exploded into
510    // loose objects instead of landing as a pack (upstream `get_pack` picks
511    // unpack-objects when the header count is below fetch/transfer.unpackLimit).
512    if let Some(limit) = unpack_limit
513        && objects.len() < limit
514    {
515        for entry in &objects {
516            destination.loose().write_object((*entry.object).clone())?;
517        }
518        return Ok(None);
519    }
520    let inputs = pack_inputs(&objects);
521    let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
522    trace_packfile(&pack.pack)?;
523    destination
524        .install_generated_pack_unchecked(&pack, options)
525        .map(Some)
526}
527
528fn trace_packfile(pack: &[u8]) -> Result<()> {
529    let Some(path) = env::var_os("GIT_TRACE_PACKFILE").filter(|value| !value.is_empty()) else {
530        return Ok(());
531    };
532    fs::write(path, pack)?;
533    Ok(())
534}
535
536fn collect_tree_filter_depths<R>(
537    reader: &R,
538    format: ObjectFormat,
539    objects: &[ReachablePackObject],
540) -> Result<HashMap<ObjectId, u32>>
541where
542    R: ObjectReader,
543{
544    let available: HashSet<ObjectId> = objects.iter().map(|entry| entry.oid).collect();
545    let mut depths = HashMap::new();
546    let mut stack = Vec::new();
547    for entry in objects {
548        if entry.object.object_type != ObjectType::Commit {
549            continue;
550        }
551        let commit = Commit::parse(format, &entry.object.body)?;
552        if available.contains(&commit.tree) {
553            stack.push((commit.tree, 0u32));
554        }
555    }
556    while let Some((tree_oid, depth)) = stack.pop() {
557        if depths
558            .get(&tree_oid)
559            .is_some_and(|old_depth| *old_depth <= depth)
560        {
561            continue;
562        }
563        depths.insert(tree_oid, depth);
564        let tree = reader.read_object(&tree_oid)?;
565        if tree.object_type != ObjectType::Tree {
566            continue;
567        }
568        let child_depth = depth.saturating_add(1);
569        for entry in TreeEntries::new(format, &tree.body) {
570            let entry = entry?;
571            if tree_entry_object_type(entry.mode) == ObjectType::Tree
572                && available.contains(&entry.oid)
573            {
574                stack.push((entry.oid, child_depth));
575            }
576        }
577    }
578    Ok(depths)
579}
580
581fn collect_sparse_filter_blobs<R>(
582    reader: &R,
583    format: ObjectFormat,
584    objects: &[ReachablePackObject],
585    paths: &[String],
586) -> Result<HashSet<ObjectId>>
587where
588    R: ObjectReader,
589{
590    let wanted_paths: HashSet<&str> = paths.iter().map(String::as_str).collect();
591    let mut allowed = HashSet::new();
592    let mut seen_trees = HashSet::new();
593    for entry in objects {
594        if entry.object.object_type != ObjectType::Commit {
595            continue;
596        }
597        let commit = Commit::parse(format, &entry.object.body)?;
598        collect_sparse_tree_blobs(
599            reader,
600            format,
601            &commit.tree,
602            "",
603            &wanted_paths,
604            &mut seen_trees,
605            &mut allowed,
606        )?;
607    }
608    Ok(allowed)
609}
610
611fn collect_sparse_tree_blobs<R>(
612    reader: &R,
613    format: ObjectFormat,
614    tree_oid: &ObjectId,
615    prefix: &str,
616    wanted_paths: &HashSet<&str>,
617    seen_trees: &mut HashSet<ObjectId>,
618    allowed: &mut HashSet<ObjectId>,
619) -> Result<()>
620where
621    R: ObjectReader,
622{
623    if !seen_trees.insert(*tree_oid) {
624        return Ok(());
625    }
626    let tree = reader.read_object(tree_oid)?;
627    if tree.object_type != ObjectType::Tree {
628        return Ok(());
629    }
630    for entry in TreeEntries::new(format, &tree.body) {
631        let entry = entry?;
632        let name = String::from_utf8_lossy(entry.name);
633        let path = if prefix.is_empty() {
634            name.into_owned()
635        } else {
636            format!("{prefix}/{name}")
637        };
638        if tree_entry_object_type(entry.mode) == ObjectType::Tree {
639            collect_sparse_tree_blobs(
640                reader,
641                format,
642                &entry.oid,
643                &path,
644                wanted_paths,
645                seen_trees,
646                allowed,
647            )?;
648        } else if wanted_paths.contains(path.as_str()) {
649            allowed.insert(entry.oid);
650        }
651    }
652    Ok(())
653}
654
655/// Assemble a pack stream that reuses an existing pack's object data verbatim
656/// (upstream pack-objects' "pack reuse" fast path, full-pack case) and appends
657/// `appended` as freshly encoded undeltified entries.
658///
659/// The reused pack's entry bytes are copied as-is between our own header and
660/// trailer: a full-pack copy preserves every relative distance, so internal
661/// `OFS_DELTA` bases stay valid. The header object count covers both the
662/// reused and appended entries, and the trailing pack checksum is recomputed
663/// over the assembled stream.
664pub fn assemble_pack_with_verbatim_reuse(
665    format: ObjectFormat,
666    reused_pack_bytes: &[u8],
667    appended: &[PackInput<'_>],
668) -> Result<(Vec<u8>, u32)> {
669    assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
670}
671
672/// Like [`assemble_pack_with_verbatim_reuse`], but concatenates multiple whole
673/// packs before appending fresh entries.
674pub fn assemble_pack_with_verbatim_reuses(
675    format: ObjectFormat,
676    reused_packs: &[&[u8]],
677    appended: &[PackInput<'_>],
678) -> Result<(Vec<u8>, u32)> {
679    let hash_len = format.raw_len();
680    let mut reused_count = 0u32;
681    let mut capacity = 12 + hash_len + 64 * appended.len();
682    for reused_pack_bytes in reused_packs {
683        if reused_pack_bytes.len() < 12 + hash_len {
684            return Err(GitError::InvalidFormat("reused pack too short".into()));
685        }
686        if &reused_pack_bytes[..4] != b"PACK" {
687            return Err(GitError::InvalidFormat(
688                "reused pack has no signature".into(),
689            ));
690        }
691        let version = u32::from_be_bytes([
692            reused_pack_bytes[4],
693            reused_pack_bytes[5],
694            reused_pack_bytes[6],
695            reused_pack_bytes[7],
696        ]);
697        if version != 2 {
698            return Err(GitError::Unsupported(format!(
699                "reused pack version {version}"
700            )));
701        }
702        let count = u32::from_be_bytes([
703            reused_pack_bytes[8],
704            reused_pack_bytes[9],
705            reused_pack_bytes[10],
706            reused_pack_bytes[11],
707        ]);
708        reused_count = reused_count
709            .checked_add(count)
710            .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
711        capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
712    }
713    let total = reused_count
714        .checked_add(appended.len() as u32)
715        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
716
717    let mut out = Vec::with_capacity(capacity);
718    out.extend_from_slice(b"PACK");
719    out.extend_from_slice(&2u32.to_be_bytes());
720    out.extend_from_slice(&total.to_be_bytes());
721    for reused_pack_bytes in reused_packs {
722        out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
723    }
724    for input in appended {
725        write_undeltified_pack_entry(&mut out, input.object)?;
726    }
727    let checksum = sley_core::digest_bytes(format, &out)?;
728    out.extend_from_slice(checksum.as_bytes());
729    Ok((out, reused_count))
730}
731
732/// Assemble a pack stream by copying already-encoded pack entries verbatim and
733/// appending freshly encoded undeltified entries.
734pub fn assemble_pack_with_verbatim_entries(
735    format: ObjectFormat,
736    reused_entries: &[&[u8]],
737    appended: &[PackInput<'_>],
738) -> Result<(Vec<u8>, u32)> {
739    let reused_count = u32::try_from(reused_entries.len())
740        .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
741    let total = reused_count
742        .checked_add(appended.len() as u32)
743        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
744
745    let mut capacity = 12 + format.raw_len() + 64 * appended.len();
746    for entry in reused_entries {
747        capacity = capacity.saturating_add(entry.len());
748    }
749    let mut out = Vec::with_capacity(capacity);
750    out.extend_from_slice(b"PACK");
751    out.extend_from_slice(&2u32.to_be_bytes());
752    out.extend_from_slice(&total.to_be_bytes());
753    for entry in reused_entries {
754        out.extend_from_slice(entry);
755    }
756    for input in appended {
757        write_undeltified_pack_entry(&mut out, input.object)?;
758    }
759    let checksum = sley_core::digest_bytes(format, &out)?;
760    out.extend_from_slice(checksum.as_bytes());
761    Ok((out, reused_count))
762}
763
764/// Append one undeltified pack entry (type/size varint header + zlib body).
765fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
766    let type_bits: u8 = match object.object_type {
767        ObjectType::Commit => 1,
768        ObjectType::Tree => 2,
769        ObjectType::Blob => 3,
770        ObjectType::Tag => 4,
771    };
772    let mut size = object.body.len() as u64;
773    let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
774    size >>= 4;
775    while size > 0 {
776        out.push(byte | 0x80);
777        byte = (size & 0x7f) as u8;
778        size >>= 7;
779    }
780    out.push(byte);
781    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
782    encoder.write_all(&object.body)?;
783    out.extend_from_slice(&encoder.finish()?);
784    Ok(())
785}
786
787/// Outcome of consolidating every object in a repository into a single pack.
788///
789/// This is the engine for `git gc` / `git repack`: [`repack_all_objects`]
790/// produces the bytes for one new delta-compressed pack plus its index, and
791/// reports which on-disk artifacts the caller could now remove. No deletions
792/// are performed by the engine itself; the CLI decides reachability policy and
793/// performs any pruning (see [`install_repack_result`]).
794#[derive(Debug, Clone, PartialEq, Eq)]
795pub struct RepackResult {
796    /// Bytes of the freshly written `.pack` file.
797    pub pack: Vec<u8>,
798    /// Bytes of the matching `.idx` file for [`RepackResult::pack`].
799    pub idx: Vec<u8>,
800    /// Number of distinct objects contained in the new pack.
801    pub object_count: usize,
802    /// Absolute paths of pre-existing `*.pack` files now superseded by the new
803    /// pack (every object they hold is present in [`RepackResult::pack`]).
804    pub obsolete_packs: Vec<PathBuf>,
805    /// Loose object ids that are now also present in the new pack and therefore
806    /// redundant on disk.
807    pub packed_loose: Vec<ObjectId>,
808    /// Pack stems (`pack-<checksum>`) that policy says must survive pruning
809    /// even if the new pack contains all of their objects.
810    retained_pack_stems: Vec<String>,
811    pack_checksum: ObjectId,
812    index_entries: Vec<PackIndexEntry>,
813}
814
815#[derive(Debug, Clone, Default)]
816pub struct RepackOptions {
817    /// Do not borrow objects from alternates (`git repack --local`).
818    pub local: bool,
819    /// Repack objects that are already in `.keep` / `--keep-pack` packs.
820    pub pack_kept_objects: bool,
821    /// Explicit `--keep-pack=<name>` pack stems (`pack-<checksum>`).
822    pub keep_pack_stems: HashSet<String>,
823}
824
825/// Gather every object in `git_dir` (loose objects and every existing pack) and
826/// write them into a single new delta-compressed pack.
827///
828/// Returns the new pack/index bytes, the count of packed objects, the list of
829/// pre-existing pack files that the new pack supersedes, and the loose object
830/// ids that are now packed. Nothing is deleted: the caller (CLI) decides
831/// reachability policy and performs any pruning, optionally via
832/// [`install_repack_result`].
833///
834/// Returns `Ok(None)` when the repository contains no objects at all.
835/// `git repack -a`'s gathering rule: pack the reachability closure of `roots`
836/// (ref tips, `HEAD`, reflog entries, indexed objects) instead of everything
837/// on disk. Borrowed objects (alternates) reachable from the roots are packed
838/// into the new local pack like upstream `pack-objects --all` without
839/// `--local`; previously-packed objects that are no longer reachable are NOT
840/// carried forward (that is how `repack -a -d` drops them). Missing objects
841/// are tolerated (stale reflog entries may reference pruned history).
842///
843/// Returns `Ok(None)` when no roots resolve to any object.
844pub fn repack_reachable_objects(
845    git_dir: &Path,
846    format: ObjectFormat,
847    roots: &[ObjectId],
848) -> Result<Option<RepackResult>> {
849    repack_reachable_objects_with_options(git_dir, format, roots, &RepackOptions::default())
850}
851
852pub fn repack_reachable_objects_with_options(
853    git_dir: &Path,
854    format: ObjectFormat,
855    roots: &[ObjectId],
856    options: &RepackOptions,
857) -> Result<Option<RepackResult>> {
858    let objects_dir = repository_objects_dir(git_dir);
859    let database = if options.local {
860        FileObjectDatabase::without_alternates(objects_dir.clone(), format)
861    } else {
862        FileObjectDatabase::new(objects_dir.clone(), format)
863    };
864    let retained_pack_stems = repack_retained_pack_stems(
865        &objects_dir.join("pack"),
866        &options.keep_pack_stems,
867        !options.pack_kept_objects,
868    )?;
869    let excluded_oids = if options.pack_kept_objects {
870        HashSet::new()
871    } else {
872        pack_oids_for_stems(&objects_dir.join("pack"), format, &retained_pack_stems)?
873    };
874
875    let mut seen: HashSet<ObjectId> = HashSet::new();
876    let mut objects: Vec<ReachablePackObject> = Vec::new();
877    let mut pending: Vec<ObjectId> = roots.to_vec();
878    while let Some(oid) = pending.pop() {
879        if !seen.insert(oid) {
880            continue;
881        }
882        let object = match database.read_object(&oid) {
883            Ok(object) => object,
884            Err(GitError::NotFound(_)) => continue,
885            Err(err) => return Err(err),
886        };
887        match object.object_type {
888            ObjectType::Commit => {
889                let commit = Commit::parse_ref(format, &object.body)?;
890                pending.extend(grafted_parents(&database, &oid, commit.parents));
891                pending.push(commit.tree);
892            }
893            ObjectType::Tree => {
894                for entry in TreeEntries::new(format, &object.body) {
895                    let entry = entry?;
896                    if !entry.is_gitlink() {
897                        pending.push(entry.oid);
898                    }
899                }
900            }
901            ObjectType::Tag => {
902                let tag = Tag::parse_ref(format, &object.body)?;
903                pending.push(tag.object);
904            }
905            ObjectType::Blob => {}
906        }
907        if !excluded_oids.contains(&oid) {
908            objects.push(ReachablePackObject { oid, object });
909        }
910    }
911
912    // Non-local repacks borrow packed objects from alternates as complete pack
913    // sources, while still leaving loose-only alternate objects alone. This
914    // matches `pack-objects --all` without `--local`: packed alternate objects
915    // are copied into the local consolidated pack, but a loose object in an
916    // alternate ODB is not duplicated just because a local tree points at it.
917    if !options.local {
918        for (alternate, oid) in alternate_packed_object_ids(&objects_dir, format)? {
919            if excluded_oids.contains(&oid) || !seen.insert(oid) {
920                continue;
921            }
922            let alternate_db = FileObjectDatabase::without_alternates(alternate, format);
923            match alternate_db.read_object(&oid) {
924                Ok(object) => objects.push(ReachablePackObject { oid, object }),
925                Err(GitError::NotFound(_)) => {}
926                Err(err) => return Err(err),
927            }
928        }
929    }
930
931    if objects.is_empty() {
932        return Ok(None);
933    }
934
935    let inputs = pack_inputs(&objects);
936    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
937    let object_count = written.entries.len();
938
939    // Every pre-existing local pack is superseded under `-a` (their reachable
940    // objects are in the new pack; their unreachable ones are being dropped).
941    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
942    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
943        .into_iter()
944        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
945        .collect();
946
947    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
948    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
949        .into_iter()
950        .filter(|oid| packed_oid_set.contains(oid))
951        .collect();
952    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
953
954    let pack_checksum = written.checksum;
955    let index_entries = written.entries.clone();
956    Ok(Some(RepackResult {
957        pack: written.pack,
958        idx: written.index,
959        object_count,
960        obsolete_packs,
961        packed_loose,
962        retained_pack_stems,
963        pack_checksum,
964        index_entries,
965    }))
966}
967
968fn repack_retained_pack_stems(
969    pack_dir: &Path,
970    explicit: &HashSet<String>,
971    keep_dot_keep: bool,
972) -> Result<Vec<String>> {
973    let mut stems = explicit.clone();
974    if keep_dot_keep {
975        for pack_path in existing_pack_files(pack_dir)? {
976            if pack_path.with_extension("keep").exists()
977                && let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
978            {
979                stems.insert(stem.to_string());
980            }
981        }
982    }
983    let mut stems = stems.into_iter().collect::<Vec<_>>();
984    stems.sort();
985    Ok(stems)
986}
987
988fn pack_oids_for_stems(
989    pack_dir: &Path,
990    format: ObjectFormat,
991    stems: &[String],
992) -> Result<HashSet<ObjectId>> {
993    let wanted: HashSet<&str> = stems.iter().map(String::as_str).collect();
994    if wanted.is_empty() {
995        return Ok(HashSet::new());
996    }
997    let mut oids = HashSet::new();
998    for pack_path in existing_pack_files(pack_dir)? {
999        let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1000            continue;
1001        };
1002        if !wanted.contains(stem) {
1003            continue;
1004        }
1005        let index_path = pack_path.with_extension("idx");
1006        if !index_path.exists() {
1007            continue;
1008        }
1009        let index = PackIndex::parse(&fs::read(index_path)?, format)?;
1010        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
1011    }
1012    Ok(oids)
1013}
1014
1015fn alternate_packed_object_ids(
1016    objects_dir: &Path,
1017    format: ObjectFormat,
1018) -> Result<Vec<(PathBuf, ObjectId)>> {
1019    let mut oids = Vec::new();
1020    for alternate in alternate_object_dirs(objects_dir) {
1021        let mut alternate_oids = HashSet::new();
1022        collect_packed_object_ids(&alternate.join("pack"), format, &mut alternate_oids)?;
1023        oids.extend(
1024            alternate_oids
1025                .into_iter()
1026                .map(|oid| (alternate.clone(), oid)),
1027        );
1028    }
1029    oids.sort_by(|left, right| {
1030        left.0
1031            .cmp(&right.0)
1032            .then(left.1.as_bytes().cmp(right.1.as_bytes()))
1033    });
1034    Ok(oids)
1035}
1036
1037pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1038    let objects_dir = repository_objects_dir(git_dir);
1039    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1040
1041    // Enumerate every object id reachable on disk: loose objects, every pack
1042    // index, and any multi-pack-index. `object_ids_in_objects_dir` already
1043    // unions all of these and de-duplicates them.
1044    let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
1045    if all_oids.is_empty() {
1046        return Ok(None);
1047    }
1048
1049    // Read each object's canonical encoding so the new pack stores byte-for-byte
1050    // identical payloads. Loose objects take precedence over packed copies in
1051    // `FileObjectDatabase::read_object`, but both decode to the same bytes.
1052    let mut objects = Vec::with_capacity(all_oids.len());
1053    for oid in &all_oids {
1054        objects.push(ReachablePackObject {
1055            oid: *oid,
1056            object: database.read_object(oid)?,
1057        });
1058    }
1059
1060    let inputs = pack_inputs(&objects);
1061    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1062    let object_count = written.entries.len();
1063
1064    // The new pack contains every object on disk, so every pre-existing pack is
1065    // fully superseded. We still record the exact pack paths (not the index
1066    // paths) so the caller can delete the right files. The pack we are about to
1067    // write is excluded by name in case its checksum collides with an existing
1068    // pack (identical contents).
1069    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
1070    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
1071        .into_iter()
1072        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
1073        .collect();
1074
1075    // Loose object ids that the new pack now also holds (which is all of them,
1076    // since they were gathered into it).
1077    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1078    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1079        .into_iter()
1080        .filter(|oid| packed_oid_set.contains(oid))
1081        .collect();
1082    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1083
1084    Ok(Some(RepackResult {
1085        pack: written.pack,
1086        idx: written.index,
1087        object_count,
1088        obsolete_packs,
1089        packed_loose,
1090        retained_pack_stems: Vec::new(),
1091        pack_checksum: written.checksum,
1092        index_entries: written.entries,
1093    }))
1094}
1095
1096/// Gather only loose objects in `git_dir` and write them into a new pack.
1097///
1098/// This is the engine for plain `git repack -d` (without `-a`): existing packs
1099/// remain in place, and pruning removes only the loose copies that the new pack
1100/// now serves.
1101pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1102    let objects_dir = repository_objects_dir(git_dir);
1103    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1104    let loose_oids = loose_object_ids(&objects_dir, format)?;
1105    if loose_oids.is_empty() {
1106        return Ok(None);
1107    }
1108
1109    let mut objects = Vec::with_capacity(loose_oids.len());
1110    for oid in &loose_oids {
1111        objects.push(ReachablePackObject {
1112            oid: *oid,
1113            object: database.read_object(oid)?,
1114        });
1115    }
1116
1117    let inputs = pack_inputs(&objects);
1118    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1119    let object_count = written.entries.len();
1120    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1121    let mut packed_loose: Vec<ObjectId> = loose_oids
1122        .into_iter()
1123        .filter(|oid| packed_oid_set.contains(oid))
1124        .collect();
1125    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1126
1127    let pack_checksum = written.checksum;
1128    let index_entries = written.entries.clone();
1129    Ok(Some(RepackResult {
1130        pack: written.pack,
1131        idx: written.index,
1132        object_count,
1133        obsolete_packs: Vec::new(),
1134        packed_loose,
1135        retained_pack_stems: Vec::new(),
1136        pack_checksum,
1137        index_entries,
1138    }))
1139}
1140
1141/// A local, non-kept, non-cruft pack considered for a geometric rollup,
1142/// paired with the object count that orders it in the progression.
1143#[derive(Debug, Clone)]
1144struct GeometryPack {
1145    /// Absolute path to the `.pack` file.
1146    pack_path: PathBuf,
1147    /// Object ids the pack holds (from its `.idx`).
1148    oids: Vec<ObjectId>,
1149    /// `num_objects` weight used to order the progression.
1150    weight: u64,
1151    /// True when this pack is a promisor pack (`.promisor` sidecar).
1152    is_promisor: bool,
1153}
1154
1155/// The outcome of a geometric rollup: the new pack (if one was written) plus
1156/// the rolled-up packs whose objects it now serves.
1157#[derive(Debug, Clone)]
1158pub struct GeometricRepackResult {
1159    /// `Some` when a new pack was written; `None` when nothing needed packing.
1160    pub result: Option<RepackResult>,
1161    /// Pack `.pack` paths below the split that may now be removed under `-d`.
1162    pub rolled_up_packs: Vec<PathBuf>,
1163}
1164
1165/// Collect the local non-cruft, non-kept packs eligible for geometric rollup,
1166/// keyed by promisor-ness, ordered ascending by object count.
1167fn collect_geometry_packs(
1168    objects_dir: &Path,
1169    format: ObjectFormat,
1170    kept_pack_stems: &HashSet<String>,
1171) -> Result<Vec<GeometryPack>> {
1172    let pack_dir = objects_dir.join("pack");
1173    let mut packs = Vec::new();
1174    for pack_path in existing_pack_files(&pack_dir)? {
1175        // Cruft packs (`.mtimes` sidecar) and kept packs are excluded from the
1176        // progression, matching `pack_geometry_init` in repack-geometry.c.
1177        if pack_path.with_extension("mtimes").exists() {
1178            continue;
1179        }
1180        if pack_path.with_extension("keep").exists() {
1181            continue;
1182        }
1183        let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1184            continue;
1185        };
1186        if kept_pack_stems.contains(stem) {
1187            continue;
1188        }
1189        let index_path = pack_path.with_extension("idx");
1190        if !index_path.exists() {
1191            continue;
1192        }
1193        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1194        let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
1195        let weight = oids.len() as u64;
1196        packs.push(GeometryPack {
1197            is_promisor: pack_path.with_extension("promisor").exists(),
1198            pack_path,
1199            oids,
1200            weight,
1201        });
1202    }
1203    // Ascending by weight; pack_path breaks ties deterministically.
1204    packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
1205    Ok(packs)
1206}
1207
1208/// Port of `compute_pack_geometry_split` (repack-geometry.c): given packs in
1209/// ascending weight order, return the split index — packs `[0..split)` roll up
1210/// into one new pack, packs `[split..)` are left alone.
1211fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
1212    let pack_nr = packs.len();
1213    if pack_nr == 0 {
1214        return 0;
1215    }
1216    // Count packs (descending size) that already form a geometric progression.
1217    let mut i = pack_nr - 1;
1218    while i > 0 {
1219        let ours = packs[i].weight;
1220        let prev = packs[i - 1].weight;
1221        if ours < split_factor.saturating_mul(prev) {
1222            break;
1223        }
1224        i -= 1;
1225    }
1226    let mut split = i;
1227    if split != 0 {
1228        // The top of the last-compared pair can't be in the progression.
1229        split += 1;
1230    }
1231
1232    // Roll up everything below `split`; pulling those into a new pack may break
1233    // the progression in the heavy half, so absorb heavy-half packs until it
1234    // holds again.
1235    let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
1236    for pack in &packs[split..] {
1237        if pack.weight < split_factor.saturating_mul(total_size) {
1238            split += 1;
1239            total_size = total_size.saturating_add(pack.weight);
1240        } else {
1241            break;
1242        }
1243    }
1244    split
1245}
1246
1247/// `git repack --geometric=<factor>`: roll up the smallest packs (plus loose
1248/// unpacked objects) so the surviving packs form a geometric progression by
1249/// object count. Objects in the rolled-up packs and loose objects are gathered
1250/// into one new pack; packs at/above the split are left in place. The new pack
1251/// excludes objects already served by a left-alone pack.
1252///
1253/// Returns the new pack plus the rolled-up pack paths the caller may delete
1254/// under `-d`. Returns an all-`None`/empty result when nothing needs packing
1255/// ("Nothing new to pack").
1256pub fn repack_geometric(
1257    git_dir: &Path,
1258    format: ObjectFormat,
1259    split_factor: u64,
1260    kept_pack_stems: &HashSet<String>,
1261) -> Result<GeometricRepackResult> {
1262    let objects_dir = repository_objects_dir(git_dir);
1263    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1264
1265    // Promisor packs follow their own progression; the non-promisor packs are
1266    // the common case the test-suite exercises. Build the rollup from the
1267    // non-promisor packs plus loose objects.
1268    let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
1269    let packs: Vec<GeometryPack> = all_packs
1270        .into_iter()
1271        .filter(|pack| !pack.is_promisor)
1272        .collect();
1273
1274    let split = compute_geometry_split(&packs, split_factor);
1275
1276    let loose_oids = loose_object_ids(&objects_dir, format)?;
1277
1278    // The objects that end up in the new pack: every object in a rolled-up pack,
1279    // plus every loose object — but NOT objects already served by a pack left in
1280    // place (those above the split). This mirrors the `^pack` exclusion markers
1281    // that repack.c feeds to `pack-objects --stdin-packs`.
1282    let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
1283    for pack in &packs[split..] {
1284        excluded_oids.extend(pack.oids.iter().copied());
1285    }
1286
1287    let mut included: Vec<ObjectId> = Vec::new();
1288    let mut seen: HashSet<ObjectId> = HashSet::new();
1289    for pack in &packs[..split] {
1290        for oid in &pack.oids {
1291            if excluded_oids.contains(oid) {
1292                continue;
1293            }
1294            if seen.insert(*oid) {
1295                included.push(*oid);
1296            }
1297        }
1298    }
1299    for oid in &loose_oids {
1300        if excluded_oids.contains(oid) {
1301            continue;
1302        }
1303        if seen.insert(*oid) {
1304            included.push(*oid);
1305        }
1306    }
1307
1308    // "Nothing new to pack": no packs roll up and no loose objects need packing.
1309    if included.is_empty() {
1310        return Ok(GeometricRepackResult {
1311            result: None,
1312            rolled_up_packs: Vec::new(),
1313        });
1314    }
1315
1316    included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1317    let mut objects = Vec::with_capacity(included.len());
1318    for oid in &included {
1319        objects.push(ReachablePackObject {
1320            oid: *oid,
1321            object: database.read_object(oid)?,
1322        });
1323    }
1324
1325    let inputs = pack_inputs(&objects);
1326    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1327    let object_count = written.entries.len();
1328
1329    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1330    let mut packed_loose: Vec<ObjectId> = loose_oids
1331        .into_iter()
1332        .filter(|oid| packed_oid_set.contains(oid))
1333        .collect();
1334    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1335
1336    let rolled_up_packs: Vec<PathBuf> = packs[..split]
1337        .iter()
1338        .map(|pack| pack.pack_path.clone())
1339        .collect();
1340
1341    let pack_checksum = written.checksum;
1342    let index_entries = written.entries.clone();
1343    Ok(GeometricRepackResult {
1344        result: Some(RepackResult {
1345            pack: written.pack,
1346            idx: written.index,
1347            object_count,
1348            obsolete_packs: rolled_up_packs.clone(),
1349            packed_loose,
1350            retained_pack_stems: Vec::new(),
1351            pack_checksum,
1352            index_entries,
1353        }),
1354        rolled_up_packs,
1355    })
1356}
1357
1358/// Write the consolidated pack from a [`RepackResult`] into
1359/// `objects/pack/` and, when `prune` is set, remove the now-redundant
1360/// pre-existing packs and packed loose objects.
1361///
1362/// Pruning is opt-in and deliberately conservative: an object or pack is only
1363/// removed after verifying it is actually present in the freshly written pack
1364/// on disk. Concretely:
1365///
1366/// * a loose object is removed only if its id appears in the new pack;
1367/// * a pre-existing pack is removed only if it is not the pack we just wrote
1368///   *and* every object listed in its `.idx` is present in the new pack (its
1369///   `.idx` and known sidecars are removed alongside it);
1370/// * a stale `multi-pack-index` is removed only if every pack it references is
1371///   being removed, so no reader is ever left pointing at a deleted pack.
1372pub fn install_repack_result(
1373    git_dir: &Path,
1374    format: ObjectFormat,
1375    result: &RepackResult,
1376    prune: bool,
1377) -> Result<()> {
1378    install_repack_result_with_bitmap(git_dir, format, result, prune, None)
1379}
1380
1381/// [`install_repack_result`] that additionally writes a `pack-<checksum>.bitmap`
1382/// reachability bitmap alongside the new pack when `bitmap_tips` is `Some`.
1383/// `bitmap_tips` carries the repository's ref tips (peeled to commits): they
1384/// receive selection preference, mirroring upstream's `NEEDS_BITMAP` flagging of
1385/// ref tips in `git repack -b` / `pack-objects --write-bitmap-index`.
1386pub fn install_repack_result_with_bitmap(
1387    git_dir: &Path,
1388    format: ObjectFormat,
1389    result: &RepackResult,
1390    prune: bool,
1391    bitmap_tips: Option<&HashSet<ObjectId>>,
1392) -> Result<()> {
1393    let objects_dir = repository_objects_dir(git_dir);
1394    let pack_dir = objects_dir.join("pack");
1395    fs::create_dir_all(&pack_dir)?;
1396
1397    // Validate the public bytes against the private provenance that
1398    // `repack_all_objects` captured from `PackFile::write_packed`. This avoids
1399    // inflating and resolving the freshly-written pack a second time while still
1400    // catching caller mutations before anything is written or pruned.
1401    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1402    let parsed_index = PackIndex::parse(&result.idx, format)?;
1403    if parsed_index.pack_checksum != result.pack_checksum {
1404        return Err(GitError::InvalidFormat(
1405            "repack index checksum does not match the new pack".into(),
1406        ));
1407    }
1408    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1409        return Err(GitError::InvalidFormat(
1410            "repack index does not match the new pack contents".into(),
1411        ));
1412    }
1413    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1414    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1415    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1416    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1417    // git writes a `.rev` alongside every repacked pack (`pack.writeReverseIndex`
1418    // defaults to true). Write it before the `.idx` so the index never becomes
1419    // visible ahead of its companions, mirroring upstream's finalize order.
1420    let reverse_index = sley_pack::PackReverseIndex::write(
1421        format,
1422        &sley_pack::pack_order_index_positions(&parsed_index.entries),
1423        &result.pack_checksum,
1424    )?;
1425    write_pack_component(&new_pack_path, &result.pack)?;
1426    write_pack_component(&new_rev_path, &reverse_index)?;
1427    write_pack_component(&new_index_path, &result.idx)?;
1428
1429    if let Some(tips) = bitmap_tips {
1430        // Build before pruning: the closure walk reads objects through the
1431        // pre-existing packs/loose store (the new pack holds the same bytes).
1432        let database = FileObjectDatabase::new(objects_dir.clone(), format);
1433        if let Some(bitmap) = build_pack_bitmap(
1434            &database,
1435            format,
1436            &result.index_entries,
1437            &result.pack_checksum,
1438            tips,
1439        )? {
1440            // Unlike the pack/idx/rev (content-addressed by the pack
1441            // checksum), the bitmap depends on selection inputs (e.g.
1442            // pack.preferBitmapTips), so an existing file must be replaced —
1443            // write_pack_component's exists-skip would keep a stale selection.
1444            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1445            remove_file_if_exists(&bitmap_path)?;
1446            write_pack_component(&bitmap_path, &bitmap)?;
1447        }
1448    }
1449
1450    if !prune {
1451        return Ok(());
1452    }
1453
1454    // Prune based on the objects the new pack's *index* can resolve (what reads use
1455    // once the old packs are gone), not just what the pack contains — so a stale
1456    // pack is never removed for an object the new index cannot serve.
1457    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1458
1459    prune_obsolete_pack_paths(
1460        &objects_dir,
1461        format,
1462        &result.obsolete_packs,
1463        &new_pack_path,
1464        &result.retained_pack_stems,
1465    )?;
1466    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1467    Ok(())
1468}
1469
1470/// Install a [`repack_geometric`] result: write the new pack, then under `prune`
1471/// remove EXACTLY the rolled-up packs (those below the geometric split) plus the
1472/// loose objects now packed. Unlike [`install_repack_result`], packs left in
1473/// place above the split are never removed even though some of their objects may
1474/// also live in the new pack.
1475pub fn install_geometric_repack_result(
1476    git_dir: &Path,
1477    format: ObjectFormat,
1478    geometric: &GeometricRepackResult,
1479    prune: bool,
1480    bitmap_tips: Option<&HashSet<ObjectId>>,
1481) -> Result<()> {
1482    let Some(result) = geometric.result.as_ref() else {
1483        return Ok(());
1484    };
1485    let objects_dir = repository_objects_dir(git_dir);
1486    let pack_dir = objects_dir.join("pack");
1487    fs::create_dir_all(&pack_dir)?;
1488
1489    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1490    let parsed_index = PackIndex::parse(&result.idx, format)?;
1491    if parsed_index.pack_checksum != result.pack_checksum {
1492        return Err(GitError::InvalidFormat(
1493            "repack index checksum does not match the new pack".into(),
1494        ));
1495    }
1496    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1497        return Err(GitError::InvalidFormat(
1498            "repack index does not match the new pack contents".into(),
1499        ));
1500    }
1501    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1502    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1503    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1504    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1505    let reverse_index = sley_pack::PackReverseIndex::write(
1506        format,
1507        &sley_pack::pack_order_index_positions(&parsed_index.entries),
1508        &result.pack_checksum,
1509    )?;
1510    write_pack_component(&new_pack_path, &result.pack)?;
1511    write_pack_component(&new_rev_path, &reverse_index)?;
1512    write_pack_component(&new_index_path, &result.idx)?;
1513
1514    if let Some(tips) = bitmap_tips {
1515        let database = FileObjectDatabase::new(objects_dir.clone(), format);
1516        if let Some(bitmap) = build_pack_bitmap(
1517            &database,
1518            format,
1519            &result.index_entries,
1520            &result.pack_checksum,
1521            tips,
1522        )? {
1523            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1524            remove_file_if_exists(&bitmap_path)?;
1525            write_pack_component(&bitmap_path, &bitmap)?;
1526        }
1527    }
1528
1529    if !prune {
1530        return Ok(());
1531    }
1532
1533    // Remove exactly the rolled-up packs (below the split). Never touch packs
1534    // left in place above the split.
1535    for pack_path in &geometric.rolled_up_packs {
1536        if *pack_path == new_pack_path {
1537            continue;
1538        }
1539        if pack_path.with_extension("keep").exists() {
1540            continue;
1541        }
1542        remove_file_if_exists(pack_path)?;
1543        remove_file_if_exists(&pack_path.with_extension("idx"))?;
1544        for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1545            remove_file_if_exists(&pack_path.with_extension(ext))?;
1546        }
1547    }
1548
1549    // Drop loose copies now served by the new pack.
1550    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1551    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1552
1553    // A multi-pack-index that references any removed pack is now stale.
1554    let removed_stems: HashSet<String> = geometric
1555        .rolled_up_packs
1556        .iter()
1557        .filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
1558        .collect();
1559    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1560    Ok(())
1561}
1562
1563fn validate_pack_checksum(
1564    pack: &[u8],
1565    format: ObjectFormat,
1566    expected: &ObjectId,
1567    context: &str,
1568) -> Result<()> {
1569    if expected.format() != format {
1570        return Err(GitError::InvalidObjectId(format!(
1571            "{context} checksum format does not match object format"
1572        )));
1573    }
1574    let hash_len = format.raw_len();
1575    if pack.len() < 12 + hash_len {
1576        return Err(GitError::InvalidFormat(format!(
1577            "{context} pack file too short"
1578        )));
1579    }
1580    if &pack[..4] != b"PACK" {
1581        return Err(GitError::InvalidFormat(format!(
1582            "{context} pack file missing PACK signature"
1583        )));
1584    }
1585    let trailer_offset = pack.len() - hash_len;
1586    let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
1587    let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
1588    if &actual != expected || trailer != *expected {
1589        return Err(GitError::InvalidFormat(format!(
1590            "{context} pack checksum does not match generated pack"
1591        )));
1592    }
1593    Ok(())
1594}
1595
1596/// The UNIX-seconds mtime of a path, or `0` when unavailable.
1597fn path_mtime_secs(path: &Path) -> u32 {
1598    fs::metadata(path)
1599        .and_then(|metadata| metadata.modified())
1600        .ok()
1601        .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
1602        .map(|dur| dur.as_secs() as u32)
1603        .unwrap_or(0)
1604}
1605
1606/// The bytes of one cruft `.mtimes` pack plus its sidecars and checksum, ready
1607/// to install under `objects/pack/`.
1608#[derive(Debug, Clone)]
1609pub struct CruftPack {
1610    pub pack: Vec<u8>,
1611    pub idx: Vec<u8>,
1612    pub rev: Vec<u8>,
1613    pub mtimes: Vec<u8>,
1614    pub checksum: ObjectId,
1615    /// Object ids the cruft pack holds (its surviving unreachable set).
1616    pub oids: Vec<ObjectId>,
1617}
1618
1619/// Outcome of `git repack --cruft`: the reachable pack (if any) plus the cruft
1620/// `.mtimes` pack of surviving unreachable objects.
1621#[derive(Debug, Clone)]
1622pub struct CruftRepackResult {
1623    /// The all-into-one reachable pack, or `None` when nothing is reachable.
1624    pub reachable: Option<RepackResult>,
1625    /// The cruft pack of unreachable objects, or `None` when there are none.
1626    pub cruft: Option<CruftPack>,
1627    /// Pre-existing non-cruft, non-kept pack `.pack` paths superseded by the
1628    /// reachable pack (removed under `-d`).
1629    pub obsolete_packs: Vec<PathBuf>,
1630    /// Pre-existing cruft `.pack` paths whose objects are now in the new cruft
1631    /// pack (removed under `-d`).
1632    pub obsolete_cruft_packs: Vec<PathBuf>,
1633    retained_pack_stems: Vec<String>,
1634}
1635
1636/// Gather every object id on disk together with the best (max) mtime of any
1637/// copy: a packed object contributes its pack's mtime (or its own recorded
1638/// mtime inside a cruft pack), a loose object contributes its file mtime.
1639pub fn object_mtimes_on_disk_pub(
1640    objects_dir: &Path,
1641    format: ObjectFormat,
1642) -> Result<HashMap<ObjectId, u32>> {
1643    object_mtimes_on_disk(objects_dir, format)
1644}
1645
1646fn object_mtimes_on_disk(
1647    objects_dir: &Path,
1648    format: ObjectFormat,
1649) -> Result<HashMap<ObjectId, u32>> {
1650    let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
1651    let mut record = |oid: ObjectId, mtime: u32| {
1652        mtimes
1653            .entry(oid)
1654            .and_modify(|existing| {
1655                if mtime > *existing {
1656                    *existing = mtime;
1657                }
1658            })
1659            .or_insert(mtime);
1660    };
1661
1662    let pack_dir = objects_dir.join("pack");
1663    if let Ok(entries) = fs::read_dir(&pack_dir) {
1664        let mut idx_paths: Vec<PathBuf> = Vec::new();
1665        for entry in entries {
1666            let path = entry?.path();
1667            if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
1668                idx_paths.push(path);
1669            }
1670        }
1671        idx_paths.sort();
1672        for idx_path in idx_paths {
1673            let pack_path = idx_path.with_extension("pack");
1674            if !pack_path.exists() {
1675                continue;
1676            }
1677            let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
1678            let mtimes_path = idx_path.with_extension("mtimes");
1679            let pack_object_mtimes: Option<Vec<u32>> =
1680                fs::read(&mtimes_path).ok().and_then(|bytes| {
1681                    sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
1682                        .ok()
1683                        .map(|parsed| parsed.mtimes)
1684                });
1685            let pack_mtime = path_mtime_secs(&pack_path);
1686            for (pos, entry) in index.entries.iter().enumerate() {
1687                let mtime = pack_object_mtimes
1688                    .as_ref()
1689                    .and_then(|table| table.get(pos).copied())
1690                    .unwrap_or(pack_mtime);
1691                record(entry.oid, mtime);
1692            }
1693        }
1694    }
1695
1696    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1697    for oid in loose_object_ids(objects_dir, format)? {
1698        let path = store.object_path(&oid)?;
1699        record(oid, path_mtime_secs(&path));
1700    }
1701    Ok(mtimes)
1702}
1703
1704/// Public wrapper over [`build_cruft_pack`] for the `--expire-to` limbo pack.
1705pub fn build_cruft_pack_pub(
1706    database: &FileObjectDatabase,
1707    format: ObjectFormat,
1708    survivors: &HashMap<ObjectId, u32>,
1709) -> Result<Option<CruftPack>> {
1710    build_cruft_pack(database, format, survivors)
1711}
1712
1713/// Build the cruft `.mtimes` pack from the surviving unreachable objects and
1714/// their timestamps.
1715fn build_cruft_pack(
1716    database: &FileObjectDatabase,
1717    format: ObjectFormat,
1718    survivors: &HashMap<ObjectId, u32>,
1719) -> Result<Option<CruftPack>> {
1720    if survivors.is_empty() {
1721        return Ok(None);
1722    }
1723    let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
1724    ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
1725
1726    let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
1727    let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
1728    let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
1729    for (oid, mtime) in ordered {
1730        match database.read_object(&oid) {
1731            Ok(object) => {
1732                oids.push(oid);
1733                objects.push(object);
1734                mtime_by_oid.insert(oid, mtime);
1735            }
1736            Err(GitError::NotFound(_)) => {}
1737            Err(err) => return Err(err),
1738        }
1739    }
1740    if oids.is_empty() {
1741        return Ok(None);
1742    }
1743
1744    let inputs: Vec<PackInput<'_>> = oids
1745        .iter()
1746        .zip(&objects)
1747        .map(|(oid, object)| PackInput {
1748            oid,
1749            object: object.as_ref(),
1750        })
1751        .collect();
1752    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1753
1754    // `.mtimes` table is in lexicographic (index/fanout) order.
1755    let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
1756    sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
1757    let mtimes_table: Vec<u32> = sorted_entries
1758        .iter()
1759        .map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
1760        .collect();
1761    let positions = sley_pack::pack_order_index_positions(&written.entries);
1762    let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
1763    let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
1764
1765    let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
1766    cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1767    Ok(Some(CruftPack {
1768        pack: written.pack,
1769        idx: written.index,
1770        rev,
1771        mtimes,
1772        checksum: written.checksum,
1773        oids: cruft_oids,
1774    }))
1775}
1776
1777/// `git repack --cruft [--cruft-expiration=<t>] [-d]`: pack the reachable
1778/// closure of `roots` into one new pack, then collect every unreachable object
1779/// into a `.mtimes`-stamped cruft pack (honouring `cruft_expiration`). The
1780/// caller installs the result and, under `-d`, removes the superseded non-cruft
1781/// and old cruft packs.
1782///
1783/// Mirrors builtin/repack.c's PACK_CRUFT path + repack-cruft.c `write_cruft_pack`
1784/// without the per-pack stdin protocol: unreachable objects are everything on
1785/// disk minus the reachable set.
1786pub fn repack_cruft(
1787    git_dir: &Path,
1788    format: ObjectFormat,
1789    roots: &[ObjectId],
1790    cruft_expiration: Option<u32>,
1791) -> Result<CruftRepackResult> {
1792    repack_cruft_with_options(
1793        git_dir,
1794        format,
1795        roots,
1796        cruft_expiration,
1797        &RepackOptions::default(),
1798    )
1799}
1800
1801pub fn repack_cruft_with_options(
1802    git_dir: &Path,
1803    format: ObjectFormat,
1804    roots: &[ObjectId],
1805    cruft_expiration: Option<u32>,
1806    options: &RepackOptions,
1807) -> Result<CruftRepackResult> {
1808    let objects_dir = repository_objects_dir(git_dir);
1809    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1810    let pack_dir = objects_dir.join("pack");
1811    let retained_pack_stems = repack_retained_pack_stems(
1812        &pack_dir,
1813        &options.keep_pack_stems,
1814        !options.pack_kept_objects,
1815    )?;
1816    let excluded_oids = if options.pack_kept_objects {
1817        HashSet::new()
1818    } else {
1819        pack_oids_for_stems(&pack_dir, format, &retained_pack_stems)?
1820    };
1821
1822    // Reachable closure → the new "reachable" pack.
1823    let mut reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
1824    reachable_ids.retain(|oid| !excluded_oids.contains(oid));
1825    let reachable_result = if reachable_ids.is_empty() {
1826        None
1827    } else {
1828        let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
1829        ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1830        let mut objects = Vec::with_capacity(ids.len());
1831        for oid in &ids {
1832            match database.read_object(oid) {
1833                Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
1834                Err(GitError::NotFound(_)) => {}
1835                Err(err) => return Err(err),
1836            }
1837        }
1838        if objects.is_empty() {
1839            None
1840        } else {
1841            let inputs = pack_inputs(&objects);
1842            let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1843            let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1844            let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1845                .into_iter()
1846                .filter(|oid| packed_set.contains(oid))
1847                .collect();
1848            packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1849            Some(RepackResult {
1850                pack: written.pack,
1851                idx: written.index,
1852                object_count: written.entries.len(),
1853                obsolete_packs: Vec::new(),
1854                packed_loose,
1855                retained_pack_stems: Vec::new(),
1856                pack_checksum: written.checksum,
1857                index_entries: written.entries,
1858            })
1859        }
1860    };
1861
1862    // Unreachable objects = everything on disk minus the reachable set, stamped
1863    // with their best mtime.
1864    let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
1865        .into_iter()
1866        .filter(|(oid, _)| !reachable_ids.contains(oid) && !excluded_oids.contains(oid))
1867        .collect();
1868
1869    // Expiration: rescue older objects reachable from a recent one, drop the rest.
1870    if let Some(expiration) = cruft_expiration {
1871        rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
1872    }
1873
1874    let cruft = build_cruft_pack(&database, format, &survivors)?;
1875
1876    // The packs the reachable+cruft packs supersede: every pre-existing
1877    // non-kept pack. Cruft packs are tracked separately.
1878    let mut obsolete_packs = Vec::new();
1879    let mut obsolete_cruft_packs = Vec::new();
1880    for pack_path in existing_pack_files(&pack_dir)? {
1881        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
1882            && retained_pack_stems.iter().any(|retained| retained == stem)
1883        {
1884            continue;
1885        }
1886        if pack_path.with_extension("keep").exists() {
1887            continue;
1888        }
1889        if pack_path.with_extension("mtimes").exists() {
1890            obsolete_cruft_packs.push(pack_path);
1891        } else {
1892            obsolete_packs.push(pack_path);
1893        }
1894    }
1895
1896    Ok(CruftRepackResult {
1897        reachable: reachable_result,
1898        cruft,
1899        obsolete_packs,
1900        obsolete_cruft_packs,
1901        retained_pack_stems,
1902    })
1903}
1904
1905/// Apply `--cruft-expiration` over the survivor map in place: starting from the
1906/// recent candidates (mtime strictly newer than `expiration`), walk reachability
1907/// and rescue every dependency at the cutoff mtime; drop older candidates that
1908/// no recent object reaches. Mirrors the pack-objects cruft expiry traversal.
1909fn rescue_and_expire_cruft_objects(
1910    database: &FileObjectDatabase,
1911    format: ObjectFormat,
1912    survivors: &mut HashMap<ObjectId, u32>,
1913    expiration: u32,
1914) -> Result<()> {
1915    let recent: Vec<ObjectId> = survivors
1916        .iter()
1917        .filter(|(_, mtime)| **mtime > expiration)
1918        .map(|(oid, _)| *oid)
1919        .collect();
1920
1921    let mut keep: HashSet<ObjectId> = HashSet::new();
1922    let mut pending: Vec<ObjectId> = recent.clone();
1923    while let Some(oid) = pending.pop() {
1924        if !keep.insert(oid) {
1925            continue;
1926        }
1927        let Ok(object) = database.read_object(&oid) else {
1928            continue;
1929        };
1930        match object.object_type {
1931            ObjectType::Commit => {
1932                if let Ok(commit) = Commit::parse_ref(format, &object.body) {
1933                    pending.extend(commit.parents.iter().copied());
1934                    pending.push(commit.tree);
1935                }
1936            }
1937            ObjectType::Tree => {
1938                for entry in TreeEntries::new(format, &object.body).flatten() {
1939                    if !entry.is_gitlink() {
1940                        pending.push(entry.oid);
1941                    }
1942                }
1943            }
1944            ObjectType::Tag => {
1945                if let Ok(tag) = Tag::parse_ref(format, &object.body) {
1946                    pending.push(tag.object);
1947                }
1948            }
1949            ObjectType::Blob => {}
1950        }
1951    }
1952
1953    // Drop any survivor that is neither recent nor rescued; rescued-but-older
1954    // objects keep their recorded mtime (already >= 0), recent ones unchanged.
1955    survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
1956    Ok(())
1957}
1958
1959/// Install a [`repack_cruft`] result: write the reachable pack and the cruft
1960/// `.mtimes` pack, then under `prune` remove the superseded non-cruft packs, old
1961/// cruft packs, and the loose objects now served.
1962pub fn install_cruft_repack_result(
1963    git_dir: &Path,
1964    format: ObjectFormat,
1965    result: &CruftRepackResult,
1966    prune: bool,
1967) -> Result<()> {
1968    let objects_dir = repository_objects_dir(git_dir);
1969    let pack_dir = objects_dir.join("pack");
1970    fs::create_dir_all(&pack_dir)?;
1971
1972    // Names of packs we are about to remove (so we never delete the new ones).
1973    let new_reachable_name = result
1974        .reachable
1975        .as_ref()
1976        .map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
1977    let new_cruft_name = result
1978        .cruft
1979        .as_ref()
1980        .map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
1981
1982    // Write the reachable pack (idx + rev + pack), content-addressed.
1983    if let Some(reachable) = result.reachable.as_ref() {
1984        let parsed_index = PackIndex::parse(&reachable.idx, format)?;
1985        let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
1986        let reverse_index = sley_pack::PackReverseIndex::write(
1987            format,
1988            &sley_pack::pack_order_index_positions(&parsed_index.entries),
1989            &reachable.pack_checksum,
1990        )?;
1991        write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
1992        write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
1993        write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
1994    }
1995
1996    // Write the cruft pack (pack + rev + mtimes + idx).
1997    if let Some(cruft) = result.cruft.as_ref() {
1998        let pack_name = format!("pack-{}", cruft.checksum.to_hex());
1999        write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
2000        write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
2001        write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
2002        write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
2003    }
2004
2005    if !prune {
2006        return Ok(());
2007    }
2008
2009    // Objects now served by the new packs.
2010    let mut present: HashSet<ObjectId> = HashSet::new();
2011    if let Some(reachable) = result.reachable.as_ref() {
2012        present.extend(reachable.index_entries.iter().map(|e| e.oid));
2013    }
2014    if let Some(cruft) = result.cruft.as_ref() {
2015        present.extend(cruft.oids.iter().copied());
2016    }
2017
2018    // Remove superseded non-cruft + old cruft packs (skip the new ones).
2019    let mut removed_stems: HashSet<String> = HashSet::new();
2020    for pack_path in result
2021        .obsolete_packs
2022        .iter()
2023        .chain(result.obsolete_cruft_packs.iter())
2024    {
2025        let file_name = pack_path.file_name().and_then(|n| n.to_str());
2026        if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
2027            continue;
2028        }
2029        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
2030            && result
2031                .retained_pack_stems
2032                .iter()
2033                .any(|retained| retained == stem)
2034        {
2035            continue;
2036        }
2037        if pack_path.with_extension("keep").exists() {
2038            continue;
2039        }
2040        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
2041            removed_stems.insert(stem.to_string());
2042        }
2043        remove_file_if_exists(pack_path)?;
2044        remove_file_if_exists(&pack_path.with_extension("idx"))?;
2045        for ext in ["rev", "mtimes", "bitmap", "promisor"] {
2046            remove_file_if_exists(&pack_path.with_extension(ext))?;
2047        }
2048    }
2049
2050    // Drop loose objects now in a new pack.
2051    let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2052        .into_iter()
2053        .filter(|oid| present.contains(oid))
2054        .collect();
2055    prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
2056
2057    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2058    Ok(())
2059}
2060
2061fn pack_index_entries_match_writer(
2062    parsed: &[PackIndexEntry],
2063    writer_entries: &[PackIndexEntry],
2064) -> bool {
2065    if parsed.len() != writer_entries.len() {
2066        return false;
2067    }
2068    let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
2069    writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2070    parsed.iter().zip(writer_entries).all(|(left, right)| {
2071        left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
2072    })
2073}
2074
2075/// List loose objects under `git_dir` that are *not* reachable from `roots`,
2076/// optionally deleting them.
2077///
2078/// Reachability is computed with [`collect_reachable_object_ids`] over the
2079/// repository's object database, so trees, parents, and tag targets are all
2080/// followed. When `delete` is `false` the returned ids are merely reported;
2081/// when `true` each unreachable loose object file is removed (packed copies are
2082/// never touched). Deletion is therefore opt-in.
2083pub fn prune_unreachable_loose<I>(
2084    git_dir: &Path,
2085    format: ObjectFormat,
2086    roots: I,
2087    delete: bool,
2088) -> Result<Vec<ObjectId>>
2089where
2090    I: IntoIterator<Item = ObjectId>,
2091{
2092    let objects_dir = repository_objects_dir(git_dir);
2093    let database = FileObjectDatabase::new(objects_dir.clone(), format);
2094    let reachable = collect_reachable_object_ids(&database, format, roots)?;
2095
2096    let store = LooseObjectStore::new(objects_dir.clone(), format);
2097    let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2098        .into_iter()
2099        .filter(|oid| !reachable.contains(oid))
2100        .collect();
2101    pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2102
2103    if delete {
2104        for oid in &pruned {
2105            let path = store.object_path(oid)?;
2106            match fs::remove_file(&path) {
2107                Ok(()) => {}
2108                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
2109                Err(err) => return Err(GitError::Io(err.to_string())),
2110            }
2111        }
2112    }
2113    Ok(pruned)
2114}
2115
2116/// Loose object ids under `objects_dir`, sorted by hex, with packed objects
2117/// excluded.
2118fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
2119    let oids = loose_object_id_set(objects_dir, format)?;
2120    let mut oids = oids.into_iter().collect::<Vec<_>>();
2121    oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2122    Ok(oids)
2123}
2124
2125fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
2126    let mut oids = HashSet::new();
2127    collect_loose_object_ids(objects_dir, format, &mut oids)?;
2128    Ok(oids)
2129}
2130
2131/// Absolute paths of every `*.pack` file directly inside `pack_dir`, sorted for
2132/// deterministic output.
2133fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
2134    if !pack_dir.exists() {
2135        return Ok(Vec::new());
2136    }
2137    let mut packs = Vec::new();
2138    for entry in fs::read_dir(pack_dir)? {
2139        let path = entry?.path();
2140        if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
2141            packs.push(path);
2142        }
2143    }
2144    packs.sort();
2145    Ok(packs)
2146}
2147
2148/// Remove pre-existing packs whose every object is contained in `present`,
2149/// skipping `keep` (the pack just written), `.keep` packs, and `.promisor` packs.
2150/// A stale multi-pack-index that references any removed pack is removed too.
2151fn prune_obsolete_pack_paths(
2152    objects_dir: &Path,
2153    format: ObjectFormat,
2154    packs: &[PathBuf],
2155    keep: &Path,
2156    retained_pack_stems: &[String],
2157) -> Result<()> {
2158    prune_pack_paths_matching(objects_dir, format, packs.iter(), keep, retained_pack_stems, |_| Ok(true))
2159}
2160
2161fn prune_pack_paths_matching<'a>(
2162    objects_dir: &Path,
2163    format: ObjectFormat,
2164    packs: impl IntoIterator<Item = &'a PathBuf>,
2165    keep: &Path,
2166    retained_pack_stems: &[String],
2167    mut should_prune: impl FnMut(&Path) -> Result<bool>,
2168) -> Result<()> {
2169    let pack_dir = objects_dir.join("pack");
2170    let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
2171    let retained_pack_stems: HashSet<&str> =
2172        retained_pack_stems.iter().map(String::as_str).collect();
2173    let mut removed_stems: HashSet<String> = HashSet::new();
2174
2175    for pack_path in packs {
2176        if pack_path == keep {
2177            continue;
2178        }
2179        let Some(stem) = pack_path.file_stem() else {
2180            continue;
2181        };
2182        if Some(stem) == keep_stem.as_deref() {
2183            continue;
2184        }
2185        if let Some(stem) = stem.to_str()
2186            && retained_pack_stems.contains(stem)
2187        {
2188            continue;
2189        }
2190        if pack_path.with_extension("keep").exists()
2191            || pack_path.with_extension("promisor").exists()
2192        {
2193            continue;
2194        }
2195        if !should_prune(pack_path)? {
2196            continue;
2197        }
2198        remove_file_if_exists(pack_path)?;
2199        remove_file_if_exists(&pack_path.with_extension("idx"))?;
2200        for ext in ["rev", "mtimes", "bitmap"] {
2201            remove_file_if_exists(&pack_path.with_extension(ext))?;
2202        }
2203        removed_stems.insert(stem.to_string_lossy().into_owned());
2204    }
2205
2206    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2207    Ok(())
2208}
2209
2210/// Remove a `multi-pack-index` if it names *any* pack that was removed.
2211///
2212/// A MIDX that still references a deleted pack makes reads fail (the lookup
2213/// resolves to a pack that is gone) before any fallback. Removing the whole MIDX
2214/// when even one of its packs is pruned forces readers back to the individual pack
2215/// indexes, which are correct; `multi-pack-index write` can rebuild it later.
2216fn prune_stale_multi_pack_index(
2217    pack_dir: &Path,
2218    format: ObjectFormat,
2219    removed_stems: &HashSet<String>,
2220) -> Result<()> {
2221    if removed_stems.is_empty() {
2222        return Ok(());
2223    }
2224    let midx_path = pack_dir.join("multi-pack-index");
2225    if !midx_path.exists() {
2226        return Ok(());
2227    }
2228    let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
2229    let references_removed_pack = midx.pack_names.iter().any(|name| {
2230        let stem = name.strip_suffix(".idx").unwrap_or(name);
2231        removed_stems.contains(stem)
2232    });
2233    if references_removed_pack {
2234        remove_file_if_exists(&midx_path)?;
2235    }
2236    Ok(())
2237}
2238
2239/// Remove each loose object in `candidates` whose id is in `present`, leaving
2240/// any object not actually packed untouched.
2241fn prune_loose_objects<'a, I>(
2242    objects_dir: &Path,
2243    format: ObjectFormat,
2244    candidates: I,
2245    present: &HashSet<ObjectId>,
2246) -> Result<()>
2247where
2248    I: IntoIterator<Item = &'a ObjectId>,
2249{
2250    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
2251    for oid in candidates {
2252        if !present.contains(oid) {
2253            continue;
2254        }
2255        remove_file_if_exists(&store.object_path(oid)?)?;
2256    }
2257    Ok(())
2258}
2259
2260enum PackDeltaBase {
2261    Offset(u64),
2262    Ref(ObjectId),
2263}
2264
2265struct PackIndexOffsetInfo {
2266    end_offset: u64,
2267    delta_base_oid: Option<ObjectId>,
2268}
2269
2270fn scan_pack_index_offsets(
2271    index: &PackIndex,
2272    target_offset: u64,
2273    trailer_offset: u64,
2274    delta_base_offset: Option<u64>,
2275) -> Result<PackIndexOffsetInfo> {
2276    let mut target_count = 0usize;
2277    let mut next_offset = None;
2278    let mut delta_base_oid = None;
2279
2280    for entry in &index.entries {
2281        if entry.offset == target_offset {
2282            target_count += 1;
2283        } else if entry.offset > target_offset {
2284            match next_offset {
2285                Some(current) if current <= entry.offset => {}
2286                _ => next_offset = Some(entry.offset),
2287            }
2288        }
2289        if Some(entry.offset) == delta_base_offset {
2290            delta_base_oid = Some(entry.oid);
2291        }
2292    }
2293
2294    if target_count == 0 {
2295        return Err(GitError::InvalidFormat(format!(
2296            "pack index offset {target_offset} not found"
2297        )));
2298    }
2299    if let Some(offset) = delta_base_offset
2300        && delta_base_oid.is_none()
2301    {
2302        return Err(GitError::InvalidFormat(format!(
2303            "ofs-delta base offset {offset} not found"
2304        )));
2305    }
2306
2307    Ok(PackIndexOffsetInfo {
2308        // Preserve the old sorted-vector behavior for malformed indexes with
2309        // duplicate offsets: the next sorted entry has the same offset.
2310        end_offset: if target_count > 1 {
2311            target_offset
2312        } else {
2313            next_offset.unwrap_or(trailer_offset)
2314        },
2315        delta_base_oid,
2316    })
2317}
2318
2319fn pack_entry_delta_base(
2320    format: ObjectFormat,
2321    pack: &[u8],
2322    entry_offset: u64,
2323) -> Result<Option<PackDeltaBase>> {
2324    let mut cursor = usize::try_from(entry_offset)
2325        .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
2326    let first = pack_next_byte(pack, &mut cursor)?;
2327    let kind = (first >> 4) & 0x07;
2328    let mut byte = first;
2329    while byte & 0x80 != 0 {
2330        byte = pack_next_byte(pack, &mut cursor)?;
2331    }
2332    match kind {
2333        6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
2334            pack,
2335            &mut cursor,
2336            entry_offset,
2337        )?))),
2338        7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
2339            format,
2340            pack,
2341            &mut cursor,
2342        )?))),
2343        _ => Ok(None),
2344    }
2345}
2346
2347fn parse_ref_delta_base_oid(
2348    format: ObjectFormat,
2349    pack: &[u8],
2350    cursor: &mut usize,
2351) -> Result<ObjectId> {
2352    let raw_len = format.raw_len();
2353    if *cursor + raw_len > pack.len() {
2354        return Err(GitError::InvalidFormat(
2355            "truncated ref-delta base object id".into(),
2356        ));
2357    }
2358    let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
2359    *cursor += raw_len;
2360    Ok(oid)
2361}
2362
2363fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
2364    let mut byte = pack_next_byte(pack, cursor)?;
2365    let mut relative = u64::from(byte & 0x7f);
2366    while byte & 0x80 != 0 {
2367        byte = pack_next_byte(pack, cursor)?;
2368        relative = relative
2369            .checked_add(1)
2370            .and_then(|value| value.checked_shl(7))
2371            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2372            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2373    }
2374    entry_offset
2375        .checked_sub(relative)
2376        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2377}
2378
2379fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
2380    let Some(byte) = pack.get(*cursor).copied() else {
2381        return Err(GitError::InvalidFormat("truncated pack entry".into()));
2382    };
2383    *cursor += 1;
2384    Ok(byte)
2385}
2386
2387fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
2388    Ok(ObjectId::null(format))
2389}
2390
2391/// Remove `path` if it exists, treating a missing file as success.
2392fn remove_file_if_exists(path: &Path) -> Result<()> {
2393    match fs::remove_file(path) {
2394        Ok(()) => Ok(()),
2395        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
2396        Err(err) => Err(GitError::Io(err.to_string())),
2397    }
2398}
2399
2400fn walk_reachable_objects<R, I, F>(
2401    reader: &R,
2402    format: ObjectFormat,
2403    starts: I,
2404    excluded: &HashSet<ObjectId>,
2405    visit: F,
2406) -> Result<HashSet<ObjectId>>
2407where
2408    R: ObjectReader,
2409    I: IntoIterator<Item = ObjectId>,
2410    F: FnMut(&ObjectId, &Arc<EncodedObject>),
2411{
2412    walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
2413}
2414
2415/// [`walk_reachable_objects`] with an additional `cut` set: commits in `cut`
2416/// are visited (their trees and blobs too) but their parents are not followed,
2417/// mirroring a shallow client's view of its own history during negotiation.
2418fn walk_reachable_objects_with_cut<R, I, F>(
2419    reader: &R,
2420    format: ObjectFormat,
2421    starts: I,
2422    excluded: &HashSet<ObjectId>,
2423    cut: &HashSet<ObjectId>,
2424    mut visit: F,
2425) -> Result<HashSet<ObjectId>>
2426where
2427    R: ObjectReader,
2428    I: IntoIterator<Item = ObjectId>,
2429    F: FnMut(&ObjectId, &Arc<EncodedObject>),
2430{
2431    let mut seen = HashSet::new();
2432    let mut pending = Vec::new();
2433    for start in starts {
2434        pending.push(start);
2435        while let Some(oid) = pending.pop() {
2436            if excluded.contains(&oid) {
2437                continue;
2438            }
2439            if !seen.insert(oid) {
2440                continue;
2441            }
2442            let object = reader.read_object(&oid).map_err(|err| {
2443                with_missing_object_context(err, oid, MissingObjectContext::Traversal)
2444            })?;
2445            match object.object_type {
2446                ObjectType::Commit => {
2447                    let (tree, parents) = {
2448                        let commit = Commit::parse_ref(format, &object.body)?;
2449                        (commit.tree, commit.parents)
2450                    };
2451                    visit(&oid, &object);
2452                    if !cut.contains(&oid) {
2453                        for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
2454                            pending.push(parent);
2455                        }
2456                    }
2457                    pending.push(tree);
2458                }
2459                ObjectType::Tree => {
2460                    let mut child_oids = Vec::new();
2461                    for entry in TreeEntries::new(format, &object.body) {
2462                        let entry = entry?;
2463                        if entry.is_gitlink() {
2464                            continue;
2465                        }
2466                        child_oids.push(entry.oid);
2467                    }
2468                    visit(&oid, &object);
2469                    pending.extend(child_oids.into_iter().rev());
2470                }
2471                ObjectType::Tag => {
2472                    let target = {
2473                        let tag = Tag::parse_ref(format, &object.body)?;
2474                        tag.object
2475                    };
2476                    visit(&oid, &object);
2477                    pending.push(target);
2478                }
2479                ObjectType::Blob => visit(&oid, &object),
2480            }
2481        }
2482    }
2483    Ok(seen)
2484}
2485
2486// ===== reachability bitmaps (.bitmap write + consult) =====
2487
2488/// Bit accessors over a `Vec<u64>` bitset using git's bitmap convention:
2489/// bit `i` lives in word `i / 64` at bit `i % 64` (LSB-first within a word).
2490fn bitset_get(words: &[u64], position: u32) -> bool {
2491    let word = (position / 64) as usize;
2492    word < words.len() && words[word] & (1u64 << (position % 64)) != 0
2493}
2494
2495fn bitset_set(words: &mut [u64], position: u32) {
2496    let word = (position / 64) as usize;
2497    if word < words.len() {
2498        words[word] |= 1u64 << (position % 64);
2499    }
2500}
2501
2502fn bitset_or(acc: &mut [u64], other: &[u64]) {
2503    for (dst, src) in acc.iter_mut().zip(other) {
2504        *dst |= *src;
2505    }
2506}
2507
2508/// Sorted set-bit positions of a bitset (the inverse of repeated [`bitset_set`]).
2509fn bitset_positions(words: &[u64]) -> Vec<u32> {
2510    let mut positions = Vec::new();
2511    for (word_index, word) in words.iter().enumerate() {
2512        let mut remaining = *word;
2513        while remaining != 0 {
2514            let bit = remaining.trailing_zeros();
2515            positions.push(word_index as u32 * 64 + bit);
2516            remaining &= remaining - 1;
2517        }
2518    }
2519    positions
2520}
2521
2522/// Committer timestamp (epoch seconds) of a commit identity line
2523/// (`Name <email> <timestamp> <tz>`); 0 when unparseable, matching git's
2524/// tolerance for bogus dates during bitmap commit selection.
2525fn commit_identity_timestamp(identity: &[u8]) -> i64 {
2526    let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
2527    let _tz = fields.next();
2528    fields
2529        .next()
2530        .and_then(|raw| std::str::from_utf8(raw).ok())
2531        .and_then(|raw| raw.parse::<i64>().ok())
2532        .unwrap_or(0)
2533}
2534
2535/// Upstream `next_commit_index` (pack-bitmap-write.c): the spacing schedule for
2536/// bitmap commit selection over the date-descending commit list.
2537fn bitmap_next_commit_index(idx: u32) -> u32 {
2538    const MIN_COMMITS: u32 = 100;
2539    const MAX_COMMITS: u32 = 5000;
2540    const MUST_REGION: u32 = 100;
2541    const MIN_REGION: u32 = 20000;
2542
2543    if idx <= MUST_REGION {
2544        return 0;
2545    }
2546    if idx <= MIN_REGION {
2547        let offset = idx - MUST_REGION;
2548        return offset.min(MIN_COMMITS);
2549    }
2550    let offset = idx - MIN_REGION;
2551    offset.clamp(MIN_COMMITS, MAX_COMMITS)
2552}
2553
2554/// Builds a serialised `.bitmap` for the pack described by `index_entries` /
2555/// `pack_checksum`, mirroring upstream pack-bitmap-write.c:
2556///
2557/// * commit selection walks the pack's commits in committer-date-descending
2558///   order through [`bitmap_next_commit_index`]'s spacing schedule, preferring
2559///   `preferred_tips` (ref tips — upstream's `NEEDS_BITMAP`) and merge commits
2560///   inside each window;
2561/// * each selected commit stores its full reachability closure (commits, trees,
2562///   blobs) as pack-order bit positions (no XOR compression — `xor_offset` 0 is
2563///   valid on disk and what readers see after resolution anyway).
2564///
2565/// Returns `Ok(None)` — mirroring upstream's warn-and-skip — when the pack
2566/// lacks full closure (a reachable object is missing from it).
2567pub fn build_pack_bitmap(
2568    db: &FileObjectDatabase,
2569    format: ObjectFormat,
2570    index_entries: &[PackIndexEntry],
2571    pack_checksum: &ObjectId,
2572    preferred_tips: &HashSet<ObjectId>,
2573) -> Result<Option<Vec<u8>>> {
2574    // `index_entries` carries no ordering guarantee (writer provenance is in
2575    // pack-write order); bit numbering follows pack (offset) order.
2576    let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
2577    by_offset.sort_by_key(|&slot| index_entries[slot].offset);
2578    let bit_order: Vec<ObjectId> = by_offset
2579        .into_iter()
2580        .map(|slot| index_entries[slot].oid)
2581        .collect();
2582    build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
2583}
2584
2585/// [`build_pack_bitmap`]'s multi-pack sibling: builds the serialised
2586/// `multi-pack-index-<checksum>.bitmap` for `midx_entries`, with bits in
2587/// pseudo-pack order (preferred pack first, then pack id, then offset — the
2588/// same order [`MultiPackIndex::write_with_reverse_index`] records in `RIDX`)
2589/// and the midx checksum in the BITM checksum field.
2590pub fn build_midx_bitmap(
2591    db: &FileObjectDatabase,
2592    format: ObjectFormat,
2593    midx_entries: &[sley_pack::MultiPackIndexEntry],
2594    midx_checksum: &ObjectId,
2595    preferred_pack: u32,
2596    preferred_tips: &HashSet<ObjectId>,
2597) -> Result<Option<Vec<u8>>> {
2598    let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
2599    pseudo.sort_by_key(|&slot| {
2600        let entry = &midx_entries[slot];
2601        (
2602            entry.pack_int_id != preferred_pack,
2603            entry.pack_int_id,
2604            entry.offset,
2605        )
2606    });
2607    let bit_order: Vec<ObjectId> = pseudo
2608        .into_iter()
2609        .map(|slot| midx_entries[slot].oid)
2610        .collect();
2611    build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
2612}
2613
2614/// Upstream `bitmap_builder_init`'s `num_maximal` counter (pack-bitmap-write.c):
2615/// walk the first-parent ancestry of the selected commits, children before
2616/// parents, propagating per-commit "which selected commits reach me" masks.
2617/// A commit counts as maximal when it is selected, or when distinct selected
2618/// lineages converge on it (its mask gains bits its last contributing child
2619/// did not carry). Only the count is needed (for the trace2 data event), so no
2620/// reverse-edge bookkeeping is kept.
2621fn bitmap_num_maximal_commits(
2622    db: &FileObjectDatabase,
2623    format: ObjectFormat,
2624    selected: &[ObjectId],
2625) -> Result<usize> {
2626    // First-parent subgraph reachable from the selected commits.
2627    let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
2628    let mut stack: Vec<ObjectId> = selected.to_vec();
2629    while let Some(oid) = stack.pop() {
2630        if first_parent.contains_key(&oid) {
2631            continue;
2632        }
2633        let object = db.read_object(&oid)?;
2634        let commit = Commit::parse_ref(format, &object.body)?;
2635        let parent = grafted_parents(db, &oid, commit.parents).first().copied();
2636        first_parent.insert(oid, parent);
2637        if let Some(parent) = parent {
2638            stack.push(parent);
2639        }
2640    }
2641    // Children-before-parents order (Kahn over the single first-parent edge).
2642    let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
2643    for parent in first_parent.values().flatten() {
2644        *pending_children.entry(*parent).or_default() += 1;
2645    }
2646    let word_count = selected.len().div_ceil(64);
2647    struct MaximalEnt {
2648        mask: Vec<u64>,
2649        maximal: bool,
2650    }
2651    let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
2652    for (bit, oid) in selected.iter().enumerate() {
2653        let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
2654            mask: vec![0u64; word_count],
2655            maximal: true,
2656        });
2657        ent.mask[bit / 64] |= 1u64 << (bit % 64);
2658        ent.maximal = true;
2659    }
2660    let mut queue: Vec<ObjectId> = first_parent
2661        .keys()
2662        .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
2663        .copied()
2664        .collect();
2665    let mut num_maximal = 0usize;
2666    while let Some(oid) = queue.pop() {
2667        if let Some(ent) = ents.remove(&oid) {
2668            if ent.maximal {
2669                num_maximal += 1;
2670            }
2671            if let Some(Some(parent)) = first_parent.get(&oid) {
2672                match ents.entry(*parent) {
2673                    std::collections::hash_map::Entry::Vacant(vacant) => {
2674                        // Fresh parent mask: c_not_p, !p_not_c -> not maximal.
2675                        vacant.insert(MaximalEnt {
2676                            mask: ent.mask.clone(),
2677                            maximal: false,
2678                        });
2679                    }
2680                    std::collections::hash_map::Entry::Occupied(mut occupied) => {
2681                        let parent_ent = occupied.get_mut();
2682                        let c_not_p = ent
2683                            .mask
2684                            .iter()
2685                            .zip(&parent_ent.mask)
2686                            .any(|(child, parent)| child & !parent != 0);
2687                        if c_not_p {
2688                            let p_not_c = parent_ent
2689                                .mask
2690                                .iter()
2691                                .zip(&ent.mask)
2692                                .any(|(parent, child)| parent & !child != 0);
2693                            for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
2694                                *parent |= child;
2695                            }
2696                            parent_ent.maximal = p_not_c;
2697                        }
2698                    }
2699                }
2700            }
2701        }
2702        if let Some(Some(parent)) = first_parent.get(&oid)
2703            && let Some(remaining) = pending_children.get_mut(parent)
2704        {
2705            *remaining -= 1;
2706            if *remaining == 0 {
2707                queue.push(*parent);
2708            }
2709        }
2710    }
2711    Ok(num_maximal)
2712}
2713
2714/// Shared write half: `bit_order` lists every covered object's oid in bit
2715/// order (pack order for a single pack, pseudo-pack order for a midx);
2716/// `checksum` fills the BITM checksum field (pack checksum / midx checksum).
2717fn build_reachability_bitmap(
2718    db: &FileObjectDatabase,
2719    format: ObjectFormat,
2720    checksum: &ObjectId,
2721    bit_order: &[ObjectId],
2722    preferred_tips: &HashSet<ObjectId>,
2723) -> Result<Option<Vec<u8>>> {
2724    if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
2725        return Ok(None);
2726    }
2727    let object_count = bit_order.len();
2728
2729    // The on-disk entry position space is the oid-sorted lookup order (.idx /
2730    // midx OIDL); derive each bit-order slot's rank there.
2731    let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
2732    oid_sorted.sort_by(|&left, &right| {
2733        bit_order[left as usize]
2734            .as_bytes()
2735            .cmp(bit_order[right as usize].as_bytes())
2736    });
2737    let mut index_position = vec![0u32; object_count];
2738    for (position, &slot) in oid_sorted.iter().enumerate() {
2739        index_position[slot as usize] = position as u32;
2740    }
2741    let mut oid_to_pack = HashMap::with_capacity(object_count);
2742    for (pack_pos, oid) in bit_order.iter().enumerate() {
2743        oid_to_pack.insert(*oid, pack_pos as u32);
2744    }
2745
2746    // Object types in bit order; commits also collect (date, parent count).
2747    let mut object_types = Vec::with_capacity(object_count);
2748    struct IndexedCommit {
2749        oid: ObjectId,
2750        pack_pos: u32,
2751        index_pos: u32,
2752        date: i64,
2753        parent_count: usize,
2754    }
2755    let mut indexed_commits = Vec::new();
2756    for (pack_pos, oid) in bit_order.iter().enumerate() {
2757        // Type via the header fast path: blobs (the bulk of most packs) never
2758        // need their bodies inflated here.
2759        let object_type = match db.read_object_header(oid)? {
2760            Some((object_type, _)) => object_type,
2761            None => db.read_object(oid)?.object_type,
2762        };
2763        object_types.push(object_type);
2764        if object_type == ObjectType::Commit {
2765            let object = db.read_object(oid)?;
2766            let commit = Commit::parse_ref(format, &object.body)?;
2767            indexed_commits.push(IndexedCommit {
2768                oid: *oid,
2769                pack_pos: pack_pos as u32,
2770                index_pos: index_position[pack_pos],
2771                date: commit_identity_timestamp(commit.committer),
2772                parent_count: grafted_parents(db, oid, commit.parents).len(),
2773            });
2774        }
2775    }
2776
2777    // Selection: date-descending, then the spacing schedule.
2778    indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
2779    let mut selected: Vec<&IndexedCommit> = Vec::new();
2780    let commit_count = indexed_commits.len() as u32;
2781    if commit_count < 100 {
2782        selected.extend(indexed_commits.iter());
2783    } else {
2784        let mut i = 0u32;
2785        loop {
2786            let next = bitmap_next_commit_index(i);
2787            if i + next >= commit_count {
2788                break;
2789            }
2790            let mut chosen = &indexed_commits[(i + next) as usize];
2791            if next > 0 {
2792                for j in 0..=next {
2793                    let candidate = &indexed_commits[(i + j) as usize];
2794                    if preferred_tips.contains(&candidate.oid) {
2795                        chosen = candidate;
2796                        break;
2797                    }
2798                    if candidate.parent_count >= 2 {
2799                        chosen = candidate;
2800                    }
2801                }
2802            }
2803            selected.push(chosen);
2804            i += next + 1;
2805        }
2806    }
2807
2808    // Trace2 selection counters (upstream bitmap_builder_init): emitted before
2809    // the closure walk, like upstream emits them before building the ewah
2810    // bitmaps. Computing num_maximal_commits needs its own first-parent walk,
2811    // so it only runs when the trace2 event target is active.
2812    if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
2813        let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
2814        let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
2815        sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
2816        sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
2817    }
2818
2819    // Reachability closures, oldest-first so newer walks stop at memoised
2820    // older selected commits.
2821    let word_count = object_count.div_ceil(64);
2822    let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
2823    for commit in selected.iter().rev() {
2824        let mut acc = vec![0u64; word_count];
2825        let mut pending = vec![commit.oid];
2826        while let Some(oid) = pending.pop() {
2827            let Some(&pack_pos) = oid_to_pack.get(&oid) else {
2828                // Mirrors upstream's "Packfile doesn't have full closure".
2829                eprintln!(
2830                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
2831                );
2832                return Ok(None);
2833            };
2834            if bitset_get(&acc, pack_pos) {
2835                continue;
2836            }
2837            if let Some(stored) = memo.get(&oid) {
2838                bitset_or(&mut acc, stored);
2839                continue;
2840            }
2841            bitset_set(&mut acc, pack_pos);
2842            let object = db.read_object(&oid)?;
2843            let tree = {
2844                let parsed = Commit::parse_ref(format, &object.body)?;
2845                pending.extend(grafted_parents(db, &oid, parsed.parents));
2846                parsed.tree
2847            };
2848            if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
2849                return Ok(None);
2850            }
2851        }
2852        memo.insert(commit.oid, Arc::new(acc));
2853    }
2854
2855    let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
2856    for commit in &selected {
2857        let words = match memo.get(&commit.oid) {
2858            Some(words) => words,
2859            None => continue,
2860        };
2861        writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
2862    }
2863    writer.write().map(Some)
2864}
2865
2866/// Marks `tree` and everything below it (sub-trees, blobs) in `acc`, skipping
2867/// already-set bits (their closure is already covered). Returns `false` when an
2868/// object is missing from the pack (no full closure), after warning.
2869fn bitmap_mark_tree(
2870    db: &impl ObjectReader,
2871    format: ObjectFormat,
2872    tree: &ObjectId,
2873    oid_to_pack: &HashMap<ObjectId, u32>,
2874    acc: &mut [u64],
2875) -> Result<bool> {
2876    let Some(&pack_pos) = oid_to_pack.get(tree) else {
2877        eprintln!(
2878            "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
2879        );
2880        return Ok(false);
2881    };
2882    if bitset_get(acc, pack_pos) {
2883        return Ok(true);
2884    }
2885    bitset_set(acc, pack_pos);
2886    let object = db.read_object(tree)?;
2887    for entry in TreeEntries::new(format, &object.body) {
2888        let entry = entry?;
2889        if entry.is_gitlink() {
2890            continue;
2891        }
2892        if entry.is_tree() {
2893            if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
2894                return Ok(false);
2895            }
2896        } else {
2897            let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
2898                eprintln!(
2899                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
2900                    entry.oid
2901                );
2902                return Ok(false);
2903            };
2904            bitset_set(acc, blob_pos);
2905        }
2906    }
2907    Ok(true)
2908}
2909
2910/// A pack's `.bitmap` loaded for consultation: oid <-> pack-position mappings,
2911/// resolved (XOR-expanded) per-commit reachability bitsets, and the four object
2912/// type bitmaps. Bit numbering follows pack order throughout.
2913pub struct LoadedPackBitmap {
2914    object_count: u32,
2915    oid_to_pack: HashMap<ObjectId, u32>,
2916    pack_to_oid: Vec<ObjectId>,
2917    commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
2918    commits: Vec<u64>,
2919    trees: Vec<u64>,
2920    blobs: Vec<u64>,
2921    tags: Vec<u64>,
2922}
2923
2924impl LoadedPackBitmap {
2925    pub fn object_count(&self) -> u32 {
2926        self.object_count
2927    }
2928
2929    /// Pack-order position of `oid`, when the object is in the bitmapped pack.
2930    pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
2931        self.oid_to_pack.get(oid).copied()
2932    }
2933
2934    pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
2935        self.pack_to_oid.get(position as usize)
2936    }
2937
2938    /// The resolved reachability bitset stored for `oid`, when it was one of
2939    /// the writer's selected commits.
2940    pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
2941        self.commit_words.get(oid)
2942    }
2943
2944    /// Oids of every commit with a stored bitmap entry (unordered).
2945    pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
2946        self.commit_words.keys()
2947    }
2948
2949    /// The type bitmap for `object_type` (bit per pack position).
2950    pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
2951        match object_type {
2952            ObjectType::Commit => &self.commits,
2953            ObjectType::Tree => &self.trees,
2954            ObjectType::Blob => &self.blobs,
2955            ObjectType::Tag => &self.tags,
2956        }
2957    }
2958
2959    fn word_count(&self) -> usize {
2960        (self.object_count as usize).div_ceil(64)
2961    }
2962}
2963
2964/// Loads the single-pack `.bitmap` of `objects_dir/pack`, if a valid one
2965/// exists. Scans `pack-*.bitmap` files (sorted, first valid wins, like
2966/// upstream's "first bitmap" behaviour), requires the sibling `.idx`, and
2967/// verifies the recorded pack checksum. Any unreadable/corrupt bitmap yields
2968/// `Ok(None)` — consumers fall back to a regular object walk, mirroring
2969/// upstream's warn-and-ignore on bitmap load failure.
2970pub fn load_pack_bitmap(
2971    objects_dir: &Path,
2972    format: ObjectFormat,
2973) -> Result<Option<LoadedPackBitmap>> {
2974    let pack_dir = objects_dir.join("pack");
2975    if !pack_dir.exists() {
2976        return Ok(None);
2977    }
2978    // A multi-pack bitmap wins over single-pack bitmaps, like upstream's
2979    // open_bitmap trying the midx first.
2980    if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
2981        return Ok(Some(bitmap));
2982    }
2983    let mut bitmap_paths = Vec::new();
2984    for entry in fs::read_dir(&pack_dir)? {
2985        let path = entry?.path();
2986        if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
2987            && path
2988                .file_name()
2989                .and_then(|name| name.to_str())
2990                .is_some_and(|name| name.starts_with("pack-"))
2991        {
2992            bitmap_paths.push(path);
2993        }
2994    }
2995    bitmap_paths.sort();
2996    for bitmap_path in bitmap_paths {
2997        match load_pack_bitmap_file(&bitmap_path, format) {
2998            Ok(Some(bitmap)) => return Ok(Some(bitmap)),
2999            Ok(None) | Err(_) => continue,
3000        }
3001    }
3002    Ok(None)
3003}
3004
3005/// Loads `multi-pack-index-<checksum>.bitmap` when the pack directory has a
3006/// multi-pack-index with a `RIDX` chunk (the bit-order permutation) and a
3007/// matching bitmap file. Returns `Ok(None)` — never an error — on any missing
3008/// or unusable piece, so callers fall through to single-pack bitmaps.
3009fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
3010    let midx_path = pack_dir.join("multi-pack-index");
3011    if !midx_path.exists() {
3012        return Ok(None);
3013    }
3014    let Ok(midx_bytes) = fs::read(&midx_path) else {
3015        return Ok(None);
3016    };
3017    if midx_has_bad_ridx_chunk(&midx_bytes, format) {
3018        eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
3019        eprintln!("warning: multi-pack bitmap is missing required reverse index");
3020        return Ok(None);
3021    }
3022    let midx = match MultiPackIndex::parse(&midx_bytes, format) {
3023        Ok(midx) => midx,
3024        Err(GitError::InvalidFormat(message))
3025            if message == "multi-pack-index reverse-index chunk is the wrong size" =>
3026        {
3027            eprintln!("error: {message}");
3028            eprintln!("warning: multi-pack bitmap is missing required reverse index");
3029            return Ok(None);
3030        }
3031        Err(_) => return Ok(None),
3032    };
3033    let bitmap_path = pack_dir.join(format!(
3034        "multi-pack-index-{}.bitmap",
3035        midx.checksum.to_hex()
3036    ));
3037    if !bitmap_path.exists() {
3038        return Ok(None);
3039    }
3040    let object_count = midx.objects.len();
3041    // Upstream `load_midx_revindex`: prefer the midx's own RIDX chunk unless
3042    // GIT_TEST_MIDX_READ_RIDX=0 disables it, else fall back to the separate
3043    // `multi-pack-index-<checksum>.rev` file; a trace2 data event records
3044    // which source supplied the permutation.
3045    let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
3046        .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
3047        .unwrap_or(true);
3048    let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
3049        (Some(chunk), true) => {
3050            sley_core::trace2::data("load_midx_revindex", "source", "midx");
3051            chunk.clone()
3052        }
3053        _ => {
3054            let rev_path =
3055                pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
3056            let Ok(rev_bytes) = fs::read(&rev_path) else {
3057                // Without the RIDX permutation the bit numbering is unknown.
3058                return Ok(None);
3059            };
3060            let Ok(parsed_rev) =
3061                sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
3062            else {
3063                return Ok(None);
3064            };
3065            sley_core::trace2::data("load_midx_revindex", "source", "rev");
3066            parsed_rev.positions
3067        }
3068    };
3069    let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
3070        return Ok(None);
3071    };
3072    let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
3073        Ok(parsed) => parsed,
3074        Err(_) => return Ok(None),
3075    };
3076    if parsed.pack_checksum != midx.checksum {
3077        return Ok(None);
3078    }
3079
3080    // midx.objects is in lookup (oid-sorted) order; RIDX maps bit positions
3081    // to lookup positions.
3082    let mut pack_to_oid = Vec::with_capacity(object_count);
3083    for &midx_pos in &reverse_index {
3084        let Some(entry) = midx.objects.get(midx_pos as usize) else {
3085            return Ok(None);
3086        };
3087        pack_to_oid.push(entry.oid);
3088    }
3089    let mut oid_to_pack = HashMap::with_capacity(object_count);
3090    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3091        oid_to_pack.insert(*oid, pack_pos as u32);
3092    }
3093    match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3094        midx.objects.get(position).map(|entry| entry.oid)
3095    }) {
3096        Ok(loaded) => Ok(Some(loaded)),
3097        Err(_) => Ok(None),
3098    }
3099}
3100
3101fn midx_has_bad_ridx_chunk(bytes: &[u8], format: ObjectFormat) -> bool {
3102    let hash_len = format.raw_len();
3103    if bytes.len() < 12 + 12 + hash_len || &bytes[..4] != b"MIDX" {
3104        return false;
3105    }
3106    let chunk_count = bytes[6] as usize;
3107    let table_len = match (chunk_count + 1).checked_mul(12) {
3108        Some(table_len) => table_len,
3109        None => return false,
3110    };
3111    let table_end = match 12usize.checked_add(table_len) {
3112        Some(table_end) if table_end <= bytes.len().saturating_sub(hash_len) => table_end,
3113        _ => return false,
3114    };
3115    let mut entries = Vec::with_capacity(chunk_count + 1);
3116    let mut cursor = 12usize;
3117    while cursor < table_end {
3118        let id = [
3119            bytes[cursor],
3120            bytes[cursor + 1],
3121            bytes[cursor + 2],
3122            bytes[cursor + 3],
3123        ];
3124        let mut raw_offset = [0u8; 8];
3125        raw_offset.copy_from_slice(&bytes[cursor + 4..cursor + 12]);
3126        entries.push((id, u64::from_be_bytes(raw_offset) as usize));
3127        cursor += 12;
3128    }
3129    let mut oidf = None;
3130    let mut ridx = None;
3131    for pair in entries.windows(2) {
3132        let start = pair[0].1;
3133        let end = pair[1].1;
3134        if end < start || end > bytes.len().saturating_sub(hash_len) {
3135            return false;
3136        }
3137        match &pair[0].0 {
3138            b"OIDF" => oidf = Some((start, end)),
3139            b"RIDX" => ridx = Some((start, end)),
3140            _ => {}
3141        }
3142    }
3143    let Some((oidf_start, oidf_end)) = oidf else {
3144        return false;
3145    };
3146    let Some((ridx_start, ridx_end)) = ridx else {
3147        return false;
3148    };
3149    if oidf_end.saturating_sub(oidf_start) != 256 * 4 {
3150        return false;
3151    }
3152    let object_count_start = oidf_end - 4;
3153    let object_count = u32::from_be_bytes([
3154        bytes[object_count_start],
3155        bytes[object_count_start + 1],
3156        bytes[object_count_start + 2],
3157        bytes[object_count_start + 3],
3158    ]) as usize;
3159    ridx_end.saturating_sub(ridx_start) != object_count.saturating_mul(4)
3160}
3161
3162fn load_pack_bitmap_file(
3163    bitmap_path: &Path,
3164    format: ObjectFormat,
3165) -> Result<Option<LoadedPackBitmap>> {
3166    let index_path = bitmap_path.with_extension("idx");
3167    if !index_path.exists() {
3168        return Ok(None);
3169    }
3170    let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
3171    let object_count = index.entries.len();
3172    let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
3173    if parsed.pack_checksum != index.pack_checksum {
3174        return Ok(None);
3175    }
3176
3177    let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
3178    pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
3179    let mut pack_to_oid = Vec::with_capacity(object_count);
3180    for index_pos in &pack_order {
3181        pack_to_oid.push(index.entries[*index_pos as usize].oid);
3182    }
3183    let mut oid_to_pack = HashMap::with_capacity(object_count);
3184    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3185        oid_to_pack.insert(*oid, pack_pos as u32);
3186    }
3187
3188    assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3189        index.entries.get(position).map(|entry| entry.oid)
3190    })
3191    .map(Some)
3192}
3193
3194/// Shared tail of the bitmap loaders: expands the type bitmaps, resolves the
3195/// per-commit entries (XOR offsets reference earlier entries in file order),
3196/// and maps each entry's lookup-order position back to a commit oid via
3197/// `lookup_oid`.
3198fn assemble_loaded_bitmap(
3199    parsed: PackBitmapIndex,
3200    object_count: usize,
3201    pack_to_oid: Vec<ObjectId>,
3202    oid_to_pack: HashMap<ObjectId, u32>,
3203    lookup_oid: impl Fn(usize) -> Option<ObjectId>,
3204) -> Result<LoadedPackBitmap> {
3205    let word_count = object_count.div_ceil(64);
3206    let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
3207        let mut words = bitmap.to_words()?;
3208        words.resize(word_count, 0);
3209        Ok(words)
3210    };
3211
3212    let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
3213    let mut commit_words = HashMap::with_capacity(parsed.entries.len());
3214    for (entry_index, entry) in parsed.entries.iter().enumerate() {
3215        let mut words = expand(&entry.bitmap)?;
3216        if entry.xor_offset > 0 {
3217            let base_index = entry_index - entry.xor_offset as usize;
3218            let base = &resolved[base_index];
3219            for (dst, src) in words.iter_mut().zip(base.iter()) {
3220                *dst ^= *src;
3221            }
3222        }
3223        let words = Arc::new(words);
3224        resolved.push(Arc::clone(&words));
3225        let commit_oid = lookup_oid(entry.object_position as usize)
3226            .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
3227        commit_words.insert(commit_oid, words);
3228    }
3229
3230    Ok(LoadedPackBitmap {
3231        object_count: object_count as u32,
3232        oid_to_pack,
3233        pack_to_oid,
3234        commit_words,
3235        commits: expand(&parsed.type_bitmaps.commits)?,
3236        trees: expand(&parsed.type_bitmaps.trees)?,
3237        blobs: expand(&parsed.type_bitmaps.blobs)?,
3238        tags: expand(&parsed.type_bitmaps.tags)?,
3239    })
3240}
3241
3242/// Result of a bitmap-assisted reachability walk: pack-position bits for
3243/// in-pack objects plus the "extended" objects encountered outside the
3244/// bitmapped pack (in first-seen order, like upstream's extended index).
3245pub struct BitmapWalkResult {
3246    pub words: Vec<u64>,
3247    pub extended: Vec<(ObjectId, ObjectType)>,
3248}
3249
3250impl BitmapWalkResult {
3251    /// Removes everything reachable in `haves` from this result.
3252    pub fn subtract(&mut self, haves: &BitmapWalkResult) {
3253        for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
3254            *dst &= !*src;
3255        }
3256        let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
3257        self.extended.retain(|(oid, _)| !have_ext.contains(oid));
3258    }
3259}
3260
3261/// Computes the set of objects reachable from `roots` using stored bitmaps
3262/// where available and a fill-in object walk where not — the consult half of
3263/// the bitmap engine (upstream `find_objects` + `fill_in_bitmap`).
3264///
3265/// Roots may be any object type; tag chains are peeled with every tag object
3266/// itself included, like the pending-object handling in
3267/// `prepare_bitmap_walk`. When `include_objects` is false only commits are
3268/// walked (tree contents of fill-in commits are not marked) — callers that
3269/// only count/enumerate commits mask with the commit type bitmap, so the
3270/// extra non-commit bits OR-ed in from stored (closed) bitmaps are harmless.
3271pub fn bitmap_reachable(
3272    bitmap: &LoadedPackBitmap,
3273    db: &impl ObjectReader,
3274    format: ObjectFormat,
3275    roots: &[ObjectId],
3276    include_objects: bool,
3277) -> Result<BitmapWalkResult> {
3278    let mut walk = BitmapFillWalk {
3279        bitmap,
3280        words: vec![0u64; bitmap.word_count()],
3281        extended: Vec::new(),
3282        extended_seen: HashSet::new(),
3283    };
3284    let mut commit_stack: Vec<ObjectId> = Vec::new();
3285
3286    for root in roots {
3287        let mut oid = *root;
3288        // Peel tag chains, marking each tag object on the way.
3289        loop {
3290            let object = db.read_object(&oid)?;
3291            match object.object_type {
3292                ObjectType::Tag => {
3293                    walk.mark(&oid, ObjectType::Tag);
3294                    let tag = Tag::parse_ref(format, &object.body)?;
3295                    oid = tag.object;
3296                }
3297                ObjectType::Commit => {
3298                    commit_stack.push(oid);
3299                    break;
3300                }
3301                ObjectType::Tree => {
3302                    walk.mark_tree_closure(db, format, &oid)?;
3303                    break;
3304                }
3305                ObjectType::Blob => {
3306                    walk.mark(&oid, ObjectType::Blob);
3307                    break;
3308                }
3309            }
3310        }
3311    }
3312
3313    while let Some(oid) = commit_stack.pop() {
3314        if let Some(position) = bitmap.pack_position(&oid) {
3315            if bitset_get(&walk.words, position) {
3316                continue;
3317            }
3318            if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
3319                bitset_or(&mut walk.words, stored);
3320                continue;
3321            }
3322            bitset_set(&mut walk.words, position);
3323        } else {
3324            if walk.extended_seen.contains(&oid) {
3325                continue;
3326            }
3327            walk.extended_seen.insert(oid);
3328            walk.extended.push((oid, ObjectType::Commit));
3329        }
3330        let object = db.read_object(&oid)?;
3331        let commit = Commit::parse_ref(format, &object.body)?;
3332        commit_stack.extend(grafted_parents(db, &oid, commit.parents));
3333        if include_objects {
3334            walk.mark_tree_closure(db, format, &commit.tree)?;
3335        }
3336    }
3337
3338    Ok(BitmapWalkResult {
3339        words: walk.words,
3340        extended: walk.extended,
3341    })
3342}
3343
3344struct BitmapFillWalk<'a> {
3345    bitmap: &'a LoadedPackBitmap,
3346    words: Vec<u64>,
3347    extended: Vec<(ObjectId, ObjectType)>,
3348    extended_seen: HashSet<ObjectId>,
3349}
3350
3351impl BitmapFillWalk<'_> {
3352    /// Marks one object; returns false when it was already marked.
3353    fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
3354        if let Some(position) = self.bitmap.pack_position(oid) {
3355            if bitset_get(&self.words, position) {
3356                return false;
3357            }
3358            bitset_set(&mut self.words, position);
3359            true
3360        } else {
3361            if !self.extended_seen.insert(*oid) {
3362                return false;
3363            }
3364            self.extended.push((*oid, object_type));
3365            true
3366        }
3367    }
3368
3369    /// Marks `tree` and everything below it, skipping subtrees already marked
3370    /// (a set in-pack bit means its closure is covered: either it came from a
3371    /// stored — closed — bitmap, or this walk already expanded it).
3372    fn mark_tree_closure(
3373        &mut self,
3374        db: &impl ObjectReader,
3375        format: ObjectFormat,
3376        tree: &ObjectId,
3377    ) -> Result<()> {
3378        if !self.mark(tree, ObjectType::Tree) {
3379            return Ok(());
3380        }
3381        let object = db.read_object(tree)?;
3382        for entry in TreeEntries::new(format, &object.body) {
3383            let entry = entry?;
3384            if entry.is_gitlink() {
3385                continue;
3386            }
3387            if entry.is_tree() {
3388                self.mark_tree_closure(db, format, &entry.oid)?;
3389            } else {
3390                self.mark(&entry.oid, ObjectType::Blob);
3391            }
3392        }
3393        Ok(())
3394    }
3395}
3396
3397#[derive(Debug)]
3398pub struct ObjectDatabase {
3399    format: ObjectFormat,
3400    // Behind a `Mutex` so `write_object` can take `&self` (matching the
3401    // `ObjectWriter` trait) and a single handle can interleave reads and writes
3402    // without a `&mut` borrow — the same shared-by-`&` shape the file-backed
3403    // database uses for its caches. Removes the need for callers to wrap this in
3404    // a `RefCell`/`&mut` just to write (see sley-fetch's former `RefCell` dance).
3405    objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
3406    promisor: bool,
3407}
3408
3409impl ObjectDatabase {
3410    pub fn new(format: ObjectFormat) -> Self {
3411        Self {
3412            format,
3413            objects: Mutex::new(HashMap::new()),
3414            promisor: false,
3415        }
3416    }
3417
3418    pub fn with_promisor(mut self, promisor: bool) -> Self {
3419        self.promisor = promisor;
3420        self
3421    }
3422
3423    pub fn contains(&self, oid: &ObjectId) -> bool {
3424        self.objects
3425            .lock()
3426            .map(|objects| objects.contains_key(oid))
3427            .unwrap_or(false)
3428    }
3429
3430    pub fn validate(&self, oid: &ObjectId) -> Result<()> {
3431        let object = self.read_object(oid)?;
3432        let actual = object.object_id(self.format)?;
3433        if &actual == oid {
3434            Ok(())
3435        } else {
3436            Err(GitError::InvalidObject(format!(
3437                "object id mismatch: expected {oid}, got {actual}"
3438            )))
3439        }
3440    }
3441}
3442
3443impl ObjectReader for ObjectDatabase {
3444    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3445        self.objects
3446            .lock()
3447            .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
3448            .get(oid)
3449            .map(Arc::clone)
3450            .or_else(|| implied_empty_tree_object(self.format, oid))
3451            .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
3452    }
3453}
3454
3455impl ObjectWriter for ObjectDatabase {
3456    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3457        let oid = object.object_id(self.format)?;
3458        self.objects
3459            .lock()
3460            .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
3461            .entry(oid)
3462            .or_insert_with(|| Arc::new(object));
3463        Ok(oid)
3464    }
3465}
3466
3467#[derive(Debug, Clone, PartialEq, Eq)]
3468pub struct Alternate {
3469    pub path: std::path::PathBuf,
3470}
3471
3472#[derive(Debug, Clone, PartialEq, Eq)]
3473pub struct PartialClonePolicy {
3474    pub promisor_remote: Option<String>,
3475    pub allow_missing_promised_objects: bool,
3476}
3477
3478/// Raw pack-file bytes keyed by pack path, shared across cloned handles. Loaded
3479/// once so individual objects can be decoded at their offsets (see
3480/// [`sley_pack::read_object_at`]) without re-reading the whole file per read.
3481type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
3482
3483/// Backing bytes of a pack file: either memory-mapped (under the `mmap` feature)
3484/// or read into the heap. Both deref to `&[u8]`, so the decode path is identical.
3485#[derive(Debug)]
3486enum PackData {
3487    #[cfg(feature = "mmap")]
3488    Mapped(sley_mmap::MappedFile),
3489    Heap(Vec<u8>),
3490}
3491
3492impl std::ops::Deref for PackData {
3493    type Target = [u8];
3494
3495    fn deref(&self) -> &[u8] {
3496        match self {
3497            #[cfg(feature = "mmap")]
3498            Self::Mapped(mapped) => mapped,
3499            Self::Heap(bytes) => bytes,
3500        }
3501    }
3502}
3503
3504/// Load a pack file's bytes: memory-mapped when the `mmap` feature is on (falling
3505/// back to a heap read if the map fails), otherwise read into the heap.
3506#[cfg(feature = "mmap")]
3507fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3508    match sley_mmap::MappedFile::open_pack(pack_path) {
3509        Ok(mapped) => Ok(PackData::Mapped(mapped)),
3510        Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
3511    }
3512}
3513
3514#[cfg(not(feature = "mmap"))]
3515fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3516    Ok(PackData::Heap(fs::read(pack_path)?))
3517}
3518
3519#[cfg(feature = "mmap")]
3520fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3521    match sley_mmap::MappedFile::open_pack(index_path) {
3522        Ok(mapped) => Ok(Arc::new(mapped)),
3523        Err(_) => Ok(Arc::new(fs::read(index_path)?)),
3524    }
3525}
3526
3527#[cfg(not(feature = "mmap"))]
3528fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3529    Ok(Arc::new(fs::read(index_path)?))
3530}
3531
3532#[cfg(feature = "mmap")]
3533fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3534    match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
3535        Ok(mapped) => Ok(Arc::new(mapped)),
3536        Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
3537    }
3538}
3539
3540#[cfg(not(feature = "mmap"))]
3541fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3542    Ok(Arc::new(fs::read(midx_path)?))
3543}
3544
3545/// Memory-capped LRU of recently decoded objects, shared across cloned handles,
3546/// so hot delta bases and repeated reads during a walk aren't re-decoded. The
3547/// cache is bounded by an approximate byte budget (not a fixed object count) so
3548/// it neither thrashes on bulk reads of small objects nor blows up on a few
3549/// large ones.
3550type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
3551
3552/// Per-pack caches of objects decoded from a pack, keyed by pack path and then by
3553/// the in-pack byte offset of each object's entry. Shared across cloned handles.
3554/// This is the delta-base cache: resolving a delta chain by offset reuses already
3555/// decoded bases instead of re-inflating the whole chain on every read.
3556type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
3557
3558/// Per-pack memo of `in-pack offset -> end-of-chain object type` for the
3559/// `cat-file --batch-check` header fast path. Resolving a packed delta's *type*
3560/// walks the delta chain to its base; without this memo every header read
3561/// re-walks (and re-inflates) the whole chain, so reading every object in a
3562/// deeply-deltified pack is super-linear (sley#26). The type only depends on the
3563/// chain base, so memoizing `offset -> type` lets each chain be walked at most
3564/// once across a batch. Keyed by pack path so an offset key is never applied to
3565/// the wrong pack's bytes; shared across cloned handles.
3566/// One pack's offset-keyed header memo (see [`PackHeaderTypeCaches`]).
3567type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
3568
3569type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
3570
3571/// Default approximate byte budget for the decoded-object LRU. Sized to comfortably
3572/// hold the working set of a history walk (commits/trees/blobs and their delta
3573/// bases) without growing without bound on large repositories. Overridable via the
3574/// `SLEY_OBJECT_CACHE_BYTES` environment variable; there is currently no git-config
3575/// hook threaded into the object database, so this constant is the default.
3576const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
3577
3578/// Default approximate byte budget for each per-pack delta-base cache. Holds the
3579/// decoded bases of the delta chains being walked so neighboring reads stay warm.
3580/// Overridable via `SLEY_DELTA_BASE_CACHE_BYTES`.
3581const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
3582
3583/// Approximate heap cost of caching one [`EncodedObject`]: its body plus a fixed
3584/// allowance for the key, enum/`Vec` headers, and per-entry map overhead. Used
3585/// only to drive eviction, so an estimate is fine.
3586fn cached_object_cost(object: &EncodedObject) -> usize {
3587    object.body.len().saturating_add(64)
3588}
3589
3590/// Read an approximate byte budget from `var`, falling back to `default` when the
3591/// variable is unset or unparseable. A value of `0` disables the cache.
3592fn cache_budget_from_env(var: &str, default: usize) -> usize {
3593    match env::var(var) {
3594        Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
3595        Err(_) => default,
3596    }
3597}
3598
3599/// Approximate byte budget for the decoded-object LRU (see
3600/// [`DEFAULT_OBJECT_CACHE_BYTES`], `SLEY_OBJECT_CACHE_BYTES`).
3601///
3602/// Resolved once per process: the environment does not change under us, and a new
3603/// `FileObjectDatabase` is built often enough (e.g. once per revision resolved)
3604/// that re-reading the variable each time showed up as per-object overhead.
3605fn object_cache_budget() -> usize {
3606    static BUDGET: OnceLock<usize> = OnceLock::new();
3607    *BUDGET.get_or_init(|| {
3608        cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
3609    })
3610}
3611
3612/// Approximate byte budget for each per-pack delta-base cache (see
3613/// [`DEFAULT_DELTA_BASE_CACHE_BYTES`], `SLEY_DELTA_BASE_CACHE_BYTES`). Resolved
3614/// once per process for the same reason as [`object_cache_budget`].
3615fn delta_base_cache_budget() -> usize {
3616    static BUDGET: OnceLock<usize> = OnceLock::new();
3617    *BUDGET.get_or_init(|| {
3618        cache_budget_from_env(
3619            "SLEY_DELTA_BASE_CACHE_BYTES",
3620            DEFAULT_DELTA_BASE_CACHE_BYTES,
3621        )
3622    })
3623}
3624
3625/// Whether to re-hash every object on read and compare it to the requested id.
3626///
3627/// Off by default, matching git: reads trust the pack index → offset mapping and
3628/// the loose object's on-disk name, and object ids are verified where git verifies
3629/// them — when a pack is received (the index build re-hashes every object) and on
3630/// demand via [`FileObjectDatabase`]'s `validate`/fsck. Re-hashing on *every* read
3631/// dominated bulk-read cost (a scalar pure-Rust SHA-1 over each object's full
3632/// body), so it is opt-in via `SLEY_VERIFY_READS` (any value other than unset, ``,
3633/// or `0`) for callers that want the paranoid check back. Read once and cached, so
3634/// the default path pays only a single relaxed atomic load per read.
3635fn verify_reads_enabled() -> bool {
3636    static VERIFY: OnceLock<bool> = OnceLock::new();
3637    *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
3638        Ok(value) => !matches!(value.trim(), "" | "0"),
3639        Err(_) => false,
3640    })
3641}
3642
3643/// A memory-capped LRU map from a key `K` to a decoded [`EncodedObject`].
3644///
3645/// Eviction is by approximate byte budget (gix-style), not object count, so the
3646/// cache adapts to object size. On access an entry is moved to most-recently-used;
3647/// on insert, least-recently-used entries are dropped until the budget holds. A
3648/// budget of `0` makes the cache inert. Generic over the key so it backs both the
3649/// oid-keyed decoded-object cache and the offset-keyed delta-base cache.
3650#[derive(Debug)]
3651struct LruCache<K: std::hash::Hash + Eq + Clone> {
3652    budget: usize,
3653    used: usize,
3654    map: HashMap<K, LruEntry<K>>,
3655    head: Option<K>,
3656    tail: Option<K>,
3657}
3658
3659#[derive(Debug)]
3660struct LruEntry<K> {
3661    object: Arc<EncodedObject>,
3662    prev: Option<K>,
3663    next: Option<K>,
3664}
3665
3666impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
3667    fn new(budget: usize) -> Self {
3668        Self {
3669            budget,
3670            used: 0,
3671            map: HashMap::new(),
3672            head: None,
3673            tail: None,
3674        }
3675    }
3676
3677    fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
3678        let object = Arc::clone(&self.map.get(key)?.object);
3679        self.touch(key);
3680        Some(object)
3681    }
3682
3683    /// Move `key` to the most-recently-used end in O(1).
3684    fn touch(&mut self, key: &K) {
3685        if self.tail.as_ref() == Some(key) {
3686            return;
3687        }
3688        if self.map.contains_key(key) {
3689            self.detach(key);
3690            self.attach_back(key.clone());
3691        }
3692    }
3693
3694    /// Drop `key` from both the map and the recency queue, releasing its budget.
3695    fn remove(&mut self, key: &K) {
3696        if let Some(entry) = self.map.get(key) {
3697            self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
3698        }
3699        self.detach(key);
3700        self.map.remove(key);
3701    }
3702
3703    fn detach(&mut self, key: &K) {
3704        let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
3705            let prev = entry.prev.take();
3706            let next = entry.next.take();
3707            (prev, next)
3708        }) else {
3709            return;
3710        };
3711
3712        match &prev {
3713            Some(prev_key) => {
3714                if let Some(prev_entry) = self.map.get_mut(prev_key) {
3715                    prev_entry.next = next.clone();
3716                }
3717            }
3718            None => self.head = next.clone(),
3719        }
3720        match &next {
3721            Some(next_key) => {
3722                if let Some(next_entry) = self.map.get_mut(next_key) {
3723                    next_entry.prev = prev.clone();
3724                }
3725            }
3726            None => self.tail = prev.clone(),
3727        }
3728    }
3729
3730    fn attach_back(&mut self, key: K) {
3731        let previous_tail = self.tail.replace(key.clone());
3732        match previous_tail {
3733            Some(tail_key) => {
3734                if let Some(tail_entry) = self.map.get_mut(&tail_key) {
3735                    tail_entry.next = Some(key.clone());
3736                }
3737                if let Some(entry) = self.map.get_mut(&key) {
3738                    entry.prev = Some(tail_key);
3739                    entry.next = None;
3740                }
3741            }
3742            None => {
3743                self.head = Some(key.clone());
3744                if let Some(entry) = self.map.get_mut(&key) {
3745                    entry.prev = None;
3746                    entry.next = None;
3747                }
3748            }
3749        }
3750    }
3751
3752    fn clear(&mut self) {
3753        self.map.clear();
3754        self.head = None;
3755        self.tail = None;
3756        self.used = 0;
3757    }
3758
3759    fn put(&mut self, key: K, object: Arc<EncodedObject>) {
3760        if self.budget == 0 {
3761            return;
3762        }
3763        let cost = cached_object_cost(&object);
3764        // A single object larger than the whole budget is not worth caching; it
3765        // would immediately evict everything including itself. Drop any stale
3766        // smaller entry stored under the same key so accounting stays exact.
3767        if cost > self.budget {
3768            self.remove(&key);
3769            return;
3770        }
3771        if let Some(entry) = self.map.get_mut(&key) {
3772            let previous = std::mem::replace(&mut entry.object, object);
3773            // Replacing an existing entry: adjust accounting and refresh recency.
3774            self.used = self
3775                .used
3776                .saturating_sub(cached_object_cost(&previous))
3777                .saturating_add(cost);
3778            self.touch(&key);
3779        } else {
3780            self.used = self.used.saturating_add(cost);
3781            self.map.insert(
3782                key.clone(),
3783                LruEntry {
3784                    object,
3785                    prev: None,
3786                    next: None,
3787                },
3788            );
3789            self.attach_back(key);
3790        }
3791        while self.used > self.budget {
3792            let Some(evicted) = self.head.clone() else {
3793                break;
3794            };
3795            self.remove(&evicted);
3796        }
3797    }
3798}
3799
3800/// Decoded-object cache keyed by object id (loose + packed reads share it).
3801type LruObjectCache = LruCache<ObjectId>;
3802/// Delta-base cache keyed by in-pack byte offset, scoped to one pack.
3803type LruOffsetCache = LruCache<u64>;
3804
3805/// Bridges the offset-keyed [`LruOffsetCache`] to [`sley_pack::PackDeltaCache`]
3806/// so the pack decoder can reuse decoded delta bases. Holds the shared cache
3807/// behind its mutex; a poisoned lock simply behaves as a cache miss/no-op, so a
3808/// decode still completes correctly (just without reuse).
3809struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
3810
3811impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
3812    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
3813        self.0.lock().ok()?.get(&offset)
3814    }
3815
3816    fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
3817        if let Ok(mut cache) = self.0.lock() {
3818            cache.put(offset, object);
3819        }
3820    }
3821}
3822
3823/// Bridges a per-pack `offset -> ObjectType` memo into the header fast path so
3824/// the ofs-delta chain walk is performed at most once per chain across a batch
3825/// of `read_object_header` calls (sley#26).
3826struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
3827
3828impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
3829    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
3830        self.0.lock().ok()?.get(&pack_offset).copied()
3831    }
3832
3833    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
3834        if let Ok(mut cache) = self.0.lock() {
3835            cache.insert(pack_offset, header);
3836        }
3837    }
3838}
3839
3840/// Parsed pack indexes keyed by `.idx` path, shared across cloned handles. This
3841/// remains for MIDX and path-only fallback lookups; normal pack-directory scans
3842/// use [`PackRegistrySnapshot`] so the lookup hot path can walk already-parsed
3843/// pack records directly.
3844type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
3845
3846/// Parsed multi-pack-index files keyed by path, shared across cloned handles.
3847/// Caches the MIDX parse so object lookups in repositories with a MIDX avoid
3848/// reparsing the same fanout/object tables for every read.
3849type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
3850
3851/// Raw multi-pack-index OID lookup tables keyed by path, shared across cloned
3852/// handles. These avoid hashing and materializing every MIDX object when a
3853/// command only needs point lookups.
3854type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
3855
3856/// One registered `.idx`/`.pack` pair from a pack directory. The index is parsed
3857/// when the registry snapshot is built; pack bytes and per-pack decode/header
3858/// caches hang directly off this record so repeated object lookups do not bounce
3859/// through path-keyed maps.
3860#[derive(Debug)]
3861struct RegisteredPack {
3862    idx: PathBuf,
3863    pack: PathBuf,
3864    index: Mutex<Option<Arc<PackIndexViewData>>>,
3865    data: Mutex<Option<Arc<PackData>>>,
3866    delta_cache: Arc<Mutex<LruOffsetCache>>,
3867    header_type_cache: PackHeaderTypeCache,
3868}
3869
3870impl RegisteredPack {
3871    fn new(idx: PathBuf, pack: PathBuf) -> Self {
3872        Self {
3873            idx,
3874            pack,
3875            index: Mutex::new(None),
3876            data: Mutex::new(None),
3877            delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
3878            header_type_cache: Arc::new(Mutex::new(HashMap::new())),
3879        }
3880    }
3881
3882    fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
3883        if let Ok(cache) = self.index.lock()
3884            && let Some(index) = cache.as_ref()
3885        {
3886            return Ok(Arc::clone(index));
3887        }
3888        let index_bytes = load_pack_index_data(&self.idx)?;
3889        let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
3890            index_bytes,
3891            format,
3892        )?);
3893        if let Ok(mut cache) = self.index.lock() {
3894            *cache = Some(Arc::clone(&index));
3895        }
3896        Ok(index)
3897    }
3898
3899    fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
3900        if let Ok(cache) = self.data.lock()
3901            && let Some(bytes) = cache.as_ref()
3902        {
3903            return Ok(Arc::clone(bytes));
3904        }
3905        if let Ok(cache) = pack_bytes.lock()
3906            && let Some(bytes) = cache.get(&self.pack)
3907        {
3908            let bytes = Arc::clone(bytes);
3909            if let Ok(mut local_cache) = self.data.lock() {
3910                *local_cache = Some(Arc::clone(&bytes));
3911            }
3912            return Ok(bytes);
3913        }
3914        let bytes = Arc::new(load_pack_data(&self.pack)?);
3915        if let Ok(mut local_cache) = self.data.lock() {
3916            *local_cache = Some(Arc::clone(&bytes));
3917        }
3918        if let Ok(mut cache) = pack_bytes.lock() {
3919            cache.insert(self.pack.clone(), Arc::clone(&bytes));
3920        }
3921        Ok(bytes)
3922    }
3923}
3924
3925#[derive(Debug, Clone, PartialEq, Eq)]
3926struct PackDirFingerprint {
3927    modified: Option<std::time::SystemTime>,
3928    idx_count: usize,
3929    pack_count: usize,
3930}
3931
3932/// Snapshot of a pack directory's lookup state, shared across cloned handles.
3933/// New packs are still found: a lookup that misses every cached pack re-scans the
3934/// directory once before concluding the object is absent (see
3935/// [`FileObjectDatabase::find_pack_containing`]).
3936#[derive(Debug)]
3937struct PackRegistrySnapshot {
3938    fingerprint: PackDirFingerprint,
3939    packs: Vec<Arc<RegisteredPack>>,
3940    recent_pack: Mutex<Option<usize>>,
3941}
3942
3943impl PackRegistrySnapshot {
3944    fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
3945        Self {
3946            fingerprint,
3947            packs,
3948            recent_pack: Mutex::new(None),
3949        }
3950    }
3951
3952    fn cached_hint(&self) -> Option<usize> {
3953        self.recent_pack
3954            .lock()
3955            .ok()
3956            .and_then(|hint| *hint)
3957            .filter(|pack_index| *pack_index < self.packs.len())
3958    }
3959
3960    fn remember_hint(&self, pack_index: usize) {
3961        if let Ok(mut hint) = self.recent_pack.lock() {
3962            *hint = Some(pack_index);
3963        }
3964    }
3965}
3966
3967/// Cached pack-registry snapshot for this object directory, shared across cloned
3968/// handles. A `FileObjectDatabase` owns exactly one object directory, so this is
3969/// an `Option` instead of another path-keyed map.
3970type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
3971
3972#[derive(Debug, Clone)]
3973struct PackLookup {
3974    pack: PathBuf,
3975    registered: Option<Arc<RegisteredPack>>,
3976    offset: u64,
3977}
3978
3979impl PackLookup {
3980    fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
3981        Self {
3982            pack: pack.pack.clone(),
3983            registered: Some(pack),
3984            offset,
3985        }
3986    }
3987
3988    fn from_path(pack: PathBuf, offset: u64) -> Self {
3989        Self {
3990            pack,
3991            registered: None,
3992            offset,
3993        }
3994    }
3995
3996    fn pack_path(&self) -> &Path {
3997        &self.pack
3998    }
3999
4000    fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
4001        match &self.registered {
4002            Some(pack) => pack.bytes(&database.pack_bytes),
4003            None => database.cached_pack_bytes(&self.pack),
4004        }
4005    }
4006
4007    fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
4008        match &self.registered {
4009            Some(pack) => database.cached_pack_index(&pack.idx),
4010            None => database.cached_pack_index(&self.pack.with_extension("idx")),
4011        }
4012    }
4013
4014    fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
4015        match &self.registered {
4016            Some(pack) => Some(Arc::clone(&pack.delta_cache)),
4017            None => database.pack_delta_cache(&self.pack),
4018        }
4019    }
4020
4021    fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
4022        match &self.registered {
4023            Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
4024            None => database.pack_header_type_cache(&self.pack),
4025        }
4026    }
4027}
4028
4029#[derive(Debug, Clone)]
4030pub struct FileObjectDatabase {
4031    loose: LooseObjectStore,
4032    objects_dir: PathBuf,
4033    alternates: Vec<PathBuf>,
4034    format: ObjectFormat,
4035    pack_bytes: PackBytesCache,
4036    pack_indexes: PackIndexCache,
4037    multi_pack_indexes: MultiPackIndexCache,
4038    multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
4039    pack_registry: PackRegistryCache,
4040    decoded: DecodedObjectCache,
4041    pack_deltas: PackDeltaCaches,
4042    pack_header_types: PackHeaderTypeCaches,
4043    promisor_objects: Arc<OnceLock<HashSet<ObjectId>>>,
4044    /// Graft points (`$GIT_DIR/shallow`), loaded lazily on the first
4045    /// [`ObjectReader::is_shallow_graft`] query. `$GIT_DIR` is taken to be
4046    /// the parent of `objects_dir`, matching the standard layout.
4047    shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
4048}
4049
4050#[derive(Debug)]
4051pub struct ObjectPresenceChecker {
4052    db: FileObjectDatabase,
4053    pack_dir: PathBuf,
4054    midx: Option<Arc<MultiPackIndexOidLookup>>,
4055    registry: Option<Arc<PackRegistrySnapshot>>,
4056    registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
4057    recent_pack: Option<usize>,
4058    prepared_packs: bool,
4059    prepared_registry: bool,
4060}
4061
4062impl ObjectPresenceChecker {
4063    fn new(db: FileObjectDatabase) -> Self {
4064        let pack_dir = db.objects_dir.join("pack");
4065        Self {
4066            db,
4067            pack_dir,
4068            midx: None,
4069            registry: None,
4070            registry_indexes: Vec::new(),
4071            recent_pack: None,
4072            prepared_packs: false,
4073            prepared_registry: false,
4074        }
4075    }
4076
4077    pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
4078        if oid.format() != self.db.format {
4079            return Err(GitError::InvalidObjectId(format!(
4080                "object {oid} uses {}, store uses {}",
4081                oid.format().name(),
4082                self.db.format.name()
4083            )));
4084        }
4085        if self.db.loose.exists(oid)? {
4086            return Ok(true);
4087        }
4088        if self.find_packed(oid, false)? {
4089            return Ok(true);
4090        }
4091        if self.find_packed(oid, true)? {
4092            return Ok(true);
4093        }
4094        for alternate in &self.db.alternates {
4095            if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
4096                return Ok(true);
4097            }
4098        }
4099        // Preserve the regular contains() reprepare-on-miss behavior for loose
4100        // objects that appeared after the fanout cache was populated.
4101        self.db.loose.invalidate_cache();
4102        self.db.loose.exists(oid)
4103    }
4104
4105    fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
4106        self.prepare_packs(force_rescan)?;
4107        if let Some(midx) = &self.midx
4108            && midx.contains(oid)
4109        {
4110            return Ok(true);
4111        }
4112        self.prepare_registry(force_rescan)?;
4113        self.find_in_registry(oid)
4114    }
4115
4116    fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
4117        if self.prepared_packs && !force_rescan {
4118            return Ok(());
4119        }
4120        let midx_path = self.pack_dir.join("multi-pack-index");
4121        self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
4122        self.prepared_packs = true;
4123        Ok(())
4124    }
4125
4126    fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
4127        if self.prepared_registry && !force_rescan {
4128            return Ok(());
4129        }
4130        let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
4131        let registry_changed = match self.registry.as_ref() {
4132            Some(cached) => !Arc::ptr_eq(cached, &registry),
4133            None => true,
4134        };
4135        if registry_changed {
4136            self.registry_indexes = vec![None; registry.packs.len()];
4137            self.recent_pack = None;
4138            self.registry = Some(registry);
4139        }
4140        self.prepared_registry = true;
4141        Ok(())
4142    }
4143
4144    fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
4145        let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
4146            return Ok(false);
4147        };
4148        if let Some(pack_index) = self
4149            .recent_pack
4150            .filter(|pack_index| *pack_index < registry.packs.len())
4151        {
4152            let index = self.registry_index(&registry, pack_index)?;
4153            if index.find(oid).is_some() {
4154                return Ok(true);
4155            }
4156        }
4157        for pack_index in 0..registry.packs.len() {
4158            if Some(pack_index) == self.recent_pack {
4159                continue;
4160            }
4161            let index = self.registry_index(&registry, pack_index)?;
4162            if index.find(oid).is_some() {
4163                self.recent_pack = Some(pack_index);
4164                return Ok(true);
4165            }
4166        }
4167        Ok(false)
4168    }
4169
4170    fn registry_index(
4171        &mut self,
4172        registry: &PackRegistrySnapshot,
4173        pack_index: usize,
4174    ) -> Result<Arc<PackIndexViewData>> {
4175        if self.registry_indexes.len() != registry.packs.len() {
4176            self.registry_indexes = vec![None; registry.packs.len()];
4177            self.recent_pack = None;
4178        }
4179        if let Some(index) = self
4180            .registry_indexes
4181            .get(pack_index)
4182            .and_then(|index| index.as_ref())
4183        {
4184            return Ok(Arc::clone(index));
4185        }
4186        let index = registry.packs[pack_index].index(self.db.format)?;
4187        if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
4188            *slot = Some(Arc::clone(&index));
4189        }
4190        Ok(index)
4191    }
4192}
4193
4194/// Parse `$GIT_DIR/shallow`: one hex object id per line. A missing file is an
4195/// empty set (the repository is not shallow); unparsable lines are ignored so
4196/// a torn write never poisons walks.
4197fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
4198    let Ok(contents) = std::fs::read_to_string(shallow_file) else {
4199        return HashSet::new();
4200    };
4201    contents
4202        .lines()
4203        .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
4204        .collect()
4205}
4206
4207pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4208    env::var_os("GIT_OBJECT_DIRECTORY")
4209        .map(PathBuf::from)
4210        .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
4211}
4212
4213pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4214    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
4215        return PathBuf::from(common_dir);
4216    }
4217    let git_dir = git_dir.as_ref();
4218    let commondir = git_dir.join("commondir");
4219    if let Ok(value) = fs::read_to_string(&commondir) {
4220        let path = PathBuf::from(value.trim());
4221        let common = if path.is_absolute() {
4222            path
4223        } else {
4224            git_dir.join(path)
4225        };
4226        return fs::canonicalize(&common).unwrap_or(common);
4227    }
4228    git_dir.to_path_buf()
4229}
4230
4231pub fn repository_object_ids(
4232    git_dir: impl AsRef<Path>,
4233    format: ObjectFormat,
4234) -> Result<Vec<ObjectId>> {
4235    object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
4236}
4237
4238pub fn object_ids_in_objects_dir(
4239    objects_dir: impl AsRef<Path>,
4240    format: ObjectFormat,
4241) -> Result<Vec<ObjectId>> {
4242    let objects_dir = objects_dir.as_ref();
4243    let mut oids = HashSet::new();
4244    collect_loose_object_ids(objects_dir, format, &mut oids)?;
4245    collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
4246    let mut oids = oids.into_iter().collect::<Vec<_>>();
4247    oids.sort_by_key(ObjectId::to_hex);
4248    Ok(oids)
4249}
4250
4251fn collect_loose_object_ids(
4252    objects_dir: &Path,
4253    format: ObjectFormat,
4254    oids: &mut HashSet<ObjectId>,
4255) -> Result<()> {
4256    if !objects_dir.exists() {
4257        return Ok(());
4258    }
4259    let hex_len = format.hex_len();
4260    for entry in fs::read_dir(objects_dir)? {
4261        let entry = entry?;
4262        if !entry.file_type()?.is_dir() {
4263            continue;
4264        }
4265        let name = entry.file_name();
4266        let Some(fanout) = name.to_str() else {
4267            continue;
4268        };
4269        if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4270            continue;
4271        }
4272        for object_entry in fs::read_dir(entry.path())? {
4273            let object_entry = object_entry?;
4274            if !object_entry.file_type()?.is_file() {
4275                continue;
4276            }
4277            let name = object_entry.file_name();
4278            let Some(suffix) = name.to_str() else {
4279                continue;
4280            };
4281            if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4282                continue;
4283            }
4284            oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
4285        }
4286    }
4287    Ok(())
4288}
4289
4290fn collect_loose_fanout_object_ids(
4291    objects_dir: &Path,
4292    format: ObjectFormat,
4293    fanout: u8,
4294    oids: &mut HashSet<ObjectId>,
4295) -> Result<()> {
4296    let fanout_hex = format!("{fanout:02x}");
4297    let fanout_dir = objects_dir.join(&fanout_hex);
4298    let entries = match fs::read_dir(&fanout_dir) {
4299        Ok(entries) => entries,
4300        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
4301        Err(err) => return Err(GitError::Io(err.to_string())),
4302    };
4303    let hex_len = format.hex_len();
4304    for object_entry in entries {
4305        let object_entry = object_entry?;
4306        let name = object_entry.file_name();
4307        let Some(suffix) = name.to_str() else {
4308            continue;
4309        };
4310        if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4311            continue;
4312        }
4313        oids.insert(ObjectId::from_hex(
4314            format,
4315            &format!("{fanout_hex}{suffix}"),
4316        )?);
4317    }
4318    Ok(())
4319}
4320
4321#[derive(Debug, Default)]
4322struct LoosePresenceCache {
4323    loaded_fanouts: HashSet<u8>,
4324    objects: HashSet<ObjectId>,
4325}
4326
4327/// Every object id resolvable through a pack (any `.idx` or the
4328/// multi-pack-index) under `objects_dir/pack`. Used by `--unpacked`
4329/// filtering: an object is "unpacked" when absent from this set, regardless
4330/// of a loose copy also existing.
4331pub fn packed_object_ids(
4332    objects_dir: impl AsRef<Path>,
4333    format: ObjectFormat,
4334) -> Result<HashSet<ObjectId>> {
4335    let mut oids = HashSet::new();
4336    collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
4337    Ok(oids)
4338}
4339
4340fn collect_packed_object_ids(
4341    pack_dir: &Path,
4342    format: ObjectFormat,
4343    oids: &mut HashSet<ObjectId>,
4344) -> Result<()> {
4345    if !pack_dir.exists() {
4346        return Ok(());
4347    }
4348    let mut midx_pack_names = HashSet::new();
4349    let midx_path = pack_dir.join("multi-pack-index");
4350    if midx_path.exists() {
4351        let midx = MultiPackIndex::parse_without_checksum(&fs::read(&midx_path)?, format)?;
4352        midx_pack_names.extend(midx.pack_names.iter().cloned());
4353        oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
4354    }
4355    for entry in fs::read_dir(pack_dir)? {
4356        let path = entry?.path();
4357        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
4358            continue;
4359        }
4360        if !path.with_extension("pack").exists() {
4361            continue;
4362        }
4363        let index = match PackIndex::parse(&fs::read(&path)?, format) {
4364            Ok(index) => index,
4365            Err(_err)
4366                if path
4367                    .file_name()
4368                    .and_then(|name| name.to_str())
4369                    .is_some_and(|name| midx_pack_names.contains(name)) =>
4370            {
4371                eprintln!(
4372                    "error: packfile {} index unavailable",
4373                    path.with_extension("pack").display()
4374                );
4375                continue;
4376            }
4377            Err(err) => return Err(err),
4378        };
4379        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
4380    }
4381    Ok(())
4382}
4383
4384impl FileObjectDatabase {
4385    /// The object-id format (hash algorithm) this database was opened with.
4386    pub fn object_format(&self) -> ObjectFormat {
4387        self.format
4388    }
4389
4390    /// The repository object directory this database reads from.
4391    pub fn objects_dir(&self) -> &Path {
4392        &self.objects_dir
4393    }
4394
4395    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4396        let objects_dir = objects_dir.into();
4397        Self {
4398            loose: LooseObjectStore::new(objects_dir.clone(), format),
4399            alternates: alternate_object_dirs(&objects_dir),
4400            objects_dir,
4401            format,
4402            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4403            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4404            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4405            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4406            pack_registry: Arc::new(Mutex::new(None)),
4407            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4408            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4409            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4410            promisor_objects: Arc::new(OnceLock::new()),
4411            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4412        }
4413    }
4414
4415    fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4416        let objects_dir = objects_dir.into();
4417        Self {
4418            loose: LooseObjectStore::new(objects_dir.clone(), format),
4419            alternates: Vec::new(),
4420            objects_dir,
4421            format,
4422            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4423            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4424            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4425            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4426            pack_registry: Arc::new(Mutex::new(None)),
4427            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4428            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4429            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4430            promisor_objects: Arc::new(OnceLock::new()),
4431            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4432        }
4433    }
4434
4435    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4436        Self::new(repository_objects_dir(git_dir), format)
4437    }
4438
4439    /// Drop cached pack registries, indexes, and decoded objects so the next read
4440    /// sees packs/objects installed after this handle was created (e.g. after
4441    /// `fetch` or `install_pack`). Long-lived [`Repository`] sessions call this
4442    /// via the owning repository's `refresh_objects` hook.
4443    pub fn refresh_read_cache(&self) {
4444        if let Ok(mut cache) = self.pack_registry.lock() {
4445            *cache = None;
4446        }
4447        if let Ok(mut cache) = self.pack_indexes.lock() {
4448            cache.clear();
4449        }
4450        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
4451            cache.clear();
4452        }
4453        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
4454            cache.clear();
4455        }
4456        if let Ok(mut cache) = self.pack_bytes.lock() {
4457            cache.clear();
4458        }
4459        if let Ok(mut cache) = self.pack_deltas.lock() {
4460            cache.clear();
4461        }
4462        if let Ok(mut cache) = self.pack_header_types.lock() {
4463            cache.clear();
4464        }
4465        if let Ok(mut cache) = self.decoded.lock() {
4466            cache.clear();
4467        }
4468        self.loose.invalidate_cache();
4469    }
4470
4471    pub fn loose(&self) -> &LooseObjectStore {
4472        &self.loose
4473    }
4474
4475    pub fn presence_checker(&self) -> ObjectPresenceChecker {
4476        ObjectPresenceChecker::new(self.clone())
4477    }
4478
4479    pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4480        self.install_pack_with_options(pack, RawPackInstallOptions::default())
4481    }
4482
4483    pub fn write_blob_as_pack(
4484        &self,
4485        oid: ObjectId,
4486        object: &EncodedObject,
4487        compression_level: u32,
4488    ) -> Result<ObjectId> {
4489        if object.object_type != ObjectType::Blob {
4490            return Err(GitError::InvalidObject(
4491                "write_blob_as_pack requires a blob object".into(),
4492            ));
4493        }
4494        if oid.format() != self.format {
4495            return Err(GitError::InvalidObjectId(format!(
4496                "object {oid} uses {}, store uses {}",
4497                oid.format().name(),
4498                self.format.name()
4499            )));
4500        }
4501        if self.contains(&oid)? {
4502            return Ok(oid);
4503        }
4504        let input = [PackInput {
4505            oid: &oid,
4506            object,
4507        }];
4508        let options = PackWriteOptions::new()
4509            .with_window(0)
4510            .with_depth(0)
4511            .with_reorder(false)
4512            .with_compression_level(compression_level);
4513        let pack = PackFile::write_packed_with_known_ids_and_options(&input, self.format, &options)?;
4514        self.install_pack(&pack)?;
4515        Ok(oid)
4516    }
4517
4518    pub fn write_blobs_as_pack(
4519        &self,
4520        objects: &[(ObjectId, EncodedObject)],
4521        compression_level: u32,
4522    ) -> Result<()> {
4523        let mut seen = HashSet::with_capacity(objects.len());
4524        let mut inputs = Vec::new();
4525        for (oid, object) in objects {
4526            if object.object_type != ObjectType::Blob {
4527                return Err(GitError::InvalidObject(
4528                    "write_blobs_as_pack requires blob objects".into(),
4529                ));
4530            }
4531            if oid.format() != self.format {
4532                return Err(GitError::InvalidObjectId(format!(
4533                    "object {oid} uses {}, store uses {}",
4534                    oid.format().name(),
4535                    self.format.name()
4536                )));
4537            }
4538            if seen.insert(*oid) && !self.contains(oid)? {
4539                inputs.push(PackInput { oid, object });
4540            }
4541        }
4542        if inputs.is_empty() {
4543            return Ok(());
4544        }
4545        let options = PackWriteOptions::new()
4546            .with_window(0)
4547            .with_depth(0)
4548            .with_reorder(false)
4549            .with_compression_level(compression_level);
4550        let pack = PackFile::write_packed_with_known_ids_and_options(&inputs, self.format, &options)?;
4551        self.install_pack(&pack)?;
4552        Ok(())
4553    }
4554
4555    pub fn install_pack_with_options(
4556        &self,
4557        pack: &PackWrite,
4558        options: RawPackInstallOptions,
4559    ) -> Result<PackInstallResult> {
4560        if pack.checksum.format() != self.format {
4561            return Err(GitError::InvalidObjectId(format!(
4562                "pack checksum uses {}, store uses {}",
4563                pack.checksum.format().name(),
4564                self.format.name()
4565            )));
4566        }
4567        for entry in &pack.entries {
4568            if entry.oid.format() != self.format {
4569                return Err(GitError::InvalidObjectId(format!(
4570                    "pack entry {} uses {}, store uses {}",
4571                    entry.oid,
4572                    entry.oid.format().name(),
4573                    self.format.name()
4574                )));
4575            }
4576        }
4577        let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
4578        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4579        if canonical_index.pack_checksum != pack.checksum
4580            || parsed_index.pack_checksum != pack.checksum
4581        {
4582            return Err(GitError::InvalidFormat(
4583                "pack and index checksums do not match pack write".into(),
4584            ));
4585        }
4586        if pack.index != canonical_index.index {
4587            return Err(GitError::InvalidFormat(
4588                "pack index does not match pack contents".into(),
4589            ));
4590        }
4591
4592        let pack_dir = self.objects_dir.join("pack");
4593        fs::create_dir_all(&pack_dir)?;
4594        let pack_name = format!("pack-{}", pack.checksum.to_hex());
4595        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4596        let index_path = pack_dir.join(format!("{pack_name}.idx"));
4597        if !pack_path.exists() || !index_path.exists() {
4598            write_pack_component(&pack_path, &pack.pack)?;
4599            write_pack_component(&index_path, &pack.index)?;
4600        }
4601        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4602        Ok(PackInstallResult {
4603            pack_name,
4604            pack_path,
4605            index_path,
4606            promisor_path,
4607            object_ids: canonical_index
4608                .entries
4609                .iter()
4610                .map(|entry| entry.oid)
4611                .collect(),
4612        })
4613    }
4614
4615    /// Install a pack that was produced in this process by [`PackFile::write_packed`].
4616    ///
4617    /// Unlike [`Self::install_raw_pack_with_options`], this does not re-inflate
4618    /// every pack entry to rebuild the index. It validates the generated pack
4619    /// trailer and generated index against the writer's object ids, CRCs, and
4620    /// offsets, then writes those bytes directly. Use the raw installer for
4621    /// arbitrary pack bytes received from an untrusted transport.
4622    pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4623        self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
4624    }
4625
4626    pub fn install_written_pack_with_options(
4627        &self,
4628        pack: &PackWrite,
4629        options: RawPackInstallOptions,
4630    ) -> Result<PackInstallResult> {
4631        validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
4632        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4633        if parsed_index.pack_checksum != pack.checksum {
4634            return Err(GitError::InvalidFormat(
4635                "pack write index checksum does not match pack".into(),
4636            ));
4637        }
4638        if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
4639            return Err(GitError::InvalidFormat(
4640                "pack write index does not match generated entries".into(),
4641            ));
4642        }
4643        self.install_generated_pack_unchecked(pack, options)
4644    }
4645
4646    fn install_generated_pack_unchecked(
4647        &self,
4648        pack: &PackWrite,
4649        options: RawPackInstallOptions,
4650    ) -> Result<PackInstallResult> {
4651        let pack_dir = self.objects_dir.join("pack");
4652        fs::create_dir_all(&pack_dir)?;
4653        let pack_name = format!("pack-{}", pack.checksum.to_hex());
4654        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4655        let index_path = pack_dir.join(format!("{pack_name}.idx"));
4656        if !pack_path.exists() || !index_path.exists() {
4657            write_pack_component(&pack_path, &pack.pack)?;
4658            write_pack_component(&index_path, &pack.index)?;
4659        }
4660        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4661        Ok(PackInstallResult {
4662            pack_name,
4663            pack_path,
4664            index_path,
4665            promisor_path,
4666            object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
4667        })
4668    }
4669
4670    pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
4671        self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
4672    }
4673
4674    pub fn install_raw_pack_with_options(
4675        &self,
4676        pack_bytes: &[u8],
4677        options: RawPackInstallOptions,
4678    ) -> Result<PackInstallResult> {
4679        let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
4680        let pack_dir = self.objects_dir.join("pack");
4681        fs::create_dir_all(&pack_dir)?;
4682        let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
4683        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4684        let index_path = pack_dir.join(format!("{pack_name}.idx"));
4685        if !pack_path.exists() || !index_path.exists() {
4686            write_pack_component(&pack_path, pack_bytes)?;
4687            write_pack_component(&index_path, &built.index)?;
4688        }
4689        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4690        Ok(PackInstallResult {
4691            pack_name,
4692            pack_path,
4693            index_path,
4694            promisor_path,
4695            object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
4696        })
4697    }
4698
4699    pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
4700        if self.loose.exists(oid)? {
4701            return Ok(true);
4702        }
4703        if self.find_pack_containing(oid)?.is_some() {
4704            return Ok(true);
4705        }
4706        for alternate in &self.alternates {
4707            if Self::without_alternates(alternate, self.format).contains(oid)? {
4708                return Ok(true);
4709            }
4710        }
4711        // Reprepare-on-miss: a cached negative loose verdict may predate a
4712        // sibling write. Drop it and exact-probe once before reporting absence.
4713        self.loose.invalidate_cache();
4714        self.loose.exists(oid)
4715    }
4716
4717    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4718        let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
4719            .into_iter()
4720            .collect::<HashSet<_>>();
4721        for alternate in &self.alternates {
4722            oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
4723        }
4724        let mut oids = oids.into_iter().collect::<Vec<_>>();
4725        oids.sort_by_key(ObjectId::to_hex);
4726        Ok(oids)
4727    }
4728
4729    pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
4730        if let Some(disk_size) = self.loose.disk_size(oid)? {
4731            return Ok(Some(ObjectStorageInfo {
4732                disk_size,
4733                deltabase: zero_oid(self.format)?,
4734            }));
4735        }
4736        if let Some(info) = self.packed_object_storage_info(oid)? {
4737            return Ok(Some(info));
4738        }
4739        for alternate in &self.alternates {
4740            if let Some(info) =
4741                Self::without_alternates(alternate, self.format).object_storage_info(oid)?
4742            {
4743                return Ok(Some(info));
4744            }
4745        }
4746        // Reprepare-on-miss: drop any stale negative loose cache and exact-probe
4747        // once before reporting absence (see `read_object`).
4748        self.loose.invalidate_cache();
4749        if let Some(disk_size) = self.loose.disk_size(oid)? {
4750            return Ok(Some(ObjectStorageInfo {
4751                disk_size,
4752                deltabase: zero_oid(self.format)?,
4753            }));
4754        }
4755        Ok(None)
4756    }
4757
4758    pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
4759        let mut matches = self.object_ids_with_prefix(prefix)?;
4760        Ok(match matches.len() {
4761            0 => ObjectPrefixResolution::Missing,
4762            1 => ObjectPrefixResolution::Unique(matches.remove(0)),
4763            _ => ObjectPrefixResolution::Ambiguous(matches),
4764        })
4765    }
4766
4767    pub fn object_ids_with_prefix(&self, prefix: &str) -> Result<Vec<ObjectId>> {
4768        validate_object_id_prefix(self.format, prefix)?;
4769        let mut matches = Vec::new();
4770        for oid in self.object_ids()? {
4771            if object_id_matches_prefix(&oid, prefix) {
4772                matches.push(oid);
4773            }
4774        }
4775        Ok(matches)
4776    }
4777
4778    /// The object type and content size of `oid` without decoding its full body —
4779    /// git's `cat-file --batch-check` fast path. Tries the decoded-object cache,
4780    /// then loose storage (inflating only the framing header), then packs (reading
4781    /// the entry header and, for deltas, only the delta's leading varints), then
4782    /// alternates. Returns `Ok(None)` if the object is not present.
4783    ///
4784    /// Unlike [`ObjectReader::read_object`], this never materializes the body, so it
4785    /// stays cheap on huge blobs and deep delta chains. It does not populate the
4786    /// decoded-object cache (nothing is decoded).
4787    pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4788        if implied_empty_tree_object(self.format, oid).is_some() {
4789            return Ok(Some((ObjectType::Tree, 0)));
4790        }
4791        if let Ok(mut cache) = self.decoded.lock()
4792            && let Some(object) = cache.get(oid)
4793        {
4794            return Ok(Some((object.object_type, object.body.len() as u64)));
4795        }
4796        if let Some(header) = self.loose.read_header(oid)? {
4797            return Ok(Some(header));
4798        }
4799        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4800            let bytes = pack_lookup.pack_bytes(self)?;
4801            // Per-pack offset->type memo so the ofs-delta chain walk that resolves
4802            // a packed object's type runs at most once per chain across the batch,
4803            // instead of re-walking (and re-inflating each link's leading varints)
4804            // on every header read — the sley#26 super-linear cat-file --batch-check.
4805            let type_cache = pack_lookup.header_type_cache(self);
4806            let resolve_ref_base = |base: &ObjectId| {
4807                self.read_object_header(base)
4808                    .map(|header| header.map(|(t, _)| t))
4809            };
4810            let header = match &type_cache {
4811                Some(cache) => {
4812                    let mut adapter = PackHeaderTypeCacheAdapter(cache);
4813                    sley_pack::read_object_header_at_with_cache(
4814                        &bytes,
4815                        pack_lookup.offset,
4816                        self.format,
4817                        resolve_ref_base,
4818                        &mut adapter,
4819                    )?
4820                }
4821                None => sley_pack::read_object_header_at(
4822                    &bytes,
4823                    pack_lookup.offset,
4824                    self.format,
4825                    resolve_ref_base,
4826                )?,
4827            };
4828            return Ok(Some(header));
4829        }
4830        for alternate in &self.alternates {
4831            if let Some(header) =
4832                Self::without_alternates(alternate, self.format).read_object_header(oid)?
4833            {
4834                return Ok(Some(header));
4835            }
4836        }
4837        // Reprepare-on-miss: discard any stale negative loose cache and retry an
4838        // exact path probe once before reporting absence (see `read_object`).
4839        self.loose.invalidate_cache();
4840        if let Some(header) = self.loose.read_header(oid)? {
4841            return Ok(Some(header));
4842        }
4843        Ok(None)
4844    }
4845
4846    fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
4847        // Memory-capped decoded-object cache first (delta-base reuse for ref-delta
4848        // bases that resolve back through the store + repeated whole-object reads).
4849        if let Ok(mut cache) = self.decoded.lock()
4850            && let Some(object) = cache.get(oid)
4851        {
4852            return Ok(Some(object));
4853        }
4854        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
4855            return Ok(None);
4856        };
4857        self.read_packed_object_at_lookup(oid, &pack_lookup)
4858            .map(Some)
4859    }
4860
4861    fn read_packed_object_at_lookup(
4862        &self,
4863        oid: &ObjectId,
4864        pack_lookup: &PackLookup,
4865    ) -> Result<Arc<EncodedObject>> {
4866        if let Ok(mut cache) = self.decoded.lock()
4867            && let Some(object) = cache.get(oid)
4868        {
4869            return Ok(object);
4870        }
4871        let bytes = pack_lookup.pack_bytes(self)?;
4872        // Per-pack delta-base cache (keyed by in-pack offset). Resolving an
4873        // ofs-delta chain reuses already-decoded bases instead of re-inflating the
4874        // whole chain on every read. Scoped to this pack's path so an offset key is
4875        // never applied to the wrong pack's bytes.
4876        let delta_cache = pack_lookup.delta_cache(self);
4877        let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
4878        // Decode only this object at its offset (plus its delta-base chain). A
4879        // ref-delta base resolves through the full store (loose / other packs) and
4880        // reuses the decoded-object cache. No cache lock is held across the decode,
4881        // so the recursive resolver re-entry (which may re-enter read_object) is
4882        // safe.
4883        let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
4884        let object = match &delta_adapter {
4885            Some(adapter) => sley_pack::read_object_at_with_cache_arc(
4886                &bytes,
4887                pack_lookup.offset,
4888                self.format,
4889                resolve_ref_base,
4890                adapter,
4891            )?,
4892            None => sley_pack::read_object_at_arc(
4893                &bytes,
4894                pack_lookup.offset,
4895                self.format,
4896                resolve_ref_base,
4897            )?,
4898        };
4899        // Trust the index → offset mapping rather than re-hashing every decoded
4900        // object on read (see `verify_reads_enabled`); this re-hash dominated
4901        // bulk-read cost. Opt back in with `SLEY_VERIFY_READS` for a paranoid check.
4902        if verify_reads_enabled() {
4903            let actual = object.object_id(self.format)?;
4904            if actual != *oid {
4905                return Err(GitError::InvalidObject(format!(
4906                    "pack object id mismatch: index says {oid}, decoded {actual}"
4907                )));
4908            }
4909        }
4910        if let Ok(mut cache) = self.decoded.lock() {
4911            cache.put(*oid, Arc::clone(&object));
4912        }
4913        Ok(object)
4914    }
4915
4916    /// The per-pack delta-base cache for `pack_path`, creating it on first use.
4917    /// Returns `None` only if the shared map's lock is poisoned, in which case the
4918    /// caller falls back to an uncached decode (correctness preserved).
4919    fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
4920        let mut caches = self.pack_deltas.lock().ok()?;
4921        let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
4922            Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
4923        });
4924        Some(Arc::clone(cache))
4925    }
4926
4927    /// The per-pack header-type memo for `pack_path`, creating it on first use.
4928    /// Returns `None` only if the shared map's lock is poisoned, in which case the
4929    /// caller falls back to an unmemoized header walk (correctness preserved).
4930    fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
4931        let mut caches = self.pack_header_types.lock().ok()?;
4932        let cache = caches
4933            .entry(pack_path.to_path_buf())
4934            .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
4935        Some(Arc::clone(cache))
4936    }
4937
4938    /// Backing bytes of the pack at `pack_path`, loaded at most once per database
4939    /// handle (cached, shared across clones). Memory-mapped under the `mmap` feature,
4940    /// otherwise read into the heap. On a poisoned lock it falls back to loading
4941    /// without caching, preserving correctness.
4942    fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
4943        if let Ok(cache) = self.pack_bytes.lock()
4944            && let Some(bytes) = cache.get(pack_path)
4945        {
4946            return Ok(Arc::clone(bytes));
4947        }
4948        let bytes = Arc::new(load_pack_data(pack_path)?);
4949        if let Ok(mut cache) = self.pack_bytes.lock() {
4950            cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
4951        }
4952        Ok(bytes)
4953    }
4954
4955    /// Parsed index for the `.idx` at `index_path`, parsed at most once per
4956    /// database handle. On a poisoned lock it falls back to parsing without
4957    /// caching, preserving correctness.
4958    fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
4959        if let Ok(cache) = self.pack_indexes.lock()
4960            && let Some(index) = cache.get(index_path)
4961        {
4962            return Ok(Arc::clone(index));
4963        }
4964        let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
4965        if let Ok(mut cache) = self.pack_indexes.lock() {
4966            cache.insert(index_path.to_path_buf(), Arc::clone(&index));
4967        }
4968        Ok(index)
4969    }
4970
4971    fn cached_multi_pack_index_oid_lookup(
4972        &self,
4973        midx_path: &Path,
4974    ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
4975        if !midx_path.exists() {
4976            return Ok(None);
4977        }
4978        if let Ok(cache) = self.multi_pack_oid_lookups.lock()
4979            && let Some(midx) = cache.get(midx_path)
4980        {
4981            return Ok(Some(Arc::clone(midx)));
4982        }
4983        let bytes = load_multi_pack_index_lookup_data(midx_path)?;
4984        let midx = match MultiPackIndexOidLookup::parse(bytes, self.format) {
4985            Ok(midx) => Arc::new(midx),
4986            Err(GitError::InvalidFormat(message))
4987                if message.starts_with("multi-pack-index hash id ") =>
4988            {
4989                let actual = message
4990                    .strip_prefix("multi-pack-index hash id ")
4991                    .and_then(|rest| rest.split_whitespace().next())
4992                    .unwrap_or("0");
4993                let expected = match self.format {
4994                    ObjectFormat::Sha1 => 1,
4995                    ObjectFormat::Sha256 => 2,
4996                };
4997                eprintln!(
4998                    "error: multi-pack-index hash version {actual} does not match version {expected}"
4999                );
5000                return Ok(None);
5001            }
5002            Err(err) => return Err(err),
5003        };
5004        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
5005            cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
5006        }
5007        Ok(Some(midx))
5008    }
5009
5010    /// Registry snapshot for this database's pack directory. With `force_rescan`,
5011    /// the directory is re-read; when the fingerprint and pack set match the
5012    /// cached snapshot, the same `Arc` is returned so miss handling can tell that
5013    /// no new packs appeared.
5014    fn cached_pack_registry(
5015        &self,
5016        pack_dir: &Path,
5017        force_rescan: bool,
5018    ) -> Result<Arc<PackRegistrySnapshot>> {
5019        if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
5020            return Ok(registry);
5021        }
5022        let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
5023        if let Ok(mut cache) = self.pack_registry.lock() {
5024            match cache.as_ref() {
5025                Some(existing)
5026                    if existing.fingerprint == scanned.fingerprint
5027                        && same_registered_pack_set(&existing.packs, &scanned.packs) =>
5028                {
5029                    return Ok(Arc::clone(existing));
5030                }
5031                _ => {
5032                    *cache = Some(Arc::clone(&scanned));
5033                }
5034            }
5035        }
5036        Ok(scanned)
5037    }
5038
5039    fn find_in_pack_registry(
5040        &self,
5041        registry: Arc<PackRegistrySnapshot>,
5042        oid: &ObjectId,
5043    ) -> Result<Option<PackLookup>> {
5044        let hinted_pack_index = registry.cached_hint();
5045        if let Some(pack_index) = hinted_pack_index {
5046            let pack = &registry.packs[pack_index];
5047            match pack.index(self.format) {
5048                Ok(index) => {
5049                    if let Some(entry) = index.find(oid) {
5050                        return Ok(Some(PackLookup::from_registered(
5051                            Arc::clone(pack),
5052                            entry.offset,
5053                        )));
5054                    }
5055                }
5056                Err(_) => {
5057                    eprintln!("error: packfile {} index unavailable", pack.pack.display());
5058                }
5059            }
5060        }
5061        for (pack_index, pack) in registry.packs.iter().enumerate() {
5062            if Some(pack_index) == hinted_pack_index {
5063                continue;
5064            }
5065            let index = match pack.index(self.format) {
5066                Ok(index) => index,
5067                Err(_) => {
5068                    eprintln!("error: packfile {} index unavailable", pack.pack.display());
5069                    continue;
5070                }
5071            };
5072            if let Some(entry) = index.find(oid) {
5073                registry.remember_hint(pack_index);
5074                return Ok(Some(PackLookup::from_registered(
5075                    Arc::clone(pack),
5076                    entry.offset,
5077                )));
5078            }
5079        }
5080        Ok(None)
5081    }
5082
5083    /// Read `oid` from any pack *other than* the one named by `exclude`, used as
5084    /// a corruption fallback: a redundant packed copy survives one pack's
5085    /// damage. Scans the on-disk `.idx` files directly (bypassing the registry
5086    /// cache, whose first hit is the excluded pack) and decodes from the first
5087    /// other pack that both indexes the object and parses cleanly.
5088    fn read_packed_object_from_other_packs(
5089        &self,
5090        oid: &ObjectId,
5091        exclude: &PackLookup,
5092    ) -> Result<Option<Arc<EncodedObject>>> {
5093        let pack_dir = self.objects_dir.join("pack");
5094        let Ok(entries) = fs::read_dir(&pack_dir) else {
5095            return Ok(None);
5096        };
5097        let excluded_pack = exclude.pack_path().to_path_buf();
5098        for entry in entries {
5099            let idx_path = entry?.path();
5100            if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
5101                continue;
5102            }
5103            let pack_path = idx_path.with_extension("pack");
5104            if pack_path == excluded_pack {
5105                continue;
5106            }
5107            let Ok(idx_bytes) = fs::read(&idx_path) else {
5108                continue;
5109            };
5110            let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
5111                continue;
5112            };
5113            let Some(entry) = index.find(oid) else {
5114                continue;
5115            };
5116            let candidate = PackLookup::from_path(pack_path, entry.offset);
5117            if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
5118                return Ok(Some(object));
5119            }
5120        }
5121        Ok(None)
5122    }
5123
5124    fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
5125        if oid.format() != self.format {
5126            return Err(GitError::InvalidObjectId(format!(
5127                "object {oid} uses {}, store uses {}",
5128                oid.format().name(),
5129                self.format.name()
5130            )));
5131        }
5132        let pack_dir = self.objects_dir.join("pack");
5133        // Hot path: a previously cached pack registry or multi-pack-index already
5134        // names every pack, and locating `oid` in them is pure in-memory index
5135        // work. Try that first so a warm handle does not parse indexes or hash
5136        // pack paths on every lookup.
5137        if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
5138            && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
5139        {
5140            return Ok(Some(pack_paths));
5141        }
5142        if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
5143            && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
5144        {
5145            return Ok(Some(pack_paths));
5146        }
5147
5148        if !pack_dir.exists() {
5149            return Ok(None);
5150        }
5151        if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
5152            return Ok(Some(pack_paths));
5153        }
5154        // Search the cached registry first. On a complete miss, re-scan the
5155        // directory once (picking up any pack added since the registry was
5156        // cached) and search again, so newly written packs are still found.
5157        let registry = self.cached_pack_registry(&pack_dir, false)?;
5158        if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(&registry), oid)? {
5159            return Ok(Some(pack_paths));
5160        }
5161        let refreshed = self.cached_pack_registry(&pack_dir, true)?;
5162        if Arc::ptr_eq(&registry, &refreshed) {
5163            // The re-scan produced the same registry, so nothing new appeared.
5164            return Ok(None);
5165        }
5166        self.find_in_pack_registry(refreshed, oid)
5167    }
5168
5169    fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
5170        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
5171            return Ok(None);
5172        };
5173        let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
5174        let trailer_offset = pack_len
5175            .checked_sub(self.format.raw_len() as u64)
5176            .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
5177        let index = pack_lookup.pack_index(self)?;
5178        let pack = pack_lookup.pack_bytes(self)?;
5179        let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
5180        let delta_base_offset = match &delta_base {
5181            Some(PackDeltaBase::Offset(offset)) => Some(*offset),
5182            Some(PackDeltaBase::Ref(_)) | None => None,
5183        };
5184        let offset_info = scan_pack_index_offsets(
5185            &index,
5186            pack_lookup.offset,
5187            trailer_offset,
5188            delta_base_offset,
5189        )?;
5190        let disk_size = offset_info
5191            .end_offset
5192            .checked_sub(pack_lookup.offset)
5193            .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
5194        let deltabase = match delta_base {
5195            Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
5196                // scan_pack_index_offsets returns Err when delta_base_offset is
5197                // Some but no matching entry is found, so this is unreachable for
5198                // valid packs; propagate as an error rather than panic to keep a
5199                // malformed pack from taking down the process if that invariant
5200                // ever drifts.
5201                GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
5202            })?,
5203            Some(PackDeltaBase::Ref(oid)) => oid,
5204            None => zero_oid(self.format)?,
5205        };
5206        Ok(Some(ObjectStorageInfo {
5207            disk_size,
5208            deltabase,
5209        }))
5210    }
5211
5212    fn find_midx_pack_containing(
5213        &self,
5214        pack_dir: &Path,
5215        oid: &ObjectId,
5216    ) -> Result<Option<PackLookup>> {
5217        let midx_path = pack_dir.join("multi-pack-index");
5218        let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
5219            return Ok(None);
5220        };
5221        self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
5222    }
5223
5224    fn midx_oid_lookup_pack_paths(
5225        &self,
5226        pack_dir: &Path,
5227        midx: &MultiPackIndexOidLookup,
5228        oid: &ObjectId,
5229    ) -> Result<Option<PackLookup>> {
5230        let Some(entry) = midx.find(oid)? else {
5231            return Ok(None);
5232        };
5233        let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
5234            return Err(GitError::InvalidFormat(
5235                "multi-pack-index object points past pack table".into(),
5236            ));
5237        };
5238        let pack_file_name = pack_name
5239            .strip_suffix(".idx")
5240            .map(|stem| format!("{stem}.pack"))
5241            .unwrap_or_else(|| pack_name.to_string());
5242        let pack = pack_dir.join(pack_file_name);
5243        Ok(Some(PackLookup::from_path(pack, entry.offset)))
5244    }
5245
5246    fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
5247        let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
5248        let cache = self.multi_pack_oid_lookups.lock().ok()?;
5249        cache.get(&midx_path).map(Arc::clone)
5250    }
5251
5252    /// The pack registry for `pack_dir` *only if already scanned and cached* —
5253    /// never touches the filesystem. Used by the lookup hot path to skip
5254    /// per-object pack-dir metadata checks once a handle is warm. A cold cache
5255    /// returns `None`, so the caller falls back to the scanning path. A complete
5256    /// miss still forces one rescan, preserving the new-pack discovery semantics.
5257    fn cached_loaded_pack_registry(
5258        &self,
5259        _pack_dir: &Path,
5260    ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
5261        let cache = match self.pack_registry.lock() {
5262            Ok(cache) => cache,
5263            Err(_) => return Ok(None),
5264        };
5265        Ok(cache.as_ref().map(Arc::clone))
5266    }
5267}
5268
5269fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
5270    if prefix.len() < 4 || prefix.len() > format.hex_len() {
5271        return Err(GitError::InvalidObjectId(format!(
5272            "expected 4 to {} hex digits for {}, got {}",
5273            format.hex_len(),
5274            format.name(),
5275            prefix.len()
5276        )));
5277    }
5278    if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
5279        return Err(GitError::InvalidObjectId(format!(
5280            "non-hex object id prefix {prefix}"
5281        )));
5282    }
5283    Ok(())
5284}
5285
5286fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
5287    oid.to_hex()
5288        .as_bytes()
5289        .iter()
5290        .zip(prefix.as_bytes())
5291        .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
5292}
5293
5294fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
5295    match fs::metadata(pack_dir) {
5296        Ok(metadata) => Ok(metadata.modified().ok()),
5297        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5298        Err(err) => Err(GitError::Io(err.to_string())),
5299    }
5300}
5301
5302/// Scan `pack_dir` for `.idx` files that have a matching `.pack` sibling and
5303/// parse each index into a registered pack. An `.idx` without its `.pack` is
5304/// skipped (an orphan index cannot serve objects), matching the prior per-read
5305/// behavior.
5306fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
5307    let modified = pack_dir_modified(pack_dir)?;
5308    let entries = match fs::read_dir(pack_dir) {
5309        Ok(entries) => entries,
5310        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
5311            return Ok(PackRegistrySnapshot::new(
5312                PackDirFingerprint {
5313                    modified,
5314                    idx_count: 0,
5315                    pack_count: 0,
5316                },
5317                Vec::new(),
5318            ));
5319        }
5320        Err(err) => return Err(GitError::Io(err.to_string())),
5321    };
5322
5323    let mut idx_paths = Vec::new();
5324    let mut idx_count = 0;
5325    let mut pack_count = 0;
5326    for entry in entries {
5327        let entry = entry?;
5328        let path = entry.path();
5329        match path.extension().and_then(|ext| ext.to_str()) {
5330            Some("idx") => {
5331                idx_count += 1;
5332                idx_paths.push(path);
5333            }
5334            Some("pack") => {
5335                pack_count += 1;
5336            }
5337            _ => {}
5338        }
5339    }
5340
5341    let mut packs = Vec::new();
5342    for idx in idx_paths {
5343        let pack = idx.with_extension("pack");
5344        let Ok(metadata) = fs::metadata(&pack) else {
5345            continue;
5346        };
5347        let modified = pack_sort_modified(&metadata);
5348        packs.push((
5349            modified,
5350            metadata.len(),
5351            Arc::new(RegisteredPack::new(idx, pack)),
5352        ));
5353    }
5354    // Git keeps a most-recently-used pack order; seed ours with newer/larger
5355    // packs before falling back to the path. In repositories with many packs,
5356    // this avoids parsing a long run of unrelated `.idx` files before the first
5357    // lookup establishes the recent-pack hint.
5358    packs.sort_by(|left, right| {
5359        right
5360            .0
5361            .cmp(&left.0)
5362            .then_with(|| right.1.cmp(&left.1))
5363            .then_with(|| left.2.idx.cmp(&right.2.idx))
5364    });
5365    let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
5366    Ok(PackRegistrySnapshot::new(
5367        PackDirFingerprint {
5368            modified,
5369            idx_count,
5370            pack_count,
5371        },
5372        packs,
5373    ))
5374}
5375
5376fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
5377    metadata
5378        .modified()
5379        .ok()
5380        .and_then(|modified| {
5381            modified
5382                .duration_since(std::time::UNIX_EPOCH)
5383                .ok()
5384                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
5385        })
5386        .unwrap_or((0, 0))
5387}
5388
5389/// Whether two pack registries reference the same pack/index paths (order is
5390/// already normalized by [`scan_pack_registry`]).
5391fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
5392    left.len() == right.len()
5393        && left
5394            .iter()
5395            .zip(right.iter())
5396            .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
5397}
5398
5399fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
5400    let mut alternates = Vec::new();
5401    if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
5402        for raw in value.to_string_lossy().split(':') {
5403            if !raw.is_empty() {
5404                alternates.push(PathBuf::from(raw));
5405            }
5406        }
5407    }
5408    let alternates_path = objects_dir.join("info").join("alternates");
5409    if let Ok(contents) = fs::read(&alternates_path) {
5410        for raw in contents.split(|byte| *byte == b'\n') {
5411            let line = raw.strip_suffix(b"\r").unwrap_or(raw);
5412            if line.is_empty() || line.starts_with(b"#") {
5413                continue;
5414            }
5415            let Ok(value) = std::str::from_utf8(line) else {
5416                continue;
5417            };
5418            let path = Path::new(value);
5419            let absolute = if path.is_absolute() {
5420                path.to_path_buf()
5421            } else {
5422                objects_dir.join(path)
5423            };
5424            alternates.push(absolute);
5425        }
5426    }
5427    alternates
5428}
5429
5430impl ObjectReader for FileObjectDatabase {
5431    fn is_promised_object(&self, oid: &ObjectId) -> bool {
5432        self.promisor_objects().contains(oid)
5433    }
5434
5435    fn has_shallow_grafts(&self) -> bool {
5436        !self
5437            .shallow_grafts
5438            .get_or_init(|| {
5439                let shallow_file = self
5440                    .objects_dir
5441                    .parent()
5442                    .map(|git_dir| git_dir.join("shallow"));
5443                match shallow_file {
5444                    Some(path) => read_shallow_grafts(&path, self.format),
5445                    None => HashSet::new(),
5446                }
5447            })
5448            .is_empty()
5449    }
5450
5451    fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
5452        self.shallow_grafts
5453            .get_or_init(|| {
5454                let shallow_file = self
5455                    .objects_dir
5456                    .parent()
5457                    .map(|git_dir| git_dir.join("shallow"));
5458                match shallow_file {
5459                    Some(path) => read_shallow_grafts(&path, self.format),
5460                    None => HashSet::new(),
5461                }
5462            })
5463            .contains(oid)
5464    }
5465
5466    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
5467        if let Some(object) = implied_empty_tree_object(self.format, oid) {
5468            return Ok(object);
5469        }
5470        // A corrupt loose copy must not shadow a good packed copy: git's
5471        // `oid_object_info_extended` consults every source, so a repacked object
5472        // whose loose file was later corrupted still reads fine from the pack. If
5473        // a packed copy exists, prefer it WITHOUT touching the corrupt loose file
5474        // (which would otherwise emit a spurious `inflate:` diagnostic on each
5475        // probe). Only when no pack copy exists do we read (and, if corrupt,
5476        // surface the error from) the loose file.
5477        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
5478            match self.read_packed_object_at_lookup(oid, &pack_lookup) {
5479                Ok(object) => return Ok(object),
5480                Err(GitError::NotFound(_)) => {}
5481                // A corrupt packed copy must not be fatal when another good copy
5482                // exists: git's `oid_object_info_extended` keeps consulting the
5483                // remaining sources (loose, other packs, alternates) when a pack
5484                // read fails. Fall through to the loose/other-pack probes and
5485                // only surface the packed error if every source comes up empty.
5486                Err(packed_err) => {
5487                    if let Ok(object) = self.loose.read_object(oid) {
5488                        return Ok(object);
5489                    }
5490                    // Try any *other* pack that also holds the object (a
5491                    // redundant copy survives one pack's corruption).
5492                    if let Some(object) =
5493                        self.read_packed_object_from_other_packs(oid, &pack_lookup)?
5494                    {
5495                        return Ok(object);
5496                    }
5497                    for alternate in &self.alternates {
5498                        if let Ok(object) =
5499                            Self::without_alternates(alternate, self.format).read_object(oid)
5500                        {
5501                            return Ok(object);
5502                        }
5503                    }
5504                    return Err(packed_err);
5505                }
5506            }
5507        }
5508        let loose_err = match self.loose.read_object(oid) {
5509            Ok(object) => return Ok(object),
5510            Err(GitError::NotFound(_)) => None,
5511            Err(err) => Some(err),
5512        };
5513        if let Some(object) = self.read_packed_object(oid)? {
5514            return Ok(object);
5515        }
5516        for alternate in &self.alternates {
5517            match Self::without_alternates(alternate, self.format).read_object(oid) {
5518                Ok(object) => return Ok(object),
5519                Err(GitError::NotFound(_)) => {}
5520                Err(err) => return Err(err),
5521            }
5522        }
5523        // Hard miss against every store. If an earlier enumeration built a loose
5524        // cache, an object written loose afterward by a sibling handle could have
5525        // been skipped above. Mirror git's `oid_object_info_extended`
5526        // reprepare-on-miss: drop stale cache state and retry an exact loose path
5527        // probe once before declaring the object missing.
5528        self.loose.invalidate_cache();
5529        match self.loose.read_object(oid) {
5530            Ok(object) => return Ok(object),
5531            Err(GitError::NotFound(_)) => {}
5532            Err(err) => return Err(err),
5533        }
5534        // No good copy in any store. If the local loose copy was corrupt (not
5535        // merely absent), surface that error — it is more specific than a plain
5536        // "not found".
5537        if let Some(err) = loose_err {
5538            return Err(err);
5539        }
5540        Err(GitError::object_not_found_in(
5541            *oid,
5542            MissingObjectContext::Read,
5543        ))
5544    }
5545}
5546
5547impl FileObjectDatabase {
5548    fn promisor_objects(&self) -> &HashSet<ObjectId> {
5549        self.promisor_objects.get_or_init(|| {
5550            let mut promised =
5551                promisor_pack_object_ids(&self.objects_dir, self.format).unwrap_or_default();
5552            let mut pending = promised.iter().copied().collect::<Vec<_>>();
5553            while let Some(oid) = pending.pop() {
5554                let Ok(object) = self.read_object(&oid) else {
5555                    continue;
5556                };
5557                for link in promisor_object_links(self.format, &object) {
5558                    if promised.insert(link) {
5559                        pending.push(link);
5560                    }
5561                }
5562            }
5563            promised
5564        })
5565    }
5566}
5567
5568fn promisor_pack_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
5569    let pack_dir = objects_dir.join("pack");
5570    let mut oids = HashSet::new();
5571    if !pack_dir.exists() {
5572        return Ok(oids);
5573    }
5574    for entry in fs::read_dir(pack_dir)? {
5575        let path = entry?.path();
5576        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
5577            continue;
5578        }
5579        if !path.with_extension("pack").exists() || !path.with_extension("promisor").exists() {
5580            continue;
5581        }
5582        let index = PackIndex::parse(&fs::read(path)?, format)?;
5583        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
5584    }
5585    Ok(oids)
5586}
5587
5588fn promisor_object_links(format: ObjectFormat, object: &EncodedObject) -> Vec<ObjectId> {
5589    match object.object_type {
5590        ObjectType::Commit => Commit::parse_ref(format, &object.body)
5591            .map(|commit| {
5592                let mut links = Vec::with_capacity(commit.parents.len() + 1);
5593                links.push(commit.tree);
5594                links.extend(commit.parents);
5595                links
5596            })
5597            .unwrap_or_default(),
5598        ObjectType::Tree => TreeEntries::new(format, &object.body)
5599            .filter_map(|entry| entry.ok().map(|entry| entry.oid))
5600            .collect(),
5601        ObjectType::Tag => Tag::parse_ref(format, &object.body)
5602            .map(|tag| vec![tag.object])
5603            .unwrap_or_default(),
5604        ObjectType::Blob => Vec::new(),
5605    }
5606}
5607
5608impl ObjectWriter for FileObjectDatabase {
5609    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
5610        // Mirror git's freshen semantics (`write_object_file`:
5611        // `freshen_packed_object || freshen_loose_object`): an object already
5612        // present anywhere in the database — loose, packed, or through an
5613        // alternate — is not written again, so e.g. `git add` after
5614        // `git repack -ad` does not resurrect a loose copy of a packed object.
5615        let oid = object.object_id(self.format)?;
5616        if self.contains(&oid)? {
5617            return Ok(oid);
5618        }
5619        self.loose.write_object(object)
5620    }
5621}
5622
5623fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
5624    if path.exists() {
5625        return Ok(());
5626    }
5627    let parent = path
5628        .parent()
5629        .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
5630    fs::create_dir_all(parent)?;
5631    let temp_path = unique_temp_path(parent);
5632    let write_result = (|| -> Result<()> {
5633        {
5634            let mut file = fs::OpenOptions::new()
5635                .write(true)
5636                .create_new(true)
5637                .open(&temp_path)?;
5638            file.write_all(bytes)?;
5639            file.sync_all()?;
5640        }
5641        match fs::rename(&temp_path, path) {
5642            Ok(()) => Ok(()),
5643            Err(_) if path.exists() => {
5644                let _ = fs::remove_file(&temp_path);
5645                Ok(())
5646            }
5647            Err(err) => Err(GitError::Io(err.to_string())),
5648        }
5649    })();
5650    if write_result.is_err() {
5651        let _ = fs::remove_file(&temp_path);
5652    }
5653    write_result
5654}
5655
5656fn write_promisor_pack_sidecar(
5657    pack_dir: &Path,
5658    pack_name: &str,
5659    promisor: bool,
5660) -> Result<Option<PathBuf>> {
5661    if !promisor {
5662        return Ok(None);
5663    }
5664    let path = pack_dir.join(format!("{pack_name}.promisor"));
5665    write_pack_component(&path, b"")?;
5666    Ok(Some(path))
5667}
5668
5669/// Maximum number of bytes git will inflate when reading a loose object's
5670/// `"<type> <size>\0"` header (git's `MAX_HEADER_LEN` in object-file.c). The NUL
5671/// terminator must land within this window, so a header of 32 or more non-NUL
5672/// bytes is rejected as too long.
5673const MAX_LOOSE_HEADER_LEN: usize = 32;
5674
5675/// git's exact `error:`-level diagnostic for a loose object whose header overflows
5676/// `MAX_LOOSE_HEADER_LEN` (object-file.c: `error(_("header for %s too long, exceeds
5677/// %d bytes"), ...)`). Shared by the header-only and full-read paths so both surface
5678/// byte-identical text.
5679fn loose_header_too_long(oid: &ObjectId) -> GitError {
5680    GitError::InvalidObject(format!(
5681        "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
5682    ))
5683}
5684
5685/// git's `error:`-level diagnostic when the loose framing header cannot be inflated at
5686/// all (object-file.c `loose_object_info`, the `ULHR_BAD` arm: `error(_("unable to
5687/// unpack %s header"), ...)`).
5688fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
5689    GitError::InvalidObject(format!("unable to unpack {oid} header"))
5690}
5691
5692/// git-zlib.c's `error("inflate: %s (%s)", ...)` text for an inflate failure whose
5693/// cause is identifiable from the zlib stream header. The checks mirror zlib's own
5694/// `inflate()` HEAD-state validation, in order: the FCHECK checksum over CMF+FLG,
5695/// the compression method, the window size, and the FDICT preset-dictionary bit
5696/// (zlib reports `Z_NEED_DICT` with a NULL `msg`, which git renders as
5697/// "(no message)"). Failures past the stream header return `None`: flate2 does not
5698/// surface zlib's per-case `msg` strings, so no diagnostic is fabricated for them.
5699fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
5700    let [cmf, flg, ..] = *input else { return None };
5701    if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
5702        return Some("inflate: data stream error (incorrect header check)");
5703    }
5704    if cmf & 0x0f != 8 {
5705        return Some("inflate: data stream error (unknown compression method)");
5706    }
5707    if cmf >> 4 > 7 {
5708        return Some("inflate: data stream error (invalid window size)");
5709    }
5710    if flg & 0x20 != 0 {
5711        return Some("inflate: needs dictionary (no message)");
5712    }
5713    None
5714}
5715
5716/// Print the `error: inflate: ...` line git's zlib wrapper emits the moment
5717/// `inflate()` fails, when the failure is classifiable from the stream header.
5718fn emit_inflate_diagnostic(input: &[u8]) {
5719    if let Some(diagnostic) = inflate_header_diagnostic(input) {
5720        eprintln!("error: {diagnostic}");
5721    }
5722}
5723
5724/// Integrity verdict for a single loose object file, as classified by
5725/// [`LooseObjectStore::verify_object`].
5726#[derive(Debug, Clone, PartialEq, Eq)]
5727pub enum LooseObjectIntegrity {
5728    /// Inflated, parsed, and re-hashed to its path-derived oid.
5729    Ok,
5730    /// Readable and well-formed, but its content hashes to a different oid
5731    /// (a loose file stored under the wrong path).
5732    HashMismatch { actual: ObjectId },
5733    /// Unreadable: corrupt zlib stream, truncated content, or unparseable header.
5734    /// The `error:`-level diagnostics were already printed to stderr.
5735    Corrupt,
5736}
5737
5738#[derive(Debug, Clone)]
5739pub struct LooseObjectStore {
5740    objects_dir: PathBuf,
5741    format: ObjectFormat,
5742    /// Lazily-populated set of loose object ids present on disk, mirroring git's
5743    /// `loose_objects_cache` (object-file.c). A lookup scans the queried
5744    /// `objects/XX/` fanout once; afterward misses in that fanout are in-memory
5745    /// checks instead of failed exact-path opens. Shared across
5746    /// `FileObjectDatabase` clones via `Arc` so a write through one handle is
5747    /// visible to reads through another; cleared by `refresh_read_cache` so
5748    /// objects installed out-of-band (fetch, repack) become visible. Writes
5749    /// extend the set in place rather than invalidating it.
5750    loose_cache: Arc<Mutex<LoosePresenceCache>>,
5751}
5752
5753impl LooseObjectStore {
5754    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
5755        Self {
5756            objects_dir: objects_dir.into(),
5757            format,
5758            loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
5759        }
5760    }
5761
5762    /// Whether `oid` is present according to the loose-object cache, populating
5763    /// the cache on first use. Returns `None` when the lock cannot be trusted or
5764    /// the scan fails; callers should fall back to an exact filesystem probe in
5765    /// that case so a cache-building problem cannot change read semantics.
5766    fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
5767        let mut guard = self.loose_cache.lock().ok()?;
5768        let fanout = oid.as_bytes()[0];
5769        if !guard.loaded_fanouts.contains(&fanout) {
5770            collect_loose_fanout_object_ids(
5771                &self.objects_dir,
5772                self.format,
5773                fanout,
5774                &mut guard.objects,
5775            )
5776            .ok()?;
5777            guard.loaded_fanouts.insert(fanout);
5778        }
5779        Some(guard.objects.contains(oid))
5780    }
5781
5782    /// Populate the loose-object cache and return the sorted ids. This mirrors
5783    /// git's `odb_loose_cache` lazy fill and is reserved for operations that
5784    /// really need loose-object enumeration.
5785    fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
5786        if let Ok(mut guard) = self.loose_cache.lock() {
5787            guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
5788            guard.loaded_fanouts = (0..=u8::MAX).collect();
5789            let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
5790            ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
5791            return Ok(ids);
5792        }
5793        loose_object_ids(&self.objects_dir, self.format)
5794    }
5795
5796    /// Record `oid` as present in loose storage so subsequent reads find it
5797    /// without a rescan. A no-op when the cache has not been populated yet (the
5798    /// eventual lazy scan will pick the object up) or the lock is poisoned.
5799    fn note_loose_write(&self, oid: ObjectId) {
5800        if let Ok(mut guard) = self.loose_cache.lock() {
5801            guard.objects.insert(oid);
5802        }
5803    }
5804
5805    /// Drop the in-memory loose set so the next access rescans the fanout. Called
5806    /// by `FileObjectDatabase::refresh_read_cache` after out-of-band installs.
5807    pub(crate) fn invalidate_cache(&self) {
5808        if let Ok(mut guard) = self.loose_cache.lock() {
5809            *guard = LoosePresenceCache::default();
5810        }
5811    }
5812
5813    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
5814        Self::new(repository_objects_dir(git_dir), format)
5815    }
5816
5817    fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
5818        if oid.format() != self.format {
5819            return Err(GitError::InvalidObjectId(format!(
5820                "object {oid} uses {}, store uses {}",
5821                oid.format().name(),
5822                self.format.name()
5823            )));
5824        }
5825        Ok(())
5826    }
5827
5828    pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
5829        self.validate_oid_format(oid)?;
5830        let hex = oid.to_hex();
5831        Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
5832    }
5833
5834    pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
5835        self.validate_oid_format(oid)?;
5836        if self.cached_loose_presence(oid) == Some(false) {
5837            return Ok(false);
5838        }
5839        let path = self.object_path(oid)?;
5840        Ok(path.exists())
5841    }
5842
5843    pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
5844        self.validate_oid_format(oid)?;
5845        if self.cached_loose_presence(oid) == Some(false) {
5846            return Ok(None);
5847        }
5848        let path = self.object_path(oid)?;
5849        match fs::metadata(path) {
5850            Ok(metadata) => Ok(Some(metadata.len())),
5851            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5852            Err(err) => Err(GitError::Io(err.to_string())),
5853        }
5854    }
5855
5856    /// The object type and content size of `oid` from loose storage, inflating only
5857    /// the framing header (`"<type> <size>\0"`) and not the body. Output-limited
5858    /// reads keep miniz from inflating past the header even for large objects.
5859    /// Returns `Ok(None)` when the loose object is absent.
5860    pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
5861        self.validate_oid_format(oid)?;
5862        if self.cached_loose_presence(oid) == Some(false) {
5863            return Ok(None);
5864        }
5865        let path = self.object_path(oid)?;
5866        let compressed = match fs::read(&path) {
5867            Ok(compressed) => compressed,
5868            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5869            Err(err) => return Err(GitError::Io(err.to_string())),
5870        };
5871        match inflate_loose_header(&compressed)? {
5872            LooseHeader::Ok(header) => {
5873                let header = std::str::from_utf8(&header)
5874                    .map_err(|err| GitError::InvalidObject(err.to_string()))?;
5875                let (kind, size) = header
5876                    .split_once(' ')
5877                    .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
5878                let object_type = kind.parse::<ObjectType>()?;
5879                let size = size
5880                    .parse::<u64>()
5881                    .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
5882                Ok(Some((object_type, size)))
5883            }
5884            LooseHeader::Bad => {
5885                // git's ULHR_BAD: the zlib wrapper's `error: inflate: ...` line, then
5886                // "unable to unpack <oid> header".
5887                emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
5888                Err(loose_unpack_header_failed(oid))
5889            }
5890            LooseHeader::TooLong => {
5891                // git inflates only the first `MAX_LOOSE_HEADER_LEN` bytes
5892                // (object-file.c `unpack_loose_header`) and reports ULHR_TOO_LONG when
5893                // no NUL terminator lands within them — whether the stream simply ends
5894                // early or overflows the window. Both collapse to the same diagnostic.
5895                Err(loose_header_too_long(oid))
5896            }
5897        }
5898    }
5899
5900    /// Loose object ids in this store, sorted by hex.
5901    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
5902        self.loose_object_ids_cached()
5903    }
5904
5905    /// fsck's loose-object integrity probe, mirroring C git's `read_loose_object`
5906    /// (object-file.c) as called from `fsck_loose` (builtin/fsck.c): inflate and
5907    /// parse the file at `oid`'s loose path, then re-hash its content against the
5908    /// path-derived oid. `display_path` appears verbatim in the `error:`-level
5909    /// diagnostics — the path-form messages of `read_loose_object` ("unable to
5910    /// unpack header of <path>"), unlike the oid-form messages of the normal read
5911    /// path. Returns `Ok(None)` when no loose file exists for `oid`.
5912    pub fn verify_object(
5913        &self,
5914        oid: &ObjectId,
5915        display_path: &str,
5916    ) -> Result<Option<LooseObjectIntegrity>> {
5917        let path = self.object_path(oid)?;
5918        let compressed = match fs::read(&path) {
5919            Ok(compressed) => compressed,
5920            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5921            Err(err) => return Err(GitError::Io(err.to_string())),
5922        };
5923        let mut decoder = ZlibDecoder::new(compressed.as_slice());
5924        let mut framed = Vec::new();
5925        if decoder.read_to_end(&mut framed).is_err() {
5926            emit_inflate_diagnostic(&compressed);
5927            // git inflates the header first (`unpack_loose_header`), then the body
5928            // (`unpack_loose_rest`). If the header inflated (its NUL is visible in
5929            // the partial output) but the body broke, that is a *content*
5930            // corruption: git's `unpack_loose_rest` prints `corrupt loose object
5931            // '<oid>'` (status != Z_STREAM_END), then `read_loose_object` adds
5932            // `unable to unpack contents of <path>`. If inflation died before the
5933            // header materialized, only the header message fires.
5934            if framed_loose_header_terminated(&framed) {
5935                eprintln!("error: corrupt loose object '{oid}'");
5936                eprintln!("error: unable to unpack contents of {display_path}");
5937            } else {
5938                eprintln!("error: unable to unpack header of {display_path}");
5939            }
5940            return Ok(Some(LooseObjectIntegrity::Corrupt));
5941        }
5942        if !framed_loose_header_terminated(&framed) {
5943            // ULHR_TOO_LONG collapses into the same path-form message here: C's
5944            // `read_loose_object` treats every non-OK `unpack_loose_header` alike.
5945            eprintln!("error: unable to unpack header of {display_path}");
5946            return Ok(Some(LooseObjectIntegrity::Corrupt));
5947        }
5948        // git's `unpack_loose_rest`/`check_stream_oid` reject trailing bytes after
5949        // the zlib stream: a fully-inflated object whose compressed input was not
5950        // entirely consumed is `garbage at end of loose object '<oid>'`, then
5951        // `object corrupt or missing: <path>` from `fsck_loose`. (read_to_end
5952        // stops at Z_STREAM_END and silently ignores the trailing bytes, so we
5953        // compare consumed input against the file size ourselves.)
5954        if (decoder.total_in() as usize) < compressed.len() {
5955            // git's `unpack_loose_rest` prints `garbage at end of loose object`
5956            // then returns NULL, so `read_loose_object` also prints `unable to
5957            // unpack contents of <path>`.
5958            eprintln!("error: garbage at end of loose object '{oid}'");
5959            eprintln!("error: unable to unpack contents of {display_path}");
5960            return Ok(Some(LooseObjectIntegrity::Corrupt));
5961        }
5962        // A truncated object can inflate to a clean stream end yet yield fewer
5963        // body bytes than the header's declared size. git's `unpack_loose_rest`
5964        // inflates exactly `size` bytes and, finding the stream ends short,
5965        // prints `corrupt loose object '<oid>'`; `read_loose_object` then adds
5966        // `unable to unpack contents of <path>`. Detect the short body here so it
5967        // is not misreported as a header-parse failure.
5968        if let Some(declared) = loose_header_declared_size(&framed) {
5969            let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
5970            let body_len = framed.len() - (nul + 1).min(framed.len());
5971            if body_len < declared {
5972                eprintln!("error: corrupt loose object '{oid}'");
5973                eprintln!("error: unable to unpack contents of {display_path}");
5974                return Ok(Some(LooseObjectIntegrity::Corrupt));
5975            }
5976        }
5977        let Ok(object) = parse_framed_object(&framed) else {
5978            // Distinguish git's two header-parse failures: a structurally valid
5979            // `"<word> <size>\0"` header whose *type word* is not a known object
5980            // type yields `unable to parse type from header '<header>'`, while a
5981            // genuinely malformed header yields `unable to parse header`.
5982            if let Some(header) = loose_header_with_unknown_type(&framed) {
5983                eprintln!("error: unable to parse type from header '{header}' of {display_path}");
5984            } else {
5985                eprintln!("error: unable to parse header of {display_path}");
5986            }
5987            return Ok(Some(LooseObjectIntegrity::Corrupt));
5988        };
5989        let actual = object.object_id(self.format)?;
5990        if &actual != oid {
5991            return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
5992        }
5993        Ok(Some(LooseObjectIntegrity::Ok))
5994    }
5995}
5996
5997/// Whether the inflated framing bytes contain the header's NUL terminator within
5998/// git's `MAX_HEADER_LEN` window (object-file.c `unpack_loose_header`'s success
5999/// condition).
6000fn framed_loose_header_terminated(framed: &[u8]) -> bool {
6001    framed
6002        .iter()
6003        .take(MAX_LOOSE_HEADER_LEN)
6004        .any(|byte| *byte == 0)
6005}
6006
6007/// If the framing has a structurally valid `"<word> <size>\0"` header whose body
6008/// length matches `<size>` but whose `<word>` is not a known object type, return
6009/// the header string (the bytes before the NUL). Mirrors git's
6010/// `parse_loose_header` reporting `unable to parse type from header '<header>'`.
6011fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
6012    let nul = framed.iter().position(|&b| b == 0)?;
6013    let header = std::str::from_utf8(&framed[..nul]).ok()?;
6014    let (kind, size) = header.split_once(' ')?;
6015    let size: usize = size.parse().ok()?;
6016    // Body length must match the declared size (otherwise it is a different
6017    // corruption, handled by the generic path).
6018    if framed.len() - (nul + 1) != size {
6019        return None;
6020    }
6021    // A known type word would have parsed successfully upstream; only return
6022    // when the word is genuinely unknown.
6023    if kind.parse::<ObjectType>().is_ok() {
6024        return None;
6025    }
6026    Some(header.to_string())
6027}
6028
6029/// The size declared in a loose object's `"<type> <size>\0"` header, if the
6030/// header is structurally a `<word> <decimal-size>` pair. Used to detect a body
6031/// inflated short of its declared length (a truncated object).
6032fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
6033    let nul = framed.iter().position(|&b| b == 0)?;
6034    let header = std::str::from_utf8(&framed[..nul]).ok()?;
6035    let (_kind, size) = header.split_once(' ')?;
6036    size.parse::<usize>().ok()
6037}
6038
6039/// Read up to `prefix.len()` bytes from the start of `file`, returning how many
6040/// were available (short only when the file itself is shorter).
6041/// Outcome of inflating a loose object's header, mirroring git's
6042/// `unpack_loose_header` result codes (object-file.c `enum
6043/// unpack_loose_header_result`).
6044enum LooseHeader {
6045    /// ULHR_OK: a NUL-terminated header was found within the window. Carries the
6046    /// header bytes up to (not including) the NUL.
6047    Ok(Vec<u8>),
6048    /// ULHR_BAD: the zlib stream would not inflate (status != Z_OK/Z_STREAM_END).
6049    Bad,
6050    /// ULHR_TOO_LONG: the inflated output filled the header window with no NUL.
6051    TooLong,
6052}
6053
6054/// Inflate a loose object's *header* exactly as git's `unpack_loose_header` does
6055/// (object-file.c): a single bounded inflate into a `MAX_LOOSE_HEADER_LEN`-byte
6056/// output buffer, then look for the header-terminating NUL in what came out.
6057///
6058/// The byte budget is load-bearing for corruption parity: git inflates only up to
6059/// `MAX_HEADER_LEN` (32) bytes of *output* before stopping, so a `cat-file -s`/`-t`
6060/// header read detects a zlib data error only when it lands within those first 32
6061/// inflated bytes (the header plus the start of the body for a small object) — and
6062/// silently returns the header for corruption buried deeper in the body, which the
6063/// full-object read path catches instead. A byte-by-byte loop that stopped at the
6064/// NUL would never inflate into the corrupt region and miss the bit-error case
6065/// (t1060 "getting type of a corrupt blob fails"); feeding too much output budget
6066/// would over-detect relative to git. So this matches git's exact window.
6067fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
6068    let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
6069    let mut decompress = Decompress::new(true);
6070    // git feeds the whole mapped file as `avail_in` and inflates once into a
6071    // 32-byte `avail_out`; zlib stops at the output limit (Z_OK with avail_out==0)
6072    // or at the stream's end, propagating Z_DATA_ERROR for a corrupt stream.
6073    let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
6074    let produced = decompress.total_out() as usize;
6075    match status {
6076        Ok(_) => {
6077            let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
6078            match window.iter().position(|&byte| byte == 0) {
6079                Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
6080                // No NUL within the window: either the stream ended early or the
6081                // header overflows `MAX_LOOSE_HEADER_LEN`. git collapses both into
6082                // ULHR_TOO_LONG (object-file.c `unpack_loose_header`).
6083                None => Ok(LooseHeader::TooLong),
6084            }
6085        }
6086        // Any zlib error before a NUL materializes is git's ULHR_BAD.
6087        Err(_) => Ok(LooseHeader::Bad),
6088    }
6089}
6090
6091impl ObjectReader for LooseObjectStore {
6092    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
6093        self.validate_oid_format(oid)?;
6094        // Skip the `open()` (and its ENOENT) when an already-built loose cache
6095        // knows the id is absent. Without a cache, use an exact path probe; a
6096        // full fanout scan is far more expensive for one-shot packed-object reads.
6097        if self.cached_loose_presence(oid) == Some(false) {
6098            return Err(GitError::object_not_found_in(
6099                *oid,
6100                MissingObjectContext::Read,
6101            ));
6102        }
6103        let path = self.object_path(oid)?;
6104        let compressed = match fs::read(&path) {
6105            Ok(compressed) => compressed,
6106            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
6107                return Err(GitError::object_not_found_in(
6108                    *oid,
6109                    MissingObjectContext::Read,
6110                ));
6111            }
6112            Err(err) => return Err(GitError::Io(err.to_string())),
6113        };
6114        let mut decoder = ZlibDecoder::new(compressed.as_slice());
6115        let mut framed = Vec::new();
6116        if decoder.read_to_end(&mut framed).is_err() {
6117            emit_inflate_diagnostic(&compressed);
6118            // A stream that dies before the framing header materializes is git's
6119            // ULHR_BAD ("unable to unpack <oid> header"); with the header intact,
6120            // the body is what broke (`unpack_loose_rest`'s "corrupt loose
6121            // object").
6122            if !framed_loose_header_terminated(&framed) {
6123                return Err(loose_unpack_header_failed(oid));
6124            }
6125            return Err(GitError::InvalidObject(format!(
6126                "corrupt loose object '{oid}'"
6127            )));
6128        }
6129        // git only inflates the first `MAX_LOOSE_HEADER_LEN` bytes looking for the
6130        // header's NUL terminator before parsing the type; an over-long header is
6131        // rejected here (with git's diagnostic) rather than failing later as an
6132        // "unknown object type". Mirror that so `cat-file -p` matches upstream.
6133        if framed
6134            .iter()
6135            .take(MAX_LOOSE_HEADER_LEN)
6136            .all(|byte| *byte != 0)
6137        {
6138            return Err(loose_header_too_long(oid));
6139        }
6140        let object = parse_framed_object(&framed)?;
6141        // Trust the loose object's on-disk name rather than re-hashing its full body
6142        // on every read (see `verify_reads_enabled`); use `validate`/fsck or
6143        // `SLEY_VERIFY_READS` for an explicit integrity check.
6144        if verify_reads_enabled() {
6145            let actual = object.object_id(self.format)?;
6146            if &actual != oid {
6147                return Err(GitError::InvalidObject(format!(
6148                    "loose object {} hashes to {actual}",
6149                    path.display()
6150                )));
6151            }
6152        }
6153        Ok(Arc::new(object))
6154    }
6155}
6156
6157impl ObjectWriter for LooseObjectStore {
6158    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
6159        let oid = object.object_id(self.format)?;
6160        let path = self.object_path(&oid)?;
6161        if path.exists() {
6162            self.note_loose_write(oid);
6163            return Ok(oid);
6164        }
6165        let parent = path
6166            .parent()
6167            .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
6168        fs::create_dir_all(parent)?;
6169        let temp_path = unique_temp_path(parent);
6170        let write_result = (|| -> Result<()> {
6171            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6172            encoder.write_all(&object.framed_bytes())?;
6173            let compressed = encoder.finish()?;
6174            {
6175                let mut file = fs::OpenOptions::new()
6176                    .write(true)
6177                    .create_new(true)
6178                    .open(&temp_path)?;
6179                file.write_all(&compressed)?;
6180                // No fsync: git's default `core.fsync=none` fsyncs nothing on the
6181                // loose-object write path (object-file.c writes the temp file and
6182                // renames it without syncing unless `core.fsync` names
6183                // `loose-object`/`objects`/`all`, which it does not by default).
6184                // A per-object sync_all() here made `git add` of N files cost N
6185                // fsyncs — the dominant term in sley#27's 10x `add -u` slowdown —
6186                // for durability git itself does not provide by default. The
6187                // create_new temp + atomic rename below still guarantees the
6188                // object never appears half-written under its final name.
6189            }
6190            match fs::rename(&temp_path, &path) {
6191                Ok(()) => Ok(()),
6192                Err(_) if path.exists() => {
6193                    let _ = fs::remove_file(&temp_path);
6194                    Ok(())
6195                }
6196                Err(err) => Err(GitError::Io(err.to_string())),
6197            }
6198        })();
6199        if write_result.is_err() {
6200            let _ = fs::remove_file(&temp_path);
6201        }
6202        write_result?;
6203        self.note_loose_write(oid);
6204        Ok(oid)
6205    }
6206}
6207
6208fn unique_temp_path(parent: &Path) -> PathBuf {
6209    let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
6210    parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
6211}
6212
6213#[cfg(test)]
6214mod tests {
6215    use super::*;
6216    use sley_core::BString;
6217    use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
6218    use sley_pack::{PackFile, PackWriteOptions};
6219
6220    fn blob_of(byte: u8, len: usize) -> EncodedObject {
6221        EncodedObject::new(ObjectType::Blob, vec![byte; len])
6222    }
6223
6224    fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
6225        Arc::new(blob_of(byte, len))
6226    }
6227
6228    fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
6229        reader
6230            .read_object(oid)
6231            .expect("test operation should succeed")
6232            .as_ref()
6233            .clone()
6234    }
6235
6236    #[test]
6237    fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
6238        // Budget holds two ~1 KiB objects but not three.
6239        let one = cached_object_cost(&blob_of(0, 1000));
6240        let mut cache = LruCache::<u32>::new(one * 2 + 8);
6241        cache.put(1, cached_blob_of(b'a', 1000));
6242        cache.put(2, cached_blob_of(b'b', 1000));
6243        // Touch key 1 so key 2 becomes least-recently-used.
6244        assert!(cache.get(&1).is_some());
6245        cache.put(3, cached_blob_of(b'c', 1000));
6246        // Key 2 (LRU) is evicted; 1 and 3 remain.
6247        assert!(cache.get(&1).is_some());
6248        assert!(cache.get(&2).is_none());
6249        assert!(cache.get(&3).is_some());
6250    }
6251
6252    #[test]
6253    fn lru_cache_zero_budget_is_inert() {
6254        let mut cache = LruCache::<u32>::new(0);
6255        cache.put(1, cached_blob_of(b'a', 16));
6256        assert!(cache.get(&1).is_none());
6257    }
6258
6259    #[test]
6260    fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
6261        let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
6262        cache.put(1, cached_blob_of(b'a', 50));
6263        assert!(cache.get(&1).is_some());
6264        // An object that cannot fit is not cached, and it evicts the prior entry
6265        // stored under the same key (so we never serve a stale value for it).
6266        cache.put(1, cached_blob_of(b'b', 10_000));
6267        assert!(cache.get(&1).is_none());
6268        // A subsequent fitting insert under another key still works and accounting
6269        // is not corrupted by the oversized insert.
6270        cache.put(2, cached_blob_of(b'c', 50));
6271        assert!(cache.get(&2).is_some());
6272    }
6273
6274    #[test]
6275    fn lru_cache_replacing_entry_updates_byte_accounting() {
6276        // Budget holds two 500-byte objects (plus headroom) but not a 500 + a
6277        // ~1900-byte object.
6278        let small = cached_object_cost(&blob_of(0, 500));
6279        let mut cache = LruCache::<u32>::new(small * 2 + 200);
6280        cache.put(1, cached_blob_of(b'a', 500));
6281        cache.put(2, cached_blob_of(b'b', 500));
6282        assert!(cache.get(&1).is_some());
6283        assert!(cache.get(&2).is_some());
6284        // Replace key 2 (now MRU after the gets above re-ordered 1 then 2) with a
6285        // bigger value that still fits the budget alone but makes the running total
6286        // exceed it; the LRU (key 1) is evicted while the replaced key 2 stays.
6287        // This exercises the replace-path accounting.
6288        cache.put(2, cached_blob_of(b'b', 1000));
6289        assert!(cache.get(&2).is_some());
6290        assert!(cache.get(&1).is_none());
6291    }
6292
6293    #[test]
6294    fn write_and_validate_blob() {
6295        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6296        let oid = db
6297            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
6298            .expect("test operation should succeed");
6299        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
6300        db.validate(&oid).expect("test operation should succeed");
6301    }
6302
6303    #[test]
6304    fn loose_store_writes_and_reads_object() {
6305        let root = std::env::temp_dir().join(format!(
6306            "sley-loose-store-{}-{}",
6307            std::process::id(),
6308            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6309        ));
6310        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6311        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
6312        let oid = store
6313            .write_object(object.clone())
6314            .expect("test operation should succeed");
6315        assert_eq!(read_object_for_assert(&store, &oid), object);
6316        assert!(
6317            store
6318                .object_path(&oid)
6319                .expect("test operation should succeed")
6320                .exists()
6321        );
6322        fs::remove_dir_all(root).expect("test operation should succeed");
6323    }
6324
6325    #[test]
6326    fn read_header_detects_corruption_within_gits_header_window() {
6327        // git's `unpack_loose_header` inflates only the first MAX_HEADER_LEN (32)
6328        // bytes of output; a zlib data error inside that window makes `cat-file
6329        // -s`/`-t` fail (ULHR_BAD → "unable to unpack header"). A byte-by-byte
6330        // header read that stopped at the NUL would never inflate into the corrupt
6331        // region and would silently return a bogus size — the t1060 "getting type
6332        // of a corrupt blob fails" bug. Corrupt a byte inside the inflate stream of
6333        // a tiny object so the damage lands within the first 32 inflated bytes.
6334        let root = temp_root("sley-loose-header-corrupt");
6335        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6336        let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
6337        let oid = store
6338            .write_object(object)
6339            .expect("test operation should succeed");
6340        let path = store
6341            .object_path(&oid)
6342            .expect("test operation should succeed");
6343        let mut bytes = fs::read(&path).expect("test operation should succeed");
6344        // Offset 10 is inside the deflate stream (past the 2-byte zlib header) and,
6345        // for an 8-byte blob, decodes into the first 32 output bytes. Zero it to
6346        // break inflation, mirroring t1060's `corrupt_byte HEAD:content.t 10`.
6347        bytes[10] = 0;
6348        fs::write(&path, &bytes).expect("test operation should succeed");
6349        store.invalidate_cache();
6350        let err = store
6351            .read_header(&oid)
6352            .expect_err("corrupt loose header must fail like git's ULHR_BAD");
6353        let msg = err.to_string();
6354        assert!(
6355            msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
6356            "expected git's ULHR_BAD message, got: {msg}"
6357        );
6358        fs::remove_dir_all(root).expect("test operation should succeed");
6359    }
6360
6361    #[test]
6362    fn read_header_ignores_corruption_past_gits_header_window() {
6363        // Mirror git: corruption deeper than the 32-byte header window is NOT
6364        // detected by a header-only read (`cat-file -s` still returns the size);
6365        // the full-object read path catches it instead. Over-detecting here would
6366        // diverge from upstream on large objects with a clean header.
6367        let root = temp_root("sley-loose-header-deep-corrupt");
6368        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6369        // Incompressible body so the deflate stream is long and a deep byte is well
6370        // past the 32 inflated header-window bytes.
6371        let body: Vec<u8> = (0..4096u32)
6372            .map(|i| (i.wrapping_mul(2654435761)) as u8)
6373            .collect();
6374        let object = EncodedObject::new(ObjectType::Blob, body.clone());
6375        let oid = store
6376            .write_object(object)
6377            .expect("test operation should succeed");
6378        let path = store
6379            .object_path(&oid)
6380            .expect("test operation should succeed");
6381        let mut bytes = fs::read(&path).expect("test operation should succeed");
6382        let deep = bytes.len() / 2;
6383        bytes[deep] ^= 0xff;
6384        fs::write(&path, &bytes).expect("test operation should succeed");
6385        store.invalidate_cache();
6386        let header = store
6387            .read_header(&oid)
6388            .expect("header-only read must still succeed for deep body corruption");
6389        assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
6390        fs::remove_dir_all(root).expect("test operation should succeed");
6391    }
6392
6393    #[test]
6394    fn file_database_reads_object_from_pack_index() {
6395        let root = temp_root("sley-file-odb-pack");
6396        let git_dir = root.join(".git");
6397        let pack_dir = git_dir.join("objects").join("pack");
6398        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6399        let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
6400        let oid = object
6401            .object_id(ObjectFormat::Sha1)
6402            .expect("test operation should succeed");
6403        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6404            .expect("test operation should succeed");
6405        let pack_name = written.checksum.to_hex();
6406        fs::write(
6407            pack_dir.join(format!("pack-{pack_name}.pack")),
6408            written.pack,
6409        )
6410        .expect("test operation should succeed");
6411        fs::write(
6412            pack_dir.join(format!("pack-{pack_name}.idx")),
6413            written.index,
6414        )
6415        .expect("test operation should succeed");
6416
6417        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6418        assert!(db.contains(&oid).expect("test operation should succeed"));
6419        assert_eq!(read_object_for_assert(&db, &oid), object);
6420        fs::remove_dir_all(root).expect("test operation should succeed");
6421    }
6422
6423    #[test]
6424    fn file_database_loose_cache_observes_same_process_write_after_miss() {
6425        let root = temp_root("sley-file-odb-loose-cache-write");
6426        let git_dir = root.join(".git");
6427        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6428        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6429
6430        let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
6431        let oid = object
6432            .object_id(ObjectFormat::Sha1)
6433            .expect("test operation should succeed");
6434
6435        assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
6436        db.loose()
6437            .write_object(object.clone())
6438            .expect("test operation should succeed");
6439
6440        assert_eq!(read_object_for_assert(&db, &oid), object);
6441        fs::remove_dir_all(root).expect("test operation should succeed");
6442    }
6443
6444    #[test]
6445    fn object_presence_checker_observes_same_process_loose_write_after_miss() {
6446        let root = temp_root("sley-presence-checker-loose-cache-write");
6447        let git_dir = root.join(".git");
6448        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6449        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6450        let mut checker = db.presence_checker();
6451
6452        let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
6453        let oid = object
6454            .object_id(ObjectFormat::Sha1)
6455            .expect("test operation should succeed");
6456
6457        assert!(
6458            !checker
6459                .contains(&oid)
6460                .expect("test operation should succeed")
6461        );
6462        db.loose()
6463            .write_object(object)
6464            .expect("test operation should succeed");
6465
6466        assert!(
6467            checker
6468                .contains(&oid)
6469                .expect("test operation should succeed")
6470        );
6471        fs::remove_dir_all(root).expect("test operation should succeed");
6472    }
6473
6474    #[test]
6475    fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
6476        let root = temp_root("sley-read-object-header");
6477        let git_dir = root.join(".git");
6478        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6479        let format = ObjectFormat::Sha1;
6480        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6481
6482        // Loose object: the header read inflates only the framing, not the body.
6483        let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
6484        let loose_oid = db
6485            .write_object(loose.clone())
6486            .expect("test operation should succeed");
6487
6488        // Packed objects, including an ofs-delta whose *result* size lives in the
6489        // delta stream (not the pack entry header) and whose type is inherited from
6490        // its base at the end of the chain.
6491        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
6492        let mut child_body = vec![b'a'; 4096];
6493        child_body.extend_from_slice(b" plus a deltified tail\n");
6494        let child = EncodedObject::new(ObjectType::Blob, child_body);
6495        let commitish =
6496            EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
6497        let base_oid = base
6498            .object_id(format)
6499            .expect("test operation should succeed");
6500        let child_oid = child
6501            .object_id(format)
6502            .expect("test operation should succeed");
6503        let commit_oid = commitish
6504            .object_id(format)
6505            .expect("test operation should succeed");
6506        let options = PackWriteOptions::new()
6507            .with_prefer_ofs_delta(true)
6508            .with_reorder(false);
6509        let pack = PackFile::write_packed_with_options(
6510            &[base.clone(), child.clone(), commitish.clone()],
6511            format,
6512            &options,
6513        )
6514        .expect("test operation should succeed");
6515        db.install_pack(&pack)
6516            .expect("test operation should succeed");
6517
6518        // The header read agrees with a full decode for every object and storage
6519        // class, without ever materializing the body.
6520        for (oid, want_type, want_len) in [
6521            (&loose_oid, ObjectType::Blob, loose.body.len()),
6522            (&base_oid, ObjectType::Blob, base.body.len()),
6523            (&child_oid, ObjectType::Blob, child.body.len()),
6524            (&commit_oid, ObjectType::Commit, commitish.body.len()),
6525        ] {
6526            assert_eq!(
6527                db.read_object_header(oid)
6528                    .expect("test operation should succeed"),
6529                Some((want_type, want_len as u64)),
6530                "header for {oid}"
6531            );
6532            let full = db.read_object(oid).expect("test operation should succeed");
6533            assert_eq!(
6534                db.read_object_header(oid)
6535                    .expect("test operation should succeed"),
6536                Some((full.object_type, full.body.len() as u64))
6537            );
6538        }
6539
6540        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
6541            .expect("test operation should succeed");
6542        assert_eq!(
6543            db.read_object_header(&missing)
6544                .expect("test operation should succeed"),
6545            None
6546        );
6547        fs::remove_dir_all(root).expect("test operation should succeed");
6548    }
6549
6550    #[test]
6551    fn object_storage_info_reports_loose_packed_and_delta_metadata() {
6552        let root = temp_root("sley-object-storage-info");
6553        let git_dir = root.join(".git");
6554        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6555        let format = ObjectFormat::Sha1;
6556        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6557
6558        let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
6559        let loose_oid = db
6560            .write_object(loose)
6561            .expect("test operation should succeed");
6562        let loose_size = fs::metadata(
6563            db.loose()
6564                .object_path(&loose_oid)
6565                .expect("test operation should succeed"),
6566        )
6567        .expect("test operation should succeed")
6568        .len();
6569        let loose_info = db
6570            .object_storage_info(&loose_oid)
6571            .expect("test operation should succeed")
6572            .expect("test operation should succeed");
6573        assert_eq!(loose_info.disk_size, loose_size);
6574        assert_eq!(
6575            loose_info.deltabase,
6576            zero_oid(format).expect("test operation should succeed")
6577        );
6578
6579        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
6580        let mut child_body = vec![b'a'; 4096];
6581        child_body.extend_from_slice(b" changed tail\n");
6582        let child = EncodedObject::new(ObjectType::Blob, child_body);
6583        let base_oid = base
6584            .object_id(format)
6585            .expect("test operation should succeed");
6586        let child_oid = child
6587            .object_id(format)
6588            .expect("test operation should succeed");
6589        let options = PackWriteOptions::new()
6590            .with_prefer_ofs_delta(true)
6591            .with_reorder(false);
6592        let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
6593            .expect("test operation should succeed");
6594        db.install_pack(&pack)
6595            .expect("test operation should succeed");
6596
6597        let base_info = db
6598            .object_storage_info(&base_oid)
6599            .expect("test operation should succeed")
6600            .expect("test operation should succeed");
6601        assert!(base_info.disk_size > 0);
6602        assert_eq!(
6603            base_info.deltabase,
6604            zero_oid(format).expect("test operation should succeed")
6605        );
6606
6607        let child_info = db
6608            .object_storage_info(&child_oid)
6609            .expect("test operation should succeed")
6610            .expect("test operation should succeed");
6611        assert!(child_info.disk_size > 0);
6612        assert_eq!(child_info.deltabase, base_oid);
6613
6614        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
6615            .expect("test operation should succeed");
6616        assert_eq!(
6617            db.object_storage_info(&missing)
6618                .expect("test operation should succeed"),
6619            None
6620        );
6621        fs::remove_dir_all(root).expect("test operation should succeed");
6622    }
6623
6624    #[test]
6625    fn file_database_resolves_unique_loose_object_prefix() {
6626        let root = temp_root("sley-file-odb-prefix-loose");
6627        let git_dir = root.join(".git");
6628        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6629        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6630        let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
6631        let oid = db
6632            .write_object(object)
6633            .expect("test operation should succeed");
6634        let prefix = &oid.to_hex()[..8];
6635
6636        assert_eq!(
6637            db.resolve_prefix(prefix)
6638                .expect("test operation should succeed"),
6639            ObjectPrefixResolution::Unique(oid)
6640        );
6641        assert!(
6642            db.object_ids()
6643                .expect("test operation should succeed")
6644                .contains(&oid)
6645        );
6646        fs::remove_dir_all(root).expect("test operation should succeed");
6647    }
6648
6649    #[test]
6650    fn file_database_resolves_unique_packed_object_prefix() {
6651        let root = temp_root("sley-file-odb-prefix-packed");
6652        let git_dir = root.join(".git");
6653        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6654        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6655        let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
6656        let oid = object
6657            .object_id(ObjectFormat::Sha1)
6658            .expect("test operation should succeed");
6659        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6660            .expect("test operation should succeed");
6661        db.install_pack(&pack)
6662            .expect("test operation should succeed");
6663        let prefix = &oid.to_hex()[..8];
6664
6665        assert_eq!(
6666            db.resolve_prefix(prefix)
6667                .expect("test operation should succeed"),
6668            ObjectPrefixResolution::Unique(oid)
6669        );
6670        fs::remove_dir_all(root).expect("test operation should succeed");
6671    }
6672
6673    #[test]
6674    fn file_database_reports_ambiguous_object_prefix() {
6675        let root = temp_root("sley-file-odb-prefix-ambiguous");
6676        let git_dir = root.join(".git");
6677        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6678        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6679        let mut seen = HashMap::new();
6680        let (prefix, first, second) = (0..10_000)
6681            .find_map(|idx| {
6682                let object =
6683                    EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
6684                let oid = db
6685                    .write_object(object)
6686                    .expect("test operation should succeed");
6687                let prefix = oid.to_hex()[..4].to_string();
6688                seen.insert(prefix.clone(), oid)
6689                    .map(|first| (prefix, first, oid))
6690            })
6691            .expect("test should find a 4-hex collision");
6692
6693        let ObjectPrefixResolution::Ambiguous(mut matches) = db
6694            .resolve_prefix(&prefix)
6695            .expect("test operation should succeed")
6696        else {
6697            panic!("expected ambiguous prefix {prefix}");
6698        };
6699        matches.sort_by_key(ObjectId::to_hex);
6700        let mut expected = vec![first, second];
6701        expected.sort_by_key(ObjectId::to_hex);
6702        assert_eq!(matches, expected);
6703        fs::remove_dir_all(root).expect("test operation should succeed");
6704    }
6705
6706    #[test]
6707    fn file_database_rejects_too_short_object_prefix() {
6708        let root = temp_root("sley-file-odb-prefix-short");
6709        let git_dir = root.join(".git");
6710        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6711        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6712
6713        assert!(matches!(
6714            db.resolve_prefix("abc"),
6715            Err(GitError::InvalidObjectId(_))
6716        ));
6717        fs::remove_dir_all(root).expect("test operation should succeed");
6718    }
6719
6720    #[test]
6721    fn file_database_reads_sha256_object_from_pack_index() {
6722        let root = temp_root("sley-file-odb-pack-sha256");
6723        let git_dir = root.join(".git");
6724        let pack_dir = git_dir.join("objects").join("pack");
6725        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6726        let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
6727        let oid = object
6728            .object_id(ObjectFormat::Sha256)
6729            .expect("test operation should succeed");
6730        let written =
6731            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6732                .expect("test operation should succeed");
6733        let pack_name = written.checksum.to_hex();
6734        fs::write(
6735            pack_dir.join(format!("pack-{pack_name}.pack")),
6736            written.pack,
6737        )
6738        .expect("test operation should succeed");
6739        fs::write(
6740            pack_dir.join(format!("pack-{pack_name}.idx")),
6741            written.index,
6742        )
6743        .expect("test operation should succeed");
6744
6745        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6746        assert!(db.contains(&oid).expect("test operation should succeed"));
6747        assert_eq!(read_object_for_assert(&db, &oid), object);
6748        fs::remove_dir_all(root).expect("test operation should succeed");
6749    }
6750
6751    #[test]
6752    fn file_database_installs_sha256_pack_without_loose_objects() {
6753        let root = temp_root("sley-file-odb-install-pack");
6754        let git_dir = root.join(".git");
6755        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6756        let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
6757        let oid = object
6758            .object_id(ObjectFormat::Sha256)
6759            .expect("test operation should succeed");
6760        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6761            .expect("test operation should succeed");
6762        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6763
6764        let result = db
6765            .install_pack(&pack)
6766            .expect("test operation should succeed");
6767
6768        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6769        assert_eq!(result.object_ids, vec![oid]);
6770        assert!(result.pack_path.exists());
6771        assert!(result.index_path.exists());
6772        assert_eq!(result.promisor_path, None);
6773        assert!(
6774            !db.loose()
6775                .object_path(&oid)
6776                .expect("test operation should succeed")
6777                .exists()
6778        );
6779        assert!(db.contains(&oid).expect("test operation should succeed"));
6780        assert_eq!(read_object_for_assert(&db, &oid), object);
6781        fs::remove_dir_all(root).expect("test operation should succeed");
6782    }
6783
6784    #[test]
6785    fn file_database_installs_raw_sha256_pack_without_loose_objects() {
6786        let root = temp_root("sley-file-odb-install-raw-pack");
6787        let git_dir = root.join(".git");
6788        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6789        let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
6790        let oid = object
6791            .object_id(ObjectFormat::Sha256)
6792            .expect("test operation should succeed");
6793        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6794            .expect("test operation should succeed");
6795        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6796
6797        let result = db
6798            .install_raw_pack(&pack.pack)
6799            .expect("test operation should succeed");
6800
6801        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6802        assert_eq!(result.object_ids, vec![oid]);
6803        assert!(result.pack_path.exists());
6804        assert!(result.index_path.exists());
6805        assert_eq!(result.promisor_path, None);
6806        assert!(
6807            !db.loose()
6808                .object_path(&oid)
6809                .expect("test operation should succeed")
6810                .exists()
6811        );
6812        assert!(db.contains(&oid).expect("test operation should succeed"));
6813        assert_eq!(read_object_for_assert(&db, &oid), object);
6814        fs::remove_dir_all(root).expect("test operation should succeed");
6815    }
6816
6817    #[test]
6818    fn file_database_rejects_noncanonical_pack_index() {
6819        let root = temp_root("sley-file-odb-install-bad-index");
6820        let git_dir = root.join(".git");
6821        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6822        let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
6823        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6824            .expect("test operation should succeed");
6825        let mut entries = pack.entries.clone();
6826        entries[0].crc32 ^= 1;
6827        let mut bad_pack = pack.clone();
6828        bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
6829            .expect("test operation should succeed");
6830        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6831
6832        assert!(db.install_pack(&bad_pack).is_err());
6833
6834        fs::remove_dir_all(root).expect("test operation should succeed");
6835    }
6836
6837    #[test]
6838    fn file_database_installs_raw_promisor_pack_with_sidecar() {
6839        let root = temp_root("sley-file-odb-install-raw-promisor-pack");
6840        let git_dir = root.join(".git");
6841        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6842        let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
6843        let oid = object
6844            .object_id(ObjectFormat::Sha1)
6845            .expect("test operation should succeed");
6846        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6847            .expect("test operation should succeed");
6848        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6849
6850        let result = db
6851            .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
6852            .expect("test operation should succeed");
6853
6854        let promisor_path = result.promisor_path.expect("promisor sidecar");
6855        assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
6856        assert_eq!(
6857            promisor_path.extension().and_then(|ext| ext.to_str()),
6858            Some("promisor")
6859        );
6860        assert!(promisor_path.exists());
6861        assert_eq!(
6862            fs::read(&promisor_path).expect("test operation should succeed"),
6863            b""
6864        );
6865        assert!(result.pack_path.exists());
6866        assert!(result.index_path.exists());
6867        assert!(
6868            !db.loose()
6869                .object_path(&oid)
6870                .expect("test operation should succeed")
6871                .exists()
6872        );
6873        assert_eq!(read_object_for_assert(&db, &oid), object);
6874        fs::remove_dir_all(root).expect("test operation should succeed");
6875    }
6876
6877    #[test]
6878    fn repository_objects_dir_uses_linked_worktree_common_dir() {
6879        let root = temp_root("sley-odb-common-dir");
6880        let common = root.join(".git");
6881        let admin = common.join("worktrees").join("linked");
6882        fs::create_dir_all(&admin).expect("test operation should succeed");
6883        fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
6884
6885        let common = fs::canonicalize(common).expect("test operation should succeed");
6886        assert_eq!(repository_common_dir(&admin), common);
6887        assert_eq!(repository_objects_dir(&admin), common.join("objects"));
6888
6889        fs::remove_dir_all(root).expect("test operation should succeed");
6890    }
6891
6892    #[test]
6893    fn reachable_object_helpers_walk_graph_and_install_pack() {
6894        let root = temp_root("sley-reachable-pack");
6895        let source_git_dir = root.join("source.git");
6896        let destination_git_dir = root.join("destination.git");
6897        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6898        fs::create_dir_all(destination_git_dir.join("objects"))
6899            .expect("test operation should succeed");
6900        let format = ObjectFormat::Sha1;
6901        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6902        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6903
6904        let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
6905        let blob_oid = source
6906            .write_object(blob.clone())
6907            .expect("test operation should succeed");
6908        let tree = EncodedObject::new(
6909            ObjectType::Tree,
6910            Tree {
6911                entries: vec![TreeEntry {
6912                    mode: 0o100644,
6913                    name: BString::from(b"payload.txt"),
6914                    oid: blob_oid,
6915                }],
6916            }
6917            .write(),
6918        );
6919        let tree_oid = source
6920            .write_object(tree.clone())
6921            .expect("test operation should succeed");
6922        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6923        let commit = EncodedObject::new(
6924            ObjectType::Commit,
6925            Commit {
6926                tree: tree_oid,
6927                parents: Vec::new(),
6928                author: identity.clone(),
6929                committer: identity,
6930                encoding: None,
6931                message: b"initial\n".to_vec(),
6932            }
6933            .write(),
6934        );
6935        let commit_oid = source
6936            .write_object(commit.clone())
6937            .expect("test operation should succeed");
6938
6939        let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
6940            .expect("test operation should succeed");
6941        assert!(reachable.contains(&commit_oid));
6942        assert!(reachable.contains(&tree_oid));
6943        assert!(reachable.contains(&blob_oid));
6944
6945        let install =
6946            install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
6947                .expect("test operation should succeed")
6948                .expect("reachable pack should be written");
6949        assert_eq!(install.object_ids.len(), 3);
6950        for (oid, object) in [
6951            (&commit_oid, &commit),
6952            (&tree_oid, &tree),
6953            (&blob_oid, &blob),
6954        ] {
6955            assert!(
6956                !destination
6957                    .loose()
6958                    .object_path(oid)
6959                    .expect("test operation should succeed")
6960                    .exists()
6961            );
6962            assert!(
6963                destination
6964                    .contains(oid)
6965                    .expect("test operation should succeed")
6966            );
6967            assert_eq!(read_object_for_assert(&destination, oid), *object);
6968        }
6969        fs::remove_dir_all(root).expect("test operation should succeed");
6970    }
6971
6972    #[test]
6973    fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
6974        let root = temp_root("sley-reachable-exclusions");
6975        let git_dir = root.join("repo.git");
6976        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6977        let format = ObjectFormat::Sha1;
6978        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6979
6980        let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
6981        let blob_oid = db
6982            .write_object(blob)
6983            .expect("test operation should succeed");
6984        let tree = EncodedObject::new(
6985            ObjectType::Tree,
6986            Tree {
6987                entries: vec![TreeEntry {
6988                    mode: 0o100644,
6989                    name: BString::from(b"payload.txt"),
6990                    oid: blob_oid,
6991                }],
6992            }
6993            .write(),
6994        );
6995        let tree_oid = db
6996            .write_object(tree)
6997            .expect("test operation should succeed");
6998        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6999        let commit = EncodedObject::new(
7000            ObjectType::Commit,
7001            Commit {
7002                tree: tree_oid,
7003                parents: Vec::new(),
7004                author: identity.clone(),
7005                committer: identity,
7006                encoding: None,
7007                message: b"initial\n".to_vec(),
7008            }
7009            .write(),
7010        );
7011        let commit_oid = db
7012            .write_object(commit)
7013            .expect("test operation should succeed");
7014        let excluded = HashSet::from([tree_oid]);
7015
7016        let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
7017            .expect("test operation should succeed");
7018
7019        assert_eq!(objects.len(), 1);
7020        assert_eq!(
7021            objects[0]
7022                .object_id(format)
7023                .expect("test operation should succeed"),
7024            commit_oid
7025        );
7026        fs::remove_dir_all(root).expect("test operation should succeed");
7027    }
7028
7029    #[test]
7030    fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
7031        let root = temp_root("sley-build-reachable-pack");
7032        let git_dir = root.join("repo.git");
7033        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7034        let format = ObjectFormat::Sha1;
7035        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7036
7037        let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
7038        let oid = db
7039            .write_object(object.clone())
7040            .expect("test operation should succeed");
7041        let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
7042            .expect("test operation should succeed")
7043            .expect("reachable pack should be built");
7044        assert!(pack.pack.starts_with(b"PACK"));
7045        assert_eq!(pack.entries.len(), 1);
7046        assert_eq!(pack.entries[0].oid, oid);
7047
7048        let excluded = HashSet::from([oid]);
7049        assert!(
7050            build_reachable_pack(
7051                &db,
7052                format,
7053                pack.entries.into_iter().map(|entry| entry.oid),
7054                &excluded
7055            )
7056            .expect("test operation should succeed")
7057            .is_none()
7058        );
7059        fs::remove_dir_all(root).expect("test operation should succeed");
7060    }
7061
7062    #[test]
7063    fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
7064        let root = temp_root("sley-reachable-tags");
7065        let git_dir = root.join("repo.git");
7066        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7067        let format = ObjectFormat::Sha1;
7068        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7069
7070        let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
7071        let blob_oid = db
7072            .write_object(blob)
7073            .expect("test operation should succeed");
7074        let tag = EncodedObject::new(
7075            ObjectType::Tag,
7076            Tag {
7077                object: blob_oid,
7078                object_type: ObjectType::Blob,
7079                name: b"v1".to_vec(),
7080                tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
7081                message: b"tag message\n".to_vec(),
7082                raw_body: None,
7083            }
7084            .write(),
7085        );
7086        let tag_oid = db.write_object(tag).expect("test operation should succeed");
7087
7088        let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
7089            .expect("test operation should succeed");
7090        assert!(reachable.contains(&tag_oid));
7091        assert!(reachable.contains(&blob_oid));
7092
7093        let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
7094            .expect("test operation should succeed");
7095        let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
7096            .expect_err("missing traversal root should error");
7097        let kind = err.not_found_kind().expect("typed not found");
7098        assert_eq!(kind.object_id(), Some(missing));
7099        assert_eq!(
7100            kind.missing_object_context(),
7101            Some(MissingObjectContext::Traversal)
7102        );
7103        fs::remove_dir_all(root).expect("test operation should succeed");
7104    }
7105
7106    #[test]
7107    fn install_reachable_pack_empty_starts_create_no_pack() {
7108        let root = temp_root("sley-reachable-empty");
7109        let source_git_dir = root.join("source.git");
7110        let destination_git_dir = root.join("destination.git");
7111        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7112        fs::create_dir_all(destination_git_dir.join("objects"))
7113            .expect("test operation should succeed");
7114        let format = ObjectFormat::Sha1;
7115        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7116        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7117
7118        let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
7119            .expect("test operation should succeed");
7120
7121        assert!(result.is_none());
7122        assert!(!destination_git_dir.join("objects").join("pack").exists());
7123        fs::remove_dir_all(root).expect("test operation should succeed");
7124    }
7125
7126    #[test]
7127    fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
7128        let root = temp_root("sley-reachable-install-excluding");
7129        let source_git_dir = root.join("source.git");
7130        let destination_git_dir = root.join("destination.git");
7131        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7132        fs::create_dir_all(destination_git_dir.join("objects"))
7133            .expect("test operation should succeed");
7134        let format = ObjectFormat::Sha1;
7135        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7136        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7137        let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
7138        let oid = source
7139            .write_object(object)
7140            .expect("test operation should succeed");
7141        let excluded = HashSet::from([oid]);
7142
7143        let result = install_reachable_pack_excluding(
7144            &source,
7145            &destination,
7146            format,
7147            std::iter::once(oid),
7148            &excluded,
7149        )
7150        .expect("test operation should succeed");
7151
7152        assert!(result.is_none());
7153        assert!(!destination_git_dir.join("objects").join("pack").exists());
7154        fs::remove_dir_all(root).expect("test operation should succeed");
7155    }
7156
7157    #[test]
7158    fn install_reachable_pack_supports_sha256() {
7159        let root = temp_root("sley-reachable-pack-sha256");
7160        let source_git_dir = root.join("source.git");
7161        let destination_git_dir = root.join("destination.git");
7162        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7163        fs::create_dir_all(destination_git_dir.join("objects"))
7164            .expect("test operation should succeed");
7165        let format = ObjectFormat::Sha256;
7166        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7167        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7168        let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
7169        let oid = source
7170            .write_object(object.clone())
7171            .expect("test operation should succeed");
7172
7173        let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
7174            .expect("test operation should succeed")
7175            .expect("sha256 reachable pack should be built");
7176        assert!(pack.pack.starts_with(b"PACK"));
7177        assert_eq!(pack.entries[0].oid, oid);
7178
7179        let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
7180            .expect("test operation should succeed")
7181            .expect("sha256 reachable pack should be written");
7182
7183        assert_eq!(result.object_ids, vec![oid]);
7184        assert!(
7185            !destination
7186                .loose()
7187                .object_path(&oid)
7188                .expect("test operation should succeed")
7189                .exists()
7190        );
7191        assert_eq!(read_object_for_assert(&destination, &oid), object);
7192        fs::remove_dir_all(root).expect("test operation should succeed");
7193    }
7194
7195    #[test]
7196    fn install_helpers_accept_custom_raw_pack_installer() {
7197        #[derive(Default)]
7198        struct RecordingInstaller {
7199            packs: std::cell::RefCell<Vec<Vec<u8>>>,
7200            installed: std::cell::RefCell<Vec<ObjectId>>,
7201        }
7202
7203        impl RawPackInstaller for RecordingInstaller {
7204            fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
7205                self.packs.borrow_mut().push(pack_bytes.to_vec());
7206                let object_ids = self.installed.borrow().clone();
7207                Ok(RawPackInstallResult { object_ids })
7208            }
7209        }
7210
7211        let format = ObjectFormat::Sha1;
7212        let source = ObjectDatabase::new(format);
7213        let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
7214        let oid = source
7215            .write_object(object)
7216            .expect("test operation should succeed");
7217        let installer = RecordingInstaller::default();
7218        installer.installed.borrow_mut().push(oid);
7219
7220        let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
7221            .expect("test operation should succeed")
7222            .expect("custom installer should receive pack");
7223
7224        assert_eq!(result.object_ids, installer.installed.into_inner());
7225        let packs = installer.packs.into_inner();
7226        assert_eq!(packs.len(), 1);
7227        assert!(packs[0].starts_with(b"PACK"));
7228    }
7229
7230    #[test]
7231    fn file_database_reads_object_from_multi_pack_index() {
7232        let root = temp_root("sley-file-odb-midx");
7233        let git_dir = root.join(".git");
7234        let pack_dir = git_dir.join("objects").join("pack");
7235        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7236        let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
7237        let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
7238        let first_oid = first
7239            .object_id(ObjectFormat::Sha1)
7240            .expect("test operation should succeed");
7241        let second_oid = second
7242            .object_id(ObjectFormat::Sha1)
7243            .expect("test operation should succeed");
7244        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7245            .expect("test operation should succeed");
7246        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7247            .expect("test operation should succeed");
7248        let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
7249        let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
7250        fs::write(
7251            pack_dir.join(first_pack_name.replace(".idx", ".pack")),
7252            first_pack.pack,
7253        )
7254        .expect("test operation should succeed");
7255        fs::write(
7256            pack_dir.join(second_pack_name.replace(".idx", ".pack")),
7257            second_pack.pack,
7258        )
7259        .expect("test operation should succeed");
7260        let midx = MultiPackIndex::write(
7261            ObjectFormat::Sha1,
7262            2,
7263            &[first_pack_name, second_pack_name],
7264            &[
7265                sley_pack::MultiPackIndexEntry {
7266                    oid: first_oid,
7267                    pack_int_id: 0,
7268                    offset: first_pack.entries[0].offset,
7269                    force_large_offset: false,
7270                },
7271                sley_pack::MultiPackIndexEntry {
7272                    oid: second_oid,
7273                    pack_int_id: 1,
7274                    offset: second_pack.entries[0].offset,
7275                    force_large_offset: false,
7276                },
7277            ],
7278        )
7279        .expect("test operation should succeed");
7280        fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
7281
7282        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7283        assert!(
7284            db.contains(&second_oid)
7285                .expect("test operation should succeed")
7286        );
7287        assert_eq!(
7288            db.resolve_prefix(&second_oid.to_hex()[..8])
7289                .expect("test operation should succeed"),
7290            ObjectPrefixResolution::Unique(second_oid)
7291        );
7292        assert_eq!(read_object_for_assert(&db, &second_oid), second);
7293        assert_eq!(read_object_for_assert(&db, &first_oid), first);
7294        fs::remove_dir_all(root).expect("test operation should succeed");
7295    }
7296
7297    #[test]
7298    fn file_database_finds_pack_added_after_registry_was_cached() {
7299        // Regression guard for the cached pack-directory registry: a pack written
7300        // after the registry was first cached (via a prior read) must still be
7301        // discovered by the same handle, because a miss triggers a re-scan.
7302        let root = temp_root("sley-file-odb-pack-added-late");
7303        let git_dir = root.join(".git");
7304        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7305        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7306
7307        // First pack + object; reading it populates the registry cache.
7308        let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
7309        let first_oid = first
7310            .object_id(ObjectFormat::Sha1)
7311            .expect("test operation should succeed");
7312        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7313            .expect("test operation should succeed");
7314        db.install_pack(&first_pack)
7315            .expect("test operation should succeed");
7316        assert_eq!(read_object_for_assert(&db, &first_oid), first);
7317
7318        // A second object that the cached registry does not yet know about.
7319        let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
7320        let second_oid = second
7321            .object_id(ObjectFormat::Sha1)
7322            .expect("test operation should succeed");
7323        // It is genuinely absent right now.
7324        assert!(matches!(
7325            db.read_object(&second_oid),
7326            Err(GitError::NotFound(_))
7327        ));
7328
7329        // Install its pack through the same handle; the next read must find it via
7330        // a re-scan, not be masked by the stale registry.
7331        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7332            .expect("test operation should succeed");
7333        db.install_pack(&second_pack)
7334            .expect("test operation should succeed");
7335        assert!(
7336            db.contains(&second_oid)
7337                .expect("test operation should succeed")
7338        );
7339        assert_eq!(read_object_for_assert(&db, &second_oid), second);
7340        // The original object still resolves too.
7341        assert_eq!(read_object_for_assert(&db, &first_oid), first);
7342
7343        fs::remove_dir_all(root).expect("test operation should succeed");
7344    }
7345
7346    #[test]
7347    fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
7348        let root = temp_root("sley-presence-checker-pack-added-late");
7349        let git_dir = root.join(".git");
7350        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7351        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7352
7353        let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
7354        let first_oid = first
7355            .object_id(ObjectFormat::Sha1)
7356            .expect("test operation should succeed");
7357        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7358            .expect("test operation should succeed");
7359        db.install_pack(&first_pack)
7360            .expect("test operation should succeed");
7361
7362        let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
7363        let second_oid = second
7364            .object_id(ObjectFormat::Sha1)
7365            .expect("test operation should succeed");
7366        let mut checker = db.presence_checker();
7367        assert!(
7368            checker
7369                .contains(&first_oid)
7370                .expect("test operation should succeed")
7371        );
7372        assert!(
7373            !checker
7374                .contains(&second_oid)
7375                .expect("test operation should succeed")
7376        );
7377
7378        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7379            .expect("test operation should succeed");
7380        db.install_pack(&second_pack)
7381            .expect("test operation should succeed");
7382
7383        assert!(
7384            checker
7385                .contains(&second_oid)
7386                .expect("test operation should succeed")
7387        );
7388        fs::remove_dir_all(root).expect("test operation should succeed");
7389    }
7390
7391    #[test]
7392    fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
7393        let root = temp_root("sley-file-odb-pack-registry-refresh");
7394        let git_dir = root.join(".git");
7395        let pack_dir = git_dir.join("objects").join("pack");
7396        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7397        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7398
7399        let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
7400        let first_oid = first
7401            .object_id(ObjectFormat::Sha1)
7402            .expect("test operation should succeed");
7403        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7404            .expect("test operation should succeed");
7405        db.install_pack(&first_pack)
7406            .expect("test operation should succeed");
7407
7408        let first_registry = db
7409            .cached_pack_registry(&pack_dir, false)
7410            .expect("test operation should succeed");
7411        assert_eq!(first_registry.fingerprint.idx_count, 1);
7412        assert_eq!(first_registry.fingerprint.pack_count, 1);
7413        assert_eq!(first_registry.packs.len(), 1);
7414        assert!(
7415            first_registry.packs[0]
7416                .index
7417                .lock()
7418                .expect("test operation should succeed")
7419                .is_none()
7420        );
7421        assert!(
7422            first_registry.packs[0]
7423                .data
7424                .lock()
7425                .expect("test operation should succeed")
7426                .is_none()
7427        );
7428
7429        // Existence checks use the parsed index directly and do not load pack
7430        // bytes; a full read fills the registry-owned pack data handle.
7431        assert!(
7432            db.contains(&first_oid)
7433                .expect("test operation should succeed")
7434        );
7435        assert!(
7436            first_registry.packs[0]
7437                .index
7438                .lock()
7439                .expect("test operation should succeed")
7440                .is_some()
7441        );
7442        assert!(
7443            first_registry.packs[0]
7444                .data
7445                .lock()
7446                .expect("test operation should succeed")
7447                .is_none()
7448        );
7449        assert_eq!(read_object_for_assert(&db, &first_oid), first);
7450        assert!(
7451            first_registry.packs[0]
7452                .data
7453                .lock()
7454                .expect("test operation should succeed")
7455                .is_some()
7456        );
7457
7458        let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
7459        let second_oid = second
7460            .object_id(ObjectFormat::Sha1)
7461            .expect("test operation should succeed");
7462        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7463            .expect("test operation should succeed");
7464        db.install_pack(&second_pack)
7465            .expect("test operation should succeed");
7466
7467        let refreshed = db
7468            .cached_pack_registry(&pack_dir, true)
7469            .expect("test operation should succeed");
7470        assert!(!Arc::ptr_eq(&first_registry, &refreshed));
7471        assert_eq!(refreshed.fingerprint.idx_count, 2);
7472        assert_eq!(refreshed.fingerprint.pack_count, 2);
7473        assert_eq!(refreshed.packs.len(), 2);
7474        assert_eq!(read_object_for_assert(&db, &second_oid), second);
7475
7476        fs::remove_dir_all(root).expect("test operation should succeed");
7477    }
7478
7479    #[test]
7480    fn file_database_pack_search_hint_rebuilds_after_pack_added() {
7481        // Regression guard for the recent-pack search hint: it is tied to the
7482        // cached pack registry, so a miss followed by a changed registry must not
7483        // hide newly-added packs.
7484        let root = temp_root("sley-file-odb-pack-lookup-added-late");
7485        let git_dir = root.join(".git");
7486        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7487        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7488
7489        let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
7490        let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
7491        let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
7492        let first_oid = first
7493            .object_id(ObjectFormat::Sha1)
7494            .expect("test operation should succeed");
7495        let second_oid = second
7496            .object_id(ObjectFormat::Sha1)
7497            .expect("test operation should succeed");
7498        let third_oid = third
7499            .object_id(ObjectFormat::Sha1)
7500            .expect("test operation should succeed");
7501
7502        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7503            .expect("test operation should succeed");
7504        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7505            .expect("test operation should succeed");
7506        db.install_pack(&first_pack)
7507            .expect("test operation should succeed");
7508        db.install_pack(&second_pack)
7509            .expect("test operation should succeed");
7510
7511        // With two packs, these reads establish a cached registry and pack hint.
7512        assert_eq!(read_object_for_assert(&db, &first_oid), first);
7513        assert_eq!(read_object_for_assert(&db, &second_oid), second);
7514        assert!(matches!(
7515            db.read_object(&third_oid),
7516            Err(GitError::NotFound(_))
7517        ));
7518
7519        let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
7520            .expect("test operation should succeed");
7521        db.install_pack(&third_pack)
7522            .expect("test operation should succeed");
7523
7524        assert_eq!(read_object_for_assert(&db, &third_oid), third);
7525        assert_eq!(read_object_for_assert(&db, &first_oid), first);
7526
7527        fs::remove_dir_all(root).expect("test operation should succeed");
7528    }
7529
7530    #[test]
7531    fn file_database_prefers_loose_object_over_packed_object() {
7532        let root = temp_root("sley-file-odb-prefer-loose");
7533        let git_dir = root.join(".git");
7534        let pack_dir = git_dir.join("objects").join("pack");
7535        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7536        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7537        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7538            .expect("test operation should succeed");
7539        let pack_name = written.checksum.to_hex();
7540        fs::write(
7541            pack_dir.join(format!("pack-{pack_name}.pack")),
7542            written.pack,
7543        )
7544        .expect("test operation should succeed");
7545        fs::write(
7546            pack_dir.join(format!("pack-{pack_name}.idx")),
7547            written.index,
7548        )
7549        .expect("test operation should succeed");
7550
7551        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7552        let oid = db
7553            .write_object(object.clone())
7554            .expect("test operation should succeed");
7555        assert_eq!(read_object_for_assert(&db, &oid), object);
7556        fs::remove_dir_all(root).expect("test operation should succeed");
7557    }
7558
7559    #[test]
7560    fn bundle_prerequisite_verification_reads_existing_objects() {
7561        let db = ObjectDatabase::new(ObjectFormat::Sha1);
7562        let oid = db
7563            .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
7564            .expect("test operation should succeed");
7565        let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
7566        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7567            .expect("test operation should succeed");
7568
7569        verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
7570    }
7571
7572    #[test]
7573    fn bundle_prerequisite_verification_reports_missing_objects() {
7574        let db = ObjectDatabase::new(ObjectFormat::Sha1);
7575        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7576            .expect("test operation should succeed");
7577        let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
7578        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7579            .expect("test operation should succeed");
7580
7581        assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
7582    }
7583
7584    #[test]
7585    fn unbundle_objects_writes_pack_entries_and_returns_refs() {
7586        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7587        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
7588        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
7589        let oid = object
7590            .object_id(ObjectFormat::Sha1)
7591            .expect("test operation should succeed");
7592        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7593            .expect("test operation should succeed");
7594        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
7595            .into_bytes()
7596            .into_iter()
7597            .chain(pack.pack)
7598            .collect::<Vec<_>>();
7599        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7600            .expect("test operation should succeed");
7601
7602        let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
7603            .expect("test operation should succeed");
7604        assert_eq!(result.written_objects, vec![oid]);
7605        assert_eq!(result.references, bundle.references);
7606        assert_eq!(read_object_for_assert(&writer, &oid), object);
7607    }
7608
7609    #[test]
7610    fn install_bundle_pack_writes_pack_and_returns_refs() {
7611        let root = temp_root("sley-install-bundle-pack");
7612        let git_dir = root.join(".git");
7613        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7614        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7615        let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7616        let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
7617        let oid = object
7618            .object_id(ObjectFormat::Sha1)
7619            .expect("test operation should succeed");
7620        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7621            .expect("test operation should succeed");
7622        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
7623            .into_bytes()
7624            .into_iter()
7625            .chain(pack.pack)
7626            .collect::<Vec<_>>();
7627        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7628            .expect("test operation should succeed");
7629
7630        let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
7631            .expect("test operation should succeed");
7632
7633        assert_eq!(result.written_objects, vec![oid]);
7634        assert_eq!(result.references, bundle.references);
7635        assert!(
7636            database
7637                .contains(&oid)
7638                .expect("test operation should succeed")
7639        );
7640        assert_eq!(read_object_for_assert(&database, &oid), object);
7641        assert!(
7642            !database
7643                .loose()
7644                .object_path(&oid)
7645                .expect("test operation should succeed")
7646                .exists()
7647        );
7648        fs::remove_dir_all(root).expect("test operation should succeed");
7649    }
7650
7651    #[test]
7652    fn unpack_packfile_objects_writes_sha256_pack_entries() {
7653        let writer = ObjectDatabase::new(ObjectFormat::Sha256);
7654        let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
7655        let oid = object
7656            .object_id(ObjectFormat::Sha256)
7657            .expect("test operation should succeed");
7658        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7659            .expect("test operation should succeed");
7660
7661        let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
7662            .expect("test operation should succeed");
7663
7664        assert_eq!(result.written_objects, vec![oid]);
7665        assert_eq!(read_object_for_assert(&writer, &oid), object);
7666    }
7667
7668    #[test]
7669    fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
7670        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7671        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
7672        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7673            .expect("test operation should succeed");
7674        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
7675        let oid = object
7676            .object_id(ObjectFormat::Sha1)
7677            .expect("test operation should succeed");
7678        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7679            .expect("test operation should succeed");
7680        let bundle_bytes =
7681            format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
7682                .into_bytes()
7683                .into_iter()
7684                .chain(pack.pack)
7685                .collect::<Vec<_>>();
7686        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7687            .expect("test operation should succeed");
7688
7689        assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
7690        assert!(!writer.contains(&oid));
7691    }
7692
7693    /// Build a commit -> tree -> blob graph in `db`, returning the three object
7694    /// ids and their canonical encodings as `(oid, object)` pairs.
7695    fn write_commit_graph(
7696        db: &mut FileObjectDatabase,
7697        payload: &[u8],
7698    ) -> Vec<(ObjectId, EncodedObject)> {
7699        let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
7700        let blob_oid = db
7701            .write_object(blob.clone())
7702            .expect("test operation should succeed");
7703        let tree = EncodedObject::new(
7704            ObjectType::Tree,
7705            Tree {
7706                entries: vec![TreeEntry {
7707                    mode: 0o100644,
7708                    name: BString::from(b"payload.txt"),
7709                    oid: blob_oid,
7710                }],
7711            }
7712            .write(),
7713        );
7714        let tree_oid = db
7715            .write_object(tree.clone())
7716            .expect("test operation should succeed");
7717        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7718        let commit = EncodedObject::new(
7719            ObjectType::Commit,
7720            Commit {
7721                tree: tree_oid,
7722                parents: Vec::new(),
7723                author: identity.clone(),
7724                committer: identity,
7725                encoding: None,
7726                message: b"initial\n".to_vec(),
7727            }
7728            .write(),
7729        );
7730        let commit_oid = db
7731            .write_object(commit.clone())
7732            .expect("test operation should succeed");
7733        vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
7734    }
7735
7736    fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
7737        let root = temp_root("sley-repack-all");
7738        let git_dir = root.join(".git");
7739        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7740        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7741
7742        // A pre-existing pack holds one blob; the rest of the graph is loose.
7743        let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
7744        let packed_oid = packed_blob
7745            .object_id(format)
7746            .expect("test operation should succeed");
7747        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7748            .expect("test operation should succeed");
7749        let existing = db
7750            .install_pack(&existing_pack)
7751            .expect("test operation should succeed");
7752
7753        let graph = write_commit_graph(&mut db, b"repack payload\n");
7754
7755        let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
7756        expected.insert(packed_oid, packed_blob.clone());
7757
7758        let result = repack_all_objects(&git_dir, format)
7759            .expect("test operation should succeed")
7760            .expect("repository has objects");
7761
7762        // The new pack round-trips and contains every original object byte-for-byte.
7763        assert_eq!(result.object_count, expected.len());
7764        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7765        assert_eq!(parsed.entries.len(), expected.len());
7766        for entry in &parsed.entries {
7767            let want = expected
7768                .get(&entry.entry.oid)
7769                .expect("packed object was in the repository");
7770            assert_eq!(&entry.object, want);
7771            assert_eq!(
7772                entry
7773                    .object
7774                    .object_id(format)
7775                    .expect("test operation should succeed"),
7776                entry.entry.oid
7777            );
7778        }
7779        // The generated index parses and agrees with the pack checksum.
7780        let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
7781        assert_eq!(idx.pack_checksum, parsed.checksum);
7782        assert_eq!(idx.entries.len(), expected.len());
7783
7784        // The pre-existing pack is reported obsolete (by its .pack path).
7785        assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
7786        // Every loose object id is reported as now packed.
7787        let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
7788        want_loose.sort_by_key(ObjectId::to_hex);
7789        assert_eq!(result.packed_loose, want_loose);
7790        assert!(!result.packed_loose.contains(&packed_oid));
7791
7792        fs::remove_dir_all(root).expect("test operation should succeed");
7793    }
7794
7795    #[test]
7796    fn repack_all_objects_consolidates_loose_and_pack_sha1() {
7797        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
7798    }
7799
7800    #[test]
7801    fn repack_all_objects_consolidates_loose_and_pack_sha256() {
7802        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
7803    }
7804
7805    #[test]
7806    fn repack_all_objects_returns_none_for_empty_repository() {
7807        let root = temp_root("sley-repack-empty");
7808        let git_dir = root.join(".git");
7809        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7810
7811        assert!(
7812            repack_all_objects(&git_dir, ObjectFormat::Sha1)
7813                .expect("test operation should succeed")
7814                .is_none()
7815        );
7816
7817        fs::remove_dir_all(root).expect("test operation should succeed");
7818    }
7819
7820    #[test]
7821    fn install_repack_result_writes_pack_without_pruning_by_default() {
7822        let root = temp_root("sley-repack-install-nodelete");
7823        let git_dir = root.join(".git");
7824        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7825        let format = ObjectFormat::Sha1;
7826        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7827        let graph = write_commit_graph(&mut db, b"install no prune\n");
7828
7829        let result = repack_all_objects(&git_dir, format)
7830            .expect("test operation should succeed")
7831            .expect("test operation should succeed");
7832        install_repack_result(&git_dir, format, &result, false)
7833            .expect("test operation should succeed");
7834
7835        // New pack is on disk and readable.
7836        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7837        let pack_dir = git_dir.join("objects").join("pack");
7838        let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
7839        let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
7840        assert!(pack_path.exists());
7841        assert!(idx_path.exists());
7842        // Loose objects survive because prune was not requested.
7843        for (oid, object) in &graph {
7844            assert!(
7845                db.loose()
7846                    .object_path(oid)
7847                    .expect("test operation should succeed")
7848                    .exists()
7849            );
7850            assert_eq!(read_object_for_assert(&db, oid), *object);
7851        }
7852
7853        fs::remove_dir_all(root).expect("test operation should succeed");
7854    }
7855
7856    #[test]
7857    fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
7858        let root = temp_root("sley-repack-install-prune");
7859        let git_dir = root.join(".git");
7860        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7861        let format = ObjectFormat::Sha1;
7862        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7863
7864        let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
7865        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7866            .expect("test operation should succeed");
7867        let existing = db
7868            .install_pack(&existing_pack)
7869            .expect("test operation should succeed");
7870        let graph = write_commit_graph(&mut db, b"prune payload\n");
7871
7872        let result = repack_all_objects(&git_dir, format)
7873            .expect("test operation should succeed")
7874            .expect("test operation should succeed");
7875        let new_pack_checksum = PackFile::parse(&result.pack, format)
7876            .expect("test operation should succeed")
7877            .checksum;
7878        install_repack_result(&git_dir, format, &result, true)
7879            .expect("test operation should succeed");
7880
7881        // Obsolete pack and its index are gone.
7882        assert!(!existing.pack_path.exists());
7883        assert!(!existing.index_path.exists());
7884        // Packed loose objects are gone from disk.
7885        for (oid, _) in &graph {
7886            assert!(
7887                !db.loose()
7888                    .object_path(oid)
7889                    .expect("test operation should succeed")
7890                    .exists()
7891            );
7892        }
7893        // The new consolidated pack remains and still serves every object.
7894        let pack_dir = git_dir.join("objects").join("pack");
7895        assert!(
7896            pack_dir
7897                .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
7898                .exists()
7899        );
7900        let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
7901        for (oid, object) in &graph {
7902            assert!(
7903                reopened
7904                    .contains(oid)
7905                    .expect("test operation should succeed")
7906            );
7907            assert_eq!(read_object_for_assert(&reopened, oid), *object);
7908        }
7909        let packed_oid = packed_blob
7910            .object_id(format)
7911            .expect("test operation should succeed");
7912        assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
7913
7914        fs::remove_dir_all(root).expect("test operation should succeed");
7915    }
7916
7917    #[test]
7918    fn install_repack_result_preserves_keep_and_promisor_packs() {
7919        let root = temp_root("sley-repack-install-keep-promisor");
7920        let git_dir = root.join(".git");
7921        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7922        let format = ObjectFormat::Sha1;
7923        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7924
7925        let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
7926        let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
7927            .expect("test operation should succeed");
7928        let keep_install = db
7929            .install_pack(&keep_pack)
7930            .expect("test operation should succeed");
7931        let keep_sidecar = keep_install.pack_path.with_extension("keep");
7932        fs::write(&keep_sidecar, b"").expect("test operation should succeed");
7933
7934        let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
7935        let promisor_pack =
7936            PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
7937                .expect("test operation should succeed");
7938        let promisor_install = db
7939            .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
7940            .expect("test operation should succeed");
7941        let promisor_sidecar = promisor_install
7942            .promisor_path
7943            .clone()
7944            .expect("promisor sidecar");
7945
7946        let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
7947        let result = repack_all_objects(&git_dir, format)
7948            .expect("test operation should succeed")
7949            .expect("test operation should succeed");
7950        assert!(result.obsolete_packs.contains(&keep_install.pack_path));
7951        assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
7952
7953        install_repack_result(&git_dir, format, &result, true)
7954            .expect("test operation should succeed");
7955
7956        for path in [
7957            &keep_install.pack_path,
7958            &keep_install.index_path,
7959            &keep_sidecar,
7960            &promisor_install.pack_path,
7961            &promisor_install.index_path,
7962            &promisor_sidecar,
7963        ] {
7964            assert!(path.exists(), "{} should be preserved", path.display());
7965        }
7966        for (oid, _) in &graph {
7967            assert!(
7968                !db.loose()
7969                    .object_path(oid)
7970                    .expect("test operation should succeed")
7971                    .exists()
7972            );
7973        }
7974
7975        fs::remove_dir_all(root).expect("test operation should succeed");
7976    }
7977
7978    #[test]
7979    fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
7980        // Safety: a loose object whose id is not in the new pack must survive
7981        // pruning even if the caller lists it in `packed_loose`.
7982        let root = temp_root("sley-repack-install-safety");
7983        let git_dir = root.join(".git");
7984        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7985        let format = ObjectFormat::Sha1;
7986        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7987        let graph = write_commit_graph(&mut db, b"safety packed\n");
7988
7989        let mut result = repack_all_objects(&git_dir, format)
7990            .expect("test operation should succeed")
7991            .expect("test operation should succeed");
7992
7993        // A loose object that is NOT in the new pack, but mislabeled as packed.
7994        let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
7995        let stray_oid = db
7996            .write_object(stray.clone())
7997            .expect("test operation should succeed");
7998        assert!(!result.packed_loose.contains(&stray_oid));
7999        result.packed_loose.push(stray_oid);
8000
8001        install_repack_result(&git_dir, format, &result, true)
8002            .expect("test operation should succeed");
8003
8004        // The stray loose object is untouched because it is not in the new pack.
8005        assert!(
8006            db.loose()
8007                .object_path(&stray_oid)
8008                .expect("test operation should succeed")
8009                .exists()
8010        );
8011        assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
8012        // Genuinely packed loose objects were still removed.
8013        for (oid, _) in &graph {
8014            assert!(
8015                !db.loose()
8016                    .object_path(oid)
8017                    .expect("test operation should succeed")
8018                    .exists()
8019            );
8020        }
8021
8022        fs::remove_dir_all(root).expect("test operation should succeed");
8023    }
8024
8025    #[test]
8026    fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
8027        let root = temp_root("sley-prune-unreachable");
8028        let git_dir = root.join(".git");
8029        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8030        let format = ObjectFormat::Sha1;
8031        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8032        let graph = write_commit_graph(&mut db, b"reachable payload\n");
8033        let commit_oid = graph[0].0.clone();
8034
8035        // A dangling loose blob not referenced by the commit graph.
8036        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
8037        let dangling_oid = db
8038            .write_object(dangling)
8039            .expect("test operation should succeed");
8040
8041        // Report-only pass leaves everything on disk.
8042        let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
8043            .expect("test operation should succeed");
8044        assert_eq!(reported, vec![dangling_oid]);
8045        assert!(
8046            db.loose()
8047                .object_path(&dangling_oid)
8048                .expect("test operation should succeed")
8049                .exists()
8050        );
8051
8052        // Deleting pass removes only the unreachable object.
8053        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8054            .expect("test operation should succeed");
8055        assert_eq!(deleted, vec![dangling_oid]);
8056        assert!(
8057            !db.loose()
8058                .object_path(&dangling_oid)
8059                .expect("test operation should succeed")
8060                .exists()
8061        );
8062        for (oid, object) in &graph {
8063            assert!(
8064                db.loose()
8065                    .object_path(oid)
8066                    .expect("test operation should succeed")
8067                    .exists()
8068            );
8069            assert_eq!(read_object_for_assert(&db, oid), *object);
8070        }
8071
8072        fs::remove_dir_all(root).expect("test operation should succeed");
8073    }
8074
8075    #[test]
8076    fn prune_unreachable_loose_ignores_gitlink_targets() {
8077        let root = temp_root("sley-prune-gitlink");
8078        let git_dir = root.join(".git");
8079        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8080        let format = ObjectFormat::Sha1;
8081        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
8082
8083        let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
8084            .expect("test operation should succeed");
8085        let tree = EncodedObject::new(
8086            ObjectType::Tree,
8087            Tree {
8088                entries: vec![TreeEntry {
8089                    mode: 0o160000,
8090                    name: BString::from(b"submodule"),
8091                    oid: submodule_oid,
8092                }],
8093            }
8094            .write(),
8095        );
8096        let tree_oid = db
8097            .write_object(tree)
8098            .expect("test operation should succeed");
8099        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
8100        let commit = EncodedObject::new(
8101            ObjectType::Commit,
8102            Commit {
8103                tree: tree_oid,
8104                parents: Vec::new(),
8105                author: identity.clone(),
8106                committer: identity,
8107                encoding: None,
8108                message: b"gitlink\n".to_vec(),
8109            }
8110            .write(),
8111        );
8112        let commit_oid = db
8113            .write_object(commit)
8114            .expect("test operation should succeed");
8115        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
8116        let dangling_oid = db
8117            .write_object(dangling)
8118            .expect("test operation should succeed");
8119
8120        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8121            .expect("test operation should succeed");
8122
8123        assert_eq!(deleted, vec![dangling_oid]);
8124        assert!(
8125            !db.loose()
8126                .object_path(&dangling_oid)
8127                .expect("test operation should succeed")
8128                .exists()
8129        );
8130
8131        fs::remove_dir_all(root).expect("test operation should succeed");
8132    }
8133
8134    fn temp_root(prefix: &str) -> PathBuf {
8135        std::env::temp_dir().join(format!(
8136            "{prefix}-{}-{}",
8137            std::process::id(),
8138            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
8139        ))
8140    }
8141}