Skip to main content

sley_odb/
lib.rs

1// sley#7: untrusted-input parsing crate — fallible ops propagate errors;
2// the only retained `expect`s would be documented compile-time invariants.
3#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{
12    Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object,
13    tree_entry_object_type,
14};
15use sley_pack::{
16    MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
17    PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput,
18    PackStreamIndexBuild, PackWrite, PackWriteOptions, PackWriteSummary,
19};
20use std::collections::{HashMap, HashSet};
21use std::io::{Read, Write};
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicU64, Ordering};
24use std::sync::{Arc, Mutex, OnceLock};
25use std::{env, fs};
26
27static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
28
29pub trait ObjectReader {
30    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
31
32    /// Graft-points seam (shallow clones today, replace refs/grafts later):
33    /// `true` when history is cut at `oid`, so every walk must treat the
34    /// commit as parentless even though its raw body still names parents.
35    ///
36    /// [`FileObjectDatabase`] answers from `$GIT_DIR/shallow`; readers that
37    /// are not backed by a repository (in-memory stores, pack overlays)
38    /// keep the default "no grafts".
39    fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
40        false
41    }
42
43    /// Whether this reader has any shallow/graft boundaries at all. Walkers can
44    /// use this to choose dense graph-only traversal when no boundary can cut
45    /// parent edges.
46    fn has_shallow_grafts(&self) -> bool {
47        false
48    }
49
50    /// True when `oid` is covered by a promisor pack. Partial clones are
51    /// allowed to omit promised objects until a later on-demand fetch hydrates
52    /// them; ordinary readers keep the default "no promised objects".
53    fn is_promised_object(&self, _oid: &ObjectId) -> bool {
54        false
55    }
56}
57
58fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
59    (*oid == ObjectId::empty_tree(format))
60        .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
61}
62
63fn with_missing_object_context(
64    err: GitError,
65    oid: ObjectId,
66    context: MissingObjectContext,
67) -> GitError {
68    let kind = err
69        .not_found_kind()
70        .and_then(sley_core::NotFoundKind::missing_object_kind);
71    match kind {
72        Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
73        None => err,
74    }
75}
76
77/// Parents of a parsed commit with the graft seam applied: empty when the
78/// reader cuts history at `oid` (shallow boundary), the raw parsed parents
79/// otherwise.
80pub fn grafted_parents<R: ObjectReader + ?Sized>(
81    reader: &R,
82    oid: &ObjectId,
83    parents: Vec<ObjectId>,
84) -> Vec<ObjectId> {
85    if reader.is_shallow_graft(oid) {
86        Vec::new()
87    } else {
88        parents
89    }
90}
91
92pub trait ObjectWriter {
93    /// Write `object`, returning its id. Takes `&self`: every implementation's
94    /// write state (in-memory map, loose-object cache) is behind interior
95    /// mutability, so a single handle can interleave reads and writes without a
96    /// `&mut` borrow. This lets the merge engine read and write through one `db`
97    /// instead of opening a second read-only handle that re-warms the caches.
98    fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct BundleUnbundleResult {
103    pub written_objects: Vec<ObjectId>,
104    pub references: Vec<BundleReference>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct PackUnpackResult {
109    pub written_objects: Vec<ObjectId>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct PackInstallResult {
114    pub pack_name: String,
115    pub pack_path: PathBuf,
116    pub index_path: PathBuf,
117    pub promisor_path: Option<PathBuf>,
118    pub object_ids: Vec<ObjectId>,
119}
120
121#[derive(Debug)]
122pub struct RawPackStreamingInstall {
123    format: ObjectFormat,
124    expected_pack_id: ObjectId,
125    expected_pack_size: u64,
126    options: RawPackInstallOptions,
127    pack_dir: PathBuf,
128    pack_name: String,
129    pack_path: PathBuf,
130    index_path: PathBuf,
131    temp_pack_path: PathBuf,
132    file: Option<fs::File>,
133    written: u64,
134    finished: bool,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct RawPackInstallResult {
139    pub object_ids: Vec<ObjectId>,
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct RawPackIndexResult {
144    pub pack_id: ObjectId,
145    pub index: Vec<u8>,
146    pub objects: Vec<RawPackIndexedObject>,
147}
148
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct RawPackIndexedObject {
151    pub oid: ObjectId,
152    pub object_type: ObjectType,
153    pub size: u64,
154    pub offset: u64,
155}
156
157struct PackInstallTeeReader<'a, R, W> {
158    reader: &'a mut R,
159    writer: &'a mut W,
160}
161
162impl<R, W> Read for PackInstallTeeReader<'_, R, W>
163where
164    R: Read,
165    W: Write,
166{
167    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
168        let len = self.reader.read(buf)?;
169        if len > 0 {
170            self.writer.write_all(&buf[..len])?;
171        }
172        Ok(len)
173    }
174}
175
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct ReachablePackFile {
178    pub pack_path: PathBuf,
179    pub pack_size: u64,
180    pub checksum: ObjectId,
181    pub object_count: usize,
182    pub delta_count: u32,
183}
184
185#[derive(Debug, Clone, PartialEq, Eq)]
186pub struct ReachablePackWriteSummary {
187    pub index: Vec<u8>,
188    pub checksum: ObjectId,
189    pub object_count: usize,
190    pub delta_count: u32,
191    pub pack_size: u64,
192}
193
194#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
195pub struct RawPackInstallOptions {
196    pub promisor: bool,
197}
198
199pub trait RawPackInstaller {
200    fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<RawPackInstallResult>
201    where
202        R: Read;
203}
204
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub enum ObjectPrefixResolution {
207    Missing,
208    Unique(ObjectId),
209    Ambiguous(Vec<ObjectId>),
210}
211
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub struct ObjectStorageInfo {
214    pub disk_size: u64,
215    pub deltabase: ObjectId,
216}
217
218impl RawPackInstaller for FileObjectDatabase {
219    fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<RawPackInstallResult>
220    where
221        R: Read,
222    {
223        let result = FileObjectDatabase::install_raw_pack_from_reader(self, reader)?;
224        Ok(RawPackInstallResult {
225            object_ids: result.object_ids,
226        })
227    }
228}
229
230impl RawPackInstaller for ObjectDatabase {
231    fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<RawPackInstallResult>
232    where
233        R: Read,
234    {
235        let mut pack_bytes = Vec::new();
236        reader.read_to_end(&mut pack_bytes)?;
237        let result = unpack_packfile_objects(&pack_bytes, self.format, self)?;
238        Ok(RawPackInstallResult {
239            object_ids: result.written_objects,
240        })
241    }
242}
243
244impl RawPackStreamingInstall {
245    pub fn bytes_written(&self) -> u64 {
246        self.written
247    }
248
249    pub fn pack_path(&self) -> &Path {
250        &self.pack_path
251    }
252
253    pub fn index_path(&self) -> &Path {
254        &self.index_path
255    }
256
257    pub fn finish(mut self) -> Result<PackInstallResult> {
258        let result = (|| -> Result<PackInstallResult> {
259            let mut file = self.file.take().ok_or_else(|| {
260                GitError::InvalidFormat("raw pack stream already finished".into())
261            })?;
262            file.flush()?;
263            file.sync_all()?;
264            drop(file);
265
266            if self.written != self.expected_pack_size {
267                return Err(GitError::InvalidFormat(format!(
268                    "raw pack stream length mismatch: expected {}, got {}",
269                    self.expected_pack_size, self.written
270                )));
271            }
272
273            let built = PackIndex::write_v2_for_pack_path(&self.temp_pack_path, self.format)?;
274            if built.pack_checksum != self.expected_pack_id {
275                return Err(GitError::InvalidFormat(format!(
276                    "raw pack stream checksum mismatch: expected {}, got {}",
277                    self.expected_pack_id, built.pack_checksum
278                )));
279            }
280
281            match fs::rename(&self.temp_pack_path, &self.pack_path) {
282                Ok(()) => {}
283                Err(_) if self.pack_path.exists() => {
284                    let _ = fs::remove_file(&self.temp_pack_path);
285                }
286                Err(err) => return Err(GitError::Io(err.to_string())),
287            }
288            write_pack_component(&self.index_path, &built.index)?;
289            let promisor_path = write_promisor_pack_sidecar(
290                &self.pack_dir,
291                &self.pack_name,
292                self.options.promisor,
293            )?;
294            Ok(PackInstallResult {
295                pack_name: self.pack_name.clone(),
296                pack_path: self.pack_path.clone(),
297                index_path: self.index_path.clone(),
298                promisor_path,
299                object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
300            })
301        })();
302
303        if result.is_ok() {
304            self.finished = true;
305        } else {
306            let _ = fs::remove_file(&self.temp_pack_path);
307        }
308        result
309    }
310}
311
312impl Write for RawPackStreamingInstall {
313    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
314        let next_written = self.written.checked_add(buf.len() as u64).ok_or_else(|| {
315            std::io::Error::new(std::io::ErrorKind::InvalidData, "pack size overflow")
316        })?;
317        if next_written > self.expected_pack_size {
318            return Err(std::io::Error::new(
319                std::io::ErrorKind::InvalidData,
320                format!(
321                    "raw pack stream exceeds expected size {}; got at least {}",
322                    self.expected_pack_size, next_written
323                ),
324            ));
325        }
326        let file = self.file.as_mut().ok_or_else(|| {
327            std::io::Error::new(
328                std::io::ErrorKind::BrokenPipe,
329                "raw pack stream already finished",
330            )
331        })?;
332        let written = file.write(buf)?;
333        self.written = self.written.checked_add(written as u64).ok_or_else(|| {
334            std::io::Error::new(std::io::ErrorKind::InvalidData, "pack size overflow")
335        })?;
336        Ok(written)
337    }
338
339    fn flush(&mut self) -> std::io::Result<()> {
340        match self.file.as_mut() {
341            Some(file) => file.flush(),
342            None => Ok(()),
343        }
344    }
345}
346
347impl Drop for RawPackStreamingInstall {
348    fn drop(&mut self) {
349        if !self.finished {
350            let _ = self.file.take();
351            let _ = fs::remove_file(&self.temp_pack_path);
352        }
353    }
354}
355
356pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
357    let mut missing = Vec::new();
358    for prerequisite in &bundle.prerequisites {
359        match reader.read_object(&prerequisite.oid) {
360            Ok(object) => {
361                let actual = object.object_id(bundle.format)?;
362                if actual != prerequisite.oid {
363                    return Err(GitError::InvalidObject(format!(
364                        "bundle prerequisite {} hashes to {actual}",
365                        prerequisite.oid
366                    )));
367                }
368            }
369            Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
370            Err(err) => return Err(err),
371        }
372    }
373    if missing.is_empty() {
374        return Ok(());
375    }
376    Err(GitError::object_not_found_in(
377        missing[0],
378        MissingObjectContext::PackInstall,
379    ))
380}
381
382pub fn unbundle_objects<R, W>(
383    bundle: &Bundle,
384    prerequisite_reader: &R,
385    writer: &mut W,
386) -> Result<BundleUnbundleResult>
387where
388    R: ObjectReader,
389    W: ObjectWriter,
390{
391    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
392    let pack = PackFile::parse_bundle(bundle)?;
393    let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
394    Ok(BundleUnbundleResult {
395        written_objects,
396        references: bundle.references.clone(),
397    })
398}
399
400pub fn install_bundle_pack<R>(
401    bundle: &Bundle,
402    prerequisite_reader: &R,
403    destination: &impl RawPackInstaller,
404) -> Result<BundleUnbundleResult>
405where
406    R: ObjectReader,
407{
408    verify_bundle_prerequisites(bundle, prerequisite_reader)?;
409    let mut reader = bundle.pack.as_slice();
410    let install = destination.install_raw_pack_from_reader(&mut reader)?;
411    Ok(BundleUnbundleResult {
412        written_objects: install.object_ids,
413        references: bundle.references.clone(),
414    })
415}
416
417pub fn unpack_packfile_objects<W>(
418    pack_bytes: &[u8],
419    format: ObjectFormat,
420    writer: &W,
421) -> Result<PackUnpackResult>
422where
423    W: ObjectWriter,
424{
425    let pack = PackFile::parse(pack_bytes, format)?;
426    write_pack_objects(pack, writer, "pack")
427}
428
429pub fn index_raw_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<RawPackIndexResult> {
430    let pack = PackFile::parse(pack_bytes, format)?;
431    let built = PackIndex::write_v2_for_pack(pack_bytes, format)?;
432    if built.pack_checksum != pack.checksum {
433        return Err(GitError::InvalidFormat(
434            "pack index checksum does not match parsed pack checksum".to_string(),
435        ));
436    }
437
438    let offsets = built
439        .entries
440        .iter()
441        .map(|entry| (entry.oid, entry.offset))
442        .collect::<HashMap<_, _>>();
443    let mut objects = Vec::with_capacity(pack.entries.len());
444    for object in pack.entries {
445        let offset = offsets.get(&object.entry.oid).copied().ok_or_else(|| {
446            GitError::InvalidFormat(format!(
447                "pack index is missing object {}",
448                object.entry.oid.to_hex()
449            ))
450        })?;
451        objects.push(RawPackIndexedObject {
452            oid: object.entry.oid,
453            object_type: object.object.object_type,
454            size: object.object.body.len() as u64,
455            offset,
456        });
457    }
458
459    Ok(RawPackIndexResult {
460        pack_id: built.pack_checksum,
461        index: built.index,
462        objects,
463    })
464}
465
466pub fn index_raw_pack_from_reader<R>(
467    reader: &mut R,
468    format: ObjectFormat,
469) -> Result<RawPackIndexResult>
470where
471    R: Read,
472{
473    Ok(stream_index_build_to_raw_result(
474        PackIndex::write_v2_for_pack_reader_to_trailer(reader, format)?,
475    ))
476}
477
478pub fn index_raw_pack_from_reader_with_len<R>(
479    reader: &mut R,
480    format: ObjectFormat,
481    pack_len: u64,
482) -> Result<RawPackIndexResult>
483where
484    R: Read,
485{
486    Ok(stream_index_build_to_raw_result(
487        PackIndex::write_v2_for_pack_reader_with_len(reader, format, pack_len)?,
488    ))
489}
490
491pub fn index_raw_pack_file(
492    path: impl AsRef<Path>,
493    format: ObjectFormat,
494) -> Result<RawPackIndexResult> {
495    Ok(stream_index_build_to_raw_result(
496        PackIndex::write_v2_for_pack_path(path, format)?,
497    ))
498}
499
500fn stream_index_build_to_raw_result(built: PackStreamIndexBuild) -> RawPackIndexResult {
501    let objects = built
502        .objects
503        .into_iter()
504        .map(|object| RawPackIndexedObject {
505            oid: object.oid,
506            object_type: object.object_type,
507            size: object.size,
508            offset: object.offset,
509        })
510        .collect::<Vec<_>>();
511    RawPackIndexResult {
512        pack_id: built.pack_checksum,
513        index: built.index,
514        objects,
515    }
516}
517
518fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
519where
520    W: ObjectWriter,
521{
522    let mut written_objects = Vec::with_capacity(pack.entries.len());
523    for entry in pack.entries {
524        let expected = entry.entry.oid;
525        let actual = writer.write_object(entry.object)?;
526        if actual != expected {
527            return Err(GitError::InvalidObject(format!(
528                "{source} object id mismatch: expected {expected}, wrote {actual}"
529            )));
530        }
531        written_objects.push(actual);
532    }
533    Ok(PackUnpackResult { written_objects })
534}
535
536pub fn collect_reachable_object_ids<R, I>(
537    reader: &R,
538    format: ObjectFormat,
539    starts: I,
540) -> Result<HashSet<ObjectId>>
541where
542    R: ObjectReader,
543    I: IntoIterator<Item = ObjectId>,
544{
545    walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
546}
547
548/// [`collect_reachable_object_ids`] with a cut set: commits in `cut` are
549/// collected, but the walk does not continue to their parents — the view a
550/// shallow repository has of its own refs (`$GIT_DIR/shallow` of the *other*
551/// side, threaded explicitly because `reader` belongs to this side).
552pub fn collect_reachable_object_ids_with_cut<R, I>(
553    reader: &R,
554    format: ObjectFormat,
555    starts: I,
556    cut: &HashSet<ObjectId>,
557) -> Result<HashSet<ObjectId>>
558where
559    R: ObjectReader,
560    I: IntoIterator<Item = ObjectId>,
561{
562    walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
563}
564
565/// [`collect_reachable_object_ids`] with a stop set: objects in `excluded` are
566/// not visited and not expanded, so the walk never sees anything reachable only
567/// through them (used to truncate history at a shallow boundary).
568pub fn collect_reachable_object_ids_excluding<R, I>(
569    reader: &R,
570    format: ObjectFormat,
571    starts: I,
572    excluded: &HashSet<ObjectId>,
573) -> Result<HashSet<ObjectId>>
574where
575    R: ObjectReader,
576    I: IntoIterator<Item = ObjectId>,
577{
578    walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
579}
580
581pub fn collect_reachable_objects<R, I>(
582    reader: &R,
583    format: ObjectFormat,
584    starts: I,
585    excluded: &HashSet<ObjectId>,
586) -> Result<Vec<Arc<EncodedObject>>>
587where
588    R: ObjectReader,
589    I: IntoIterator<Item = ObjectId>,
590{
591    let mut objects = Vec::new();
592    walk_reachable_objects(reader, format, starts, excluded, |_, object| {
593        objects.push(Arc::clone(object));
594    })?;
595    Ok(objects)
596}
597
598#[derive(Debug, Clone)]
599struct ReachablePackObject {
600    oid: ObjectId,
601    object: Arc<EncodedObject>,
602}
603
604fn collect_reachable_pack_objects<R, I>(
605    reader: &R,
606    format: ObjectFormat,
607    starts: I,
608    excluded: &HashSet<ObjectId>,
609) -> Result<Vec<ReachablePackObject>>
610where
611    R: ObjectReader,
612    I: IntoIterator<Item = ObjectId>,
613{
614    let mut objects = Vec::new();
615    walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
616        objects.push(ReachablePackObject {
617            oid: *oid,
618            object: Arc::clone(object),
619        });
620    })?;
621    Ok(objects)
622}
623
624fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
625    objects
626        .iter()
627        .map(|entry| PackInput {
628            oid: &entry.oid,
629            object: &entry.object,
630        })
631        .collect()
632}
633
634pub fn install_reachable_pack<I>(
635    source: &impl ObjectReader,
636    destination: &impl RawPackInstaller,
637    format: ObjectFormat,
638    starts: I,
639) -> Result<Option<RawPackInstallResult>>
640where
641    I: IntoIterator<Item = ObjectId>,
642{
643    install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
644}
645
646pub fn install_reachable_pack_excluding<I>(
647    source: &impl ObjectReader,
648    destination: &impl RawPackInstaller,
649    format: ObjectFormat,
650    starts: I,
651    excluded: &HashSet<ObjectId>,
652) -> Result<Option<RawPackInstallResult>>
653where
654    I: IntoIterator<Item = ObjectId>,
655{
656    let pack = match build_reachable_pack(source, format, starts, excluded)? {
657        Some(pack) => pack,
658        None => return Ok(None),
659    };
660    let mut reader = pack.pack.as_slice();
661    destination
662        .install_raw_pack_from_reader(&mut reader)
663        .map(Some)
664}
665
666pub fn build_reachable_pack<R, I>(
667    reader: &R,
668    format: ObjectFormat,
669    starts: I,
670    excluded: &HashSet<ObjectId>,
671) -> Result<Option<PackWrite>>
672where
673    R: ObjectReader,
674    I: IntoIterator<Item = ObjectId>,
675{
676    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
677    if objects.is_empty() {
678        return Ok(None);
679    }
680    // Delta-compress reachable packs (used by install/push/fetch) via git-pack's
681    // sliding-window selection. Self-contained, ofs-delta by default; round-trips
682    // through the existing parser. PackWrite shape is unchanged, so callers are
683    // unaffected.
684    let inputs = pack_inputs(&objects);
685    PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
686}
687
688pub fn build_reachable_pack_file<R, I>(
689    reader: &R,
690    format: ObjectFormat,
691    starts: I,
692    excluded: &HashSet<ObjectId>,
693    pack_path: impl AsRef<Path>,
694) -> Result<Option<ReachablePackFile>>
695where
696    R: ObjectReader,
697    I: IntoIterator<Item = ObjectId>,
698{
699    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
700    if objects.is_empty() {
701        return Ok(None);
702    }
703    let inputs = pack_inputs(&objects);
704    let pack_path = pack_path.as_ref();
705    if let Some(parent) = pack_path.parent() {
706        fs::create_dir_all(parent)?;
707    }
708    let mut file = fs::OpenOptions::new()
709        .write(true)
710        .create(true)
711        .truncate(true)
712        .open(pack_path)?;
713    let summary = PackFile::write_packed_with_known_ids_to_writer(
714        &inputs,
715        format,
716        &PackWriteOptions::new(),
717        &mut file,
718    )?;
719    file.sync_all()?;
720    Ok(Some(reachable_pack_file_result(pack_path, summary)))
721}
722
723pub fn write_reachable_pack_to_writer<R, I, W>(
724    reader: &R,
725    format: ObjectFormat,
726    starts: I,
727    excluded: &HashSet<ObjectId>,
728    writer: &mut W,
729) -> Result<Option<ReachablePackWriteSummary>>
730where
731    R: ObjectReader,
732    I: IntoIterator<Item = ObjectId>,
733    W: Write,
734{
735    let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
736    if objects.is_empty() {
737        return Ok(None);
738    }
739    let inputs = pack_inputs(&objects);
740    let summary = PackFile::write_packed_with_known_ids_to_writer(
741        &inputs,
742        format,
743        &PackWriteOptions::new(),
744        writer,
745    )?;
746    Ok(Some(reachable_pack_write_summary(summary)))
747}
748
749fn reachable_pack_file_result(path: &Path, summary: PackWriteSummary) -> ReachablePackFile {
750    ReachablePackFile {
751        pack_path: path.to_path_buf(),
752        pack_size: summary.pack_size,
753        checksum: summary.checksum,
754        object_count: summary.entries.len(),
755        delta_count: summary.delta_count,
756    }
757}
758
759fn reachable_pack_write_summary(summary: PackWriteSummary) -> ReachablePackWriteSummary {
760    ReachablePackWriteSummary {
761        index: summary.index,
762        checksum: summary.checksum,
763        object_count: summary.entries.len(),
764        delta_count: summary.delta_count,
765        pack_size: summary.pack_size,
766    }
767}
768
769pub fn build_and_install_reachable_pack<R, I>(
770    source: &R,
771    destination: &FileObjectDatabase,
772    format: ObjectFormat,
773    starts: I,
774    excluded: &HashSet<ObjectId>,
775    options: RawPackInstallOptions,
776) -> Result<Option<PackInstallResult>>
777where
778    R: ObjectReader,
779    I: IntoIterator<Item = ObjectId>,
780{
781    build_and_install_reachable_pack_filtered(
782        source,
783        destination,
784        format,
785        starts,
786        excluded,
787        options,
788        None,
789        None,
790    )
791}
792
793/// A partial-clone object filter applied while building a transfer pack.
794///
795/// Mirrors the subset of upstream's `list-objects-filter` the in-process local
796/// server supports: directly-wanted tips are always packed; the filter only
797/// prunes objects reached *through* the traversal (upstream's
798/// `filter_blobs_none` runs on traversed blobs, never on wanted tips).
799#[derive(Debug, Clone, PartialEq, Eq)]
800pub enum PackObjectFilter {
801    /// `blob:none`: omit every blob reached through tree traversal.
802    BlobNone,
803    /// `blob:limit=<n>`: omit traversed blobs whose body is at least `n` bytes.
804    BlobLimit(u64),
805    /// `tree:<n>`: keep only trees shallower than `n`, and omit traversed blobs.
806    TreeDepth(u32),
807    /// `sparse:oid=<blob>`: keep only blobs whose repo path is listed.
808    SparsePathSet(Vec<String>),
809}
810
811/// [`build_and_install_reachable_pack`] with an optional partial-clone
812/// `filter`. With `Some(BlobNone)`, blobs are dropped from the pack unless
813/// they are directly wanted (named in `starts`).
814#[allow(clippy::too_many_arguments)]
815pub fn build_and_install_reachable_pack_filtered<R, I>(
816    source: &R,
817    destination: &FileObjectDatabase,
818    format: ObjectFormat,
819    starts: I,
820    excluded: &HashSet<ObjectId>,
821    options: RawPackInstallOptions,
822    filter: Option<PackObjectFilter>,
823    unpack_limit: Option<usize>,
824) -> Result<Option<PackInstallResult>>
825where
826    R: ObjectReader,
827    I: IntoIterator<Item = ObjectId>,
828{
829    let starts: Vec<ObjectId> = starts.into_iter().collect();
830    let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
831    let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
832    match filter {
833        Some(PackObjectFilter::BlobNone) => {
834            objects.retain(|entry| {
835                entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
836            });
837        }
838        Some(PackObjectFilter::BlobLimit(limit)) => {
839            objects.retain(|entry| {
840                entry.object.object_type != ObjectType::Blob
841                    || wanted.contains(&entry.oid)
842                    || (entry.object.body.len() as u64) < limit
843            });
844        }
845        Some(PackObjectFilter::TreeDepth(depth)) => {
846            let tree_depths = collect_tree_filter_depths(source, format, &objects)?;
847            objects.retain(|entry| {
848                if wanted.contains(&entry.oid) {
849                    return true;
850                }
851                match entry.object.object_type {
852                    ObjectType::Blob => false,
853                    ObjectType::Tree => tree_depths
854                        .get(&entry.oid)
855                        .is_some_and(|tree_depth| *tree_depth < depth),
856                    _ => true,
857                }
858            });
859        }
860        Some(PackObjectFilter::SparsePathSet(paths)) => {
861            let allowed_blobs = collect_sparse_filter_blobs(source, format, &objects, &paths)?;
862            objects.retain(|entry| {
863                entry.object.object_type != ObjectType::Blob
864                    || wanted.contains(&entry.oid)
865                    || allowed_blobs.contains(&entry.oid)
866            });
867        }
868        None => {}
869    }
870    if objects.is_empty() {
871        return Ok(None);
872    }
873    // Mirror fetch-pack's unpack-limit: small transfers are exploded into
874    // loose objects instead of landing as a pack (upstream `get_pack` picks
875    // unpack-objects when the header count is below fetch/transfer.unpackLimit).
876    if let Some(limit) = unpack_limit
877        && objects.len() < limit
878    {
879        for entry in &objects {
880            destination.loose().write_object((*entry.object).clone())?;
881        }
882        return Ok(None);
883    }
884    let inputs = pack_inputs(&objects);
885    let pack_dir = destination.objects_dir.join("pack");
886    fs::create_dir_all(&pack_dir)?;
887    let temp_pack_path = unique_temp_path(&pack_dir);
888    let result = (|| -> Result<PackInstallResult> {
889        let mut file = fs::OpenOptions::new()
890            .write(true)
891            .create_new(true)
892            .open(&temp_pack_path)?;
893        let summary = PackFile::write_packed_with_known_ids_to_writer(
894            &inputs,
895            format,
896            &PackWriteOptions::new(),
897            &mut file,
898        )?;
899        file.flush()?;
900        file.sync_all()?;
901        drop(file);
902        trace_packfile_path(&temp_pack_path)?;
903        destination.install_pack_file_from_temp(
904            &temp_pack_path,
905            summary.checksum,
906            &summary.index,
907            summary.entries.iter().map(|entry| entry.oid).collect(),
908            options,
909        )
910    })();
911    if result.is_err() {
912        let _ = fs::remove_file(&temp_pack_path);
913    }
914    result.map(Some)
915}
916
917fn trace_packfile_path(pack_path: &Path) -> Result<()> {
918    let Some(path) = env::var_os("GIT_TRACE_PACKFILE").filter(|value| !value.is_empty()) else {
919        return Ok(());
920    };
921    fs::copy(pack_path, path)?;
922    Ok(())
923}
924
925fn collect_tree_filter_depths<R>(
926    reader: &R,
927    format: ObjectFormat,
928    objects: &[ReachablePackObject],
929) -> Result<HashMap<ObjectId, u32>>
930where
931    R: ObjectReader,
932{
933    let available: HashSet<ObjectId> = objects.iter().map(|entry| entry.oid).collect();
934    let mut depths = HashMap::new();
935    let mut stack = Vec::new();
936    for entry in objects {
937        if entry.object.object_type != ObjectType::Commit {
938            continue;
939        }
940        let commit = Commit::parse(format, &entry.object.body)?;
941        if available.contains(&commit.tree) {
942            stack.push((commit.tree, 0u32));
943        }
944    }
945    while let Some((tree_oid, depth)) = stack.pop() {
946        if depths
947            .get(&tree_oid)
948            .is_some_and(|old_depth| *old_depth <= depth)
949        {
950            continue;
951        }
952        depths.insert(tree_oid, depth);
953        let tree = reader.read_object(&tree_oid)?;
954        if tree.object_type != ObjectType::Tree {
955            continue;
956        }
957        let child_depth = depth.saturating_add(1);
958        for entry in TreeEntries::new(format, &tree.body) {
959            let entry = entry?;
960            if tree_entry_object_type(entry.mode) == ObjectType::Tree
961                && available.contains(&entry.oid)
962            {
963                stack.push((entry.oid, child_depth));
964            }
965        }
966    }
967    Ok(depths)
968}
969
970fn collect_sparse_filter_blobs<R>(
971    reader: &R,
972    format: ObjectFormat,
973    objects: &[ReachablePackObject],
974    paths: &[String],
975) -> Result<HashSet<ObjectId>>
976where
977    R: ObjectReader,
978{
979    let wanted_paths: HashSet<&str> = paths.iter().map(String::as_str).collect();
980    let mut allowed = HashSet::new();
981    let mut seen_trees = HashSet::new();
982    for entry in objects {
983        if entry.object.object_type != ObjectType::Commit {
984            continue;
985        }
986        let commit = Commit::parse(format, &entry.object.body)?;
987        collect_sparse_tree_blobs(
988            reader,
989            format,
990            &commit.tree,
991            "",
992            &wanted_paths,
993            &mut seen_trees,
994            &mut allowed,
995        )?;
996    }
997    Ok(allowed)
998}
999
1000fn collect_sparse_tree_blobs<R>(
1001    reader: &R,
1002    format: ObjectFormat,
1003    tree_oid: &ObjectId,
1004    prefix: &str,
1005    wanted_paths: &HashSet<&str>,
1006    seen_trees: &mut HashSet<ObjectId>,
1007    allowed: &mut HashSet<ObjectId>,
1008) -> Result<()>
1009where
1010    R: ObjectReader,
1011{
1012    if !seen_trees.insert(*tree_oid) {
1013        return Ok(());
1014    }
1015    let tree = reader.read_object(tree_oid)?;
1016    if tree.object_type != ObjectType::Tree {
1017        return Ok(());
1018    }
1019    for entry in TreeEntries::new(format, &tree.body) {
1020        let entry = entry?;
1021        let name = String::from_utf8_lossy(entry.name);
1022        let path = if prefix.is_empty() {
1023            name.into_owned()
1024        } else {
1025            format!("{prefix}/{name}")
1026        };
1027        if tree_entry_object_type(entry.mode) == ObjectType::Tree {
1028            collect_sparse_tree_blobs(
1029                reader,
1030                format,
1031                &entry.oid,
1032                &path,
1033                wanted_paths,
1034                seen_trees,
1035                allowed,
1036            )?;
1037        } else if wanted_paths.contains(path.as_str()) {
1038            allowed.insert(entry.oid);
1039        }
1040    }
1041    Ok(())
1042}
1043
1044/// Assemble a pack stream that reuses an existing pack's object data verbatim
1045/// (upstream pack-objects' "pack reuse" fast path, full-pack case) and appends
1046/// `appended` as freshly encoded undeltified entries.
1047///
1048/// The reused pack's entry bytes are copied as-is between our own header and
1049/// trailer: a full-pack copy preserves every relative distance, so internal
1050/// `OFS_DELTA` bases stay valid. The header object count covers both the
1051/// reused and appended entries, and the trailing pack checksum is recomputed
1052/// over the assembled stream.
1053pub fn assemble_pack_with_verbatim_reuse(
1054    format: ObjectFormat,
1055    reused_pack_bytes: &[u8],
1056    appended: &[PackInput<'_>],
1057) -> Result<(Vec<u8>, u32)> {
1058    assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
1059}
1060
1061/// Like [`assemble_pack_with_verbatim_reuse`], but concatenates multiple whole
1062/// packs before appending fresh entries.
1063pub fn assemble_pack_with_verbatim_reuses(
1064    format: ObjectFormat,
1065    reused_packs: &[&[u8]],
1066    appended: &[PackInput<'_>],
1067) -> Result<(Vec<u8>, u32)> {
1068    let hash_len = format.raw_len();
1069    let mut reused_count = 0u32;
1070    let mut capacity = 12 + hash_len + 64 * appended.len();
1071    for reused_pack_bytes in reused_packs {
1072        if reused_pack_bytes.len() < 12 + hash_len {
1073            return Err(GitError::InvalidFormat("reused pack too short".into()));
1074        }
1075        if &reused_pack_bytes[..4] != b"PACK" {
1076            return Err(GitError::InvalidFormat(
1077                "reused pack has no signature".into(),
1078            ));
1079        }
1080        let version = u32::from_be_bytes([
1081            reused_pack_bytes[4],
1082            reused_pack_bytes[5],
1083            reused_pack_bytes[6],
1084            reused_pack_bytes[7],
1085        ]);
1086        if version != 2 {
1087            return Err(GitError::Unsupported(format!(
1088                "reused pack version {version}"
1089            )));
1090        }
1091        let count = u32::from_be_bytes([
1092            reused_pack_bytes[8],
1093            reused_pack_bytes[9],
1094            reused_pack_bytes[10],
1095            reused_pack_bytes[11],
1096        ]);
1097        reused_count = reused_count
1098            .checked_add(count)
1099            .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
1100        capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
1101    }
1102    let total = reused_count
1103        .checked_add(appended.len() as u32)
1104        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
1105
1106    let mut out = Vec::with_capacity(capacity);
1107    out.extend_from_slice(b"PACK");
1108    out.extend_from_slice(&2u32.to_be_bytes());
1109    out.extend_from_slice(&total.to_be_bytes());
1110    for reused_pack_bytes in reused_packs {
1111        out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
1112    }
1113    for input in appended {
1114        write_undeltified_pack_entry(&mut out, input.object)?;
1115    }
1116    let checksum = sley_core::digest_bytes(format, &out)?;
1117    out.extend_from_slice(checksum.as_bytes());
1118    Ok((out, reused_count))
1119}
1120
1121/// Assemble a pack stream by copying already-encoded pack entries verbatim and
1122/// appending freshly encoded undeltified entries.
1123pub fn assemble_pack_with_verbatim_entries(
1124    format: ObjectFormat,
1125    reused_entries: &[&[u8]],
1126    appended: &[PackInput<'_>],
1127) -> Result<(Vec<u8>, u32)> {
1128    let reused_count = u32::try_from(reused_entries.len())
1129        .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
1130    let total = reused_count
1131        .checked_add(appended.len() as u32)
1132        .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
1133
1134    let mut capacity = 12 + format.raw_len() + 64 * appended.len();
1135    for entry in reused_entries {
1136        capacity = capacity.saturating_add(entry.len());
1137    }
1138    let mut out = Vec::with_capacity(capacity);
1139    out.extend_from_slice(b"PACK");
1140    out.extend_from_slice(&2u32.to_be_bytes());
1141    out.extend_from_slice(&total.to_be_bytes());
1142    for entry in reused_entries {
1143        out.extend_from_slice(entry);
1144    }
1145    for input in appended {
1146        write_undeltified_pack_entry(&mut out, input.object)?;
1147    }
1148    let checksum = sley_core::digest_bytes(format, &out)?;
1149    out.extend_from_slice(checksum.as_bytes());
1150    Ok((out, reused_count))
1151}
1152
1153/// Append one undeltified pack entry (type/size varint header + zlib body).
1154fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
1155    let type_bits: u8 = match object.object_type {
1156        ObjectType::Commit => 1,
1157        ObjectType::Tree => 2,
1158        ObjectType::Blob => 3,
1159        ObjectType::Tag => 4,
1160    };
1161    let mut size = object.body.len() as u64;
1162    let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
1163    size >>= 4;
1164    while size > 0 {
1165        out.push(byte | 0x80);
1166        byte = (size & 0x7f) as u8;
1167        size >>= 7;
1168    }
1169    out.push(byte);
1170    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1171    encoder.write_all(&object.body)?;
1172    out.extend_from_slice(&encoder.finish()?);
1173    Ok(())
1174}
1175
1176/// Outcome of consolidating every object in a repository into a single pack.
1177///
1178/// This is the engine for `git gc` / `git repack`: [`repack_all_objects`]
1179/// produces the bytes for one new delta-compressed pack plus its index, and
1180/// reports which on-disk artifacts the caller could now remove. No deletions
1181/// are performed by the engine itself; the CLI decides reachability policy and
1182/// performs any pruning (see [`install_repack_result`]).
1183#[derive(Debug, Clone, PartialEq, Eq)]
1184pub struct RepackResult {
1185    /// Bytes of the freshly written `.pack` file.
1186    pub pack: Vec<u8>,
1187    /// Bytes of the matching `.idx` file for [`RepackResult::pack`].
1188    pub idx: Vec<u8>,
1189    /// Number of distinct objects contained in the new pack.
1190    pub object_count: usize,
1191    /// Absolute paths of pre-existing `*.pack` files now superseded by the new
1192    /// pack (every object they hold is present in [`RepackResult::pack`]).
1193    pub obsolete_packs: Vec<PathBuf>,
1194    /// Loose object ids that are now also present in the new pack and therefore
1195    /// redundant on disk.
1196    pub packed_loose: Vec<ObjectId>,
1197    /// Pack stems (`pack-<checksum>`) that policy says must survive pruning
1198    /// even if the new pack contains all of their objects.
1199    retained_pack_stems: Vec<String>,
1200    pack_checksum: ObjectId,
1201    index_entries: Vec<PackIndexEntry>,
1202}
1203
1204#[derive(Debug, Clone, Default)]
1205pub struct RepackOptions {
1206    /// Do not borrow objects from alternates (`git repack --local`).
1207    pub local: bool,
1208    /// Repack objects that are already in `.keep` / `--keep-pack` packs.
1209    pub pack_kept_objects: bool,
1210    /// Explicit `--keep-pack=<name>` pack stems (`pack-<checksum>`).
1211    pub keep_pack_stems: HashSet<String>,
1212}
1213
1214/// Gather every object in `git_dir` (loose objects and every existing pack) and
1215/// write them into a single new delta-compressed pack.
1216///
1217/// Returns the new pack/index bytes, the count of packed objects, the list of
1218/// pre-existing pack files that the new pack supersedes, and the loose object
1219/// ids that are now packed. Nothing is deleted: the caller (CLI) decides
1220/// reachability policy and performs any pruning, optionally via
1221/// [`install_repack_result`].
1222///
1223/// Returns `Ok(None)` when the repository contains no objects at all.
1224/// `git repack -a`'s gathering rule: pack the reachability closure of `roots`
1225/// (ref tips, `HEAD`, reflog entries, indexed objects) instead of everything
1226/// on disk. Borrowed objects (alternates) reachable from the roots are packed
1227/// into the new local pack like upstream `pack-objects --all` without
1228/// `--local`; previously-packed objects that are no longer reachable are NOT
1229/// carried forward (that is how `repack -a -d` drops them). Missing objects
1230/// are tolerated (stale reflog entries may reference pruned history).
1231///
1232/// Returns `Ok(None)` when no roots resolve to any object.
1233pub fn repack_reachable_objects(
1234    git_dir: &Path,
1235    format: ObjectFormat,
1236    roots: &[ObjectId],
1237) -> Result<Option<RepackResult>> {
1238    repack_reachable_objects_with_options(git_dir, format, roots, &RepackOptions::default())
1239}
1240
1241pub fn repack_reachable_objects_with_options(
1242    git_dir: &Path,
1243    format: ObjectFormat,
1244    roots: &[ObjectId],
1245    options: &RepackOptions,
1246) -> Result<Option<RepackResult>> {
1247    let objects_dir = repository_objects_dir(git_dir);
1248    let database = if options.local {
1249        FileObjectDatabase::without_alternates(objects_dir.clone(), format)
1250    } else {
1251        FileObjectDatabase::new(objects_dir.clone(), format)
1252    };
1253    let retained_pack_stems = repack_retained_pack_stems(
1254        &objects_dir.join("pack"),
1255        &options.keep_pack_stems,
1256        !options.pack_kept_objects,
1257    )?;
1258    let excluded_oids = if options.pack_kept_objects {
1259        HashSet::new()
1260    } else {
1261        pack_oids_for_stems(&objects_dir.join("pack"), format, &retained_pack_stems)?
1262    };
1263
1264    let mut seen: HashSet<ObjectId> = HashSet::new();
1265    let mut objects: Vec<ReachablePackObject> = Vec::new();
1266    let mut pending: Vec<ObjectId> = roots.to_vec();
1267    while let Some(oid) = pending.pop() {
1268        if !seen.insert(oid) {
1269            continue;
1270        }
1271        let object = match database.read_object(&oid) {
1272            Ok(object) => object,
1273            Err(GitError::NotFound(_)) => continue,
1274            Err(err) => return Err(err),
1275        };
1276        match object.object_type {
1277            ObjectType::Commit => {
1278                let commit = Commit::parse_ref(format, &object.body)?;
1279                pending.extend(grafted_parents(&database, &oid, commit.parents));
1280                pending.push(commit.tree);
1281            }
1282            ObjectType::Tree => {
1283                for entry in TreeEntries::new(format, &object.body) {
1284                    let entry = entry?;
1285                    if !entry.is_gitlink() {
1286                        pending.push(entry.oid);
1287                    }
1288                }
1289            }
1290            ObjectType::Tag => {
1291                let tag = Tag::parse_ref(format, &object.body)?;
1292                pending.push(tag.object);
1293            }
1294            ObjectType::Blob => {}
1295        }
1296        if !excluded_oids.contains(&oid) {
1297            objects.push(ReachablePackObject { oid, object });
1298        }
1299    }
1300
1301    // Non-local repacks borrow packed objects from alternates as complete pack
1302    // sources, while still leaving loose-only alternate objects alone. This
1303    // matches `pack-objects --all` without `--local`: packed alternate objects
1304    // are copied into the local consolidated pack, but a loose object in an
1305    // alternate ODB is not duplicated just because a local tree points at it.
1306    if !options.local {
1307        for (alternate, oid) in alternate_packed_object_ids(&objects_dir, format)? {
1308            if excluded_oids.contains(&oid) || !seen.insert(oid) {
1309                continue;
1310            }
1311            let alternate_db = FileObjectDatabase::without_alternates(alternate, format);
1312            match alternate_db.read_object(&oid) {
1313                Ok(object) => objects.push(ReachablePackObject { oid, object }),
1314                Err(GitError::NotFound(_)) => {}
1315                Err(err) => return Err(err),
1316            }
1317        }
1318    }
1319
1320    if objects.is_empty() {
1321        return Ok(None);
1322    }
1323
1324    let inputs = pack_inputs(&objects);
1325    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1326    let object_count = written.entries.len();
1327
1328    // Every pre-existing local pack is superseded under `-a` (their reachable
1329    // objects are in the new pack; their unreachable ones are being dropped).
1330    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
1331    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
1332        .into_iter()
1333        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
1334        .collect();
1335
1336    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1337    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1338        .into_iter()
1339        .filter(|oid| packed_oid_set.contains(oid))
1340        .collect();
1341    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1342
1343    let pack_checksum = written.checksum;
1344    let index_entries = written.entries.clone();
1345    Ok(Some(RepackResult {
1346        pack: written.pack,
1347        idx: written.index,
1348        object_count,
1349        obsolete_packs,
1350        packed_loose,
1351        retained_pack_stems,
1352        pack_checksum,
1353        index_entries,
1354    }))
1355}
1356
1357fn repack_retained_pack_stems(
1358    pack_dir: &Path,
1359    explicit: &HashSet<String>,
1360    keep_dot_keep: bool,
1361) -> Result<Vec<String>> {
1362    let mut stems = explicit.clone();
1363    if keep_dot_keep {
1364        for pack_path in existing_pack_files(pack_dir)? {
1365            if pack_path.with_extension("keep").exists()
1366                && let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
1367            {
1368                stems.insert(stem.to_string());
1369            }
1370        }
1371    }
1372    let mut stems = stems.into_iter().collect::<Vec<_>>();
1373    stems.sort();
1374    Ok(stems)
1375}
1376
1377fn pack_oids_for_stems(
1378    pack_dir: &Path,
1379    format: ObjectFormat,
1380    stems: &[String],
1381) -> Result<HashSet<ObjectId>> {
1382    let wanted: HashSet<&str> = stems.iter().map(String::as_str).collect();
1383    if wanted.is_empty() {
1384        return Ok(HashSet::new());
1385    }
1386    let mut oids = HashSet::new();
1387    for pack_path in existing_pack_files(pack_dir)? {
1388        let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1389            continue;
1390        };
1391        if !wanted.contains(stem) {
1392            continue;
1393        }
1394        let index_path = pack_path.with_extension("idx");
1395        if !index_path.exists() {
1396            continue;
1397        }
1398        let index = PackIndex::parse(&fs::read(index_path)?, format)?;
1399        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
1400    }
1401    Ok(oids)
1402}
1403
1404fn alternate_packed_object_ids(
1405    objects_dir: &Path,
1406    format: ObjectFormat,
1407) -> Result<Vec<(PathBuf, ObjectId)>> {
1408    let mut oids = Vec::new();
1409    for alternate in alternate_object_dirs(objects_dir) {
1410        let mut alternate_oids = HashSet::new();
1411        collect_packed_object_ids(&alternate.join("pack"), format, &mut alternate_oids)?;
1412        oids.extend(
1413            alternate_oids
1414                .into_iter()
1415                .map(|oid| (alternate.clone(), oid)),
1416        );
1417    }
1418    oids.sort_by(|left, right| {
1419        left.0
1420            .cmp(&right.0)
1421            .then(left.1.as_bytes().cmp(right.1.as_bytes()))
1422    });
1423    Ok(oids)
1424}
1425
1426pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1427    let objects_dir = repository_objects_dir(git_dir);
1428    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1429
1430    // Enumerate every object id reachable on disk: loose objects, every pack
1431    // index, and any multi-pack-index. `object_ids_in_objects_dir` already
1432    // unions all of these and de-duplicates them.
1433    let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
1434    if all_oids.is_empty() {
1435        return Ok(None);
1436    }
1437
1438    // Read each object's canonical encoding so the new pack stores byte-for-byte
1439    // identical payloads. Loose objects take precedence over packed copies in
1440    // `FileObjectDatabase::read_object`, but both decode to the same bytes.
1441    let mut objects = Vec::with_capacity(all_oids.len());
1442    for oid in &all_oids {
1443        objects.push(ReachablePackObject {
1444            oid: *oid,
1445            object: database.read_object(oid)?,
1446        });
1447    }
1448
1449    let inputs = pack_inputs(&objects);
1450    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1451    let object_count = written.entries.len();
1452
1453    // The new pack contains every object on disk, so every pre-existing pack is
1454    // fully superseded. We still record the exact pack paths (not the index
1455    // paths) so the caller can delete the right files. The pack we are about to
1456    // write is excluded by name in case its checksum collides with an existing
1457    // pack (identical contents).
1458    let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
1459    let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
1460        .into_iter()
1461        .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
1462        .collect();
1463
1464    // Loose object ids that the new pack now also holds (which is all of them,
1465    // since they were gathered into it).
1466    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1467    let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1468        .into_iter()
1469        .filter(|oid| packed_oid_set.contains(oid))
1470        .collect();
1471    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1472
1473    Ok(Some(RepackResult {
1474        pack: written.pack,
1475        idx: written.index,
1476        object_count,
1477        obsolete_packs,
1478        packed_loose,
1479        retained_pack_stems: Vec::new(),
1480        pack_checksum: written.checksum,
1481        index_entries: written.entries,
1482    }))
1483}
1484
1485/// Gather only loose objects in `git_dir` and write them into a new pack.
1486///
1487/// This is the engine for plain `git repack -d` (without `-a`): existing packs
1488/// remain in place, and pruning removes only the loose copies that the new pack
1489/// now serves.
1490pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1491    let objects_dir = repository_objects_dir(git_dir);
1492    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1493    let loose_oids = loose_object_ids(&objects_dir, format)?;
1494    if loose_oids.is_empty() {
1495        return Ok(None);
1496    }
1497
1498    let mut objects = Vec::with_capacity(loose_oids.len());
1499    for oid in &loose_oids {
1500        objects.push(ReachablePackObject {
1501            oid: *oid,
1502            object: database.read_object(oid)?,
1503        });
1504    }
1505
1506    let inputs = pack_inputs(&objects);
1507    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1508    let object_count = written.entries.len();
1509    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1510    let mut packed_loose: Vec<ObjectId> = loose_oids
1511        .into_iter()
1512        .filter(|oid| packed_oid_set.contains(oid))
1513        .collect();
1514    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1515
1516    let pack_checksum = written.checksum;
1517    let index_entries = written.entries.clone();
1518    Ok(Some(RepackResult {
1519        pack: written.pack,
1520        idx: written.index,
1521        object_count,
1522        obsolete_packs: Vec::new(),
1523        packed_loose,
1524        retained_pack_stems: Vec::new(),
1525        pack_checksum,
1526        index_entries,
1527    }))
1528}
1529
1530/// A local, non-kept, non-cruft pack considered for a geometric rollup,
1531/// paired with the object count that orders it in the progression.
1532#[derive(Debug, Clone)]
1533struct GeometryPack {
1534    /// Absolute path to the `.pack` file.
1535    pack_path: PathBuf,
1536    /// Object ids the pack holds (from its `.idx`).
1537    oids: Vec<ObjectId>,
1538    /// `num_objects` weight used to order the progression.
1539    weight: u64,
1540    /// True when this pack is a promisor pack (`.promisor` sidecar).
1541    is_promisor: bool,
1542}
1543
1544/// The outcome of a geometric rollup: the new pack (if one was written) plus
1545/// the rolled-up packs whose objects it now serves.
1546#[derive(Debug, Clone)]
1547pub struct GeometricRepackResult {
1548    /// `Some` when a new pack was written; `None` when nothing needed packing.
1549    pub result: Option<RepackResult>,
1550    /// Pack `.pack` paths below the split that may now be removed under `-d`.
1551    pub rolled_up_packs: Vec<PathBuf>,
1552}
1553
1554/// Collect the local non-cruft, non-kept packs eligible for geometric rollup,
1555/// keyed by promisor-ness, ordered ascending by object count.
1556fn collect_geometry_packs(
1557    objects_dir: &Path,
1558    format: ObjectFormat,
1559    kept_pack_stems: &HashSet<String>,
1560) -> Result<Vec<GeometryPack>> {
1561    let pack_dir = objects_dir.join("pack");
1562    let mut packs = Vec::new();
1563    for pack_path in existing_pack_files(&pack_dir)? {
1564        // Cruft packs (`.mtimes` sidecar) and kept packs are excluded from the
1565        // progression, matching `pack_geometry_init` in repack-geometry.c.
1566        if pack_path.with_extension("mtimes").exists() {
1567            continue;
1568        }
1569        if pack_path.with_extension("keep").exists() {
1570            continue;
1571        }
1572        let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1573            continue;
1574        };
1575        if kept_pack_stems.contains(stem) {
1576            continue;
1577        }
1578        let index_path = pack_path.with_extension("idx");
1579        if !index_path.exists() {
1580            continue;
1581        }
1582        let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1583        let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
1584        let weight = oids.len() as u64;
1585        packs.push(GeometryPack {
1586            is_promisor: pack_path.with_extension("promisor").exists(),
1587            pack_path,
1588            oids,
1589            weight,
1590        });
1591    }
1592    // Ascending by weight; pack_path breaks ties deterministically.
1593    packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
1594    Ok(packs)
1595}
1596
1597/// Port of `compute_pack_geometry_split` (repack-geometry.c): given packs in
1598/// ascending weight order, return the split index — packs `[0..split)` roll up
1599/// into one new pack, packs `[split..)` are left alone.
1600fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
1601    let pack_nr = packs.len();
1602    if pack_nr == 0 {
1603        return 0;
1604    }
1605    // Count packs (descending size) that already form a geometric progression.
1606    let mut i = pack_nr - 1;
1607    while i > 0 {
1608        let ours = packs[i].weight;
1609        let prev = packs[i - 1].weight;
1610        if ours < split_factor.saturating_mul(prev) {
1611            break;
1612        }
1613        i -= 1;
1614    }
1615    let mut split = i;
1616    if split != 0 {
1617        // The top of the last-compared pair can't be in the progression.
1618        split += 1;
1619    }
1620
1621    // Roll up everything below `split`; pulling those into a new pack may break
1622    // the progression in the heavy half, so absorb heavy-half packs until it
1623    // holds again.
1624    let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
1625    for pack in &packs[split..] {
1626        if pack.weight < split_factor.saturating_mul(total_size) {
1627            split += 1;
1628            total_size = total_size.saturating_add(pack.weight);
1629        } else {
1630            break;
1631        }
1632    }
1633    split
1634}
1635
1636/// `git repack --geometric=<factor>`: roll up the smallest packs (plus loose
1637/// unpacked objects) so the surviving packs form a geometric progression by
1638/// object count. Objects in the rolled-up packs and loose objects are gathered
1639/// into one new pack; packs at/above the split are left in place. The new pack
1640/// excludes objects already served by a left-alone pack.
1641///
1642/// Returns the new pack plus the rolled-up pack paths the caller may delete
1643/// under `-d`. Returns an all-`None`/empty result when nothing needs packing
1644/// ("Nothing new to pack").
1645pub fn repack_geometric(
1646    git_dir: &Path,
1647    format: ObjectFormat,
1648    split_factor: u64,
1649    kept_pack_stems: &HashSet<String>,
1650) -> Result<GeometricRepackResult> {
1651    let objects_dir = repository_objects_dir(git_dir);
1652    let database = FileObjectDatabase::new(objects_dir.clone(), format);
1653
1654    // Promisor packs follow their own progression; the non-promisor packs are
1655    // the common case the test-suite exercises. Build the rollup from the
1656    // non-promisor packs plus loose objects.
1657    let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
1658    let packs: Vec<GeometryPack> = all_packs
1659        .into_iter()
1660        .filter(|pack| !pack.is_promisor)
1661        .collect();
1662
1663    let split = compute_geometry_split(&packs, split_factor);
1664
1665    let loose_oids = loose_object_ids(&objects_dir, format)?;
1666
1667    // The objects that end up in the new pack: every object in a rolled-up pack,
1668    // plus every loose object — but NOT objects already served by a pack left in
1669    // place (those above the split). This mirrors the `^pack` exclusion markers
1670    // that repack.c feeds to `pack-objects --stdin-packs`.
1671    let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
1672    for pack in &packs[split..] {
1673        excluded_oids.extend(pack.oids.iter().copied());
1674    }
1675
1676    let mut included: Vec<ObjectId> = Vec::new();
1677    let mut seen: HashSet<ObjectId> = HashSet::new();
1678    for pack in &packs[..split] {
1679        for oid in &pack.oids {
1680            if excluded_oids.contains(oid) {
1681                continue;
1682            }
1683            if seen.insert(*oid) {
1684                included.push(*oid);
1685            }
1686        }
1687    }
1688    for oid in &loose_oids {
1689        if excluded_oids.contains(oid) {
1690            continue;
1691        }
1692        if seen.insert(*oid) {
1693            included.push(*oid);
1694        }
1695    }
1696
1697    // "Nothing new to pack": no packs roll up and no loose objects need packing.
1698    if included.is_empty() {
1699        return Ok(GeometricRepackResult {
1700            result: None,
1701            rolled_up_packs: Vec::new(),
1702        });
1703    }
1704
1705    included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1706    let mut objects = Vec::with_capacity(included.len());
1707    for oid in &included {
1708        objects.push(ReachablePackObject {
1709            oid: *oid,
1710            object: database.read_object(oid)?,
1711        });
1712    }
1713
1714    let inputs = pack_inputs(&objects);
1715    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1716    let object_count = written.entries.len();
1717
1718    let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1719    let mut packed_loose: Vec<ObjectId> = loose_oids
1720        .into_iter()
1721        .filter(|oid| packed_oid_set.contains(oid))
1722        .collect();
1723    packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1724
1725    let rolled_up_packs: Vec<PathBuf> = packs[..split]
1726        .iter()
1727        .map(|pack| pack.pack_path.clone())
1728        .collect();
1729
1730    let pack_checksum = written.checksum;
1731    let index_entries = written.entries.clone();
1732    Ok(GeometricRepackResult {
1733        result: Some(RepackResult {
1734            pack: written.pack,
1735            idx: written.index,
1736            object_count,
1737            obsolete_packs: rolled_up_packs.clone(),
1738            packed_loose,
1739            retained_pack_stems: Vec::new(),
1740            pack_checksum,
1741            index_entries,
1742        }),
1743        rolled_up_packs,
1744    })
1745}
1746
1747/// Write the consolidated pack from a [`RepackResult`] into
1748/// `objects/pack/` and, when `prune` is set, remove the now-redundant
1749/// pre-existing packs and packed loose objects.
1750///
1751/// Pruning is opt-in and deliberately conservative: an object or pack is only
1752/// removed after verifying it is actually present in the freshly written pack
1753/// on disk. Concretely:
1754///
1755/// * a loose object is removed only if its id appears in the new pack;
1756/// * a pre-existing pack is removed only if it is not the pack we just wrote
1757///   *and* every object listed in its `.idx` is present in the new pack (its
1758///   `.idx` and known sidecars are removed alongside it);
1759/// * a stale `multi-pack-index` is removed only if every pack it references is
1760///   being removed, so no reader is ever left pointing at a deleted pack.
1761pub fn install_repack_result(
1762    git_dir: &Path,
1763    format: ObjectFormat,
1764    result: &RepackResult,
1765    prune: bool,
1766) -> Result<()> {
1767    install_repack_result_with_bitmap(git_dir, format, result, prune, None)
1768}
1769
1770/// [`install_repack_result`] that additionally writes a `pack-<checksum>.bitmap`
1771/// reachability bitmap alongside the new pack when `bitmap_tips` is `Some`.
1772/// `bitmap_tips` carries the repository's ref tips (peeled to commits): they
1773/// receive selection preference, mirroring upstream's `NEEDS_BITMAP` flagging of
1774/// ref tips in `git repack -b` / `pack-objects --write-bitmap-index`.
1775pub fn install_repack_result_with_bitmap(
1776    git_dir: &Path,
1777    format: ObjectFormat,
1778    result: &RepackResult,
1779    prune: bool,
1780    bitmap_tips: Option<&HashSet<ObjectId>>,
1781) -> Result<()> {
1782    let objects_dir = repository_objects_dir(git_dir);
1783    let pack_dir = objects_dir.join("pack");
1784    fs::create_dir_all(&pack_dir)?;
1785
1786    // Validate the public bytes against the private provenance that
1787    // `repack_all_objects` captured from `PackFile::write_packed`. This avoids
1788    // inflating and resolving the freshly-written pack a second time while still
1789    // catching caller mutations before anything is written or pruned.
1790    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1791    let parsed_index = PackIndex::parse(&result.idx, format)?;
1792    if parsed_index.pack_checksum != result.pack_checksum {
1793        return Err(GitError::InvalidFormat(
1794            "repack index checksum does not match the new pack".into(),
1795        ));
1796    }
1797    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1798        return Err(GitError::InvalidFormat(
1799            "repack index does not match the new pack contents".into(),
1800        ));
1801    }
1802    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1803    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1804    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1805    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1806    // git writes a `.rev` alongside every repacked pack (`pack.writeReverseIndex`
1807    // defaults to true). Write it before the `.idx` so the index never becomes
1808    // visible ahead of its companions, mirroring upstream's finalize order.
1809    let reverse_index = sley_pack::PackReverseIndex::write(
1810        format,
1811        &sley_pack::pack_order_index_positions(&parsed_index.entries),
1812        &result.pack_checksum,
1813    )?;
1814    write_pack_component(&new_pack_path, &result.pack)?;
1815    write_pack_component(&new_rev_path, &reverse_index)?;
1816    write_pack_component(&new_index_path, &result.idx)?;
1817
1818    if let Some(tips) = bitmap_tips {
1819        // Build before pruning: the closure walk reads objects through the
1820        // pre-existing packs/loose store (the new pack holds the same bytes).
1821        let database = FileObjectDatabase::new(objects_dir.clone(), format);
1822        if let Some(bitmap) = build_pack_bitmap(
1823            &database,
1824            format,
1825            &result.index_entries,
1826            &result.pack_checksum,
1827            tips,
1828        )? {
1829            // Unlike the pack/idx/rev (content-addressed by the pack
1830            // checksum), the bitmap depends on selection inputs (e.g.
1831            // pack.preferBitmapTips), so an existing file must be replaced —
1832            // write_pack_component's exists-skip would keep a stale selection.
1833            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1834            remove_file_if_exists(&bitmap_path)?;
1835            write_pack_component(&bitmap_path, &bitmap)?;
1836        }
1837    }
1838
1839    if !prune {
1840        return Ok(());
1841    }
1842
1843    // Prune based on the objects the new pack's *index* can resolve (what reads use
1844    // once the old packs are gone), not just what the pack contains — so a stale
1845    // pack is never removed for an object the new index cannot serve.
1846    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1847
1848    prune_obsolete_pack_paths(
1849        &objects_dir,
1850        format,
1851        &result.obsolete_packs,
1852        &new_pack_path,
1853        &result.retained_pack_stems,
1854    )?;
1855    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1856    Ok(())
1857}
1858
1859/// Install a [`repack_geometric`] result: write the new pack, then under `prune`
1860/// remove EXACTLY the rolled-up packs (those below the geometric split) plus the
1861/// loose objects now packed. Unlike [`install_repack_result`], packs left in
1862/// place above the split are never removed even though some of their objects may
1863/// also live in the new pack.
1864pub fn install_geometric_repack_result(
1865    git_dir: &Path,
1866    format: ObjectFormat,
1867    geometric: &GeometricRepackResult,
1868    prune: bool,
1869    bitmap_tips: Option<&HashSet<ObjectId>>,
1870) -> Result<()> {
1871    let Some(result) = geometric.result.as_ref() else {
1872        return Ok(());
1873    };
1874    let objects_dir = repository_objects_dir(git_dir);
1875    let pack_dir = objects_dir.join("pack");
1876    fs::create_dir_all(&pack_dir)?;
1877
1878    validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1879    let parsed_index = PackIndex::parse(&result.idx, format)?;
1880    if parsed_index.pack_checksum != result.pack_checksum {
1881        return Err(GitError::InvalidFormat(
1882            "repack index checksum does not match the new pack".into(),
1883        ));
1884    }
1885    if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1886        return Err(GitError::InvalidFormat(
1887            "repack index does not match the new pack contents".into(),
1888        ));
1889    }
1890    let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1891    let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1892    let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1893    let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1894    let reverse_index = sley_pack::PackReverseIndex::write(
1895        format,
1896        &sley_pack::pack_order_index_positions(&parsed_index.entries),
1897        &result.pack_checksum,
1898    )?;
1899    write_pack_component(&new_pack_path, &result.pack)?;
1900    write_pack_component(&new_rev_path, &reverse_index)?;
1901    write_pack_component(&new_index_path, &result.idx)?;
1902
1903    if let Some(tips) = bitmap_tips {
1904        let database = FileObjectDatabase::new(objects_dir.clone(), format);
1905        if let Some(bitmap) = build_pack_bitmap(
1906            &database,
1907            format,
1908            &result.index_entries,
1909            &result.pack_checksum,
1910            tips,
1911        )? {
1912            let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1913            remove_file_if_exists(&bitmap_path)?;
1914            write_pack_component(&bitmap_path, &bitmap)?;
1915        }
1916    }
1917
1918    if !prune {
1919        return Ok(());
1920    }
1921
1922    // Remove exactly the rolled-up packs (below the split). Never touch packs
1923    // left in place above the split.
1924    for pack_path in &geometric.rolled_up_packs {
1925        if *pack_path == new_pack_path {
1926            continue;
1927        }
1928        if pack_path.with_extension("keep").exists() {
1929            continue;
1930        }
1931        remove_file_if_exists(pack_path)?;
1932        remove_file_if_exists(&pack_path.with_extension("idx"))?;
1933        for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1934            remove_file_if_exists(&pack_path.with_extension(ext))?;
1935        }
1936    }
1937
1938    // Drop loose copies now served by the new pack.
1939    let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1940    prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1941
1942    // A multi-pack-index that references any removed pack is now stale.
1943    let removed_stems: HashSet<String> = geometric
1944        .rolled_up_packs
1945        .iter()
1946        .filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
1947        .collect();
1948    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1949    Ok(())
1950}
1951
1952fn validate_pack_checksum(
1953    pack: &[u8],
1954    format: ObjectFormat,
1955    expected: &ObjectId,
1956    context: &str,
1957) -> Result<()> {
1958    if expected.format() != format {
1959        return Err(GitError::InvalidObjectId(format!(
1960            "{context} checksum format does not match object format"
1961        )));
1962    }
1963    let hash_len = format.raw_len();
1964    if pack.len() < 12 + hash_len {
1965        return Err(GitError::InvalidFormat(format!(
1966            "{context} pack file too short"
1967        )));
1968    }
1969    if &pack[..4] != b"PACK" {
1970        return Err(GitError::InvalidFormat(format!(
1971            "{context} pack file missing PACK signature"
1972        )));
1973    }
1974    let trailer_offset = pack.len() - hash_len;
1975    let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
1976    let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
1977    if &actual != expected || trailer != *expected {
1978        return Err(GitError::InvalidFormat(format!(
1979            "{context} pack checksum does not match generated pack"
1980        )));
1981    }
1982    Ok(())
1983}
1984
1985/// The UNIX-seconds mtime of a path, or `0` when unavailable.
1986fn path_mtime_secs(path: &Path) -> u32 {
1987    fs::metadata(path)
1988        .and_then(|metadata| metadata.modified())
1989        .ok()
1990        .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
1991        .map(|dur| dur.as_secs() as u32)
1992        .unwrap_or(0)
1993}
1994
1995/// The bytes of one cruft `.mtimes` pack plus its sidecars and checksum, ready
1996/// to install under `objects/pack/`.
1997#[derive(Debug, Clone)]
1998pub struct CruftPack {
1999    pub pack: Vec<u8>,
2000    pub idx: Vec<u8>,
2001    pub rev: Vec<u8>,
2002    pub mtimes: Vec<u8>,
2003    pub checksum: ObjectId,
2004    /// Object ids the cruft pack holds (its surviving unreachable set).
2005    pub oids: Vec<ObjectId>,
2006}
2007
2008/// Outcome of `git repack --cruft`: the reachable pack (if any) plus the cruft
2009/// `.mtimes` pack of surviving unreachable objects.
2010#[derive(Debug, Clone)]
2011pub struct CruftRepackResult {
2012    /// The all-into-one reachable pack, or `None` when nothing is reachable.
2013    pub reachable: Option<RepackResult>,
2014    /// The cruft pack of unreachable objects, or `None` when there are none.
2015    pub cruft: Option<CruftPack>,
2016    /// Pre-existing non-cruft, non-kept pack `.pack` paths superseded by the
2017    /// reachable pack (removed under `-d`).
2018    pub obsolete_packs: Vec<PathBuf>,
2019    /// Pre-existing cruft `.pack` paths whose objects are now in the new cruft
2020    /// pack (removed under `-d`).
2021    pub obsolete_cruft_packs: Vec<PathBuf>,
2022    retained_pack_stems: Vec<String>,
2023}
2024
2025/// Gather every object id on disk together with the best (max) mtime of any
2026/// copy: a packed object contributes its pack's mtime (or its own recorded
2027/// mtime inside a cruft pack), a loose object contributes its file mtime.
2028pub fn object_mtimes_on_disk_pub(
2029    objects_dir: &Path,
2030    format: ObjectFormat,
2031) -> Result<HashMap<ObjectId, u32>> {
2032    object_mtimes_on_disk(objects_dir, format)
2033}
2034
2035fn object_mtimes_on_disk(
2036    objects_dir: &Path,
2037    format: ObjectFormat,
2038) -> Result<HashMap<ObjectId, u32>> {
2039    let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
2040    let mut record = |oid: ObjectId, mtime: u32| {
2041        mtimes
2042            .entry(oid)
2043            .and_modify(|existing| {
2044                if mtime > *existing {
2045                    *existing = mtime;
2046                }
2047            })
2048            .or_insert(mtime);
2049    };
2050
2051    let pack_dir = objects_dir.join("pack");
2052    if let Ok(entries) = fs::read_dir(&pack_dir) {
2053        let mut idx_paths: Vec<PathBuf> = Vec::new();
2054        for entry in entries {
2055            let path = entry?.path();
2056            if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
2057                idx_paths.push(path);
2058            }
2059        }
2060        idx_paths.sort();
2061        for idx_path in idx_paths {
2062            let pack_path = idx_path.with_extension("pack");
2063            if !pack_path.exists() {
2064                continue;
2065            }
2066            let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
2067            let mtimes_path = idx_path.with_extension("mtimes");
2068            let pack_object_mtimes: Option<Vec<u32>> =
2069                fs::read(&mtimes_path).ok().and_then(|bytes| {
2070                    sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
2071                        .ok()
2072                        .map(|parsed| parsed.mtimes)
2073                });
2074            let pack_mtime = path_mtime_secs(&pack_path);
2075            for (pos, entry) in index.entries.iter().enumerate() {
2076                let mtime = pack_object_mtimes
2077                    .as_ref()
2078                    .and_then(|table| table.get(pos).copied())
2079                    .unwrap_or(pack_mtime);
2080                record(entry.oid, mtime);
2081            }
2082        }
2083    }
2084
2085    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
2086    for oid in loose_object_ids(objects_dir, format)? {
2087        let path = store.object_path(&oid)?;
2088        record(oid, path_mtime_secs(&path));
2089    }
2090    Ok(mtimes)
2091}
2092
2093/// Public wrapper over [`build_cruft_pack`] for the `--expire-to` limbo pack.
2094pub fn build_cruft_pack_pub(
2095    database: &FileObjectDatabase,
2096    format: ObjectFormat,
2097    survivors: &HashMap<ObjectId, u32>,
2098) -> Result<Option<CruftPack>> {
2099    build_cruft_pack(database, format, survivors)
2100}
2101
2102/// Build the cruft `.mtimes` pack from the surviving unreachable objects and
2103/// their timestamps.
2104fn build_cruft_pack(
2105    database: &FileObjectDatabase,
2106    format: ObjectFormat,
2107    survivors: &HashMap<ObjectId, u32>,
2108) -> Result<Option<CruftPack>> {
2109    if survivors.is_empty() {
2110        return Ok(None);
2111    }
2112    let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
2113    ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
2114
2115    let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
2116    let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
2117    let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
2118    for (oid, mtime) in ordered {
2119        match database.read_object(&oid) {
2120            Ok(object) => {
2121                oids.push(oid);
2122                objects.push(object);
2123                mtime_by_oid.insert(oid, mtime);
2124            }
2125            Err(GitError::NotFound(_)) => {}
2126            Err(err) => return Err(err),
2127        }
2128    }
2129    if oids.is_empty() {
2130        return Ok(None);
2131    }
2132
2133    let inputs: Vec<PackInput<'_>> = oids
2134        .iter()
2135        .zip(&objects)
2136        .map(|(oid, object)| PackInput {
2137            oid,
2138            object: object.as_ref(),
2139        })
2140        .collect();
2141    let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
2142
2143    // `.mtimes` table is in lexicographic (index/fanout) order.
2144    let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
2145    sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
2146    let mtimes_table: Vec<u32> = sorted_entries
2147        .iter()
2148        .map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
2149        .collect();
2150    let positions = sley_pack::pack_order_index_positions(&written.entries);
2151    let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
2152    let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
2153
2154    let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
2155    cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
2156    Ok(Some(CruftPack {
2157        pack: written.pack,
2158        idx: written.index,
2159        rev,
2160        mtimes,
2161        checksum: written.checksum,
2162        oids: cruft_oids,
2163    }))
2164}
2165
2166/// `git repack --cruft [--cruft-expiration=<t>] [-d]`: pack the reachable
2167/// closure of `roots` into one new pack, then collect every unreachable object
2168/// into a `.mtimes`-stamped cruft pack (honouring `cruft_expiration`). The
2169/// caller installs the result and, under `-d`, removes the superseded non-cruft
2170/// and old cruft packs.
2171///
2172/// Mirrors builtin/repack.c's PACK_CRUFT path + repack-cruft.c `write_cruft_pack`
2173/// without the per-pack stdin protocol: unreachable objects are everything on
2174/// disk minus the reachable set.
2175pub fn repack_cruft(
2176    git_dir: &Path,
2177    format: ObjectFormat,
2178    roots: &[ObjectId],
2179    cruft_expiration: Option<u32>,
2180) -> Result<CruftRepackResult> {
2181    repack_cruft_with_options(
2182        git_dir,
2183        format,
2184        roots,
2185        cruft_expiration,
2186        &RepackOptions::default(),
2187    )
2188}
2189
2190pub fn repack_cruft_with_options(
2191    git_dir: &Path,
2192    format: ObjectFormat,
2193    roots: &[ObjectId],
2194    cruft_expiration: Option<u32>,
2195    options: &RepackOptions,
2196) -> Result<CruftRepackResult> {
2197    let objects_dir = repository_objects_dir(git_dir);
2198    let database = FileObjectDatabase::new(objects_dir.clone(), format);
2199    let pack_dir = objects_dir.join("pack");
2200    let retained_pack_stems = repack_retained_pack_stems(
2201        &pack_dir,
2202        &options.keep_pack_stems,
2203        !options.pack_kept_objects,
2204    )?;
2205    let excluded_oids = if options.pack_kept_objects {
2206        HashSet::new()
2207    } else {
2208        pack_oids_for_stems(&pack_dir, format, &retained_pack_stems)?
2209    };
2210
2211    // Reachable closure → the new "reachable" pack.
2212    let mut reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
2213    reachable_ids.retain(|oid| !excluded_oids.contains(oid));
2214    let reachable_result = if reachable_ids.is_empty() {
2215        None
2216    } else {
2217        let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
2218        ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
2219        let mut objects = Vec::with_capacity(ids.len());
2220        for oid in &ids {
2221            match database.read_object(oid) {
2222                Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
2223                Err(GitError::NotFound(_)) => {}
2224                Err(err) => return Err(err),
2225            }
2226        }
2227        if objects.is_empty() {
2228            None
2229        } else {
2230            let inputs = pack_inputs(&objects);
2231            let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
2232            let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
2233            let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2234                .into_iter()
2235                .filter(|oid| packed_set.contains(oid))
2236                .collect();
2237            packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
2238            Some(RepackResult {
2239                pack: written.pack,
2240                idx: written.index,
2241                object_count: written.entries.len(),
2242                obsolete_packs: Vec::new(),
2243                packed_loose,
2244                retained_pack_stems: Vec::new(),
2245                pack_checksum: written.checksum,
2246                index_entries: written.entries,
2247            })
2248        }
2249    };
2250
2251    // Unreachable objects = everything on disk minus the reachable set, stamped
2252    // with their best mtime.
2253    let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
2254        .into_iter()
2255        .filter(|(oid, _)| !reachable_ids.contains(oid) && !excluded_oids.contains(oid))
2256        .collect();
2257
2258    // Expiration: rescue older objects reachable from a recent one, drop the rest.
2259    if let Some(expiration) = cruft_expiration {
2260        rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
2261    }
2262
2263    let cruft = build_cruft_pack(&database, format, &survivors)?;
2264
2265    // The packs the reachable+cruft packs supersede: every pre-existing
2266    // non-kept pack. Cruft packs are tracked separately.
2267    let mut obsolete_packs = Vec::new();
2268    let mut obsolete_cruft_packs = Vec::new();
2269    for pack_path in existing_pack_files(&pack_dir)? {
2270        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
2271            && retained_pack_stems.iter().any(|retained| retained == stem)
2272        {
2273            continue;
2274        }
2275        if pack_path.with_extension("keep").exists() {
2276            continue;
2277        }
2278        if pack_path.with_extension("mtimes").exists() {
2279            obsolete_cruft_packs.push(pack_path);
2280        } else {
2281            obsolete_packs.push(pack_path);
2282        }
2283    }
2284
2285    Ok(CruftRepackResult {
2286        reachable: reachable_result,
2287        cruft,
2288        obsolete_packs,
2289        obsolete_cruft_packs,
2290        retained_pack_stems,
2291    })
2292}
2293
2294/// Apply `--cruft-expiration` over the survivor map in place: starting from the
2295/// recent candidates (mtime strictly newer than `expiration`), walk reachability
2296/// and rescue every dependency at the cutoff mtime; drop older candidates that
2297/// no recent object reaches. Mirrors the pack-objects cruft expiry traversal.
2298fn rescue_and_expire_cruft_objects(
2299    database: &FileObjectDatabase,
2300    format: ObjectFormat,
2301    survivors: &mut HashMap<ObjectId, u32>,
2302    expiration: u32,
2303) -> Result<()> {
2304    let recent: Vec<ObjectId> = survivors
2305        .iter()
2306        .filter(|(_, mtime)| **mtime > expiration)
2307        .map(|(oid, _)| *oid)
2308        .collect();
2309
2310    let mut keep: HashSet<ObjectId> = HashSet::new();
2311    let mut pending: Vec<ObjectId> = recent.clone();
2312    while let Some(oid) = pending.pop() {
2313        if !keep.insert(oid) {
2314            continue;
2315        }
2316        let Ok(object) = database.read_object(&oid) else {
2317            continue;
2318        };
2319        match object.object_type {
2320            ObjectType::Commit => {
2321                if let Ok(commit) = Commit::parse_ref(format, &object.body) {
2322                    pending.extend(commit.parents.iter().copied());
2323                    pending.push(commit.tree);
2324                }
2325            }
2326            ObjectType::Tree => {
2327                for entry in TreeEntries::new(format, &object.body).flatten() {
2328                    if !entry.is_gitlink() {
2329                        pending.push(entry.oid);
2330                    }
2331                }
2332            }
2333            ObjectType::Tag => {
2334                if let Ok(tag) = Tag::parse_ref(format, &object.body) {
2335                    pending.push(tag.object);
2336                }
2337            }
2338            ObjectType::Blob => {}
2339        }
2340    }
2341
2342    // Drop any survivor that is neither recent nor rescued; rescued-but-older
2343    // objects keep their recorded mtime (already >= 0), recent ones unchanged.
2344    survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
2345    Ok(())
2346}
2347
2348/// Install a [`repack_cruft`] result: write the reachable pack and the cruft
2349/// `.mtimes` pack, then under `prune` remove the superseded non-cruft packs, old
2350/// cruft packs, and the loose objects now served.
2351pub fn install_cruft_repack_result(
2352    git_dir: &Path,
2353    format: ObjectFormat,
2354    result: &CruftRepackResult,
2355    prune: bool,
2356) -> Result<()> {
2357    let objects_dir = repository_objects_dir(git_dir);
2358    let pack_dir = objects_dir.join("pack");
2359    fs::create_dir_all(&pack_dir)?;
2360
2361    // Names of packs we are about to remove (so we never delete the new ones).
2362    let new_reachable_name = result
2363        .reachable
2364        .as_ref()
2365        .map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
2366    let new_cruft_name = result
2367        .cruft
2368        .as_ref()
2369        .map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
2370
2371    // Write the reachable pack (idx + rev + pack), content-addressed.
2372    if let Some(reachable) = result.reachable.as_ref() {
2373        let parsed_index = PackIndex::parse(&reachable.idx, format)?;
2374        let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
2375        let reverse_index = sley_pack::PackReverseIndex::write(
2376            format,
2377            &sley_pack::pack_order_index_positions(&parsed_index.entries),
2378            &reachable.pack_checksum,
2379        )?;
2380        write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
2381        write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
2382        write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
2383    }
2384
2385    // Write the cruft pack (pack + rev + mtimes + idx).
2386    if let Some(cruft) = result.cruft.as_ref() {
2387        let pack_name = format!("pack-{}", cruft.checksum.to_hex());
2388        write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
2389        write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
2390        write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
2391        write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
2392    }
2393
2394    if !prune {
2395        return Ok(());
2396    }
2397
2398    // Objects now served by the new packs.
2399    let mut present: HashSet<ObjectId> = HashSet::new();
2400    if let Some(reachable) = result.reachable.as_ref() {
2401        present.extend(reachable.index_entries.iter().map(|e| e.oid));
2402    }
2403    if let Some(cruft) = result.cruft.as_ref() {
2404        present.extend(cruft.oids.iter().copied());
2405    }
2406
2407    // Remove superseded non-cruft + old cruft packs (skip the new ones).
2408    let mut removed_stems: HashSet<String> = HashSet::new();
2409    for pack_path in result
2410        .obsolete_packs
2411        .iter()
2412        .chain(result.obsolete_cruft_packs.iter())
2413    {
2414        let file_name = pack_path.file_name().and_then(|n| n.to_str());
2415        if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
2416            continue;
2417        }
2418        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
2419            && result
2420                .retained_pack_stems
2421                .iter()
2422                .any(|retained| retained == stem)
2423        {
2424            continue;
2425        }
2426        if pack_path.with_extension("keep").exists() {
2427            continue;
2428        }
2429        if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
2430            removed_stems.insert(stem.to_string());
2431        }
2432        remove_file_if_exists(pack_path)?;
2433        remove_file_if_exists(&pack_path.with_extension("idx"))?;
2434        for ext in ["rev", "mtimes", "bitmap", "promisor"] {
2435            remove_file_if_exists(&pack_path.with_extension(ext))?;
2436        }
2437    }
2438
2439    // Drop loose objects now in a new pack.
2440    let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2441        .into_iter()
2442        .filter(|oid| present.contains(oid))
2443        .collect();
2444    prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
2445
2446    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2447    Ok(())
2448}
2449
2450fn pack_index_entries_match_writer(
2451    parsed: &[PackIndexEntry],
2452    writer_entries: &[PackIndexEntry],
2453) -> bool {
2454    if parsed.len() != writer_entries.len() {
2455        return false;
2456    }
2457    let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
2458    writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2459    parsed.iter().zip(writer_entries).all(|(left, right)| {
2460        left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
2461    })
2462}
2463
2464/// List loose objects under `git_dir` that are *not* reachable from `roots`,
2465/// optionally deleting them.
2466///
2467/// Reachability is computed with [`collect_reachable_object_ids`] over the
2468/// repository's object database, so trees, parents, and tag targets are all
2469/// followed. When `delete` is `false` the returned ids are merely reported;
2470/// when `true` each unreachable loose object file is removed (packed copies are
2471/// never touched). Deletion is therefore opt-in.
2472pub fn prune_unreachable_loose<I>(
2473    git_dir: &Path,
2474    format: ObjectFormat,
2475    roots: I,
2476    delete: bool,
2477) -> Result<Vec<ObjectId>>
2478where
2479    I: IntoIterator<Item = ObjectId>,
2480{
2481    let objects_dir = repository_objects_dir(git_dir);
2482    let database = FileObjectDatabase::new(objects_dir.clone(), format);
2483    let reachable = collect_reachable_object_ids(&database, format, roots)?;
2484
2485    let store = LooseObjectStore::new(objects_dir.clone(), format);
2486    let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2487        .into_iter()
2488        .filter(|oid| !reachable.contains(oid))
2489        .collect();
2490    pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2491
2492    if delete {
2493        for oid in &pruned {
2494            let path = store.object_path(oid)?;
2495            match fs::remove_file(&path) {
2496                Ok(()) => {}
2497                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
2498                Err(err) => return Err(GitError::Io(err.to_string())),
2499            }
2500        }
2501    }
2502    Ok(pruned)
2503}
2504
2505/// Loose object ids under `objects_dir`, sorted by hex, with packed objects
2506/// excluded.
2507fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
2508    let oids = loose_object_id_set(objects_dir, format)?;
2509    let mut oids = oids.into_iter().collect::<Vec<_>>();
2510    oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2511    Ok(oids)
2512}
2513
2514fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
2515    let mut oids = HashSet::new();
2516    collect_loose_object_ids(objects_dir, format, &mut oids)?;
2517    Ok(oids)
2518}
2519
2520/// Absolute paths of every `*.pack` file directly inside `pack_dir`, sorted for
2521/// deterministic output.
2522fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
2523    if !pack_dir.exists() {
2524        return Ok(Vec::new());
2525    }
2526    let mut packs = Vec::new();
2527    for entry in fs::read_dir(pack_dir)? {
2528        let path = entry?.path();
2529        if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
2530            packs.push(path);
2531        }
2532    }
2533    packs.sort();
2534    Ok(packs)
2535}
2536
2537/// Remove pre-existing packs whose every object is contained in `present`,
2538/// skipping `keep` (the pack just written), `.keep` packs, and `.promisor` packs.
2539/// A stale multi-pack-index that references any removed pack is removed too.
2540fn prune_obsolete_pack_paths(
2541    objects_dir: &Path,
2542    format: ObjectFormat,
2543    packs: &[PathBuf],
2544    keep: &Path,
2545    retained_pack_stems: &[String],
2546) -> Result<()> {
2547    prune_pack_paths_matching(
2548        objects_dir,
2549        format,
2550        packs.iter(),
2551        keep,
2552        retained_pack_stems,
2553        |_| Ok(true),
2554    )
2555}
2556
2557fn prune_pack_paths_matching<'a>(
2558    objects_dir: &Path,
2559    format: ObjectFormat,
2560    packs: impl IntoIterator<Item = &'a PathBuf>,
2561    keep: &Path,
2562    retained_pack_stems: &[String],
2563    mut should_prune: impl FnMut(&Path) -> Result<bool>,
2564) -> Result<()> {
2565    let pack_dir = objects_dir.join("pack");
2566    let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
2567    let retained_pack_stems: HashSet<&str> =
2568        retained_pack_stems.iter().map(String::as_str).collect();
2569    let mut removed_stems: HashSet<String> = HashSet::new();
2570
2571    for pack_path in packs {
2572        if pack_path == keep {
2573            continue;
2574        }
2575        let Some(stem) = pack_path.file_stem() else {
2576            continue;
2577        };
2578        if Some(stem) == keep_stem.as_deref() {
2579            continue;
2580        }
2581        if let Some(stem) = stem.to_str()
2582            && retained_pack_stems.contains(stem)
2583        {
2584            continue;
2585        }
2586        if pack_path.with_extension("keep").exists()
2587            || pack_path.with_extension("promisor").exists()
2588        {
2589            continue;
2590        }
2591        if !should_prune(pack_path)? {
2592            continue;
2593        }
2594        remove_file_if_exists(pack_path)?;
2595        remove_file_if_exists(&pack_path.with_extension("idx"))?;
2596        for ext in ["rev", "mtimes", "bitmap"] {
2597            remove_file_if_exists(&pack_path.with_extension(ext))?;
2598        }
2599        removed_stems.insert(stem.to_string_lossy().into_owned());
2600    }
2601
2602    prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2603    Ok(())
2604}
2605
2606/// Remove a `multi-pack-index` if it names *any* pack that was removed.
2607///
2608/// A MIDX that still references a deleted pack makes reads fail (the lookup
2609/// resolves to a pack that is gone) before any fallback. Removing the whole MIDX
2610/// when even one of its packs is pruned forces readers back to the individual pack
2611/// indexes, which are correct; `multi-pack-index write` can rebuild it later.
2612fn prune_stale_multi_pack_index(
2613    pack_dir: &Path,
2614    format: ObjectFormat,
2615    removed_stems: &HashSet<String>,
2616) -> Result<()> {
2617    if removed_stems.is_empty() {
2618        return Ok(());
2619    }
2620    let midx_path = pack_dir.join("multi-pack-index");
2621    if !midx_path.exists() {
2622        return Ok(());
2623    }
2624    let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
2625    let references_removed_pack = midx.pack_names.iter().any(|name| {
2626        let stem = name.strip_suffix(".idx").unwrap_or(name);
2627        removed_stems.contains(stem)
2628    });
2629    if references_removed_pack {
2630        remove_file_if_exists(&midx_path)?;
2631    }
2632    Ok(())
2633}
2634
2635/// Remove each loose object in `candidates` whose id is in `present`, leaving
2636/// any object not actually packed untouched.
2637fn prune_loose_objects<'a, I>(
2638    objects_dir: &Path,
2639    format: ObjectFormat,
2640    candidates: I,
2641    present: &HashSet<ObjectId>,
2642) -> Result<()>
2643where
2644    I: IntoIterator<Item = &'a ObjectId>,
2645{
2646    let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
2647    for oid in candidates {
2648        if !present.contains(oid) {
2649            continue;
2650        }
2651        remove_file_if_exists(&store.object_path(oid)?)?;
2652    }
2653    Ok(())
2654}
2655
2656enum PackDeltaBase {
2657    Offset(u64),
2658    Ref(ObjectId),
2659}
2660
2661struct PackIndexOffsetInfo {
2662    end_offset: u64,
2663    delta_base_oid: Option<ObjectId>,
2664}
2665
2666fn scan_pack_index_offsets(
2667    index: &PackIndex,
2668    target_offset: u64,
2669    trailer_offset: u64,
2670    delta_base_offset: Option<u64>,
2671) -> Result<PackIndexOffsetInfo> {
2672    let mut target_count = 0usize;
2673    let mut next_offset = None;
2674    let mut delta_base_oid = None;
2675
2676    for entry in &index.entries {
2677        if entry.offset == target_offset {
2678            target_count += 1;
2679        } else if entry.offset > target_offset {
2680            match next_offset {
2681                Some(current) if current <= entry.offset => {}
2682                _ => next_offset = Some(entry.offset),
2683            }
2684        }
2685        if Some(entry.offset) == delta_base_offset {
2686            delta_base_oid = Some(entry.oid);
2687        }
2688    }
2689
2690    if target_count == 0 {
2691        return Err(GitError::InvalidFormat(format!(
2692            "pack index offset {target_offset} not found"
2693        )));
2694    }
2695    if let Some(offset) = delta_base_offset
2696        && delta_base_oid.is_none()
2697    {
2698        return Err(GitError::InvalidFormat(format!(
2699            "ofs-delta base offset {offset} not found"
2700        )));
2701    }
2702
2703    Ok(PackIndexOffsetInfo {
2704        // Preserve the old sorted-vector behavior for malformed indexes with
2705        // duplicate offsets: the next sorted entry has the same offset.
2706        end_offset: if target_count > 1 {
2707            target_offset
2708        } else {
2709            next_offset.unwrap_or(trailer_offset)
2710        },
2711        delta_base_oid,
2712    })
2713}
2714
2715fn pack_entry_delta_base(
2716    format: ObjectFormat,
2717    pack: &[u8],
2718    entry_offset: u64,
2719) -> Result<Option<PackDeltaBase>> {
2720    let mut cursor = usize::try_from(entry_offset)
2721        .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
2722    let first = pack_next_byte(pack, &mut cursor)?;
2723    let kind = (first >> 4) & 0x07;
2724    let mut byte = first;
2725    while byte & 0x80 != 0 {
2726        byte = pack_next_byte(pack, &mut cursor)?;
2727    }
2728    match kind {
2729        6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
2730            pack,
2731            &mut cursor,
2732            entry_offset,
2733        )?))),
2734        7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
2735            format,
2736            pack,
2737            &mut cursor,
2738        )?))),
2739        _ => Ok(None),
2740    }
2741}
2742
2743fn parse_ref_delta_base_oid(
2744    format: ObjectFormat,
2745    pack: &[u8],
2746    cursor: &mut usize,
2747) -> Result<ObjectId> {
2748    let raw_len = format.raw_len();
2749    if *cursor + raw_len > pack.len() {
2750        return Err(GitError::InvalidFormat(
2751            "truncated ref-delta base object id".into(),
2752        ));
2753    }
2754    let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
2755    *cursor += raw_len;
2756    Ok(oid)
2757}
2758
2759fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
2760    let mut byte = pack_next_byte(pack, cursor)?;
2761    let mut relative = u64::from(byte & 0x7f);
2762    while byte & 0x80 != 0 {
2763        byte = pack_next_byte(pack, cursor)?;
2764        relative = relative
2765            .checked_add(1)
2766            .and_then(|value| value.checked_shl(7))
2767            .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2768            .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2769    }
2770    entry_offset
2771        .checked_sub(relative)
2772        .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2773}
2774
2775fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
2776    let Some(byte) = pack.get(*cursor).copied() else {
2777        return Err(GitError::InvalidFormat("truncated pack entry".into()));
2778    };
2779    *cursor += 1;
2780    Ok(byte)
2781}
2782
2783fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
2784    Ok(ObjectId::null(format))
2785}
2786
2787/// Remove `path` if it exists, treating a missing file as success.
2788fn remove_file_if_exists(path: &Path) -> Result<()> {
2789    match fs::remove_file(path) {
2790        Ok(()) => Ok(()),
2791        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
2792        Err(err) => Err(GitError::Io(err.to_string())),
2793    }
2794}
2795
2796fn walk_reachable_objects<R, I, F>(
2797    reader: &R,
2798    format: ObjectFormat,
2799    starts: I,
2800    excluded: &HashSet<ObjectId>,
2801    visit: F,
2802) -> Result<HashSet<ObjectId>>
2803where
2804    R: ObjectReader,
2805    I: IntoIterator<Item = ObjectId>,
2806    F: FnMut(&ObjectId, &Arc<EncodedObject>),
2807{
2808    walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
2809}
2810
2811/// [`walk_reachable_objects`] with an additional `cut` set: commits in `cut`
2812/// are visited (their trees and blobs too) but their parents are not followed,
2813/// mirroring a shallow client's view of its own history during negotiation.
2814fn walk_reachable_objects_with_cut<R, I, F>(
2815    reader: &R,
2816    format: ObjectFormat,
2817    starts: I,
2818    excluded: &HashSet<ObjectId>,
2819    cut: &HashSet<ObjectId>,
2820    mut visit: F,
2821) -> Result<HashSet<ObjectId>>
2822where
2823    R: ObjectReader,
2824    I: IntoIterator<Item = ObjectId>,
2825    F: FnMut(&ObjectId, &Arc<EncodedObject>),
2826{
2827    let mut seen = HashSet::new();
2828    let mut pending = Vec::new();
2829    for start in starts {
2830        pending.push(start);
2831        while let Some(oid) = pending.pop() {
2832            if excluded.contains(&oid) {
2833                continue;
2834            }
2835            if !seen.insert(oid) {
2836                continue;
2837            }
2838            let object = reader.read_object(&oid).map_err(|err| {
2839                with_missing_object_context(err, oid, MissingObjectContext::Traversal)
2840            })?;
2841            match object.object_type {
2842                ObjectType::Commit => {
2843                    let (tree, parents) = {
2844                        let commit = Commit::parse_ref(format, &object.body)?;
2845                        (commit.tree, commit.parents)
2846                    };
2847                    visit(&oid, &object);
2848                    if !cut.contains(&oid) {
2849                        for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
2850                            pending.push(parent);
2851                        }
2852                    }
2853                    pending.push(tree);
2854                }
2855                ObjectType::Tree => {
2856                    let mut child_oids = Vec::new();
2857                    for entry in TreeEntries::new(format, &object.body) {
2858                        let entry = entry?;
2859                        if entry.is_gitlink() {
2860                            continue;
2861                        }
2862                        child_oids.push(entry.oid);
2863                    }
2864                    visit(&oid, &object);
2865                    pending.extend(child_oids.into_iter().rev());
2866                }
2867                ObjectType::Tag => {
2868                    let target = {
2869                        let tag = Tag::parse_ref(format, &object.body)?;
2870                        tag.object
2871                    };
2872                    visit(&oid, &object);
2873                    pending.push(target);
2874                }
2875                ObjectType::Blob => visit(&oid, &object),
2876            }
2877        }
2878    }
2879    Ok(seen)
2880}
2881
2882// ===== reachability bitmaps (.bitmap write + consult) =====
2883
2884/// Bit accessors over a `Vec<u64>` bitset using git's bitmap convention:
2885/// bit `i` lives in word `i / 64` at bit `i % 64` (LSB-first within a word).
2886fn bitset_get(words: &[u64], position: u32) -> bool {
2887    let word = (position / 64) as usize;
2888    word < words.len() && words[word] & (1u64 << (position % 64)) != 0
2889}
2890
2891fn bitset_set(words: &mut [u64], position: u32) {
2892    let word = (position / 64) as usize;
2893    if word < words.len() {
2894        words[word] |= 1u64 << (position % 64);
2895    }
2896}
2897
2898fn bitset_or(acc: &mut [u64], other: &[u64]) {
2899    for (dst, src) in acc.iter_mut().zip(other) {
2900        *dst |= *src;
2901    }
2902}
2903
2904/// Sorted set-bit positions of a bitset (the inverse of repeated [`bitset_set`]).
2905fn bitset_positions(words: &[u64]) -> Vec<u32> {
2906    let mut positions = Vec::new();
2907    for (word_index, word) in words.iter().enumerate() {
2908        let mut remaining = *word;
2909        while remaining != 0 {
2910            let bit = remaining.trailing_zeros();
2911            positions.push(word_index as u32 * 64 + bit);
2912            remaining &= remaining - 1;
2913        }
2914    }
2915    positions
2916}
2917
2918/// Committer timestamp (epoch seconds) of a commit identity line
2919/// (`Name <email> <timestamp> <tz>`); 0 when unparseable, matching git's
2920/// tolerance for bogus dates during bitmap commit selection.
2921fn commit_identity_timestamp(identity: &[u8]) -> i64 {
2922    let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
2923    let _tz = fields.next();
2924    fields
2925        .next()
2926        .and_then(|raw| std::str::from_utf8(raw).ok())
2927        .and_then(|raw| raw.parse::<i64>().ok())
2928        .unwrap_or(0)
2929}
2930
2931/// Upstream `next_commit_index` (pack-bitmap-write.c): the spacing schedule for
2932/// bitmap commit selection over the date-descending commit list.
2933fn bitmap_next_commit_index(idx: u32) -> u32 {
2934    const MIN_COMMITS: u32 = 100;
2935    const MAX_COMMITS: u32 = 5000;
2936    const MUST_REGION: u32 = 100;
2937    const MIN_REGION: u32 = 20000;
2938
2939    if idx <= MUST_REGION {
2940        return 0;
2941    }
2942    if idx <= MIN_REGION {
2943        let offset = idx - MUST_REGION;
2944        return offset.min(MIN_COMMITS);
2945    }
2946    let offset = idx - MIN_REGION;
2947    offset.clamp(MIN_COMMITS, MAX_COMMITS)
2948}
2949
2950/// Builds a serialised `.bitmap` for the pack described by `index_entries` /
2951/// `pack_checksum`, mirroring upstream pack-bitmap-write.c:
2952///
2953/// * commit selection walks the pack's commits in committer-date-descending
2954///   order through [`bitmap_next_commit_index`]'s spacing schedule, preferring
2955///   `preferred_tips` (ref tips — upstream's `NEEDS_BITMAP`) and merge commits
2956///   inside each window;
2957/// * each selected commit stores its full reachability closure (commits, trees,
2958///   blobs) as pack-order bit positions (no XOR compression — `xor_offset` 0 is
2959///   valid on disk and what readers see after resolution anyway).
2960///
2961/// Returns `Ok(None)` — mirroring upstream's warn-and-skip — when the pack
2962/// lacks full closure (a reachable object is missing from it).
2963pub fn build_pack_bitmap(
2964    db: &FileObjectDatabase,
2965    format: ObjectFormat,
2966    index_entries: &[PackIndexEntry],
2967    pack_checksum: &ObjectId,
2968    preferred_tips: &HashSet<ObjectId>,
2969) -> Result<Option<Vec<u8>>> {
2970    // `index_entries` carries no ordering guarantee (writer provenance is in
2971    // pack-write order); bit numbering follows pack (offset) order.
2972    let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
2973    by_offset.sort_by_key(|&slot| index_entries[slot].offset);
2974    let bit_order: Vec<ObjectId> = by_offset
2975        .into_iter()
2976        .map(|slot| index_entries[slot].oid)
2977        .collect();
2978    build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
2979}
2980
2981/// [`build_pack_bitmap`]'s multi-pack sibling: builds the serialised
2982/// `multi-pack-index-<checksum>.bitmap` for `midx_entries`, with bits in
2983/// pseudo-pack order (preferred pack first, then pack id, then offset — the
2984/// same order [`MultiPackIndex::write_with_reverse_index`] records in `RIDX`)
2985/// and the midx checksum in the BITM checksum field.
2986pub fn build_midx_bitmap(
2987    db: &FileObjectDatabase,
2988    format: ObjectFormat,
2989    midx_entries: &[sley_pack::MultiPackIndexEntry],
2990    midx_checksum: &ObjectId,
2991    preferred_pack: u32,
2992    preferred_tips: &HashSet<ObjectId>,
2993) -> Result<Option<Vec<u8>>> {
2994    let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
2995    pseudo.sort_by_key(|&slot| {
2996        let entry = &midx_entries[slot];
2997        (
2998            entry.pack_int_id != preferred_pack,
2999            entry.pack_int_id,
3000            entry.offset,
3001        )
3002    });
3003    let bit_order: Vec<ObjectId> = pseudo
3004        .into_iter()
3005        .map(|slot| midx_entries[slot].oid)
3006        .collect();
3007    build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
3008}
3009
3010/// Upstream `bitmap_builder_init`'s `num_maximal` counter (pack-bitmap-write.c):
3011/// walk the first-parent ancestry of the selected commits, children before
3012/// parents, propagating per-commit "which selected commits reach me" masks.
3013/// A commit counts as maximal when it is selected, or when distinct selected
3014/// lineages converge on it (its mask gains bits its last contributing child
3015/// did not carry). Only the count is needed (for the trace2 data event), so no
3016/// reverse-edge bookkeeping is kept.
3017fn bitmap_num_maximal_commits(
3018    db: &FileObjectDatabase,
3019    format: ObjectFormat,
3020    selected: &[ObjectId],
3021) -> Result<usize> {
3022    // First-parent subgraph reachable from the selected commits.
3023    let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
3024    let mut stack: Vec<ObjectId> = selected.to_vec();
3025    while let Some(oid) = stack.pop() {
3026        if first_parent.contains_key(&oid) {
3027            continue;
3028        }
3029        let object = db.read_object(&oid)?;
3030        let commit = Commit::parse_ref(format, &object.body)?;
3031        let parent = grafted_parents(db, &oid, commit.parents).first().copied();
3032        first_parent.insert(oid, parent);
3033        if let Some(parent) = parent {
3034            stack.push(parent);
3035        }
3036    }
3037    // Children-before-parents order (Kahn over the single first-parent edge).
3038    let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
3039    for parent in first_parent.values().flatten() {
3040        *pending_children.entry(*parent).or_default() += 1;
3041    }
3042    let word_count = selected.len().div_ceil(64);
3043    struct MaximalEnt {
3044        mask: Vec<u64>,
3045        maximal: bool,
3046    }
3047    let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
3048    for (bit, oid) in selected.iter().enumerate() {
3049        let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
3050            mask: vec![0u64; word_count],
3051            maximal: true,
3052        });
3053        ent.mask[bit / 64] |= 1u64 << (bit % 64);
3054        ent.maximal = true;
3055    }
3056    let mut queue: Vec<ObjectId> = first_parent
3057        .keys()
3058        .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
3059        .copied()
3060        .collect();
3061    let mut num_maximal = 0usize;
3062    while let Some(oid) = queue.pop() {
3063        if let Some(ent) = ents.remove(&oid) {
3064            if ent.maximal {
3065                num_maximal += 1;
3066            }
3067            if let Some(Some(parent)) = first_parent.get(&oid) {
3068                match ents.entry(*parent) {
3069                    std::collections::hash_map::Entry::Vacant(vacant) => {
3070                        // Fresh parent mask: c_not_p, !p_not_c -> not maximal.
3071                        vacant.insert(MaximalEnt {
3072                            mask: ent.mask.clone(),
3073                            maximal: false,
3074                        });
3075                    }
3076                    std::collections::hash_map::Entry::Occupied(mut occupied) => {
3077                        let parent_ent = occupied.get_mut();
3078                        let c_not_p = ent
3079                            .mask
3080                            .iter()
3081                            .zip(&parent_ent.mask)
3082                            .any(|(child, parent)| child & !parent != 0);
3083                        if c_not_p {
3084                            let p_not_c = parent_ent
3085                                .mask
3086                                .iter()
3087                                .zip(&ent.mask)
3088                                .any(|(parent, child)| parent & !child != 0);
3089                            for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
3090                                *parent |= child;
3091                            }
3092                            parent_ent.maximal = p_not_c;
3093                        }
3094                    }
3095                }
3096            }
3097        }
3098        if let Some(Some(parent)) = first_parent.get(&oid)
3099            && let Some(remaining) = pending_children.get_mut(parent)
3100        {
3101            *remaining -= 1;
3102            if *remaining == 0 {
3103                queue.push(*parent);
3104            }
3105        }
3106    }
3107    Ok(num_maximal)
3108}
3109
3110/// Shared write half: `bit_order` lists every covered object's oid in bit
3111/// order (pack order for a single pack, pseudo-pack order for a midx);
3112/// `checksum` fills the BITM checksum field (pack checksum / midx checksum).
3113fn build_reachability_bitmap(
3114    db: &FileObjectDatabase,
3115    format: ObjectFormat,
3116    checksum: &ObjectId,
3117    bit_order: &[ObjectId],
3118    preferred_tips: &HashSet<ObjectId>,
3119) -> Result<Option<Vec<u8>>> {
3120    if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
3121        return Ok(None);
3122    }
3123    let object_count = bit_order.len();
3124
3125    // The on-disk entry position space is the oid-sorted lookup order (.idx /
3126    // midx OIDL); derive each bit-order slot's rank there.
3127    let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
3128    oid_sorted.sort_by(|&left, &right| {
3129        bit_order[left as usize]
3130            .as_bytes()
3131            .cmp(bit_order[right as usize].as_bytes())
3132    });
3133    let mut index_position = vec![0u32; object_count];
3134    for (position, &slot) in oid_sorted.iter().enumerate() {
3135        index_position[slot as usize] = position as u32;
3136    }
3137    let mut oid_to_pack = HashMap::with_capacity(object_count);
3138    for (pack_pos, oid) in bit_order.iter().enumerate() {
3139        oid_to_pack.insert(*oid, pack_pos as u32);
3140    }
3141
3142    // Object types in bit order; commits also collect (date, parent count).
3143    let mut object_types = Vec::with_capacity(object_count);
3144    struct IndexedCommit {
3145        oid: ObjectId,
3146        pack_pos: u32,
3147        index_pos: u32,
3148        date: i64,
3149        parent_count: usize,
3150    }
3151    let mut indexed_commits = Vec::new();
3152    for (pack_pos, oid) in bit_order.iter().enumerate() {
3153        // Type via the header fast path: blobs (the bulk of most packs) never
3154        // need their bodies inflated here.
3155        let object_type = match db.read_object_header(oid)? {
3156            Some((object_type, _)) => object_type,
3157            None => db.read_object(oid)?.object_type,
3158        };
3159        object_types.push(object_type);
3160        if object_type == ObjectType::Commit {
3161            let object = db.read_object(oid)?;
3162            let commit = Commit::parse_ref(format, &object.body)?;
3163            indexed_commits.push(IndexedCommit {
3164                oid: *oid,
3165                pack_pos: pack_pos as u32,
3166                index_pos: index_position[pack_pos],
3167                date: commit_identity_timestamp(commit.committer),
3168                parent_count: grafted_parents(db, oid, commit.parents).len(),
3169            });
3170        }
3171    }
3172
3173    // Selection: date-descending, then the spacing schedule.
3174    indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
3175    let mut selected: Vec<&IndexedCommit> = Vec::new();
3176    let commit_count = indexed_commits.len() as u32;
3177    if commit_count < 100 {
3178        selected.extend(indexed_commits.iter());
3179    } else {
3180        let mut i = 0u32;
3181        loop {
3182            let next = bitmap_next_commit_index(i);
3183            if i + next >= commit_count {
3184                break;
3185            }
3186            let mut chosen = &indexed_commits[(i + next) as usize];
3187            if next > 0 {
3188                for j in 0..=next {
3189                    let candidate = &indexed_commits[(i + j) as usize];
3190                    if preferred_tips.contains(&candidate.oid) {
3191                        chosen = candidate;
3192                        break;
3193                    }
3194                    if candidate.parent_count >= 2 {
3195                        chosen = candidate;
3196                    }
3197                }
3198            }
3199            selected.push(chosen);
3200            i += next + 1;
3201        }
3202    }
3203
3204    // Trace2 selection counters (upstream bitmap_builder_init): emitted before
3205    // the closure walk, like upstream emits them before building the ewah
3206    // bitmaps. Computing num_maximal_commits needs its own first-parent walk,
3207    // so it only runs when the trace2 event target is active.
3208    if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
3209        let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
3210        let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
3211        sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
3212        sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
3213    }
3214
3215    // Reachability closures, oldest-first so newer walks stop at memoised
3216    // older selected commits.
3217    let word_count = object_count.div_ceil(64);
3218    let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
3219    for commit in selected.iter().rev() {
3220        let mut acc = vec![0u64; word_count];
3221        let mut pending = vec![commit.oid];
3222        while let Some(oid) = pending.pop() {
3223            let Some(&pack_pos) = oid_to_pack.get(&oid) else {
3224                // Mirrors upstream's "Packfile doesn't have full closure".
3225                eprintln!(
3226                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
3227                );
3228                return Ok(None);
3229            };
3230            if bitset_get(&acc, pack_pos) {
3231                continue;
3232            }
3233            if let Some(stored) = memo.get(&oid) {
3234                bitset_or(&mut acc, stored);
3235                continue;
3236            }
3237            bitset_set(&mut acc, pack_pos);
3238            let object = db.read_object(&oid)?;
3239            let tree = {
3240                let parsed = Commit::parse_ref(format, &object.body)?;
3241                pending.extend(grafted_parents(db, &oid, parsed.parents));
3242                parsed.tree
3243            };
3244            if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
3245                return Ok(None);
3246            }
3247        }
3248        memo.insert(commit.oid, Arc::new(acc));
3249    }
3250
3251    let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
3252    for commit in &selected {
3253        let words = match memo.get(&commit.oid) {
3254            Some(words) => words,
3255            None => continue,
3256        };
3257        writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
3258    }
3259    writer.write().map(Some)
3260}
3261
3262/// Marks `tree` and everything below it (sub-trees, blobs) in `acc`, skipping
3263/// already-set bits (their closure is already covered). Returns `false` when an
3264/// object is missing from the pack (no full closure), after warning.
3265fn bitmap_mark_tree(
3266    db: &impl ObjectReader,
3267    format: ObjectFormat,
3268    tree: &ObjectId,
3269    oid_to_pack: &HashMap<ObjectId, u32>,
3270    acc: &mut [u64],
3271) -> Result<bool> {
3272    let Some(&pack_pos) = oid_to_pack.get(tree) else {
3273        eprintln!(
3274            "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
3275        );
3276        return Ok(false);
3277    };
3278    if bitset_get(acc, pack_pos) {
3279        return Ok(true);
3280    }
3281    bitset_set(acc, pack_pos);
3282    let object = db.read_object(tree)?;
3283    for entry in TreeEntries::new(format, &object.body) {
3284        let entry = entry?;
3285        if entry.is_gitlink() {
3286            continue;
3287        }
3288        if entry.is_tree() {
3289            if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
3290                return Ok(false);
3291            }
3292        } else {
3293            let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
3294                eprintln!(
3295                    "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
3296                    entry.oid
3297                );
3298                return Ok(false);
3299            };
3300            bitset_set(acc, blob_pos);
3301        }
3302    }
3303    Ok(true)
3304}
3305
3306/// A pack's `.bitmap` loaded for consultation: oid <-> pack-position mappings,
3307/// resolved (XOR-expanded) per-commit reachability bitsets, and the four object
3308/// type bitmaps. Bit numbering follows pack order throughout.
3309pub struct LoadedPackBitmap {
3310    object_count: u32,
3311    oid_to_pack: HashMap<ObjectId, u32>,
3312    pack_to_oid: Vec<ObjectId>,
3313    commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
3314    commits: Vec<u64>,
3315    trees: Vec<u64>,
3316    blobs: Vec<u64>,
3317    tags: Vec<u64>,
3318}
3319
3320impl LoadedPackBitmap {
3321    pub fn object_count(&self) -> u32 {
3322        self.object_count
3323    }
3324
3325    /// Pack-order position of `oid`, when the object is in the bitmapped pack.
3326    pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
3327        self.oid_to_pack.get(oid).copied()
3328    }
3329
3330    pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
3331        self.pack_to_oid.get(position as usize)
3332    }
3333
3334    /// The resolved reachability bitset stored for `oid`, when it was one of
3335    /// the writer's selected commits.
3336    pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
3337        self.commit_words.get(oid)
3338    }
3339
3340    /// Oids of every commit with a stored bitmap entry (unordered).
3341    pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
3342        self.commit_words.keys()
3343    }
3344
3345    /// The type bitmap for `object_type` (bit per pack position).
3346    pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
3347        match object_type {
3348            ObjectType::Commit => &self.commits,
3349            ObjectType::Tree => &self.trees,
3350            ObjectType::Blob => &self.blobs,
3351            ObjectType::Tag => &self.tags,
3352        }
3353    }
3354
3355    fn word_count(&self) -> usize {
3356        (self.object_count as usize).div_ceil(64)
3357    }
3358}
3359
3360/// Loads the single-pack `.bitmap` of `objects_dir/pack`, if a valid one
3361/// exists. Scans `pack-*.bitmap` files (sorted, first valid wins, like
3362/// upstream's "first bitmap" behaviour), requires the sibling `.idx`, and
3363/// verifies the recorded pack checksum. Any unreadable/corrupt bitmap yields
3364/// `Ok(None)` — consumers fall back to a regular object walk, mirroring
3365/// upstream's warn-and-ignore on bitmap load failure.
3366pub fn load_pack_bitmap(
3367    objects_dir: &Path,
3368    format: ObjectFormat,
3369) -> Result<Option<LoadedPackBitmap>> {
3370    let pack_dir = objects_dir.join("pack");
3371    if !pack_dir.exists() {
3372        return Ok(None);
3373    }
3374    // A multi-pack bitmap wins over single-pack bitmaps, like upstream's
3375    // open_bitmap trying the midx first.
3376    if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
3377        return Ok(Some(bitmap));
3378    }
3379    let mut bitmap_paths = Vec::new();
3380    for entry in fs::read_dir(&pack_dir)? {
3381        let path = entry?.path();
3382        if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
3383            && path
3384                .file_name()
3385                .and_then(|name| name.to_str())
3386                .is_some_and(|name| name.starts_with("pack-"))
3387        {
3388            bitmap_paths.push(path);
3389        }
3390    }
3391    bitmap_paths.sort();
3392    for bitmap_path in bitmap_paths {
3393        match load_pack_bitmap_file(&bitmap_path, format) {
3394            Ok(Some(bitmap)) => return Ok(Some(bitmap)),
3395            Ok(None) | Err(_) => continue,
3396        }
3397    }
3398    Ok(None)
3399}
3400
3401/// Loads `multi-pack-index-<checksum>.bitmap` when the pack directory has a
3402/// multi-pack-index with a `RIDX` chunk (the bit-order permutation) and a
3403/// matching bitmap file. Returns `Ok(None)` — never an error — on any missing
3404/// or unusable piece, so callers fall through to single-pack bitmaps.
3405fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
3406    let midx_path = pack_dir.join("multi-pack-index");
3407    if !midx_path.exists() {
3408        return Ok(None);
3409    }
3410    let Ok(midx_bytes) = fs::read(&midx_path) else {
3411        return Ok(None);
3412    };
3413    if midx_has_bad_ridx_chunk(&midx_bytes, format) {
3414        eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
3415        eprintln!("warning: multi-pack bitmap is missing required reverse index");
3416        return Ok(None);
3417    }
3418    let midx = match MultiPackIndex::parse(&midx_bytes, format) {
3419        Ok(midx) => midx,
3420        Err(GitError::InvalidFormat(message))
3421            if message == "multi-pack-index reverse-index chunk is the wrong size" =>
3422        {
3423            eprintln!("error: {message}");
3424            eprintln!("warning: multi-pack bitmap is missing required reverse index");
3425            return Ok(None);
3426        }
3427        Err(_) => return Ok(None),
3428    };
3429    let bitmap_path = pack_dir.join(format!(
3430        "multi-pack-index-{}.bitmap",
3431        midx.checksum.to_hex()
3432    ));
3433    if !bitmap_path.exists() {
3434        return Ok(None);
3435    }
3436    let object_count = midx.objects.len();
3437    // Upstream `load_midx_revindex`: prefer the midx's own RIDX chunk unless
3438    // GIT_TEST_MIDX_READ_RIDX=0 disables it, else fall back to the separate
3439    // `multi-pack-index-<checksum>.rev` file; a trace2 data event records
3440    // which source supplied the permutation.
3441    let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
3442        .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
3443        .unwrap_or(true);
3444    let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
3445        (Some(chunk), true) => {
3446            sley_core::trace2::data("load_midx_revindex", "source", "midx");
3447            chunk.clone()
3448        }
3449        _ => {
3450            let rev_path =
3451                pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
3452            let Ok(rev_bytes) = fs::read(&rev_path) else {
3453                // Without the RIDX permutation the bit numbering is unknown.
3454                return Ok(None);
3455            };
3456            let Ok(parsed_rev) =
3457                sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
3458            else {
3459                return Ok(None);
3460            };
3461            sley_core::trace2::data("load_midx_revindex", "source", "rev");
3462            parsed_rev.positions
3463        }
3464    };
3465    let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
3466        return Ok(None);
3467    };
3468    let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
3469        Ok(parsed) => parsed,
3470        Err(_) => return Ok(None),
3471    };
3472    if parsed.pack_checksum != midx.checksum {
3473        return Ok(None);
3474    }
3475
3476    // midx.objects is in lookup (oid-sorted) order; RIDX maps bit positions
3477    // to lookup positions.
3478    let mut pack_to_oid = Vec::with_capacity(object_count);
3479    for &midx_pos in &reverse_index {
3480        let Some(entry) = midx.objects.get(midx_pos as usize) else {
3481            return Ok(None);
3482        };
3483        pack_to_oid.push(entry.oid);
3484    }
3485    let mut oid_to_pack = HashMap::with_capacity(object_count);
3486    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3487        oid_to_pack.insert(*oid, pack_pos as u32);
3488    }
3489    match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3490        midx.objects.get(position).map(|entry| entry.oid)
3491    }) {
3492        Ok(loaded) => Ok(Some(loaded)),
3493        Err(_) => Ok(None),
3494    }
3495}
3496
3497fn midx_has_bad_ridx_chunk(bytes: &[u8], format: ObjectFormat) -> bool {
3498    let hash_len = format.raw_len();
3499    if bytes.len() < 12 + 12 + hash_len || &bytes[..4] != b"MIDX" {
3500        return false;
3501    }
3502    let chunk_count = bytes[6] as usize;
3503    let table_len = match (chunk_count + 1).checked_mul(12) {
3504        Some(table_len) => table_len,
3505        None => return false,
3506    };
3507    let table_end = match 12usize.checked_add(table_len) {
3508        Some(table_end) if table_end <= bytes.len().saturating_sub(hash_len) => table_end,
3509        _ => return false,
3510    };
3511    let mut entries = Vec::with_capacity(chunk_count + 1);
3512    let mut cursor = 12usize;
3513    while cursor < table_end {
3514        let id = [
3515            bytes[cursor],
3516            bytes[cursor + 1],
3517            bytes[cursor + 2],
3518            bytes[cursor + 3],
3519        ];
3520        let mut raw_offset = [0u8; 8];
3521        raw_offset.copy_from_slice(&bytes[cursor + 4..cursor + 12]);
3522        entries.push((id, u64::from_be_bytes(raw_offset) as usize));
3523        cursor += 12;
3524    }
3525    let mut oidf = None;
3526    let mut ridx = None;
3527    for pair in entries.windows(2) {
3528        let start = pair[0].1;
3529        let end = pair[1].1;
3530        if end < start || end > bytes.len().saturating_sub(hash_len) {
3531            return false;
3532        }
3533        match &pair[0].0 {
3534            b"OIDF" => oidf = Some((start, end)),
3535            b"RIDX" => ridx = Some((start, end)),
3536            _ => {}
3537        }
3538    }
3539    let Some((oidf_start, oidf_end)) = oidf else {
3540        return false;
3541    };
3542    let Some((ridx_start, ridx_end)) = ridx else {
3543        return false;
3544    };
3545    if oidf_end.saturating_sub(oidf_start) != 256 * 4 {
3546        return false;
3547    }
3548    let object_count_start = oidf_end - 4;
3549    let object_count = u32::from_be_bytes([
3550        bytes[object_count_start],
3551        bytes[object_count_start + 1],
3552        bytes[object_count_start + 2],
3553        bytes[object_count_start + 3],
3554    ]) as usize;
3555    ridx_end.saturating_sub(ridx_start) != object_count.saturating_mul(4)
3556}
3557
3558fn load_pack_bitmap_file(
3559    bitmap_path: &Path,
3560    format: ObjectFormat,
3561) -> Result<Option<LoadedPackBitmap>> {
3562    let index_path = bitmap_path.with_extension("idx");
3563    if !index_path.exists() {
3564        return Ok(None);
3565    }
3566    let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
3567    let object_count = index.entries.len();
3568    let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
3569    if parsed.pack_checksum != index.pack_checksum {
3570        return Ok(None);
3571    }
3572
3573    let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
3574    pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
3575    let mut pack_to_oid = Vec::with_capacity(object_count);
3576    for index_pos in &pack_order {
3577        pack_to_oid.push(index.entries[*index_pos as usize].oid);
3578    }
3579    let mut oid_to_pack = HashMap::with_capacity(object_count);
3580    for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3581        oid_to_pack.insert(*oid, pack_pos as u32);
3582    }
3583
3584    assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3585        index.entries.get(position).map(|entry| entry.oid)
3586    })
3587    .map(Some)
3588}
3589
3590/// Shared tail of the bitmap loaders: expands the type bitmaps, resolves the
3591/// per-commit entries (XOR offsets reference earlier entries in file order),
3592/// and maps each entry's lookup-order position back to a commit oid via
3593/// `lookup_oid`.
3594fn assemble_loaded_bitmap(
3595    parsed: PackBitmapIndex,
3596    object_count: usize,
3597    pack_to_oid: Vec<ObjectId>,
3598    oid_to_pack: HashMap<ObjectId, u32>,
3599    lookup_oid: impl Fn(usize) -> Option<ObjectId>,
3600) -> Result<LoadedPackBitmap> {
3601    let word_count = object_count.div_ceil(64);
3602    let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
3603        let mut words = bitmap.to_words()?;
3604        words.resize(word_count, 0);
3605        Ok(words)
3606    };
3607
3608    let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
3609    let mut commit_words = HashMap::with_capacity(parsed.entries.len());
3610    for (entry_index, entry) in parsed.entries.iter().enumerate() {
3611        let mut words = expand(&entry.bitmap)?;
3612        if entry.xor_offset > 0 {
3613            let base_index = entry_index - entry.xor_offset as usize;
3614            let base = &resolved[base_index];
3615            for (dst, src) in words.iter_mut().zip(base.iter()) {
3616                *dst ^= *src;
3617            }
3618        }
3619        let words = Arc::new(words);
3620        resolved.push(Arc::clone(&words));
3621        let commit_oid = lookup_oid(entry.object_position as usize)
3622            .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
3623        commit_words.insert(commit_oid, words);
3624    }
3625
3626    Ok(LoadedPackBitmap {
3627        object_count: object_count as u32,
3628        oid_to_pack,
3629        pack_to_oid,
3630        commit_words,
3631        commits: expand(&parsed.type_bitmaps.commits)?,
3632        trees: expand(&parsed.type_bitmaps.trees)?,
3633        blobs: expand(&parsed.type_bitmaps.blobs)?,
3634        tags: expand(&parsed.type_bitmaps.tags)?,
3635    })
3636}
3637
3638/// Result of a bitmap-assisted reachability walk: pack-position bits for
3639/// in-pack objects plus the "extended" objects encountered outside the
3640/// bitmapped pack (in first-seen order, like upstream's extended index).
3641pub struct BitmapWalkResult {
3642    pub words: Vec<u64>,
3643    pub extended: Vec<(ObjectId, ObjectType)>,
3644}
3645
3646impl BitmapWalkResult {
3647    /// Removes everything reachable in `haves` from this result.
3648    pub fn subtract(&mut self, haves: &BitmapWalkResult) {
3649        for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
3650            *dst &= !*src;
3651        }
3652        let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
3653        self.extended.retain(|(oid, _)| !have_ext.contains(oid));
3654    }
3655}
3656
3657/// Computes the set of objects reachable from `roots` using stored bitmaps
3658/// where available and a fill-in object walk where not — the consult half of
3659/// the bitmap engine (upstream `find_objects` + `fill_in_bitmap`).
3660///
3661/// Roots may be any object type; tag chains are peeled with every tag object
3662/// itself included, like the pending-object handling in
3663/// `prepare_bitmap_walk`. When `include_objects` is false only commits are
3664/// walked (tree contents of fill-in commits are not marked) — callers that
3665/// only count/enumerate commits mask with the commit type bitmap, so the
3666/// extra non-commit bits OR-ed in from stored (closed) bitmaps are harmless.
3667pub fn bitmap_reachable(
3668    bitmap: &LoadedPackBitmap,
3669    db: &impl ObjectReader,
3670    format: ObjectFormat,
3671    roots: &[ObjectId],
3672    include_objects: bool,
3673) -> Result<BitmapWalkResult> {
3674    let mut walk = BitmapFillWalk {
3675        bitmap,
3676        words: vec![0u64; bitmap.word_count()],
3677        extended: Vec::new(),
3678        extended_seen: HashSet::new(),
3679    };
3680    let mut commit_stack: Vec<ObjectId> = Vec::new();
3681
3682    for root in roots {
3683        let mut oid = *root;
3684        // Peel tag chains, marking each tag object on the way.
3685        loop {
3686            let object = db.read_object(&oid)?;
3687            match object.object_type {
3688                ObjectType::Tag => {
3689                    walk.mark(&oid, ObjectType::Tag);
3690                    let tag = Tag::parse_ref(format, &object.body)?;
3691                    oid = tag.object;
3692                }
3693                ObjectType::Commit => {
3694                    commit_stack.push(oid);
3695                    break;
3696                }
3697                ObjectType::Tree => {
3698                    walk.mark_tree_closure(db, format, &oid)?;
3699                    break;
3700                }
3701                ObjectType::Blob => {
3702                    walk.mark(&oid, ObjectType::Blob);
3703                    break;
3704                }
3705            }
3706        }
3707    }
3708
3709    while let Some(oid) = commit_stack.pop() {
3710        if let Some(position) = bitmap.pack_position(&oid) {
3711            if bitset_get(&walk.words, position) {
3712                continue;
3713            }
3714            if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
3715                bitset_or(&mut walk.words, stored);
3716                continue;
3717            }
3718            bitset_set(&mut walk.words, position);
3719        } else {
3720            if walk.extended_seen.contains(&oid) {
3721                continue;
3722            }
3723            walk.extended_seen.insert(oid);
3724            walk.extended.push((oid, ObjectType::Commit));
3725        }
3726        let object = db.read_object(&oid)?;
3727        let commit = Commit::parse_ref(format, &object.body)?;
3728        commit_stack.extend(grafted_parents(db, &oid, commit.parents));
3729        if include_objects {
3730            walk.mark_tree_closure(db, format, &commit.tree)?;
3731        }
3732    }
3733
3734    Ok(BitmapWalkResult {
3735        words: walk.words,
3736        extended: walk.extended,
3737    })
3738}
3739
3740struct BitmapFillWalk<'a> {
3741    bitmap: &'a LoadedPackBitmap,
3742    words: Vec<u64>,
3743    extended: Vec<(ObjectId, ObjectType)>,
3744    extended_seen: HashSet<ObjectId>,
3745}
3746
3747impl BitmapFillWalk<'_> {
3748    /// Marks one object; returns false when it was already marked.
3749    fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
3750        if let Some(position) = self.bitmap.pack_position(oid) {
3751            if bitset_get(&self.words, position) {
3752                return false;
3753            }
3754            bitset_set(&mut self.words, position);
3755            true
3756        } else {
3757            if !self.extended_seen.insert(*oid) {
3758                return false;
3759            }
3760            self.extended.push((*oid, object_type));
3761            true
3762        }
3763    }
3764
3765    /// Marks `tree` and everything below it, skipping subtrees already marked
3766    /// (a set in-pack bit means its closure is covered: either it came from a
3767    /// stored — closed — bitmap, or this walk already expanded it).
3768    fn mark_tree_closure(
3769        &mut self,
3770        db: &impl ObjectReader,
3771        format: ObjectFormat,
3772        tree: &ObjectId,
3773    ) -> Result<()> {
3774        if !self.mark(tree, ObjectType::Tree) {
3775            return Ok(());
3776        }
3777        let object = db.read_object(tree)?;
3778        for entry in TreeEntries::new(format, &object.body) {
3779            let entry = entry?;
3780            if entry.is_gitlink() {
3781                continue;
3782            }
3783            if entry.is_tree() {
3784                self.mark_tree_closure(db, format, &entry.oid)?;
3785            } else {
3786                self.mark(&entry.oid, ObjectType::Blob);
3787            }
3788        }
3789        Ok(())
3790    }
3791}
3792
3793#[derive(Debug)]
3794pub struct ObjectDatabase {
3795    format: ObjectFormat,
3796    // Behind a `Mutex` so `write_object` can take `&self` (matching the
3797    // `ObjectWriter` trait) and a single handle can interleave reads and writes
3798    // without a `&mut` borrow — the same shared-by-`&` shape the file-backed
3799    // database uses for its caches. Removes the need for callers to wrap this in
3800    // a `RefCell`/`&mut` just to write (see sley-fetch's former `RefCell` dance).
3801    objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
3802    promisor: bool,
3803}
3804
3805impl ObjectDatabase {
3806    pub fn new(format: ObjectFormat) -> Self {
3807        Self {
3808            format,
3809            objects: Mutex::new(HashMap::new()),
3810            promisor: false,
3811        }
3812    }
3813
3814    pub fn with_promisor(mut self, promisor: bool) -> Self {
3815        self.promisor = promisor;
3816        self
3817    }
3818
3819    pub fn contains(&self, oid: &ObjectId) -> bool {
3820        self.objects
3821            .lock()
3822            .map(|objects| objects.contains_key(oid))
3823            .unwrap_or(false)
3824    }
3825
3826    pub fn validate(&self, oid: &ObjectId) -> Result<()> {
3827        let object = self.read_object(oid)?;
3828        let actual = object.object_id(self.format)?;
3829        if &actual == oid {
3830            Ok(())
3831        } else {
3832            Err(GitError::InvalidObject(format!(
3833                "object id mismatch: expected {oid}, got {actual}"
3834            )))
3835        }
3836    }
3837}
3838
3839impl ObjectReader for ObjectDatabase {
3840    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3841        self.objects
3842            .lock()
3843            .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
3844            .get(oid)
3845            .map(Arc::clone)
3846            .or_else(|| implied_empty_tree_object(self.format, oid))
3847            .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
3848    }
3849}
3850
3851impl ObjectWriter for ObjectDatabase {
3852    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3853        let oid = object.object_id(self.format)?;
3854        self.objects
3855            .lock()
3856            .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
3857            .entry(oid)
3858            .or_insert_with(|| Arc::new(object));
3859        Ok(oid)
3860    }
3861}
3862
3863#[derive(Debug, Clone, PartialEq, Eq)]
3864pub struct Alternate {
3865    pub path: std::path::PathBuf,
3866}
3867
3868#[derive(Debug, Clone, PartialEq, Eq)]
3869pub struct PartialClonePolicy {
3870    pub promisor_remote: Option<String>,
3871    pub allow_missing_promised_objects: bool,
3872}
3873
3874/// Raw pack-file bytes keyed by pack path, shared across cloned handles. Loaded
3875/// once so individual objects can be decoded at their offsets (see
3876/// [`sley_pack::read_object_at`]) without re-reading the whole file per read.
3877type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
3878
3879/// Backing bytes of a pack file: either memory-mapped (under the `mmap` feature)
3880/// or read into the heap. Both deref to `&[u8]`, so the decode path is identical.
3881#[derive(Debug)]
3882enum PackData {
3883    #[cfg(feature = "mmap")]
3884    Mapped(sley_mmap::MappedFile),
3885    Heap(Vec<u8>),
3886}
3887
3888impl std::ops::Deref for PackData {
3889    type Target = [u8];
3890
3891    fn deref(&self) -> &[u8] {
3892        match self {
3893            #[cfg(feature = "mmap")]
3894            Self::Mapped(mapped) => mapped,
3895            Self::Heap(bytes) => bytes,
3896        }
3897    }
3898}
3899
3900/// Load a pack file's bytes: memory-mapped when the `mmap` feature is on (falling
3901/// back to a heap read if the map fails), otherwise read into the heap.
3902#[cfg(feature = "mmap")]
3903fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3904    match sley_mmap::MappedFile::open_pack(pack_path) {
3905        Ok(mapped) => Ok(PackData::Mapped(mapped)),
3906        Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
3907    }
3908}
3909
3910#[cfg(not(feature = "mmap"))]
3911fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3912    Ok(PackData::Heap(fs::read(pack_path)?))
3913}
3914
3915#[cfg(feature = "mmap")]
3916fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3917    match sley_mmap::MappedFile::open_pack(index_path) {
3918        Ok(mapped) => Ok(Arc::new(mapped)),
3919        Err(_) => Ok(Arc::new(fs::read(index_path)?)),
3920    }
3921}
3922
3923#[cfg(not(feature = "mmap"))]
3924fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3925    Ok(Arc::new(fs::read(index_path)?))
3926}
3927
3928#[cfg(feature = "mmap")]
3929fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3930    match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
3931        Ok(mapped) => Ok(Arc::new(mapped)),
3932        Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
3933    }
3934}
3935
3936#[cfg(not(feature = "mmap"))]
3937fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3938    Ok(Arc::new(fs::read(midx_path)?))
3939}
3940
3941/// Memory-capped LRU of recently decoded objects, shared across cloned handles,
3942/// so hot delta bases and repeated reads during a walk aren't re-decoded. The
3943/// cache is bounded by an approximate byte budget (not a fixed object count) so
3944/// it neither thrashes on bulk reads of small objects nor blows up on a few
3945/// large ones.
3946type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
3947
3948/// Per-pack caches of objects decoded from a pack, keyed by pack path and then by
3949/// the in-pack byte offset of each object's entry. Shared across cloned handles.
3950/// This is the delta-base cache: resolving a delta chain by offset reuses already
3951/// decoded bases instead of re-inflating the whole chain on every read.
3952type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
3953
3954/// Per-pack memo of `in-pack offset -> end-of-chain object type` for the
3955/// `cat-file --batch-check` header fast path. Resolving a packed delta's *type*
3956/// walks the delta chain to its base; without this memo every header read
3957/// re-walks (and re-inflates) the whole chain, so reading every object in a
3958/// deeply-deltified pack is super-linear (sley#26). The type only depends on the
3959/// chain base, so memoizing `offset -> type` lets each chain be walked at most
3960/// once across a batch. Keyed by pack path so an offset key is never applied to
3961/// the wrong pack's bytes; shared across cloned handles.
3962/// One pack's offset-keyed header memo (see [`PackHeaderTypeCaches`]).
3963type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
3964
3965type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
3966
3967/// Default approximate byte budget for the decoded-object LRU. Sized to comfortably
3968/// hold the working set of a history walk (commits/trees/blobs and their delta
3969/// bases) without growing without bound on large repositories. Overridable via the
3970/// `SLEY_OBJECT_CACHE_BYTES` environment variable; there is currently no git-config
3971/// hook threaded into the object database, so this constant is the default.
3972const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
3973
3974/// Default approximate byte budget for each per-pack delta-base cache. Holds the
3975/// decoded bases of the delta chains being walked so neighboring reads stay warm.
3976/// Overridable via `SLEY_DELTA_BASE_CACHE_BYTES`.
3977const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
3978
3979/// Approximate heap cost of caching one [`EncodedObject`]: its body plus a fixed
3980/// allowance for the key, enum/`Vec` headers, and per-entry map overhead. Used
3981/// only to drive eviction, so an estimate is fine.
3982fn cached_object_cost(object: &EncodedObject) -> usize {
3983    object.body.len().saturating_add(64)
3984}
3985
3986/// Read an approximate byte budget from `var`, falling back to `default` when the
3987/// variable is unset or unparseable. A value of `0` disables the cache.
3988fn cache_budget_from_env(var: &str, default: usize) -> usize {
3989    match env::var(var) {
3990        Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
3991        Err(_) => default,
3992    }
3993}
3994
3995/// Approximate byte budget for the decoded-object LRU (see
3996/// [`DEFAULT_OBJECT_CACHE_BYTES`], `SLEY_OBJECT_CACHE_BYTES`).
3997///
3998/// Resolved once per process: the environment does not change under us, and a new
3999/// `FileObjectDatabase` is built often enough (e.g. once per revision resolved)
4000/// that re-reading the variable each time showed up as per-object overhead.
4001fn object_cache_budget() -> usize {
4002    static BUDGET: OnceLock<usize> = OnceLock::new();
4003    *BUDGET.get_or_init(|| {
4004        cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
4005    })
4006}
4007
4008/// Approximate byte budget for each per-pack delta-base cache (see
4009/// [`DEFAULT_DELTA_BASE_CACHE_BYTES`], `SLEY_DELTA_BASE_CACHE_BYTES`). Resolved
4010/// once per process for the same reason as [`object_cache_budget`].
4011fn delta_base_cache_budget() -> usize {
4012    static BUDGET: OnceLock<usize> = OnceLock::new();
4013    *BUDGET.get_or_init(|| {
4014        cache_budget_from_env(
4015            "SLEY_DELTA_BASE_CACHE_BYTES",
4016            DEFAULT_DELTA_BASE_CACHE_BYTES,
4017        )
4018    })
4019}
4020
4021/// Whether to re-hash every object on read and compare it to the requested id.
4022///
4023/// Off by default, matching git: reads trust the pack index → offset mapping and
4024/// the loose object's on-disk name, and object ids are verified where git verifies
4025/// them — when a pack is received (the index build re-hashes every object) and on
4026/// demand via [`FileObjectDatabase`]'s `validate`/fsck. Re-hashing on *every* read
4027/// dominated bulk-read cost (a scalar pure-Rust SHA-1 over each object's full
4028/// body), so it is opt-in via `SLEY_VERIFY_READS` (any value other than unset, ``,
4029/// or `0`) for callers that want the paranoid check back. Read once and cached, so
4030/// the default path pays only a single relaxed atomic load per read.
4031fn verify_reads_enabled() -> bool {
4032    static VERIFY: OnceLock<bool> = OnceLock::new();
4033    *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
4034        Ok(value) => !matches!(value.trim(), "" | "0"),
4035        Err(_) => false,
4036    })
4037}
4038
4039/// A memory-capped LRU map from a key `K` to a decoded [`EncodedObject`].
4040///
4041/// Eviction is by approximate byte budget (gix-style), not object count, so the
4042/// cache adapts to object size. On access an entry is moved to most-recently-used;
4043/// on insert, least-recently-used entries are dropped until the budget holds. A
4044/// budget of `0` makes the cache inert. Generic over the key so it backs both the
4045/// oid-keyed decoded-object cache and the offset-keyed delta-base cache.
4046#[derive(Debug)]
4047struct LruCache<K: std::hash::Hash + Eq + Clone> {
4048    budget: usize,
4049    used: usize,
4050    map: HashMap<K, LruEntry<K>>,
4051    head: Option<K>,
4052    tail: Option<K>,
4053}
4054
4055#[derive(Debug)]
4056struct LruEntry<K> {
4057    object: Arc<EncodedObject>,
4058    prev: Option<K>,
4059    next: Option<K>,
4060}
4061
4062impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
4063    fn new(budget: usize) -> Self {
4064        Self {
4065            budget,
4066            used: 0,
4067            map: HashMap::new(),
4068            head: None,
4069            tail: None,
4070        }
4071    }
4072
4073    fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
4074        let object = Arc::clone(&self.map.get(key)?.object);
4075        self.touch(key);
4076        Some(object)
4077    }
4078
4079    /// Move `key` to the most-recently-used end in O(1).
4080    fn touch(&mut self, key: &K) {
4081        if self.tail.as_ref() == Some(key) {
4082            return;
4083        }
4084        if self.map.contains_key(key) {
4085            self.detach(key);
4086            self.attach_back(key.clone());
4087        }
4088    }
4089
4090    /// Drop `key` from both the map and the recency queue, releasing its budget.
4091    fn remove(&mut self, key: &K) {
4092        if let Some(entry) = self.map.get(key) {
4093            self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
4094        }
4095        self.detach(key);
4096        self.map.remove(key);
4097    }
4098
4099    fn detach(&mut self, key: &K) {
4100        let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
4101            let prev = entry.prev.take();
4102            let next = entry.next.take();
4103            (prev, next)
4104        }) else {
4105            return;
4106        };
4107
4108        match &prev {
4109            Some(prev_key) => {
4110                if let Some(prev_entry) = self.map.get_mut(prev_key) {
4111                    prev_entry.next = next.clone();
4112                }
4113            }
4114            None => self.head = next.clone(),
4115        }
4116        match &next {
4117            Some(next_key) => {
4118                if let Some(next_entry) = self.map.get_mut(next_key) {
4119                    next_entry.prev = prev.clone();
4120                }
4121            }
4122            None => self.tail = prev.clone(),
4123        }
4124    }
4125
4126    fn attach_back(&mut self, key: K) {
4127        let previous_tail = self.tail.replace(key.clone());
4128        match previous_tail {
4129            Some(tail_key) => {
4130                if let Some(tail_entry) = self.map.get_mut(&tail_key) {
4131                    tail_entry.next = Some(key.clone());
4132                }
4133                if let Some(entry) = self.map.get_mut(&key) {
4134                    entry.prev = Some(tail_key);
4135                    entry.next = None;
4136                }
4137            }
4138            None => {
4139                self.head = Some(key.clone());
4140                if let Some(entry) = self.map.get_mut(&key) {
4141                    entry.prev = None;
4142                    entry.next = None;
4143                }
4144            }
4145        }
4146    }
4147
4148    fn clear(&mut self) {
4149        self.map.clear();
4150        self.head = None;
4151        self.tail = None;
4152        self.used = 0;
4153    }
4154
4155    fn put(&mut self, key: K, object: Arc<EncodedObject>) {
4156        if self.budget == 0 {
4157            return;
4158        }
4159        let cost = cached_object_cost(&object);
4160        // A single object larger than the whole budget is not worth caching; it
4161        // would immediately evict everything including itself. Drop any stale
4162        // smaller entry stored under the same key so accounting stays exact.
4163        if cost > self.budget {
4164            self.remove(&key);
4165            return;
4166        }
4167        if let Some(entry) = self.map.get_mut(&key) {
4168            let previous = std::mem::replace(&mut entry.object, object);
4169            // Replacing an existing entry: adjust accounting and refresh recency.
4170            self.used = self
4171                .used
4172                .saturating_sub(cached_object_cost(&previous))
4173                .saturating_add(cost);
4174            self.touch(&key);
4175        } else {
4176            self.used = self.used.saturating_add(cost);
4177            self.map.insert(
4178                key.clone(),
4179                LruEntry {
4180                    object,
4181                    prev: None,
4182                    next: None,
4183                },
4184            );
4185            self.attach_back(key);
4186        }
4187        while self.used > self.budget {
4188            let Some(evicted) = self.head.clone() else {
4189                break;
4190            };
4191            self.remove(&evicted);
4192        }
4193    }
4194}
4195
4196/// Decoded-object cache keyed by object id (loose + packed reads share it).
4197type LruObjectCache = LruCache<ObjectId>;
4198/// Delta-base cache keyed by in-pack byte offset, scoped to one pack.
4199type LruOffsetCache = LruCache<u64>;
4200
4201/// Bridges the offset-keyed [`LruOffsetCache`] to [`sley_pack::PackDeltaCache`]
4202/// so the pack decoder can reuse decoded delta bases. Holds the shared cache
4203/// behind its mutex; a poisoned lock simply behaves as a cache miss/no-op, so a
4204/// decode still completes correctly (just without reuse).
4205struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
4206
4207impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
4208    fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
4209        self.0.lock().ok()?.get(&offset)
4210    }
4211
4212    fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
4213        if let Ok(mut cache) = self.0.lock() {
4214            cache.put(offset, object);
4215        }
4216    }
4217}
4218
4219/// Bridges a per-pack `offset -> ObjectType` memo into the header fast path so
4220/// the ofs-delta chain walk is performed at most once per chain across a batch
4221/// of `read_object_header` calls (sley#26).
4222struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
4223
4224impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
4225    fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
4226        self.0.lock().ok()?.get(&pack_offset).copied()
4227    }
4228
4229    fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
4230        if let Ok(mut cache) = self.0.lock() {
4231            cache.insert(pack_offset, header);
4232        }
4233    }
4234}
4235
4236/// Parsed pack indexes keyed by `.idx` path, shared across cloned handles. This
4237/// remains for MIDX and path-only fallback lookups; normal pack-directory scans
4238/// use [`PackRegistrySnapshot`] so the lookup hot path can walk already-parsed
4239/// pack records directly.
4240type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
4241
4242/// Parsed multi-pack-index files keyed by path, shared across cloned handles.
4243/// Caches the MIDX parse so object lookups in repositories with a MIDX avoid
4244/// reparsing the same fanout/object tables for every read.
4245type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
4246
4247/// Raw multi-pack-index OID lookup tables keyed by path, shared across cloned
4248/// handles. These avoid hashing and materializing every MIDX object when a
4249/// command only needs point lookups.
4250type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
4251
4252/// One registered `.idx`/`.pack` pair from a pack directory. The index is parsed
4253/// when the registry snapshot is built; pack bytes and per-pack decode/header
4254/// caches hang directly off this record so repeated object lookups do not bounce
4255/// through path-keyed maps.
4256#[derive(Debug)]
4257struct RegisteredPack {
4258    idx: PathBuf,
4259    pack: PathBuf,
4260    index: Mutex<Option<Arc<PackIndexViewData>>>,
4261    data: Mutex<Option<Arc<PackData>>>,
4262    delta_cache: Arc<Mutex<LruOffsetCache>>,
4263    header_type_cache: PackHeaderTypeCache,
4264}
4265
4266impl RegisteredPack {
4267    fn new(idx: PathBuf, pack: PathBuf) -> Self {
4268        Self {
4269            idx,
4270            pack,
4271            index: Mutex::new(None),
4272            data: Mutex::new(None),
4273            delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
4274            header_type_cache: Arc::new(Mutex::new(HashMap::new())),
4275        }
4276    }
4277
4278    fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
4279        if let Ok(cache) = self.index.lock()
4280            && let Some(index) = cache.as_ref()
4281        {
4282            return Ok(Arc::clone(index));
4283        }
4284        let index_bytes = load_pack_index_data(&self.idx)?;
4285        let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
4286            index_bytes,
4287            format,
4288        )?);
4289        if let Ok(mut cache) = self.index.lock() {
4290            *cache = Some(Arc::clone(&index));
4291        }
4292        Ok(index)
4293    }
4294
4295    fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
4296        if let Ok(cache) = self.data.lock()
4297            && let Some(bytes) = cache.as_ref()
4298        {
4299            return Ok(Arc::clone(bytes));
4300        }
4301        if let Ok(cache) = pack_bytes.lock()
4302            && let Some(bytes) = cache.get(&self.pack)
4303        {
4304            let bytes = Arc::clone(bytes);
4305            if let Ok(mut local_cache) = self.data.lock() {
4306                *local_cache = Some(Arc::clone(&bytes));
4307            }
4308            return Ok(bytes);
4309        }
4310        let bytes = Arc::new(load_pack_data(&self.pack)?);
4311        if let Ok(mut local_cache) = self.data.lock() {
4312            *local_cache = Some(Arc::clone(&bytes));
4313        }
4314        if let Ok(mut cache) = pack_bytes.lock() {
4315            cache.insert(self.pack.clone(), Arc::clone(&bytes));
4316        }
4317        Ok(bytes)
4318    }
4319}
4320
4321#[derive(Debug, Clone, PartialEq, Eq)]
4322struct PackDirFingerprint {
4323    modified: Option<std::time::SystemTime>,
4324    idx_count: usize,
4325    pack_count: usize,
4326}
4327
4328/// Snapshot of a pack directory's lookup state, shared across cloned handles.
4329/// New packs are still found: a lookup that misses every cached pack re-scans the
4330/// directory once before concluding the object is absent (see
4331/// [`FileObjectDatabase::find_pack_containing`]).
4332#[derive(Debug)]
4333struct PackRegistrySnapshot {
4334    fingerprint: PackDirFingerprint,
4335    packs: Vec<Arc<RegisteredPack>>,
4336    recent_pack: Mutex<Option<usize>>,
4337}
4338
4339impl PackRegistrySnapshot {
4340    fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
4341        Self {
4342            fingerprint,
4343            packs,
4344            recent_pack: Mutex::new(None),
4345        }
4346    }
4347
4348    fn cached_hint(&self) -> Option<usize> {
4349        self.recent_pack
4350            .lock()
4351            .ok()
4352            .and_then(|hint| *hint)
4353            .filter(|pack_index| *pack_index < self.packs.len())
4354    }
4355
4356    fn remember_hint(&self, pack_index: usize) {
4357        if let Ok(mut hint) = self.recent_pack.lock() {
4358            *hint = Some(pack_index);
4359        }
4360    }
4361}
4362
4363/// Cached pack-registry snapshot for this object directory, shared across cloned
4364/// handles. A `FileObjectDatabase` owns exactly one object directory, so this is
4365/// an `Option` instead of another path-keyed map.
4366type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
4367
4368#[derive(Debug, Clone)]
4369struct PackLookup {
4370    pack: PathBuf,
4371    registered: Option<Arc<RegisteredPack>>,
4372    offset: u64,
4373}
4374
4375impl PackLookup {
4376    fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
4377        Self {
4378            pack: pack.pack.clone(),
4379            registered: Some(pack),
4380            offset,
4381        }
4382    }
4383
4384    fn from_path(pack: PathBuf, offset: u64) -> Self {
4385        Self {
4386            pack,
4387            registered: None,
4388            offset,
4389        }
4390    }
4391
4392    fn pack_path(&self) -> &Path {
4393        &self.pack
4394    }
4395
4396    fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
4397        match &self.registered {
4398            Some(pack) => pack.bytes(&database.pack_bytes),
4399            None => database.cached_pack_bytes(&self.pack),
4400        }
4401    }
4402
4403    fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
4404        match &self.registered {
4405            Some(pack) => database.cached_pack_index(&pack.idx),
4406            None => database.cached_pack_index(&self.pack.with_extension("idx")),
4407        }
4408    }
4409
4410    fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
4411        match &self.registered {
4412            Some(pack) => Some(Arc::clone(&pack.delta_cache)),
4413            None => database.pack_delta_cache(&self.pack),
4414        }
4415    }
4416
4417    fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
4418        match &self.registered {
4419            Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
4420            None => database.pack_header_type_cache(&self.pack),
4421        }
4422    }
4423}
4424
4425#[derive(Debug, Clone)]
4426pub struct FileObjectDatabase {
4427    loose: LooseObjectStore,
4428    objects_dir: PathBuf,
4429    alternates: Vec<PathBuf>,
4430    format: ObjectFormat,
4431    pack_bytes: PackBytesCache,
4432    pack_indexes: PackIndexCache,
4433    multi_pack_indexes: MultiPackIndexCache,
4434    multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
4435    pack_registry: PackRegistryCache,
4436    decoded: DecodedObjectCache,
4437    pack_deltas: PackDeltaCaches,
4438    pack_header_types: PackHeaderTypeCaches,
4439    promisor_objects: Arc<OnceLock<HashSet<ObjectId>>>,
4440    /// Whether the owning repository actually has a promisor remote configured
4441    /// (`extensions.partialclone` is set, or some `remote.<name>.promisor` is
4442    /// true). Mirrors git's `is_promisor_object`, which only treats objects in
4443    /// `.promisor` packs as "promised" when `repo_has_promisor_remote()` holds:
4444    /// a stray `.promisor` sidecar in a non-partial repo must NOT excuse missing
4445    /// objects from fsck. Defaults to `false`; the fsck driver opts in after
4446    /// reading the repo config.
4447    promisor_remote_present: bool,
4448    /// Graft points (`$GIT_DIR/shallow`), loaded lazily on the first
4449    /// [`ObjectReader::is_shallow_graft`] query. `$GIT_DIR` is taken to be
4450    /// the parent of `objects_dir`, matching the standard layout.
4451    shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
4452}
4453
4454#[derive(Debug)]
4455pub struct ObjectPresenceChecker {
4456    db: FileObjectDatabase,
4457    pack_dir: PathBuf,
4458    midx: Option<Arc<MultiPackIndexOidLookup>>,
4459    registry: Option<Arc<PackRegistrySnapshot>>,
4460    registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
4461    recent_pack: Option<usize>,
4462    prepared_packs: bool,
4463    prepared_registry: bool,
4464}
4465
4466impl ObjectPresenceChecker {
4467    fn new(db: FileObjectDatabase) -> Self {
4468        let pack_dir = db.objects_dir.join("pack");
4469        Self {
4470            db,
4471            pack_dir,
4472            midx: None,
4473            registry: None,
4474            registry_indexes: Vec::new(),
4475            recent_pack: None,
4476            prepared_packs: false,
4477            prepared_registry: false,
4478        }
4479    }
4480
4481    pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
4482        if oid.format() != self.db.format {
4483            return Err(GitError::InvalidObjectId(format!(
4484                "object {oid} uses {}, store uses {}",
4485                oid.format().name(),
4486                self.db.format.name()
4487            )));
4488        }
4489        if self.db.loose.exists(oid)? {
4490            return Ok(true);
4491        }
4492        if self.find_packed(oid, false)? {
4493            return Ok(true);
4494        }
4495        if self.find_packed(oid, true)? {
4496            return Ok(true);
4497        }
4498        for alternate in &self.db.alternates {
4499            if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
4500                return Ok(true);
4501            }
4502        }
4503        // Preserve the regular contains() reprepare-on-miss behavior for loose
4504        // objects that appeared after the fanout cache was populated.
4505        self.db.loose.invalidate_cache();
4506        self.db.loose.exists(oid)
4507    }
4508
4509    fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
4510        self.prepare_packs(force_rescan)?;
4511        if let Some(midx) = &self.midx
4512            && midx.contains(oid)
4513        {
4514            return Ok(true);
4515        }
4516        self.prepare_registry(force_rescan)?;
4517        self.find_in_registry(oid)
4518    }
4519
4520    fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
4521        if self.prepared_packs && !force_rescan {
4522            return Ok(());
4523        }
4524        let midx_path = self.pack_dir.join("multi-pack-index");
4525        self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
4526        self.prepared_packs = true;
4527        Ok(())
4528    }
4529
4530    fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
4531        if self.prepared_registry && !force_rescan {
4532            return Ok(());
4533        }
4534        let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
4535        let registry_changed = match self.registry.as_ref() {
4536            Some(cached) => !Arc::ptr_eq(cached, &registry),
4537            None => true,
4538        };
4539        if registry_changed {
4540            self.registry_indexes = vec![None; registry.packs.len()];
4541            self.recent_pack = None;
4542            self.registry = Some(registry);
4543        }
4544        self.prepared_registry = true;
4545        Ok(())
4546    }
4547
4548    fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
4549        let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
4550            return Ok(false);
4551        };
4552        if let Some(pack_index) = self
4553            .recent_pack
4554            .filter(|pack_index| *pack_index < registry.packs.len())
4555        {
4556            let index = self.registry_index(&registry, pack_index)?;
4557            if index.find(oid).is_some() {
4558                return Ok(true);
4559            }
4560        }
4561        for pack_index in 0..registry.packs.len() {
4562            if Some(pack_index) == self.recent_pack {
4563                continue;
4564            }
4565            let index = self.registry_index(&registry, pack_index)?;
4566            if index.find(oid).is_some() {
4567                self.recent_pack = Some(pack_index);
4568                return Ok(true);
4569            }
4570        }
4571        Ok(false)
4572    }
4573
4574    fn registry_index(
4575        &mut self,
4576        registry: &PackRegistrySnapshot,
4577        pack_index: usize,
4578    ) -> Result<Arc<PackIndexViewData>> {
4579        if self.registry_indexes.len() != registry.packs.len() {
4580            self.registry_indexes = vec![None; registry.packs.len()];
4581            self.recent_pack = None;
4582        }
4583        if let Some(index) = self
4584            .registry_indexes
4585            .get(pack_index)
4586            .and_then(|index| index.as_ref())
4587        {
4588            return Ok(Arc::clone(index));
4589        }
4590        let index = registry.packs[pack_index].index(self.db.format)?;
4591        if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
4592            *slot = Some(Arc::clone(&index));
4593        }
4594        Ok(index)
4595    }
4596}
4597
4598/// Parse `$GIT_DIR/shallow`: one hex object id per line. A missing file is an
4599/// empty set (the repository is not shallow); unparsable lines are ignored so
4600/// a torn write never poisons walks.
4601fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
4602    let Ok(contents) = std::fs::read_to_string(shallow_file) else {
4603        return HashSet::new();
4604    };
4605    contents
4606        .lines()
4607        .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
4608        .collect()
4609}
4610
4611pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4612    env::var_os("GIT_OBJECT_DIRECTORY")
4613        .map(PathBuf::from)
4614        .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
4615}
4616
4617pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4618    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
4619        return PathBuf::from(common_dir);
4620    }
4621    let git_dir = git_dir.as_ref();
4622    let commondir = git_dir.join("commondir");
4623    if let Ok(value) = fs::read_to_string(&commondir) {
4624        let path = PathBuf::from(value.trim());
4625        let common = if path.is_absolute() {
4626            path
4627        } else {
4628            git_dir.join(path)
4629        };
4630        return fs::canonicalize(&common).unwrap_or(common);
4631    }
4632    git_dir.to_path_buf()
4633}
4634
4635pub fn repository_object_ids(
4636    git_dir: impl AsRef<Path>,
4637    format: ObjectFormat,
4638) -> Result<Vec<ObjectId>> {
4639    object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
4640}
4641
4642pub fn object_ids_in_objects_dir(
4643    objects_dir: impl AsRef<Path>,
4644    format: ObjectFormat,
4645) -> Result<Vec<ObjectId>> {
4646    let objects_dir = objects_dir.as_ref();
4647    let mut oids = HashSet::new();
4648    collect_loose_object_ids(objects_dir, format, &mut oids)?;
4649    collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
4650    let mut oids = oids.into_iter().collect::<Vec<_>>();
4651    oids.sort_by_key(ObjectId::to_hex);
4652    Ok(oids)
4653}
4654
4655fn collect_loose_object_ids(
4656    objects_dir: &Path,
4657    format: ObjectFormat,
4658    oids: &mut HashSet<ObjectId>,
4659) -> Result<()> {
4660    if !objects_dir.exists() {
4661        return Ok(());
4662    }
4663    let hex_len = format.hex_len();
4664    for entry in fs::read_dir(objects_dir)? {
4665        let entry = entry?;
4666        if !entry.file_type()?.is_dir() {
4667            continue;
4668        }
4669        let name = entry.file_name();
4670        let Some(fanout) = name.to_str() else {
4671            continue;
4672        };
4673        if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4674            continue;
4675        }
4676        for object_entry in fs::read_dir(entry.path())? {
4677            let object_entry = object_entry?;
4678            if !object_entry.file_type()?.is_file() {
4679                continue;
4680            }
4681            let name = object_entry.file_name();
4682            let Some(suffix) = name.to_str() else {
4683                continue;
4684            };
4685            if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4686                continue;
4687            }
4688            oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
4689        }
4690    }
4691    Ok(())
4692}
4693
4694fn collect_loose_fanout_object_ids(
4695    objects_dir: &Path,
4696    format: ObjectFormat,
4697    fanout: u8,
4698    oids: &mut HashSet<ObjectId>,
4699) -> Result<()> {
4700    let fanout_hex = format!("{fanout:02x}");
4701    let fanout_dir = objects_dir.join(&fanout_hex);
4702    let entries = match fs::read_dir(&fanout_dir) {
4703        Ok(entries) => entries,
4704        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
4705        Err(err) => return Err(GitError::Io(err.to_string())),
4706    };
4707    let hex_len = format.hex_len();
4708    for object_entry in entries {
4709        let object_entry = object_entry?;
4710        let name = object_entry.file_name();
4711        let Some(suffix) = name.to_str() else {
4712            continue;
4713        };
4714        if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4715            continue;
4716        }
4717        oids.insert(ObjectId::from_hex(
4718            format,
4719            &format!("{fanout_hex}{suffix}"),
4720        )?);
4721    }
4722    Ok(())
4723}
4724
4725#[derive(Debug, Default)]
4726struct LoosePresenceCache {
4727    loaded_fanouts: HashSet<u8>,
4728    objects: HashSet<ObjectId>,
4729}
4730
4731/// Every object id resolvable through a pack (any `.idx` or the
4732/// multi-pack-index) under `objects_dir/pack`. Used by `--unpacked`
4733/// filtering: an object is "unpacked" when absent from this set, regardless
4734/// of a loose copy also existing.
4735pub fn packed_object_ids(
4736    objects_dir: impl AsRef<Path>,
4737    format: ObjectFormat,
4738) -> Result<HashSet<ObjectId>> {
4739    let mut oids = HashSet::new();
4740    collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
4741    Ok(oids)
4742}
4743
4744fn collect_packed_object_ids(
4745    pack_dir: &Path,
4746    format: ObjectFormat,
4747    oids: &mut HashSet<ObjectId>,
4748) -> Result<()> {
4749    if !pack_dir.exists() {
4750        return Ok(());
4751    }
4752    let mut midx_pack_names = HashSet::new();
4753    let midx_path = pack_dir.join("multi-pack-index");
4754    if midx_path.exists() {
4755        let midx = MultiPackIndex::parse_without_checksum(&fs::read(&midx_path)?, format)?;
4756        midx_pack_names.extend(midx.pack_names.iter().cloned());
4757        oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
4758    }
4759    for entry in fs::read_dir(pack_dir)? {
4760        let path = entry?.path();
4761        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
4762            continue;
4763        }
4764        if !path.with_extension("pack").exists() {
4765            continue;
4766        }
4767        let index = match PackIndex::parse(&fs::read(&path)?, format) {
4768            Ok(index) => index,
4769            Err(_err)
4770                if path
4771                    .file_name()
4772                    .and_then(|name| name.to_str())
4773                    .is_some_and(|name| midx_pack_names.contains(name)) =>
4774            {
4775                eprintln!(
4776                    "error: packfile {} index unavailable",
4777                    path.with_extension("pack").display()
4778                );
4779                continue;
4780            }
4781            Err(err) => return Err(err),
4782        };
4783        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
4784    }
4785    Ok(())
4786}
4787
4788impl FileObjectDatabase {
4789    /// The object-id format (hash algorithm) this database was opened with.
4790    pub fn object_format(&self) -> ObjectFormat {
4791        self.format
4792    }
4793
4794    /// The repository object directory this database reads from.
4795    pub fn objects_dir(&self) -> &Path {
4796        &self.objects_dir
4797    }
4798
4799    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4800        let objects_dir = objects_dir.into();
4801        Self {
4802            loose: LooseObjectStore::new(objects_dir.clone(), format),
4803            alternates: alternate_object_dirs(&objects_dir),
4804            objects_dir,
4805            format,
4806            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4807            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4808            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4809            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4810            pack_registry: Arc::new(Mutex::new(None)),
4811            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4812            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4813            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4814            promisor_objects: Arc::new(OnceLock::new()),
4815            promisor_remote_present: false,
4816            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4817        }
4818    }
4819
4820    fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4821        let objects_dir = objects_dir.into();
4822        Self {
4823            loose: LooseObjectStore::new(objects_dir.clone(), format),
4824            alternates: Vec::new(),
4825            objects_dir,
4826            format,
4827            pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4828            pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4829            multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4830            multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4831            pack_registry: Arc::new(Mutex::new(None)),
4832            decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4833            pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4834            pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4835            promisor_objects: Arc::new(OnceLock::new()),
4836            promisor_remote_present: false,
4837            shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4838        }
4839    }
4840
4841    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4842        Self::new(repository_objects_dir(git_dir), format)
4843    }
4844
4845    /// Declare whether the owning repository has a promisor remote configured.
4846    /// Only when this holds does [`ObjectReader::is_promised_object`] treat
4847    /// objects in `.promisor` packs (and their transitive references) as
4848    /// promised — matching git's `is_promisor_object`, which is gated on
4849    /// `repo_has_promisor_remote()`. Callers that know the repo config (e.g. the
4850    /// fsck driver) opt in; readers built without config keep the safe default
4851    /// of `false`, so a stray `.promisor` sidecar never silently excuses a
4852    /// genuinely missing object.
4853    pub fn with_promisor_remote_present(mut self, present: bool) -> Self {
4854        self.promisor_remote_present = present;
4855        self
4856    }
4857
4858    /// Drop cached pack registries, indexes, and decoded objects so the next read
4859    /// sees packs/objects installed after this handle was created (e.g. after
4860    /// `fetch` or `install_pack`). Long-lived [`Repository`] sessions call this
4861    /// via the owning repository's `refresh_objects` hook.
4862    pub fn refresh_read_cache(&self) {
4863        if let Ok(mut cache) = self.pack_registry.lock() {
4864            *cache = None;
4865        }
4866        if let Ok(mut cache) = self.pack_indexes.lock() {
4867            cache.clear();
4868        }
4869        if let Ok(mut cache) = self.multi_pack_indexes.lock() {
4870            cache.clear();
4871        }
4872        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
4873            cache.clear();
4874        }
4875        if let Ok(mut cache) = self.pack_bytes.lock() {
4876            cache.clear();
4877        }
4878        if let Ok(mut cache) = self.pack_deltas.lock() {
4879            cache.clear();
4880        }
4881        if let Ok(mut cache) = self.pack_header_types.lock() {
4882            cache.clear();
4883        }
4884        if let Ok(mut cache) = self.decoded.lock() {
4885            cache.clear();
4886        }
4887        self.loose.invalidate_cache();
4888    }
4889
4890    pub fn loose(&self) -> &LooseObjectStore {
4891        &self.loose
4892    }
4893
4894    pub fn presence_checker(&self) -> ObjectPresenceChecker {
4895        ObjectPresenceChecker::new(self.clone())
4896    }
4897
4898    pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4899        self.install_pack_with_options(pack, RawPackInstallOptions::default())
4900    }
4901
4902    pub fn write_blob_as_pack(
4903        &self,
4904        oid: ObjectId,
4905        object: &EncodedObject,
4906        compression_level: u32,
4907    ) -> Result<ObjectId> {
4908        if object.object_type != ObjectType::Blob {
4909            return Err(GitError::InvalidObject(
4910                "write_blob_as_pack requires a blob object".into(),
4911            ));
4912        }
4913        if oid.format() != self.format {
4914            return Err(GitError::InvalidObjectId(format!(
4915                "object {oid} uses {}, store uses {}",
4916                oid.format().name(),
4917                self.format.name()
4918            )));
4919        }
4920        if self.contains(&oid)? {
4921            return Ok(oid);
4922        }
4923        let input = [PackInput { oid: &oid, object }];
4924        let options = PackWriteOptions::new()
4925            .with_window(0)
4926            .with_depth(0)
4927            .with_reorder(false)
4928            .with_compression_level(compression_level);
4929        let pack =
4930            PackFile::write_packed_with_known_ids_and_options(&input, self.format, &options)?;
4931        self.install_pack(&pack)?;
4932        Ok(oid)
4933    }
4934
4935    pub fn write_blobs_as_pack(
4936        &self,
4937        objects: &[(ObjectId, EncodedObject)],
4938        compression_level: u32,
4939    ) -> Result<()> {
4940        let mut seen = HashSet::with_capacity(objects.len());
4941        let mut inputs = Vec::new();
4942        for (oid, object) in objects {
4943            if object.object_type != ObjectType::Blob {
4944                return Err(GitError::InvalidObject(
4945                    "write_blobs_as_pack requires blob objects".into(),
4946                ));
4947            }
4948            if oid.format() != self.format {
4949                return Err(GitError::InvalidObjectId(format!(
4950                    "object {oid} uses {}, store uses {}",
4951                    oid.format().name(),
4952                    self.format.name()
4953                )));
4954            }
4955            if seen.insert(*oid) && !self.contains(oid)? {
4956                inputs.push(PackInput { oid, object });
4957            }
4958        }
4959        if inputs.is_empty() {
4960            return Ok(());
4961        }
4962        let options = PackWriteOptions::new()
4963            .with_window(0)
4964            .with_depth(0)
4965            .with_reorder(false)
4966            .with_compression_level(compression_level);
4967        let pack =
4968            PackFile::write_packed_with_known_ids_and_options(&inputs, self.format, &options)?;
4969        self.install_pack(&pack)?;
4970        Ok(())
4971    }
4972
4973    pub fn install_pack_with_options(
4974        &self,
4975        pack: &PackWrite,
4976        options: RawPackInstallOptions,
4977    ) -> Result<PackInstallResult> {
4978        if pack.checksum.format() != self.format {
4979            return Err(GitError::InvalidObjectId(format!(
4980                "pack checksum uses {}, store uses {}",
4981                pack.checksum.format().name(),
4982                self.format.name()
4983            )));
4984        }
4985        for entry in &pack.entries {
4986            if entry.oid.format() != self.format {
4987                return Err(GitError::InvalidObjectId(format!(
4988                    "pack entry {} uses {}, store uses {}",
4989                    entry.oid,
4990                    entry.oid.format().name(),
4991                    self.format.name()
4992                )));
4993            }
4994        }
4995        let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
4996        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4997        if canonical_index.pack_checksum != pack.checksum
4998            || parsed_index.pack_checksum != pack.checksum
4999        {
5000            return Err(GitError::InvalidFormat(
5001                "pack and index checksums do not match pack write".into(),
5002            ));
5003        }
5004        if pack.index != canonical_index.index {
5005            return Err(GitError::InvalidFormat(
5006                "pack index does not match pack contents".into(),
5007            ));
5008        }
5009
5010        let pack_dir = self.objects_dir.join("pack");
5011        fs::create_dir_all(&pack_dir)?;
5012        let pack_name = format!("pack-{}", pack.checksum.to_hex());
5013        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5014        let index_path = pack_dir.join(format!("{pack_name}.idx"));
5015        if !pack_path.exists() || !index_path.exists() {
5016            write_pack_component(&pack_path, &pack.pack)?;
5017            write_pack_component(&index_path, &pack.index)?;
5018        }
5019        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
5020        Ok(PackInstallResult {
5021            pack_name,
5022            pack_path,
5023            index_path,
5024            promisor_path,
5025            object_ids: canonical_index
5026                .entries
5027                .iter()
5028                .map(|entry| entry.oid)
5029                .collect(),
5030        })
5031    }
5032
5033    /// Install a pack that was produced in this process by [`PackFile::write_packed`].
5034    ///
5035    /// Unlike [`Self::install_raw_pack_with_options`], this does not re-inflate
5036    /// every pack entry to rebuild the index. It validates the generated pack
5037    /// trailer and generated index against the writer's object ids, CRCs, and
5038    /// offsets, then writes those bytes directly. Use the raw installer for
5039    /// arbitrary pack bytes received from an untrusted transport.
5040    pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
5041        self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
5042    }
5043
5044    pub fn install_written_pack_with_options(
5045        &self,
5046        pack: &PackWrite,
5047        options: RawPackInstallOptions,
5048    ) -> Result<PackInstallResult> {
5049        validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
5050        let parsed_index = PackIndex::parse(&pack.index, self.format)?;
5051        if parsed_index.pack_checksum != pack.checksum {
5052            return Err(GitError::InvalidFormat(
5053                "pack write index checksum does not match pack".into(),
5054            ));
5055        }
5056        if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
5057            return Err(GitError::InvalidFormat(
5058                "pack write index does not match generated entries".into(),
5059            ));
5060        }
5061        self.install_generated_pack_unchecked(pack, options)
5062    }
5063
5064    fn install_generated_pack_unchecked(
5065        &self,
5066        pack: &PackWrite,
5067        options: RawPackInstallOptions,
5068    ) -> Result<PackInstallResult> {
5069        let pack_dir = self.objects_dir.join("pack");
5070        fs::create_dir_all(&pack_dir)?;
5071        let pack_name = format!("pack-{}", pack.checksum.to_hex());
5072        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5073        let index_path = pack_dir.join(format!("{pack_name}.idx"));
5074        if !pack_path.exists() || !index_path.exists() {
5075            write_pack_component(&pack_path, &pack.pack)?;
5076            write_pack_component(&index_path, &pack.index)?;
5077        }
5078        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
5079        Ok(PackInstallResult {
5080            pack_name,
5081            pack_path,
5082            index_path,
5083            promisor_path,
5084            object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
5085        })
5086    }
5087
5088    fn install_pack_file_from_temp(
5089        &self,
5090        temp_pack_path: &Path,
5091        pack_checksum: ObjectId,
5092        index: &[u8],
5093        object_ids: Vec<ObjectId>,
5094        options: RawPackInstallOptions,
5095    ) -> Result<PackInstallResult> {
5096        let pack_dir = self.objects_dir.join("pack");
5097        fs::create_dir_all(&pack_dir)?;
5098        let pack_name = format!("pack-{}", pack_checksum.to_hex());
5099        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5100        let index_path = pack_dir.join(format!("{pack_name}.idx"));
5101        match fs::rename(temp_pack_path, &pack_path) {
5102            Ok(()) => {}
5103            Err(_) if pack_path.exists() => {
5104                let _ = fs::remove_file(temp_pack_path);
5105            }
5106            Err(err) => return Err(GitError::Io(err.to_string())),
5107        }
5108        write_pack_component(&index_path, index)?;
5109        let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
5110        Ok(PackInstallResult {
5111            pack_name,
5112            pack_path,
5113            index_path,
5114            promisor_path,
5115            object_ids,
5116        })
5117    }
5118
5119    pub fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<PackInstallResult>
5120    where
5121        R: Read,
5122    {
5123        self.install_raw_pack_from_reader_with_options(reader, RawPackInstallOptions::default())
5124    }
5125
5126    pub fn begin_raw_pack_install(
5127        &self,
5128        expected_pack_id: ObjectId,
5129        expected_pack_size: u64,
5130    ) -> Result<RawPackStreamingInstall> {
5131        self.begin_raw_pack_install_with_options(
5132            expected_pack_id,
5133            expected_pack_size,
5134            RawPackInstallOptions::default(),
5135        )
5136    }
5137
5138    pub fn begin_raw_pack_install_with_options(
5139        &self,
5140        expected_pack_id: ObjectId,
5141        expected_pack_size: u64,
5142        options: RawPackInstallOptions,
5143    ) -> Result<RawPackStreamingInstall> {
5144        if expected_pack_id.format() != self.format {
5145            return Err(GitError::InvalidObjectId(format!(
5146                "pack checksum uses {}, store uses {}",
5147                expected_pack_id.format().name(),
5148                self.format.name()
5149            )));
5150        }
5151        let pack_dir = self.objects_dir.join("pack");
5152        fs::create_dir_all(&pack_dir)?;
5153        let pack_name = format!("pack-{}", expected_pack_id.to_hex());
5154        let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5155        let index_path = pack_dir.join(format!("{pack_name}.idx"));
5156        let temp_pack_path = unique_temp_path(&pack_dir);
5157        let file = fs::OpenOptions::new()
5158            .write(true)
5159            .create_new(true)
5160            .open(&temp_pack_path)?;
5161        Ok(RawPackStreamingInstall {
5162            format: self.format,
5163            expected_pack_id,
5164            expected_pack_size,
5165            options,
5166            pack_dir,
5167            pack_name,
5168            pack_path,
5169            index_path,
5170            temp_pack_path,
5171            file: Some(file),
5172            written: 0,
5173            finished: false,
5174        })
5175    }
5176
5177    pub fn install_raw_pack_from_reader_with_options<R>(
5178        &self,
5179        reader: &mut R,
5180        options: RawPackInstallOptions,
5181    ) -> Result<PackInstallResult>
5182    where
5183        R: Read,
5184    {
5185        let pack_dir = self.objects_dir.join("pack");
5186        fs::create_dir_all(&pack_dir)?;
5187        let temp_pack_path = unique_temp_path(&pack_dir);
5188        let result = (|| -> Result<PackInstallResult> {
5189            // Stage directly in objects/pack so validation, indexing, and the
5190            // eventual checksum-named rename use one streamed write.
5191            let mut file = fs::OpenOptions::new()
5192                .write(true)
5193                .create_new(true)
5194                .open(&temp_pack_path)?;
5195            let built = {
5196                let mut tee = PackInstallTeeReader {
5197                    reader,
5198                    writer: &mut file,
5199                };
5200                PackIndex::write_v2_for_pack_reader_to_trailer(&mut tee, self.format)?
5201            };
5202            file.flush()?;
5203            file.sync_all()?;
5204            drop(file);
5205
5206            self.install_pack_file_from_temp(
5207                &temp_pack_path,
5208                built.pack_checksum,
5209                &built.index,
5210                built.entries.iter().map(|entry| entry.oid).collect(),
5211                options,
5212            )
5213        })();
5214        if result.is_err() {
5215            let _ = fs::remove_file(&temp_pack_path);
5216        }
5217        result
5218    }
5219
5220    pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
5221        if self.loose.exists(oid)? {
5222            return Ok(true);
5223        }
5224        if self.find_pack_containing(oid)?.is_some() {
5225            return Ok(true);
5226        }
5227        for alternate in &self.alternates {
5228            if Self::without_alternates(alternate, self.format).contains(oid)? {
5229                return Ok(true);
5230            }
5231        }
5232        // Reprepare-on-miss: a cached negative loose verdict may predate a
5233        // sibling write. Drop it and exact-probe once before reporting absence.
5234        self.loose.invalidate_cache();
5235        self.loose.exists(oid)
5236    }
5237
5238    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
5239        let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
5240            .into_iter()
5241            .collect::<HashSet<_>>();
5242        for alternate in &self.alternates {
5243            oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
5244        }
5245        let mut oids = oids.into_iter().collect::<Vec<_>>();
5246        oids.sort_by_key(ObjectId::to_hex);
5247        Ok(oids)
5248    }
5249
5250    pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
5251        if let Some(disk_size) = self.loose.disk_size(oid)? {
5252            return Ok(Some(ObjectStorageInfo {
5253                disk_size,
5254                deltabase: zero_oid(self.format)?,
5255            }));
5256        }
5257        if let Some(info) = self.packed_object_storage_info(oid)? {
5258            return Ok(Some(info));
5259        }
5260        for alternate in &self.alternates {
5261            if let Some(info) =
5262                Self::without_alternates(alternate, self.format).object_storage_info(oid)?
5263            {
5264                return Ok(Some(info));
5265            }
5266        }
5267        // Reprepare-on-miss: drop any stale negative loose cache and exact-probe
5268        // once before reporting absence (see `read_object`).
5269        self.loose.invalidate_cache();
5270        if let Some(disk_size) = self.loose.disk_size(oid)? {
5271            return Ok(Some(ObjectStorageInfo {
5272                disk_size,
5273                deltabase: zero_oid(self.format)?,
5274            }));
5275        }
5276        Ok(None)
5277    }
5278
5279    pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
5280        let mut matches = self.object_ids_with_prefix(prefix)?;
5281        Ok(match matches.len() {
5282            0 => ObjectPrefixResolution::Missing,
5283            1 => ObjectPrefixResolution::Unique(matches.remove(0)),
5284            _ => ObjectPrefixResolution::Ambiguous(matches),
5285        })
5286    }
5287
5288    pub fn object_ids_with_prefix(&self, prefix: &str) -> Result<Vec<ObjectId>> {
5289        validate_object_id_prefix(self.format, prefix)?;
5290        let mut matches = Vec::new();
5291        for oid in self.object_ids()? {
5292            if object_id_matches_prefix(&oid, prefix) {
5293                matches.push(oid);
5294            }
5295        }
5296        Ok(matches)
5297    }
5298
5299    /// The object type and content size of `oid` without decoding its full body —
5300    /// git's `cat-file --batch-check` fast path. Tries the decoded-object cache,
5301    /// then loose storage (inflating only the framing header), then packs (reading
5302    /// the entry header and, for deltas, only the delta's leading varints), then
5303    /// alternates. Returns `Ok(None)` if the object is not present.
5304    ///
5305    /// Unlike [`ObjectReader::read_object`], this never materializes the body, so it
5306    /// stays cheap on huge blobs and deep delta chains. It does not populate the
5307    /// decoded-object cache (nothing is decoded).
5308    pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
5309        if implied_empty_tree_object(self.format, oid).is_some() {
5310            return Ok(Some((ObjectType::Tree, 0)));
5311        }
5312        if let Ok(mut cache) = self.decoded.lock()
5313            && let Some(object) = cache.get(oid)
5314        {
5315            return Ok(Some((object.object_type, object.body.len() as u64)));
5316        }
5317        if let Some(header) = self.loose.read_header(oid)? {
5318            return Ok(Some(header));
5319        }
5320        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
5321            let bytes = pack_lookup.pack_bytes(self)?;
5322            // Per-pack offset->type memo so the ofs-delta chain walk that resolves
5323            // a packed object's type runs at most once per chain across the batch,
5324            // instead of re-walking (and re-inflating each link's leading varints)
5325            // on every header read — the sley#26 super-linear cat-file --batch-check.
5326            let type_cache = pack_lookup.header_type_cache(self);
5327            let resolve_ref_base = |base: &ObjectId| {
5328                self.read_object_header(base)
5329                    .map(|header| header.map(|(t, _)| t))
5330            };
5331            let header = match &type_cache {
5332                Some(cache) => {
5333                    let mut adapter = PackHeaderTypeCacheAdapter(cache);
5334                    sley_pack::read_object_header_at_with_cache(
5335                        &bytes,
5336                        pack_lookup.offset,
5337                        self.format,
5338                        resolve_ref_base,
5339                        &mut adapter,
5340                    )?
5341                }
5342                None => sley_pack::read_object_header_at(
5343                    &bytes,
5344                    pack_lookup.offset,
5345                    self.format,
5346                    resolve_ref_base,
5347                )?,
5348            };
5349            return Ok(Some(header));
5350        }
5351        for alternate in &self.alternates {
5352            if let Some(header) =
5353                Self::without_alternates(alternate, self.format).read_object_header(oid)?
5354            {
5355                return Ok(Some(header));
5356            }
5357        }
5358        // Reprepare-on-miss: discard any stale negative loose cache and retry an
5359        // exact path probe once before reporting absence (see `read_object`).
5360        self.loose.invalidate_cache();
5361        if let Some(header) = self.loose.read_header(oid)? {
5362            return Ok(Some(header));
5363        }
5364        Ok(None)
5365    }
5366
5367    fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
5368        // Memory-capped decoded-object cache first (delta-base reuse for ref-delta
5369        // bases that resolve back through the store + repeated whole-object reads).
5370        if let Ok(mut cache) = self.decoded.lock()
5371            && let Some(object) = cache.get(oid)
5372        {
5373            return Ok(Some(object));
5374        }
5375        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
5376            return Ok(None);
5377        };
5378        self.read_packed_object_at_lookup(oid, &pack_lookup)
5379            .map(Some)
5380    }
5381
5382    fn read_packed_object_at_lookup(
5383        &self,
5384        oid: &ObjectId,
5385        pack_lookup: &PackLookup,
5386    ) -> Result<Arc<EncodedObject>> {
5387        if let Ok(mut cache) = self.decoded.lock()
5388            && let Some(object) = cache.get(oid)
5389        {
5390            return Ok(object);
5391        }
5392        let bytes = pack_lookup.pack_bytes(self)?;
5393        // Per-pack delta-base cache (keyed by in-pack offset). Resolving an
5394        // ofs-delta chain reuses already-decoded bases instead of re-inflating the
5395        // whole chain on every read. Scoped to this pack's path so an offset key is
5396        // never applied to the wrong pack's bytes.
5397        let delta_cache = pack_lookup.delta_cache(self);
5398        let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
5399        // Decode only this object at its offset (plus its delta-base chain). A
5400        // ref-delta base resolves through the full store (loose / other packs) and
5401        // reuses the decoded-object cache. No cache lock is held across the decode,
5402        // so the recursive resolver re-entry (which may re-enter read_object) is
5403        // safe.
5404        let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
5405        let object = match &delta_adapter {
5406            Some(adapter) => sley_pack::read_object_at_with_cache_arc(
5407                &bytes,
5408                pack_lookup.offset,
5409                self.format,
5410                resolve_ref_base,
5411                adapter,
5412            )?,
5413            None => sley_pack::read_object_at_arc(
5414                &bytes,
5415                pack_lookup.offset,
5416                self.format,
5417                resolve_ref_base,
5418            )?,
5419        };
5420        // Trust the index → offset mapping rather than re-hashing every decoded
5421        // object on read (see `verify_reads_enabled`); this re-hash dominated
5422        // bulk-read cost. Opt back in with `SLEY_VERIFY_READS` for a paranoid check.
5423        if verify_reads_enabled() {
5424            let actual = object.object_id(self.format)?;
5425            if actual != *oid {
5426                return Err(GitError::InvalidObject(format!(
5427                    "pack object id mismatch: index says {oid}, decoded {actual}"
5428                )));
5429            }
5430        }
5431        if let Ok(mut cache) = self.decoded.lock() {
5432            cache.put(*oid, Arc::clone(&object));
5433        }
5434        Ok(object)
5435    }
5436
5437    /// The per-pack delta-base cache for `pack_path`, creating it on first use.
5438    /// Returns `None` only if the shared map's lock is poisoned, in which case the
5439    /// caller falls back to an uncached decode (correctness preserved).
5440    fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
5441        let mut caches = self.pack_deltas.lock().ok()?;
5442        let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
5443            Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
5444        });
5445        Some(Arc::clone(cache))
5446    }
5447
5448    /// The per-pack header-type memo for `pack_path`, creating it on first use.
5449    /// Returns `None` only if the shared map's lock is poisoned, in which case the
5450    /// caller falls back to an unmemoized header walk (correctness preserved).
5451    fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
5452        let mut caches = self.pack_header_types.lock().ok()?;
5453        let cache = caches
5454            .entry(pack_path.to_path_buf())
5455            .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
5456        Some(Arc::clone(cache))
5457    }
5458
5459    /// Backing bytes of the pack at `pack_path`, loaded at most once per database
5460    /// handle (cached, shared across clones). Memory-mapped under the `mmap` feature,
5461    /// otherwise read into the heap. On a poisoned lock it falls back to loading
5462    /// without caching, preserving correctness.
5463    fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
5464        if let Ok(cache) = self.pack_bytes.lock()
5465            && let Some(bytes) = cache.get(pack_path)
5466        {
5467            return Ok(Arc::clone(bytes));
5468        }
5469        let bytes = Arc::new(load_pack_data(pack_path)?);
5470        if let Ok(mut cache) = self.pack_bytes.lock() {
5471            cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
5472        }
5473        Ok(bytes)
5474    }
5475
5476    /// Parsed index for the `.idx` at `index_path`, parsed at most once per
5477    /// database handle. On a poisoned lock it falls back to parsing without
5478    /// caching, preserving correctness.
5479    fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
5480        if let Ok(cache) = self.pack_indexes.lock()
5481            && let Some(index) = cache.get(index_path)
5482        {
5483            return Ok(Arc::clone(index));
5484        }
5485        let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
5486        if let Ok(mut cache) = self.pack_indexes.lock() {
5487            cache.insert(index_path.to_path_buf(), Arc::clone(&index));
5488        }
5489        Ok(index)
5490    }
5491
5492    fn cached_multi_pack_index_oid_lookup(
5493        &self,
5494        midx_path: &Path,
5495    ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
5496        if !midx_path.exists() {
5497            return Ok(None);
5498        }
5499        if let Ok(cache) = self.multi_pack_oid_lookups.lock()
5500            && let Some(midx) = cache.get(midx_path)
5501        {
5502            return Ok(Some(Arc::clone(midx)));
5503        }
5504        let bytes = load_multi_pack_index_lookup_data(midx_path)?;
5505        let midx = match MultiPackIndexOidLookup::parse(bytes, self.format) {
5506            Ok(midx) => Arc::new(midx),
5507            Err(GitError::InvalidFormat(message))
5508                if message.starts_with("multi-pack-index hash id ") =>
5509            {
5510                let actual = message
5511                    .strip_prefix("multi-pack-index hash id ")
5512                    .and_then(|rest| rest.split_whitespace().next())
5513                    .unwrap_or("0");
5514                let expected = match self.format {
5515                    ObjectFormat::Sha1 => 1,
5516                    ObjectFormat::Sha256 => 2,
5517                };
5518                eprintln!(
5519                    "error: multi-pack-index hash version {actual} does not match version {expected}"
5520                );
5521                return Ok(None);
5522            }
5523            Err(err) => return Err(err),
5524        };
5525        if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
5526            cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
5527        }
5528        Ok(Some(midx))
5529    }
5530
5531    /// Registry snapshot for this database's pack directory. With `force_rescan`,
5532    /// the directory is re-read; when the fingerprint and pack set match the
5533    /// cached snapshot, the same `Arc` is returned so miss handling can tell that
5534    /// no new packs appeared.
5535    fn cached_pack_registry(
5536        &self,
5537        pack_dir: &Path,
5538        force_rescan: bool,
5539    ) -> Result<Arc<PackRegistrySnapshot>> {
5540        if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
5541            return Ok(registry);
5542        }
5543        let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
5544        if let Ok(mut cache) = self.pack_registry.lock() {
5545            match cache.as_ref() {
5546                Some(existing)
5547                    if existing.fingerprint == scanned.fingerprint
5548                        && same_registered_pack_set(&existing.packs, &scanned.packs) =>
5549                {
5550                    return Ok(Arc::clone(existing));
5551                }
5552                _ => {
5553                    *cache = Some(Arc::clone(&scanned));
5554                }
5555            }
5556        }
5557        Ok(scanned)
5558    }
5559
5560    fn find_in_pack_registry(
5561        &self,
5562        registry: Arc<PackRegistrySnapshot>,
5563        oid: &ObjectId,
5564    ) -> Result<Option<PackLookup>> {
5565        let hinted_pack_index = registry.cached_hint();
5566        if let Some(pack_index) = hinted_pack_index {
5567            let pack = &registry.packs[pack_index];
5568            match pack.index(self.format) {
5569                Ok(index) => {
5570                    if let Some(entry) = index.find(oid) {
5571                        return Ok(Some(PackLookup::from_registered(
5572                            Arc::clone(pack),
5573                            entry.offset,
5574                        )));
5575                    }
5576                }
5577                Err(_) => {
5578                    eprintln!("error: packfile {} index unavailable", pack.pack.display());
5579                }
5580            }
5581        }
5582        for (pack_index, pack) in registry.packs.iter().enumerate() {
5583            if Some(pack_index) == hinted_pack_index {
5584                continue;
5585            }
5586            let index = match pack.index(self.format) {
5587                Ok(index) => index,
5588                Err(_) => {
5589                    eprintln!("error: packfile {} index unavailable", pack.pack.display());
5590                    continue;
5591                }
5592            };
5593            if let Some(entry) = index.find(oid) {
5594                registry.remember_hint(pack_index);
5595                return Ok(Some(PackLookup::from_registered(
5596                    Arc::clone(pack),
5597                    entry.offset,
5598                )));
5599            }
5600        }
5601        Ok(None)
5602    }
5603
5604    /// Read `oid` from any pack *other than* the one named by `exclude`, used as
5605    /// a corruption fallback: a redundant packed copy survives one pack's
5606    /// damage. Scans the on-disk `.idx` files directly (bypassing the registry
5607    /// cache, whose first hit is the excluded pack) and decodes from the first
5608    /// other pack that both indexes the object and parses cleanly.
5609    fn read_packed_object_from_other_packs(
5610        &self,
5611        oid: &ObjectId,
5612        exclude: &PackLookup,
5613    ) -> Result<Option<Arc<EncodedObject>>> {
5614        let pack_dir = self.objects_dir.join("pack");
5615        let Ok(entries) = fs::read_dir(&pack_dir) else {
5616            return Ok(None);
5617        };
5618        let excluded_pack = exclude.pack_path().to_path_buf();
5619        for entry in entries {
5620            let idx_path = entry?.path();
5621            if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
5622                continue;
5623            }
5624            let pack_path = idx_path.with_extension("pack");
5625            if pack_path == excluded_pack {
5626                continue;
5627            }
5628            let Ok(idx_bytes) = fs::read(&idx_path) else {
5629                continue;
5630            };
5631            let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
5632                continue;
5633            };
5634            let Some(entry) = index.find(oid) else {
5635                continue;
5636            };
5637            let candidate = PackLookup::from_path(pack_path, entry.offset);
5638            if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
5639                return Ok(Some(object));
5640            }
5641        }
5642        Ok(None)
5643    }
5644
5645    fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
5646        if oid.format() != self.format {
5647            return Err(GitError::InvalidObjectId(format!(
5648                "object {oid} uses {}, store uses {}",
5649                oid.format().name(),
5650                self.format.name()
5651            )));
5652        }
5653        let pack_dir = self.objects_dir.join("pack");
5654        // Hot path: a previously cached pack registry or multi-pack-index already
5655        // names every pack, and locating `oid` in them is pure in-memory index
5656        // work. Try that first so a warm handle does not parse indexes or hash
5657        // pack paths on every lookup.
5658        if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
5659            && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
5660        {
5661            return Ok(Some(pack_paths));
5662        }
5663        if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
5664            && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
5665        {
5666            return Ok(Some(pack_paths));
5667        }
5668
5669        if !pack_dir.exists() {
5670            return Ok(None);
5671        }
5672        if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
5673            return Ok(Some(pack_paths));
5674        }
5675        // Search the cached registry first. On a complete miss, re-scan the
5676        // directory once (picking up any pack added since the registry was
5677        // cached) and search again, so newly written packs are still found.
5678        let registry = self.cached_pack_registry(&pack_dir, false)?;
5679        if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(&registry), oid)? {
5680            return Ok(Some(pack_paths));
5681        }
5682        let refreshed = self.cached_pack_registry(&pack_dir, true)?;
5683        if Arc::ptr_eq(&registry, &refreshed) {
5684            // The re-scan produced the same registry, so nothing new appeared.
5685            return Ok(None);
5686        }
5687        self.find_in_pack_registry(refreshed, oid)
5688    }
5689
5690    fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
5691        let Some(pack_lookup) = self.find_pack_containing(oid)? else {
5692            return Ok(None);
5693        };
5694        let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
5695        let trailer_offset = pack_len
5696            .checked_sub(self.format.raw_len() as u64)
5697            .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
5698        let index = pack_lookup.pack_index(self)?;
5699        let pack = pack_lookup.pack_bytes(self)?;
5700        let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
5701        let delta_base_offset = match &delta_base {
5702            Some(PackDeltaBase::Offset(offset)) => Some(*offset),
5703            Some(PackDeltaBase::Ref(_)) | None => None,
5704        };
5705        let offset_info = scan_pack_index_offsets(
5706            &index,
5707            pack_lookup.offset,
5708            trailer_offset,
5709            delta_base_offset,
5710        )?;
5711        let disk_size = offset_info
5712            .end_offset
5713            .checked_sub(pack_lookup.offset)
5714            .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
5715        let deltabase = match delta_base {
5716            Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
5717                // scan_pack_index_offsets returns Err when delta_base_offset is
5718                // Some but no matching entry is found, so this is unreachable for
5719                // valid packs; propagate as an error rather than panic to keep a
5720                // malformed pack from taking down the process if that invariant
5721                // ever drifts.
5722                GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
5723            })?,
5724            Some(PackDeltaBase::Ref(oid)) => oid,
5725            None => zero_oid(self.format)?,
5726        };
5727        Ok(Some(ObjectStorageInfo {
5728            disk_size,
5729            deltabase,
5730        }))
5731    }
5732
5733    fn find_midx_pack_containing(
5734        &self,
5735        pack_dir: &Path,
5736        oid: &ObjectId,
5737    ) -> Result<Option<PackLookup>> {
5738        let midx_path = pack_dir.join("multi-pack-index");
5739        let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
5740            return Ok(None);
5741        };
5742        self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
5743    }
5744
5745    fn midx_oid_lookup_pack_paths(
5746        &self,
5747        pack_dir: &Path,
5748        midx: &MultiPackIndexOidLookup,
5749        oid: &ObjectId,
5750    ) -> Result<Option<PackLookup>> {
5751        let Some(entry) = midx.find(oid)? else {
5752            return Ok(None);
5753        };
5754        let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
5755            return Err(GitError::InvalidFormat(
5756                "multi-pack-index object points past pack table".into(),
5757            ));
5758        };
5759        let pack_file_name = pack_name
5760            .strip_suffix(".idx")
5761            .map(|stem| format!("{stem}.pack"))
5762            .unwrap_or_else(|| pack_name.to_string());
5763        let pack = pack_dir.join(pack_file_name);
5764        Ok(Some(PackLookup::from_path(pack, entry.offset)))
5765    }
5766
5767    fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
5768        let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
5769        let cache = self.multi_pack_oid_lookups.lock().ok()?;
5770        cache.get(&midx_path).map(Arc::clone)
5771    }
5772
5773    /// The pack registry for `pack_dir` *only if already scanned and cached* —
5774    /// never touches the filesystem. Used by the lookup hot path to skip
5775    /// per-object pack-dir metadata checks once a handle is warm. A cold cache
5776    /// returns `None`, so the caller falls back to the scanning path. A complete
5777    /// miss still forces one rescan, preserving the new-pack discovery semantics.
5778    fn cached_loaded_pack_registry(
5779        &self,
5780        _pack_dir: &Path,
5781    ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
5782        let cache = match self.pack_registry.lock() {
5783            Ok(cache) => cache,
5784            Err(_) => return Ok(None),
5785        };
5786        Ok(cache.as_ref().map(Arc::clone))
5787    }
5788}
5789
5790fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
5791    if prefix.len() < 4 || prefix.len() > format.hex_len() {
5792        return Err(GitError::InvalidObjectId(format!(
5793            "expected 4 to {} hex digits for {}, got {}",
5794            format.hex_len(),
5795            format.name(),
5796            prefix.len()
5797        )));
5798    }
5799    if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
5800        return Err(GitError::InvalidObjectId(format!(
5801            "non-hex object id prefix {prefix}"
5802        )));
5803    }
5804    Ok(())
5805}
5806
5807fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
5808    oid.to_hex()
5809        .as_bytes()
5810        .iter()
5811        .zip(prefix.as_bytes())
5812        .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
5813}
5814
5815fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
5816    match fs::metadata(pack_dir) {
5817        Ok(metadata) => Ok(metadata.modified().ok()),
5818        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5819        Err(err) => Err(GitError::Io(err.to_string())),
5820    }
5821}
5822
5823/// Scan `pack_dir` for `.idx` files that have a matching `.pack` sibling and
5824/// parse each index into a registered pack. An `.idx` without its `.pack` is
5825/// skipped (an orphan index cannot serve objects), matching the prior per-read
5826/// behavior.
5827fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
5828    let modified = pack_dir_modified(pack_dir)?;
5829    let entries = match fs::read_dir(pack_dir) {
5830        Ok(entries) => entries,
5831        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
5832            return Ok(PackRegistrySnapshot::new(
5833                PackDirFingerprint {
5834                    modified,
5835                    idx_count: 0,
5836                    pack_count: 0,
5837                },
5838                Vec::new(),
5839            ));
5840        }
5841        Err(err) => return Err(GitError::Io(err.to_string())),
5842    };
5843
5844    let mut idx_paths = Vec::new();
5845    let mut idx_count = 0;
5846    let mut pack_count = 0;
5847    for entry in entries {
5848        let entry = entry?;
5849        let path = entry.path();
5850        match path.extension().and_then(|ext| ext.to_str()) {
5851            Some("idx") => {
5852                idx_count += 1;
5853                idx_paths.push(path);
5854            }
5855            Some("pack") => {
5856                pack_count += 1;
5857            }
5858            _ => {}
5859        }
5860    }
5861
5862    let mut packs = Vec::new();
5863    for idx in idx_paths {
5864        let pack = idx.with_extension("pack");
5865        let Ok(metadata) = fs::metadata(&pack) else {
5866            continue;
5867        };
5868        let modified = pack_sort_modified(&metadata);
5869        packs.push((
5870            modified,
5871            metadata.len(),
5872            Arc::new(RegisteredPack::new(idx, pack)),
5873        ));
5874    }
5875    // Git keeps a most-recently-used pack order; seed ours with newer/larger
5876    // packs before falling back to the path. In repositories with many packs,
5877    // this avoids parsing a long run of unrelated `.idx` files before the first
5878    // lookup establishes the recent-pack hint.
5879    packs.sort_by(|left, right| {
5880        right
5881            .0
5882            .cmp(&left.0)
5883            .then_with(|| right.1.cmp(&left.1))
5884            .then_with(|| left.2.idx.cmp(&right.2.idx))
5885    });
5886    let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
5887    Ok(PackRegistrySnapshot::new(
5888        PackDirFingerprint {
5889            modified,
5890            idx_count,
5891            pack_count,
5892        },
5893        packs,
5894    ))
5895}
5896
5897fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
5898    metadata
5899        .modified()
5900        .ok()
5901        .and_then(|modified| {
5902            modified
5903                .duration_since(std::time::UNIX_EPOCH)
5904                .ok()
5905                .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
5906        })
5907        .unwrap_or((0, 0))
5908}
5909
5910/// Whether two pack registries reference the same pack/index paths (order is
5911/// already normalized by [`scan_pack_registry`]).
5912fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
5913    left.len() == right.len()
5914        && left
5915            .iter()
5916            .zip(right.iter())
5917            .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
5918}
5919
5920fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
5921    let mut alternates = Vec::new();
5922    if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
5923        for raw in value.to_string_lossy().split(':') {
5924            if !raw.is_empty() {
5925                alternates.push(PathBuf::from(raw));
5926            }
5927        }
5928    }
5929    let alternates_path = objects_dir.join("info").join("alternates");
5930    if let Ok(contents) = fs::read(&alternates_path) {
5931        for raw in contents.split(|byte| *byte == b'\n') {
5932            let line = raw.strip_suffix(b"\r").unwrap_or(raw);
5933            if line.is_empty() || line.starts_with(b"#") {
5934                continue;
5935            }
5936            let Ok(value) = std::str::from_utf8(line) else {
5937                continue;
5938            };
5939            let path = Path::new(value);
5940            let absolute = if path.is_absolute() {
5941                path.to_path_buf()
5942            } else {
5943                objects_dir.join(path)
5944            };
5945            alternates.push(absolute);
5946        }
5947    }
5948    alternates
5949}
5950
5951impl ObjectReader for FileObjectDatabase {
5952    fn is_promised_object(&self, oid: &ObjectId) -> bool {
5953        // Gate on a configured promisor remote, exactly like git's
5954        // `is_promisor_object` (which short-circuits when
5955        // `repo_has_promisor_remote()` is false). Without this, a `.promisor`
5956        // sidecar left in an ordinary repository would wrongly excuse missing
5957        // objects from fsck connectivity checks.
5958        self.promisor_remote_present && self.promisor_objects().contains(oid)
5959    }
5960
5961    fn has_shallow_grafts(&self) -> bool {
5962        !self
5963            .shallow_grafts
5964            .get_or_init(|| {
5965                let shallow_file = self
5966                    .objects_dir
5967                    .parent()
5968                    .map(|git_dir| git_dir.join("shallow"));
5969                match shallow_file {
5970                    Some(path) => read_shallow_grafts(&path, self.format),
5971                    None => HashSet::new(),
5972                }
5973            })
5974            .is_empty()
5975    }
5976
5977    fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
5978        self.shallow_grafts
5979            .get_or_init(|| {
5980                let shallow_file = self
5981                    .objects_dir
5982                    .parent()
5983                    .map(|git_dir| git_dir.join("shallow"));
5984                match shallow_file {
5985                    Some(path) => read_shallow_grafts(&path, self.format),
5986                    None => HashSet::new(),
5987                }
5988            })
5989            .contains(oid)
5990    }
5991
5992    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
5993        if let Some(object) = implied_empty_tree_object(self.format, oid) {
5994            return Ok(object);
5995        }
5996        // A corrupt loose copy must not shadow a good packed copy: git's
5997        // `oid_object_info_extended` consults every source, so a repacked object
5998        // whose loose file was later corrupted still reads fine from the pack. If
5999        // a packed copy exists, prefer it WITHOUT touching the corrupt loose file
6000        // (which would otherwise emit a spurious `inflate:` diagnostic on each
6001        // probe). Only when no pack copy exists do we read (and, if corrupt,
6002        // surface the error from) the loose file.
6003        if let Some(pack_lookup) = self.find_pack_containing(oid)? {
6004            match self.read_packed_object_at_lookup(oid, &pack_lookup) {
6005                Ok(object) => return Ok(object),
6006                Err(GitError::NotFound(_)) => {}
6007                // A corrupt packed copy must not be fatal when another good copy
6008                // exists: git's `oid_object_info_extended` keeps consulting the
6009                // remaining sources (loose, other packs, alternates) when a pack
6010                // read fails. Fall through to the loose/other-pack probes and
6011                // only surface the packed error if every source comes up empty.
6012                Err(packed_err) => {
6013                    if let Ok(object) = self.loose.read_object(oid) {
6014                        return Ok(object);
6015                    }
6016                    // Try any *other* pack that also holds the object (a
6017                    // redundant copy survives one pack's corruption).
6018                    if let Some(object) =
6019                        self.read_packed_object_from_other_packs(oid, &pack_lookup)?
6020                    {
6021                        return Ok(object);
6022                    }
6023                    for alternate in &self.alternates {
6024                        if let Ok(object) =
6025                            Self::without_alternates(alternate, self.format).read_object(oid)
6026                        {
6027                            return Ok(object);
6028                        }
6029                    }
6030                    return Err(packed_err);
6031                }
6032            }
6033        }
6034        let loose_err = match self.loose.read_object(oid) {
6035            Ok(object) => return Ok(object),
6036            Err(GitError::NotFound(_)) => None,
6037            Err(err) => Some(err),
6038        };
6039        if let Some(object) = self.read_packed_object(oid)? {
6040            return Ok(object);
6041        }
6042        for alternate in &self.alternates {
6043            match Self::without_alternates(alternate, self.format).read_object(oid) {
6044                Ok(object) => return Ok(object),
6045                Err(GitError::NotFound(_)) => {}
6046                Err(err) => return Err(err),
6047            }
6048        }
6049        // Hard miss against every store. If an earlier enumeration built a loose
6050        // cache, an object written loose afterward by a sibling handle could have
6051        // been skipped above. Mirror git's `oid_object_info_extended`
6052        // reprepare-on-miss: drop stale cache state and retry an exact loose path
6053        // probe once before declaring the object missing.
6054        self.loose.invalidate_cache();
6055        match self.loose.read_object(oid) {
6056            Ok(object) => return Ok(object),
6057            Err(GitError::NotFound(_)) => {}
6058            Err(err) => return Err(err),
6059        }
6060        // No good copy in any store. If the local loose copy was corrupt (not
6061        // merely absent), surface that error — it is more specific than a plain
6062        // "not found".
6063        if let Some(err) = loose_err {
6064            return Err(err);
6065        }
6066        Err(GitError::object_not_found_in(
6067            *oid,
6068            MissingObjectContext::Read,
6069        ))
6070    }
6071}
6072
6073impl FileObjectDatabase {
6074    fn promisor_objects(&self) -> &HashSet<ObjectId> {
6075        self.promisor_objects.get_or_init(|| {
6076            let mut promised =
6077                promisor_pack_object_ids(&self.objects_dir, self.format).unwrap_or_default();
6078            let mut pending = promised.iter().copied().collect::<Vec<_>>();
6079            while let Some(oid) = pending.pop() {
6080                let Ok(object) = self.read_object(&oid) else {
6081                    continue;
6082                };
6083                for link in promisor_object_links(self.format, &object) {
6084                    if promised.insert(link) {
6085                        pending.push(link);
6086                    }
6087                }
6088            }
6089            promised
6090        })
6091    }
6092}
6093
6094fn promisor_pack_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
6095    let pack_dir = objects_dir.join("pack");
6096    let mut oids = HashSet::new();
6097    if !pack_dir.exists() {
6098        return Ok(oids);
6099    }
6100    for entry in fs::read_dir(pack_dir)? {
6101        let path = entry?.path();
6102        if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
6103            continue;
6104        }
6105        if !path.with_extension("pack").exists() || !path.with_extension("promisor").exists() {
6106            continue;
6107        }
6108        let index = PackIndex::parse(&fs::read(path)?, format)?;
6109        oids.extend(index.entries.into_iter().map(|entry| entry.oid));
6110    }
6111    Ok(oids)
6112}
6113
6114fn promisor_object_links(format: ObjectFormat, object: &EncodedObject) -> Vec<ObjectId> {
6115    match object.object_type {
6116        ObjectType::Commit => Commit::parse_ref(format, &object.body)
6117            .map(|commit| {
6118                let mut links = Vec::with_capacity(commit.parents.len() + 1);
6119                links.push(commit.tree);
6120                links.extend(commit.parents);
6121                links
6122            })
6123            .unwrap_or_default(),
6124        ObjectType::Tree => TreeEntries::new(format, &object.body)
6125            .filter_map(|entry| entry.ok().map(|entry| entry.oid))
6126            .collect(),
6127        ObjectType::Tag => Tag::parse_ref(format, &object.body)
6128            .map(|tag| vec![tag.object])
6129            .unwrap_or_default(),
6130        ObjectType::Blob => Vec::new(),
6131    }
6132}
6133
6134impl ObjectWriter for FileObjectDatabase {
6135    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
6136        // Mirror git's freshen semantics (`write_object_file`:
6137        // `freshen_packed_object || freshen_loose_object`): an object already
6138        // present anywhere in the database — loose, packed, or through an
6139        // alternate — is not written again, so e.g. `git add` after
6140        // `git repack -ad` does not resurrect a loose copy of a packed object.
6141        let oid = object.object_id(self.format)?;
6142        if self.contains(&oid)? {
6143            return Ok(oid);
6144        }
6145        self.loose.write_object(object)
6146    }
6147}
6148
6149fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
6150    if path.exists() {
6151        return Ok(());
6152    }
6153    let parent = path
6154        .parent()
6155        .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
6156    fs::create_dir_all(parent)?;
6157    let temp_path = unique_temp_path(parent);
6158    let write_result = (|| -> Result<()> {
6159        {
6160            let mut file = fs::OpenOptions::new()
6161                .write(true)
6162                .create_new(true)
6163                .open(&temp_path)?;
6164            file.write_all(bytes)?;
6165            file.sync_all()?;
6166        }
6167        match fs::rename(&temp_path, path) {
6168            Ok(()) => Ok(()),
6169            Err(_) if path.exists() => {
6170                let _ = fs::remove_file(&temp_path);
6171                Ok(())
6172            }
6173            Err(err) => Err(GitError::Io(err.to_string())),
6174        }
6175    })();
6176    if write_result.is_err() {
6177        let _ = fs::remove_file(&temp_path);
6178    }
6179    write_result
6180}
6181
6182fn write_promisor_pack_sidecar(
6183    pack_dir: &Path,
6184    pack_name: &str,
6185    promisor: bool,
6186) -> Result<Option<PathBuf>> {
6187    if !promisor {
6188        return Ok(None);
6189    }
6190    let path = pack_dir.join(format!("{pack_name}.promisor"));
6191    write_pack_component(&path, b"")?;
6192    Ok(Some(path))
6193}
6194
6195/// Maximum number of bytes git will inflate when reading a loose object's
6196/// `"<type> <size>\0"` header (git's `MAX_HEADER_LEN` in object-file.c). The NUL
6197/// terminator must land within this window, so a header of 32 or more non-NUL
6198/// bytes is rejected as too long.
6199const MAX_LOOSE_HEADER_LEN: usize = 32;
6200
6201/// git's exact `error:`-level diagnostic for a loose object whose header overflows
6202/// `MAX_LOOSE_HEADER_LEN` (object-file.c: `error(_("header for %s too long, exceeds
6203/// %d bytes"), ...)`). Shared by the header-only and full-read paths so both surface
6204/// byte-identical text.
6205fn loose_header_too_long(oid: &ObjectId) -> GitError {
6206    GitError::InvalidObject(format!(
6207        "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
6208    ))
6209}
6210
6211/// git's `error:`-level diagnostic when the loose framing header cannot be inflated at
6212/// all (object-file.c `loose_object_info`, the `ULHR_BAD` arm: `error(_("unable to
6213/// unpack %s header"), ...)`).
6214fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
6215    GitError::InvalidObject(format!("unable to unpack {oid} header"))
6216}
6217
6218/// git-zlib.c's `error("inflate: %s (%s)", ...)` text for an inflate failure whose
6219/// cause is identifiable from the zlib stream header. The checks mirror zlib's own
6220/// `inflate()` HEAD-state validation, in order: the FCHECK checksum over CMF+FLG,
6221/// the compression method, the window size, and the FDICT preset-dictionary bit
6222/// (zlib reports `Z_NEED_DICT` with a NULL `msg`, which git renders as
6223/// "(no message)"). Failures past the stream header return `None`: flate2 does not
6224/// surface zlib's per-case `msg` strings, so no diagnostic is fabricated for them.
6225fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
6226    let [cmf, flg, ..] = *input else { return None };
6227    if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
6228        return Some("inflate: data stream error (incorrect header check)");
6229    }
6230    if cmf & 0x0f != 8 {
6231        return Some("inflate: data stream error (unknown compression method)");
6232    }
6233    if cmf >> 4 > 7 {
6234        return Some("inflate: data stream error (invalid window size)");
6235    }
6236    if flg & 0x20 != 0 {
6237        return Some("inflate: needs dictionary (no message)");
6238    }
6239    None
6240}
6241
6242/// Print the `error: inflate: ...` line git's zlib wrapper emits the moment
6243/// `inflate()` fails, when the failure is classifiable from the stream header.
6244fn emit_inflate_diagnostic(input: &[u8]) {
6245    if let Some(diagnostic) = inflate_header_diagnostic(input) {
6246        eprintln!("error: {diagnostic}");
6247    }
6248}
6249
6250/// Integrity verdict for a single loose object file, as classified by
6251/// [`LooseObjectStore::verify_object`].
6252#[derive(Debug, Clone, PartialEq, Eq)]
6253pub enum LooseObjectIntegrity {
6254    /// Inflated, parsed, and re-hashed to its path-derived oid.
6255    Ok,
6256    /// Readable and well-formed, but its content hashes to a different oid
6257    /// (a loose file stored under the wrong path).
6258    HashMismatch { actual: ObjectId },
6259    /// Unreadable: corrupt zlib stream, truncated content, or unparseable header.
6260    /// The `error:`-level diagnostics were already printed to stderr.
6261    Corrupt,
6262}
6263
6264#[derive(Debug, Clone)]
6265pub struct LooseObjectStore {
6266    objects_dir: PathBuf,
6267    format: ObjectFormat,
6268    /// Lazily-populated set of loose object ids present on disk, mirroring git's
6269    /// `loose_objects_cache` (object-file.c). A lookup scans the queried
6270    /// `objects/XX/` fanout once; afterward misses in that fanout are in-memory
6271    /// checks instead of failed exact-path opens. Shared across
6272    /// `FileObjectDatabase` clones via `Arc` so a write through one handle is
6273    /// visible to reads through another; cleared by `refresh_read_cache` so
6274    /// objects installed out-of-band (fetch, repack) become visible. Writes
6275    /// extend the set in place rather than invalidating it.
6276    loose_cache: Arc<Mutex<LoosePresenceCache>>,
6277}
6278
6279impl LooseObjectStore {
6280    pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
6281        Self {
6282            objects_dir: objects_dir.into(),
6283            format,
6284            loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
6285        }
6286    }
6287
6288    /// Whether `oid` is present according to the loose-object cache, populating
6289    /// the cache on first use. Returns `None` when the lock cannot be trusted or
6290    /// the scan fails; callers should fall back to an exact filesystem probe in
6291    /// that case so a cache-building problem cannot change read semantics.
6292    fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
6293        let mut guard = self.loose_cache.lock().ok()?;
6294        let fanout = oid.as_bytes()[0];
6295        if !guard.loaded_fanouts.contains(&fanout) {
6296            collect_loose_fanout_object_ids(
6297                &self.objects_dir,
6298                self.format,
6299                fanout,
6300                &mut guard.objects,
6301            )
6302            .ok()?;
6303            guard.loaded_fanouts.insert(fanout);
6304        }
6305        Some(guard.objects.contains(oid))
6306    }
6307
6308    /// Populate the loose-object cache and return the sorted ids. This mirrors
6309    /// git's `odb_loose_cache` lazy fill and is reserved for operations that
6310    /// really need loose-object enumeration.
6311    fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
6312        if let Ok(mut guard) = self.loose_cache.lock() {
6313            guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
6314            guard.loaded_fanouts = (0..=u8::MAX).collect();
6315            let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
6316            ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
6317            return Ok(ids);
6318        }
6319        loose_object_ids(&self.objects_dir, self.format)
6320    }
6321
6322    /// Record `oid` as present in loose storage so subsequent reads find it
6323    /// without a rescan. A no-op when the cache has not been populated yet (the
6324    /// eventual lazy scan will pick the object up) or the lock is poisoned.
6325    fn note_loose_write(&self, oid: ObjectId) {
6326        if let Ok(mut guard) = self.loose_cache.lock() {
6327            guard.objects.insert(oid);
6328        }
6329    }
6330
6331    /// Drop the in-memory loose set so the next access rescans the fanout. Called
6332    /// by `FileObjectDatabase::refresh_read_cache` after out-of-band installs.
6333    pub(crate) fn invalidate_cache(&self) {
6334        if let Ok(mut guard) = self.loose_cache.lock() {
6335            *guard = LoosePresenceCache::default();
6336        }
6337    }
6338
6339    pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
6340        Self::new(repository_objects_dir(git_dir), format)
6341    }
6342
6343    fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
6344        if oid.format() != self.format {
6345            return Err(GitError::InvalidObjectId(format!(
6346                "object {oid} uses {}, store uses {}",
6347                oid.format().name(),
6348                self.format.name()
6349            )));
6350        }
6351        Ok(())
6352    }
6353
6354    pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
6355        self.validate_oid_format(oid)?;
6356        let hex = oid.to_hex();
6357        Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
6358    }
6359
6360    pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
6361        self.validate_oid_format(oid)?;
6362        if self.cached_loose_presence(oid) == Some(false) {
6363            return Ok(false);
6364        }
6365        let path = self.object_path(oid)?;
6366        Ok(path.exists())
6367    }
6368
6369    pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
6370        self.validate_oid_format(oid)?;
6371        if self.cached_loose_presence(oid) == Some(false) {
6372            return Ok(None);
6373        }
6374        let path = self.object_path(oid)?;
6375        match fs::metadata(path) {
6376            Ok(metadata) => Ok(Some(metadata.len())),
6377            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
6378            Err(err) => Err(GitError::Io(err.to_string())),
6379        }
6380    }
6381
6382    /// The object type and content size of `oid` from loose storage, inflating only
6383    /// the framing header (`"<type> <size>\0"`) and not the body. Output-limited
6384    /// reads keep miniz from inflating past the header even for large objects.
6385    /// Returns `Ok(None)` when the loose object is absent.
6386    pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
6387        self.validate_oid_format(oid)?;
6388        if self.cached_loose_presence(oid) == Some(false) {
6389            return Ok(None);
6390        }
6391        let path = self.object_path(oid)?;
6392        let compressed = match fs::read(&path) {
6393            Ok(compressed) => compressed,
6394            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
6395            Err(err) => return Err(GitError::Io(err.to_string())),
6396        };
6397        match inflate_loose_header(&compressed)? {
6398            LooseHeader::Ok(header) => {
6399                let header = std::str::from_utf8(&header)
6400                    .map_err(|err| GitError::InvalidObject(err.to_string()))?;
6401                let (kind, size) = header
6402                    .split_once(' ')
6403                    .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
6404                let object_type = kind.parse::<ObjectType>()?;
6405                let size = size
6406                    .parse::<u64>()
6407                    .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
6408                Ok(Some((object_type, size)))
6409            }
6410            LooseHeader::Bad => {
6411                // git's ULHR_BAD: the zlib wrapper's `error: inflate: ...` line, then
6412                // "unable to unpack <oid> header".
6413                emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
6414                Err(loose_unpack_header_failed(oid))
6415            }
6416            LooseHeader::TooLong => {
6417                // git inflates only the first `MAX_LOOSE_HEADER_LEN` bytes
6418                // (object-file.c `unpack_loose_header`) and reports ULHR_TOO_LONG when
6419                // no NUL terminator lands within them — whether the stream simply ends
6420                // early or overflows the window. Both collapse to the same diagnostic.
6421                Err(loose_header_too_long(oid))
6422            }
6423        }
6424    }
6425
6426    /// Loose object ids in this store, sorted by hex.
6427    pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
6428        self.loose_object_ids_cached()
6429    }
6430
6431    /// fsck's loose-object integrity probe, mirroring C git's `read_loose_object`
6432    /// (object-file.c) as called from `fsck_loose` (builtin/fsck.c): inflate and
6433    /// parse the file at `oid`'s loose path, then re-hash its content against the
6434    /// path-derived oid. `display_path` appears verbatim in the `error:`-level
6435    /// diagnostics — the path-form messages of `read_loose_object` ("unable to
6436    /// unpack header of <path>"), unlike the oid-form messages of the normal read
6437    /// path. Returns `Ok(None)` when no loose file exists for `oid`.
6438    pub fn verify_object(
6439        &self,
6440        oid: &ObjectId,
6441        display_path: &str,
6442    ) -> Result<Option<LooseObjectIntegrity>> {
6443        let path = self.object_path(oid)?;
6444        let compressed = match fs::read(&path) {
6445            Ok(compressed) => compressed,
6446            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
6447            Err(err) => return Err(GitError::Io(err.to_string())),
6448        };
6449        let mut decoder = ZlibDecoder::new(compressed.as_slice());
6450        let mut framed = Vec::new();
6451        if decoder.read_to_end(&mut framed).is_err() {
6452            emit_inflate_diagnostic(&compressed);
6453            // git inflates the header first (`unpack_loose_header`), then the body
6454            // (`unpack_loose_rest`). If the header inflated (its NUL is visible in
6455            // the partial output) but the body broke, that is a *content*
6456            // corruption: git's `unpack_loose_rest` prints `corrupt loose object
6457            // '<oid>'` (status != Z_STREAM_END), then `read_loose_object` adds
6458            // `unable to unpack contents of <path>`. If inflation died before the
6459            // header materialized, only the header message fires.
6460            if framed_loose_header_terminated(&framed) {
6461                eprintln!("error: corrupt loose object '{oid}'");
6462                eprintln!("error: unable to unpack contents of {display_path}");
6463            } else {
6464                eprintln!("error: unable to unpack header of {display_path}");
6465            }
6466            return Ok(Some(LooseObjectIntegrity::Corrupt));
6467        }
6468        if !framed_loose_header_terminated(&framed) {
6469            // ULHR_TOO_LONG collapses into the same path-form message here: C's
6470            // `read_loose_object` treats every non-OK `unpack_loose_header` alike.
6471            eprintln!("error: unable to unpack header of {display_path}");
6472            return Ok(Some(LooseObjectIntegrity::Corrupt));
6473        }
6474        // git's `unpack_loose_rest`/`check_stream_oid` reject trailing bytes after
6475        // the zlib stream: a fully-inflated object whose compressed input was not
6476        // entirely consumed is `garbage at end of loose object '<oid>'`, then
6477        // `object corrupt or missing: <path>` from `fsck_loose`. (read_to_end
6478        // stops at Z_STREAM_END and silently ignores the trailing bytes, so we
6479        // compare consumed input against the file size ourselves.)
6480        if (decoder.total_in() as usize) < compressed.len() {
6481            // git's `unpack_loose_rest` prints `garbage at end of loose object`
6482            // then returns NULL, so `read_loose_object` also prints `unable to
6483            // unpack contents of <path>`.
6484            eprintln!("error: garbage at end of loose object '{oid}'");
6485            eprintln!("error: unable to unpack contents of {display_path}");
6486            return Ok(Some(LooseObjectIntegrity::Corrupt));
6487        }
6488        // A truncated object can inflate to a clean stream end yet yield fewer
6489        // body bytes than the header's declared size. git's `unpack_loose_rest`
6490        // inflates exactly `size` bytes and, finding the stream ends short,
6491        // prints `corrupt loose object '<oid>'`; `read_loose_object` then adds
6492        // `unable to unpack contents of <path>`. Detect the short body here so it
6493        // is not misreported as a header-parse failure.
6494        if let Some(declared) = loose_header_declared_size(&framed) {
6495            let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
6496            let body_len = framed.len() - (nul + 1).min(framed.len());
6497            if body_len < declared {
6498                eprintln!("error: corrupt loose object '{oid}'");
6499                eprintln!("error: unable to unpack contents of {display_path}");
6500                return Ok(Some(LooseObjectIntegrity::Corrupt));
6501            }
6502        }
6503        let Ok(object) = parse_framed_object(&framed) else {
6504            // Distinguish git's two header-parse failures: a structurally valid
6505            // `"<word> <size>\0"` header whose *type word* is not a known object
6506            // type yields `unable to parse type from header '<header>'`, while a
6507            // genuinely malformed header yields `unable to parse header`.
6508            if let Some(header) = loose_header_with_unknown_type(&framed) {
6509                eprintln!("error: unable to parse type from header '{header}' of {display_path}");
6510            } else {
6511                eprintln!("error: unable to parse header of {display_path}");
6512            }
6513            return Ok(Some(LooseObjectIntegrity::Corrupt));
6514        };
6515        let actual = object.object_id(self.format)?;
6516        if &actual != oid {
6517            return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
6518        }
6519        Ok(Some(LooseObjectIntegrity::Ok))
6520    }
6521}
6522
6523/// Whether the inflated framing bytes contain the header's NUL terminator within
6524/// git's `MAX_HEADER_LEN` window (object-file.c `unpack_loose_header`'s success
6525/// condition).
6526fn framed_loose_header_terminated(framed: &[u8]) -> bool {
6527    framed
6528        .iter()
6529        .take(MAX_LOOSE_HEADER_LEN)
6530        .any(|byte| *byte == 0)
6531}
6532
6533/// If the framing has a structurally valid `"<word> <size>\0"` header whose body
6534/// length matches `<size>` but whose `<word>` is not a known object type, return
6535/// the header string (the bytes before the NUL). Mirrors git's
6536/// `parse_loose_header` reporting `unable to parse type from header '<header>'`.
6537fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
6538    let nul = framed.iter().position(|&b| b == 0)?;
6539    let header = std::str::from_utf8(&framed[..nul]).ok()?;
6540    let (kind, size) = header.split_once(' ')?;
6541    let size: usize = size.parse().ok()?;
6542    // Body length must match the declared size (otherwise it is a different
6543    // corruption, handled by the generic path).
6544    if framed.len() - (nul + 1) != size {
6545        return None;
6546    }
6547    // A known type word would have parsed successfully upstream; only return
6548    // when the word is genuinely unknown.
6549    if kind.parse::<ObjectType>().is_ok() {
6550        return None;
6551    }
6552    Some(header.to_string())
6553}
6554
6555/// The size declared in a loose object's `"<type> <size>\0"` header, if the
6556/// header is structurally a `<word> <decimal-size>` pair. Used to detect a body
6557/// inflated short of its declared length (a truncated object).
6558fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
6559    let nul = framed.iter().position(|&b| b == 0)?;
6560    let header = std::str::from_utf8(&framed[..nul]).ok()?;
6561    let (_kind, size) = header.split_once(' ')?;
6562    size.parse::<usize>().ok()
6563}
6564
6565/// Read up to `prefix.len()` bytes from the start of `file`, returning how many
6566/// were available (short only when the file itself is shorter).
6567/// Outcome of inflating a loose object's header, mirroring git's
6568/// `unpack_loose_header` result codes (object-file.c `enum
6569/// unpack_loose_header_result`).
6570enum LooseHeader {
6571    /// ULHR_OK: a NUL-terminated header was found within the window. Carries the
6572    /// header bytes up to (not including) the NUL.
6573    Ok(Vec<u8>),
6574    /// ULHR_BAD: the zlib stream would not inflate (status != Z_OK/Z_STREAM_END).
6575    Bad,
6576    /// ULHR_TOO_LONG: the inflated output filled the header window with no NUL.
6577    TooLong,
6578}
6579
6580/// Inflate a loose object's *header* exactly as git's `unpack_loose_header` does
6581/// (object-file.c): a single bounded inflate into a `MAX_LOOSE_HEADER_LEN`-byte
6582/// output buffer, then look for the header-terminating NUL in what came out.
6583///
6584/// The byte budget is load-bearing for corruption parity: git inflates only up to
6585/// `MAX_HEADER_LEN` (32) bytes of *output* before stopping, so a `cat-file -s`/`-t`
6586/// header read detects a zlib data error only when it lands within those first 32
6587/// inflated bytes (the header plus the start of the body for a small object) — and
6588/// silently returns the header for corruption buried deeper in the body, which the
6589/// full-object read path catches instead. A byte-by-byte loop that stopped at the
6590/// NUL would never inflate into the corrupt region and miss the bit-error case
6591/// (t1060 "getting type of a corrupt blob fails"); feeding too much output budget
6592/// would over-detect relative to git. So this matches git's exact window.
6593fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
6594    let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
6595    let mut decompress = Decompress::new(true);
6596    // git feeds the whole mapped file as `avail_in` and inflates once into a
6597    // 32-byte `avail_out`; zlib stops at the output limit (Z_OK with avail_out==0)
6598    // or at the stream's end, propagating Z_DATA_ERROR for a corrupt stream.
6599    let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
6600    let produced = decompress.total_out() as usize;
6601    match status {
6602        Ok(_) => {
6603            let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
6604            match window.iter().position(|&byte| byte == 0) {
6605                Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
6606                // No NUL within the window: either the stream ended early or the
6607                // header overflows `MAX_LOOSE_HEADER_LEN`. git collapses both into
6608                // ULHR_TOO_LONG (object-file.c `unpack_loose_header`).
6609                None => Ok(LooseHeader::TooLong),
6610            }
6611        }
6612        // Any zlib error before a NUL materializes is git's ULHR_BAD.
6613        Err(_) => Ok(LooseHeader::Bad),
6614    }
6615}
6616
6617impl ObjectReader for LooseObjectStore {
6618    fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
6619        self.validate_oid_format(oid)?;
6620        // Skip the `open()` (and its ENOENT) when an already-built loose cache
6621        // knows the id is absent. Without a cache, use an exact path probe; a
6622        // full fanout scan is far more expensive for one-shot packed-object reads.
6623        if self.cached_loose_presence(oid) == Some(false) {
6624            return Err(GitError::object_not_found_in(
6625                *oid,
6626                MissingObjectContext::Read,
6627            ));
6628        }
6629        let path = self.object_path(oid)?;
6630        let compressed = match fs::read(&path) {
6631            Ok(compressed) => compressed,
6632            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
6633                return Err(GitError::object_not_found_in(
6634                    *oid,
6635                    MissingObjectContext::Read,
6636                ));
6637            }
6638            Err(err) => return Err(GitError::Io(err.to_string())),
6639        };
6640        let mut decoder = ZlibDecoder::new(compressed.as_slice());
6641        let mut framed = Vec::new();
6642        if decoder.read_to_end(&mut framed).is_err() {
6643            emit_inflate_diagnostic(&compressed);
6644            // A stream that dies before the framing header materializes is git's
6645            // ULHR_BAD ("unable to unpack <oid> header"); with the header intact,
6646            // the body is what broke (`unpack_loose_rest`'s "corrupt loose
6647            // object").
6648            if !framed_loose_header_terminated(&framed) {
6649                return Err(loose_unpack_header_failed(oid));
6650            }
6651            return Err(GitError::InvalidObject(format!(
6652                "corrupt loose object '{oid}'"
6653            )));
6654        }
6655        // git only inflates the first `MAX_LOOSE_HEADER_LEN` bytes looking for the
6656        // header's NUL terminator before parsing the type; an over-long header is
6657        // rejected here (with git's diagnostic) rather than failing later as an
6658        // "unknown object type". Mirror that so `cat-file -p` matches upstream.
6659        if framed
6660            .iter()
6661            .take(MAX_LOOSE_HEADER_LEN)
6662            .all(|byte| *byte != 0)
6663        {
6664            return Err(loose_header_too_long(oid));
6665        }
6666        let object = parse_framed_object(&framed)?;
6667        // Trust the loose object's on-disk name rather than re-hashing its full body
6668        // on every read (see `verify_reads_enabled`); use `validate`/fsck or
6669        // `SLEY_VERIFY_READS` for an explicit integrity check.
6670        if verify_reads_enabled() {
6671            let actual = object.object_id(self.format)?;
6672            if &actual != oid {
6673                return Err(GitError::InvalidObject(format!(
6674                    "loose object {} hashes to {actual}",
6675                    path.display()
6676                )));
6677            }
6678        }
6679        Ok(Arc::new(object))
6680    }
6681}
6682
6683impl ObjectWriter for LooseObjectStore {
6684    fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
6685        let oid = object.object_id(self.format)?;
6686        let path = self.object_path(&oid)?;
6687        if path.exists() {
6688            self.note_loose_write(oid);
6689            return Ok(oid);
6690        }
6691        let parent = path
6692            .parent()
6693            .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
6694        fs::create_dir_all(parent)?;
6695        let temp_path = unique_temp_path(parent);
6696        let write_result = (|| -> Result<()> {
6697            let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6698            encoder.write_all(&object.framed_bytes())?;
6699            let compressed = encoder.finish()?;
6700            {
6701                let mut file = fs::OpenOptions::new()
6702                    .write(true)
6703                    .create_new(true)
6704                    .open(&temp_path)?;
6705                file.write_all(&compressed)?;
6706                // No fsync: git's default `core.fsync=none` fsyncs nothing on the
6707                // loose-object write path (object-file.c writes the temp file and
6708                // renames it without syncing unless `core.fsync` names
6709                // `loose-object`/`objects`/`all`, which it does not by default).
6710                // A per-object sync_all() here made `git add` of N files cost N
6711                // fsyncs — the dominant term in sley#27's 10x `add -u` slowdown —
6712                // for durability git itself does not provide by default. The
6713                // create_new temp + atomic rename below still guarantees the
6714                // object never appears half-written under its final name.
6715            }
6716            match fs::rename(&temp_path, &path) {
6717                Ok(()) => Ok(()),
6718                Err(_) if path.exists() => {
6719                    let _ = fs::remove_file(&temp_path);
6720                    Ok(())
6721                }
6722                Err(err) => Err(GitError::Io(err.to_string())),
6723            }
6724        })();
6725        if write_result.is_err() {
6726            let _ = fs::remove_file(&temp_path);
6727        }
6728        write_result?;
6729        self.note_loose_write(oid);
6730        Ok(oid)
6731    }
6732}
6733
6734fn unique_temp_path(parent: &Path) -> PathBuf {
6735    let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
6736    parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
6737}
6738
6739#[cfg(test)]
6740mod tests {
6741    use super::*;
6742    use sley_core::BString;
6743    use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
6744    use sley_pack::{PackFile, PackWriteOptions};
6745
6746    fn blob_of(byte: u8, len: usize) -> EncodedObject {
6747        EncodedObject::new(ObjectType::Blob, vec![byte; len])
6748    }
6749
6750    fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
6751        Arc::new(blob_of(byte, len))
6752    }
6753
6754    fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
6755        reader
6756            .read_object(oid)
6757            .expect("test operation should succeed")
6758            .as_ref()
6759            .clone()
6760    }
6761
6762    #[test]
6763    fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
6764        // Budget holds two ~1 KiB objects but not three.
6765        let one = cached_object_cost(&blob_of(0, 1000));
6766        let mut cache = LruCache::<u32>::new(one * 2 + 8);
6767        cache.put(1, cached_blob_of(b'a', 1000));
6768        cache.put(2, cached_blob_of(b'b', 1000));
6769        // Touch key 1 so key 2 becomes least-recently-used.
6770        assert!(cache.get(&1).is_some());
6771        cache.put(3, cached_blob_of(b'c', 1000));
6772        // Key 2 (LRU) is evicted; 1 and 3 remain.
6773        assert!(cache.get(&1).is_some());
6774        assert!(cache.get(&2).is_none());
6775        assert!(cache.get(&3).is_some());
6776    }
6777
6778    #[test]
6779    fn lru_cache_zero_budget_is_inert() {
6780        let mut cache = LruCache::<u32>::new(0);
6781        cache.put(1, cached_blob_of(b'a', 16));
6782        assert!(cache.get(&1).is_none());
6783    }
6784
6785    #[test]
6786    fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
6787        let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
6788        cache.put(1, cached_blob_of(b'a', 50));
6789        assert!(cache.get(&1).is_some());
6790        // An object that cannot fit is not cached, and it evicts the prior entry
6791        // stored under the same key (so we never serve a stale value for it).
6792        cache.put(1, cached_blob_of(b'b', 10_000));
6793        assert!(cache.get(&1).is_none());
6794        // A subsequent fitting insert under another key still works and accounting
6795        // is not corrupted by the oversized insert.
6796        cache.put(2, cached_blob_of(b'c', 50));
6797        assert!(cache.get(&2).is_some());
6798    }
6799
6800    #[test]
6801    fn lru_cache_replacing_entry_updates_byte_accounting() {
6802        // Budget holds two 500-byte objects (plus headroom) but not a 500 + a
6803        // ~1900-byte object.
6804        let small = cached_object_cost(&blob_of(0, 500));
6805        let mut cache = LruCache::<u32>::new(small * 2 + 200);
6806        cache.put(1, cached_blob_of(b'a', 500));
6807        cache.put(2, cached_blob_of(b'b', 500));
6808        assert!(cache.get(&1).is_some());
6809        assert!(cache.get(&2).is_some());
6810        // Replace key 2 (now MRU after the gets above re-ordered 1 then 2) with a
6811        // bigger value that still fits the budget alone but makes the running total
6812        // exceed it; the LRU (key 1) is evicted while the replaced key 2 stays.
6813        // This exercises the replace-path accounting.
6814        cache.put(2, cached_blob_of(b'b', 1000));
6815        assert!(cache.get(&2).is_some());
6816        assert!(cache.get(&1).is_none());
6817    }
6818
6819    #[test]
6820    fn write_and_validate_blob() {
6821        let db = ObjectDatabase::new(ObjectFormat::Sha1);
6822        let oid = db
6823            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
6824            .expect("test operation should succeed");
6825        assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
6826        db.validate(&oid).expect("test operation should succeed");
6827    }
6828
6829    #[test]
6830    fn loose_store_writes_and_reads_object() {
6831        let root = std::env::temp_dir().join(format!(
6832            "sley-loose-store-{}-{}",
6833            std::process::id(),
6834            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6835        ));
6836        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6837        let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
6838        let oid = store
6839            .write_object(object.clone())
6840            .expect("test operation should succeed");
6841        assert_eq!(read_object_for_assert(&store, &oid), object);
6842        assert!(
6843            store
6844                .object_path(&oid)
6845                .expect("test operation should succeed")
6846                .exists()
6847        );
6848        fs::remove_dir_all(root).expect("test operation should succeed");
6849    }
6850
6851    #[test]
6852    fn read_header_detects_corruption_within_gits_header_window() {
6853        // git's `unpack_loose_header` inflates only the first MAX_HEADER_LEN (32)
6854        // bytes of output; a zlib data error inside that window makes `cat-file
6855        // -s`/`-t` fail (ULHR_BAD → "unable to unpack header"). A byte-by-byte
6856        // header read that stopped at the NUL would never inflate into the corrupt
6857        // region and would silently return a bogus size — the t1060 "getting type
6858        // of a corrupt blob fails" bug. Corrupt a byte inside the inflate stream of
6859        // a tiny object so the damage lands within the first 32 inflated bytes.
6860        let root = temp_root("sley-loose-header-corrupt");
6861        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6862        let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
6863        let oid = store
6864            .write_object(object)
6865            .expect("test operation should succeed");
6866        let path = store
6867            .object_path(&oid)
6868            .expect("test operation should succeed");
6869        let mut bytes = fs::read(&path).expect("test operation should succeed");
6870        // Offset 10 is inside the deflate stream (past the 2-byte zlib header) and,
6871        // for an 8-byte blob, decodes into the first 32 output bytes. Zero it to
6872        // break inflation, mirroring t1060's `corrupt_byte HEAD:content.t 10`.
6873        bytes[10] = 0;
6874        fs::write(&path, &bytes).expect("test operation should succeed");
6875        store.invalidate_cache();
6876        let err = store
6877            .read_header(&oid)
6878            .expect_err("corrupt loose header must fail like git's ULHR_BAD");
6879        let msg = err.to_string();
6880        assert!(
6881            msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
6882            "expected git's ULHR_BAD message, got: {msg}"
6883        );
6884        fs::remove_dir_all(root).expect("test operation should succeed");
6885    }
6886
6887    #[test]
6888    fn read_header_ignores_corruption_past_gits_header_window() {
6889        // Mirror git: corruption deeper than the 32-byte header window is NOT
6890        // detected by a header-only read (`cat-file -s` still returns the size);
6891        // the full-object read path catches it instead. Over-detecting here would
6892        // diverge from upstream on large objects with a clean header.
6893        let root = temp_root("sley-loose-header-deep-corrupt");
6894        let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6895        // Incompressible body so the deflate stream is long and a deep byte is well
6896        // past the 32 inflated header-window bytes.
6897        let body: Vec<u8> = (0..4096u32)
6898            .map(|i| (i.wrapping_mul(2654435761)) as u8)
6899            .collect();
6900        let object = EncodedObject::new(ObjectType::Blob, body.clone());
6901        let oid = store
6902            .write_object(object)
6903            .expect("test operation should succeed");
6904        let path = store
6905            .object_path(&oid)
6906            .expect("test operation should succeed");
6907        let mut bytes = fs::read(&path).expect("test operation should succeed");
6908        let deep = bytes.len() / 2;
6909        bytes[deep] ^= 0xff;
6910        fs::write(&path, &bytes).expect("test operation should succeed");
6911        store.invalidate_cache();
6912        let header = store
6913            .read_header(&oid)
6914            .expect("header-only read must still succeed for deep body corruption");
6915        assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
6916        fs::remove_dir_all(root).expect("test operation should succeed");
6917    }
6918
6919    #[test]
6920    fn file_database_reads_object_from_pack_index() {
6921        let root = temp_root("sley-file-odb-pack");
6922        let git_dir = root.join(".git");
6923        let pack_dir = git_dir.join("objects").join("pack");
6924        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6925        let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
6926        let oid = object
6927            .object_id(ObjectFormat::Sha1)
6928            .expect("test operation should succeed");
6929        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6930            .expect("test operation should succeed");
6931        let pack_name = written.checksum.to_hex();
6932        fs::write(
6933            pack_dir.join(format!("pack-{pack_name}.pack")),
6934            written.pack,
6935        )
6936        .expect("test operation should succeed");
6937        fs::write(
6938            pack_dir.join(format!("pack-{pack_name}.idx")),
6939            written.index,
6940        )
6941        .expect("test operation should succeed");
6942
6943        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6944        assert!(db.contains(&oid).expect("test operation should succeed"));
6945        assert_eq!(read_object_for_assert(&db, &oid), object);
6946        fs::remove_dir_all(root).expect("test operation should succeed");
6947    }
6948
6949    #[test]
6950    fn file_database_loose_cache_observes_same_process_write_after_miss() {
6951        let root = temp_root("sley-file-odb-loose-cache-write");
6952        let git_dir = root.join(".git");
6953        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6954        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6955
6956        let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
6957        let oid = object
6958            .object_id(ObjectFormat::Sha1)
6959            .expect("test operation should succeed");
6960
6961        assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
6962        db.loose()
6963            .write_object(object.clone())
6964            .expect("test operation should succeed");
6965
6966        assert_eq!(read_object_for_assert(&db, &oid), object);
6967        fs::remove_dir_all(root).expect("test operation should succeed");
6968    }
6969
6970    #[test]
6971    fn object_presence_checker_observes_same_process_loose_write_after_miss() {
6972        let root = temp_root("sley-presence-checker-loose-cache-write");
6973        let git_dir = root.join(".git");
6974        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6975        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6976        let mut checker = db.presence_checker();
6977
6978        let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
6979        let oid = object
6980            .object_id(ObjectFormat::Sha1)
6981            .expect("test operation should succeed");
6982
6983        assert!(
6984            !checker
6985                .contains(&oid)
6986                .expect("test operation should succeed")
6987        );
6988        db.loose()
6989            .write_object(object)
6990            .expect("test operation should succeed");
6991
6992        assert!(
6993            checker
6994                .contains(&oid)
6995                .expect("test operation should succeed")
6996        );
6997        fs::remove_dir_all(root).expect("test operation should succeed");
6998    }
6999
7000    #[test]
7001    fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
7002        let root = temp_root("sley-read-object-header");
7003        let git_dir = root.join(".git");
7004        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7005        let format = ObjectFormat::Sha1;
7006        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7007
7008        // Loose object: the header read inflates only the framing, not the body.
7009        let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
7010        let loose_oid = db
7011            .write_object(loose.clone())
7012            .expect("test operation should succeed");
7013
7014        // Packed objects, including an ofs-delta whose *result* size lives in the
7015        // delta stream (not the pack entry header) and whose type is inherited from
7016        // its base at the end of the chain.
7017        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
7018        let mut child_body = vec![b'a'; 4096];
7019        child_body.extend_from_slice(b" plus a deltified tail\n");
7020        let child = EncodedObject::new(ObjectType::Blob, child_body);
7021        let commitish =
7022            EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
7023        let base_oid = base
7024            .object_id(format)
7025            .expect("test operation should succeed");
7026        let child_oid = child
7027            .object_id(format)
7028            .expect("test operation should succeed");
7029        let commit_oid = commitish
7030            .object_id(format)
7031            .expect("test operation should succeed");
7032        let options = PackWriteOptions::new()
7033            .with_prefer_ofs_delta(true)
7034            .with_reorder(false);
7035        let pack = PackFile::write_packed_with_options(
7036            &[base.clone(), child.clone(), commitish.clone()],
7037            format,
7038            &options,
7039        )
7040        .expect("test operation should succeed");
7041        db.install_pack(&pack)
7042            .expect("test operation should succeed");
7043
7044        // The header read agrees with a full decode for every object and storage
7045        // class, without ever materializing the body.
7046        for (oid, want_type, want_len) in [
7047            (&loose_oid, ObjectType::Blob, loose.body.len()),
7048            (&base_oid, ObjectType::Blob, base.body.len()),
7049            (&child_oid, ObjectType::Blob, child.body.len()),
7050            (&commit_oid, ObjectType::Commit, commitish.body.len()),
7051        ] {
7052            assert_eq!(
7053                db.read_object_header(oid)
7054                    .expect("test operation should succeed"),
7055                Some((want_type, want_len as u64)),
7056                "header for {oid}"
7057            );
7058            let full = db.read_object(oid).expect("test operation should succeed");
7059            assert_eq!(
7060                db.read_object_header(oid)
7061                    .expect("test operation should succeed"),
7062                Some((full.object_type, full.body.len() as u64))
7063            );
7064        }
7065
7066        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
7067            .expect("test operation should succeed");
7068        assert_eq!(
7069            db.read_object_header(&missing)
7070                .expect("test operation should succeed"),
7071            None
7072        );
7073        fs::remove_dir_all(root).expect("test operation should succeed");
7074    }
7075
7076    #[test]
7077    fn object_storage_info_reports_loose_packed_and_delta_metadata() {
7078        let root = temp_root("sley-object-storage-info");
7079        let git_dir = root.join(".git");
7080        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7081        let format = ObjectFormat::Sha1;
7082        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7083
7084        let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
7085        let loose_oid = db
7086            .write_object(loose)
7087            .expect("test operation should succeed");
7088        let loose_size = fs::metadata(
7089            db.loose()
7090                .object_path(&loose_oid)
7091                .expect("test operation should succeed"),
7092        )
7093        .expect("test operation should succeed")
7094        .len();
7095        let loose_info = db
7096            .object_storage_info(&loose_oid)
7097            .expect("test operation should succeed")
7098            .expect("test operation should succeed");
7099        assert_eq!(loose_info.disk_size, loose_size);
7100        assert_eq!(
7101            loose_info.deltabase,
7102            zero_oid(format).expect("test operation should succeed")
7103        );
7104
7105        let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
7106        let mut child_body = vec![b'a'; 4096];
7107        child_body.extend_from_slice(b" changed tail\n");
7108        let child = EncodedObject::new(ObjectType::Blob, child_body);
7109        let base_oid = base
7110            .object_id(format)
7111            .expect("test operation should succeed");
7112        let child_oid = child
7113            .object_id(format)
7114            .expect("test operation should succeed");
7115        let options = PackWriteOptions::new()
7116            .with_prefer_ofs_delta(true)
7117            .with_reorder(false);
7118        let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
7119            .expect("test operation should succeed");
7120        db.install_pack(&pack)
7121            .expect("test operation should succeed");
7122
7123        let base_info = db
7124            .object_storage_info(&base_oid)
7125            .expect("test operation should succeed")
7126            .expect("test operation should succeed");
7127        assert!(base_info.disk_size > 0);
7128        assert_eq!(
7129            base_info.deltabase,
7130            zero_oid(format).expect("test operation should succeed")
7131        );
7132
7133        let child_info = db
7134            .object_storage_info(&child_oid)
7135            .expect("test operation should succeed")
7136            .expect("test operation should succeed");
7137        assert!(child_info.disk_size > 0);
7138        assert_eq!(child_info.deltabase, base_oid);
7139
7140        let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
7141            .expect("test operation should succeed");
7142        assert_eq!(
7143            db.object_storage_info(&missing)
7144                .expect("test operation should succeed"),
7145            None
7146        );
7147        fs::remove_dir_all(root).expect("test operation should succeed");
7148    }
7149
7150    #[test]
7151    fn file_database_resolves_unique_loose_object_prefix() {
7152        let root = temp_root("sley-file-odb-prefix-loose");
7153        let git_dir = root.join(".git");
7154        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7155        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7156        let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
7157        let oid = db
7158            .write_object(object)
7159            .expect("test operation should succeed");
7160        let prefix = &oid.to_hex()[..8];
7161
7162        assert_eq!(
7163            db.resolve_prefix(prefix)
7164                .expect("test operation should succeed"),
7165            ObjectPrefixResolution::Unique(oid)
7166        );
7167        assert!(
7168            db.object_ids()
7169                .expect("test operation should succeed")
7170                .contains(&oid)
7171        );
7172        fs::remove_dir_all(root).expect("test operation should succeed");
7173    }
7174
7175    #[test]
7176    fn file_database_resolves_unique_packed_object_prefix() {
7177        let root = temp_root("sley-file-odb-prefix-packed");
7178        let git_dir = root.join(".git");
7179        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7180        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7181        let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
7182        let oid = object
7183            .object_id(ObjectFormat::Sha1)
7184            .expect("test operation should succeed");
7185        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7186            .expect("test operation should succeed");
7187        db.install_pack(&pack)
7188            .expect("test operation should succeed");
7189        let prefix = &oid.to_hex()[..8];
7190
7191        assert_eq!(
7192            db.resolve_prefix(prefix)
7193                .expect("test operation should succeed"),
7194            ObjectPrefixResolution::Unique(oid)
7195        );
7196        fs::remove_dir_all(root).expect("test operation should succeed");
7197    }
7198
7199    #[test]
7200    fn file_database_reports_ambiguous_object_prefix() {
7201        let root = temp_root("sley-file-odb-prefix-ambiguous");
7202        let git_dir = root.join(".git");
7203        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7204        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7205        let mut seen = HashMap::new();
7206        let (prefix, first, second) = (0..10_000)
7207            .find_map(|idx| {
7208                let object =
7209                    EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
7210                let oid = db
7211                    .write_object(object)
7212                    .expect("test operation should succeed");
7213                let prefix = oid.to_hex()[..4].to_string();
7214                seen.insert(prefix.clone(), oid)
7215                    .map(|first| (prefix, first, oid))
7216            })
7217            .expect("test should find a 4-hex collision");
7218
7219        let ObjectPrefixResolution::Ambiguous(mut matches) = db
7220            .resolve_prefix(&prefix)
7221            .expect("test operation should succeed")
7222        else {
7223            panic!("expected ambiguous prefix {prefix}");
7224        };
7225        matches.sort_by_key(ObjectId::to_hex);
7226        let mut expected = vec![first, second];
7227        expected.sort_by_key(ObjectId::to_hex);
7228        assert_eq!(matches, expected);
7229        fs::remove_dir_all(root).expect("test operation should succeed");
7230    }
7231
7232    #[test]
7233    fn file_database_rejects_too_short_object_prefix() {
7234        let root = temp_root("sley-file-odb-prefix-short");
7235        let git_dir = root.join(".git");
7236        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7237        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7238
7239        assert!(matches!(
7240            db.resolve_prefix("abc"),
7241            Err(GitError::InvalidObjectId(_))
7242        ));
7243        fs::remove_dir_all(root).expect("test operation should succeed");
7244    }
7245
7246    #[test]
7247    fn file_database_reads_sha256_object_from_pack_index() {
7248        let root = temp_root("sley-file-odb-pack-sha256");
7249        let git_dir = root.join(".git");
7250        let pack_dir = git_dir.join("objects").join("pack");
7251        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7252        let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
7253        let oid = object
7254            .object_id(ObjectFormat::Sha256)
7255            .expect("test operation should succeed");
7256        let written =
7257            PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7258                .expect("test operation should succeed");
7259        let pack_name = written.checksum.to_hex();
7260        fs::write(
7261            pack_dir.join(format!("pack-{pack_name}.pack")),
7262            written.pack,
7263        )
7264        .expect("test operation should succeed");
7265        fs::write(
7266            pack_dir.join(format!("pack-{pack_name}.idx")),
7267            written.index,
7268        )
7269        .expect("test operation should succeed");
7270
7271        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
7272        assert!(db.contains(&oid).expect("test operation should succeed"));
7273        assert_eq!(read_object_for_assert(&db, &oid), object);
7274        fs::remove_dir_all(root).expect("test operation should succeed");
7275    }
7276
7277    #[test]
7278    fn file_database_installs_sha256_pack_without_loose_objects() {
7279        let root = temp_root("sley-file-odb-install-pack");
7280        let git_dir = root.join(".git");
7281        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7282        let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
7283        let oid = object
7284            .object_id(ObjectFormat::Sha256)
7285            .expect("test operation should succeed");
7286        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7287            .expect("test operation should succeed");
7288        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
7289
7290        let result = db
7291            .install_pack(&pack)
7292            .expect("test operation should succeed");
7293
7294        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7295        assert_eq!(result.object_ids, vec![oid]);
7296        assert!(result.pack_path.exists());
7297        assert!(result.index_path.exists());
7298        assert_eq!(result.promisor_path, None);
7299        assert!(
7300            !db.loose()
7301                .object_path(&oid)
7302                .expect("test operation should succeed")
7303                .exists()
7304        );
7305        assert!(db.contains(&oid).expect("test operation should succeed"));
7306        assert_eq!(read_object_for_assert(&db, &oid), object);
7307        fs::remove_dir_all(root).expect("test operation should succeed");
7308    }
7309
7310    #[test]
7311    fn file_database_installs_raw_sha256_pack_without_loose_objects() {
7312        let root = temp_root("sley-file-odb-install-raw-pack");
7313        let git_dir = root.join(".git");
7314        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7315        let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
7316        let oid = object
7317            .object_id(ObjectFormat::Sha256)
7318            .expect("test operation should succeed");
7319        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7320            .expect("test operation should succeed");
7321        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
7322        let mut reader = pack.pack.as_slice();
7323
7324        let result = db
7325            .install_raw_pack_from_reader(&mut reader)
7326            .expect("test operation should succeed");
7327
7328        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7329        assert_eq!(result.object_ids, vec![oid]);
7330        assert!(result.pack_path.exists());
7331        assert!(result.index_path.exists());
7332        assert_eq!(result.promisor_path, None);
7333        assert!(
7334            !db.loose()
7335                .object_path(&oid)
7336                .expect("test operation should succeed")
7337                .exists()
7338        );
7339        assert!(db.contains(&oid).expect("test operation should succeed"));
7340        assert_eq!(read_object_for_assert(&db, &oid), object);
7341        fs::remove_dir_all(root).expect("test operation should succeed");
7342    }
7343
7344    #[test]
7345    fn file_database_streams_raw_pack_install_to_packfile() {
7346        use std::io::Write as _;
7347
7348        let root = temp_root("sley-file-odb-stream-raw-pack");
7349        let git_dir = root.join(".git");
7350        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7351        let object = EncodedObject::new(ObjectType::Blob, b"streamed raw pack\n".to_vec());
7352        let oid = object
7353            .object_id(ObjectFormat::Sha1)
7354            .expect("test operation should succeed");
7355        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7356            .expect("test operation should succeed");
7357        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7358
7359        let mut install = db
7360            .begin_raw_pack_install(pack.checksum, pack.pack.len() as u64)
7361            .expect("test operation should succeed");
7362        for chunk in pack.pack.chunks(5) {
7363            install
7364                .write_all(chunk)
7365                .expect("test operation should succeed");
7366        }
7367        let result = install.finish().expect("test operation should succeed");
7368
7369        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7370        assert_eq!(result.object_ids, vec![oid]);
7371        assert_eq!(
7372            fs::read(&result.pack_path).expect("test operation should succeed"),
7373            pack.pack
7374        );
7375        assert!(result.index_path.exists());
7376        assert!(db.contains(&oid).expect("test operation should succeed"));
7377        assert_eq!(read_object_for_assert(&db, &oid), object);
7378
7379        let bad_id = ObjectId::from_raw(ObjectFormat::Sha1, &[0x42; 20])
7380            .expect("test operation should succeed");
7381        let mut bad_install = db
7382            .begin_raw_pack_install(bad_id, pack.pack.len() as u64)
7383            .expect("test operation should succeed");
7384        bad_install
7385            .write_all(&pack.pack)
7386            .expect("test operation should succeed");
7387        assert!(
7388            bad_install.finish().is_err(),
7389            "checksum mismatch should reject the streamed pack"
7390        );
7391        assert!(
7392            !git_dir
7393                .join("objects")
7394                .join("pack")
7395                .join(format!("pack-{}.pack", bad_id.to_hex()))
7396                .exists()
7397        );
7398
7399        fs::remove_dir_all(root).expect("test operation should succeed");
7400    }
7401
7402    #[test]
7403    fn file_database_installs_unknown_length_raw_pack_from_reader() {
7404        let root = temp_root("sley-file-odb-install-raw-pack-reader");
7405        let git_dir = root.join(".git");
7406        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7407        let object = EncodedObject::new(ObjectType::Blob, b"reader streamed raw pack\n".to_vec());
7408        let oid = object
7409            .object_id(ObjectFormat::Sha1)
7410            .expect("test operation should succeed");
7411        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7412            .expect("test operation should succeed");
7413        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7414        let mut reader = pack.pack.as_slice();
7415
7416        let result = db
7417            .install_raw_pack_from_reader(&mut reader)
7418            .expect("test operation should succeed");
7419
7420        assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7421        assert_eq!(result.object_ids, vec![oid]);
7422        assert_eq!(
7423            fs::read(&result.pack_path).expect("test operation should succeed"),
7424            pack.pack
7425        );
7426        assert!(result.index_path.exists());
7427        assert!(db.contains(&oid).expect("test operation should succeed"));
7428        assert_eq!(read_object_for_assert(&db, &oid), object);
7429        fs::remove_dir_all(root).expect("test operation should succeed");
7430    }
7431
7432    #[test]
7433    fn file_database_rejects_unknown_length_raw_pack_with_trailing_bytes() {
7434        let root = temp_root("sley-file-odb-install-raw-pack-reader-trailing");
7435        let git_dir = root.join(".git");
7436        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7437        let object = EncodedObject::new(ObjectType::Blob, b"trailing streamed raw pack\n".to_vec());
7438        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7439            .expect("test operation should succeed");
7440        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7441        let mut bytes = pack.pack;
7442        bytes.extend_from_slice(b"not part of the pack");
7443        let mut reader = bytes.as_slice();
7444
7445        let err = db
7446            .install_raw_pack_from_reader(&mut reader)
7447            .expect_err("trailing bytes should be rejected");
7448
7449        assert!(err.to_string().contains("trailing bytes after checksum"));
7450        let pack_dir = git_dir.join("objects").join("pack");
7451        let pack_entries = fs::read_dir(&pack_dir)
7452            .map(|entries| entries.count())
7453            .unwrap_or_default();
7454        assert_eq!(pack_entries, 0);
7455        fs::remove_dir_all(root).expect("test operation should succeed");
7456    }
7457
7458    #[test]
7459    fn file_database_rejects_noncanonical_pack_index() {
7460        let root = temp_root("sley-file-odb-install-bad-index");
7461        let git_dir = root.join(".git");
7462        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7463        let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
7464        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7465            .expect("test operation should succeed");
7466        let mut entries = pack.entries.clone();
7467        entries[0].crc32 ^= 1;
7468        let mut bad_pack = pack.clone();
7469        bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
7470            .expect("test operation should succeed");
7471        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7472
7473        assert!(db.install_pack(&bad_pack).is_err());
7474
7475        fs::remove_dir_all(root).expect("test operation should succeed");
7476    }
7477
7478    #[test]
7479    fn file_database_installs_raw_promisor_pack_with_sidecar() {
7480        let root = temp_root("sley-file-odb-install-raw-promisor-pack");
7481        let git_dir = root.join(".git");
7482        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7483        let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
7484        let oid = object
7485            .object_id(ObjectFormat::Sha1)
7486            .expect("test operation should succeed");
7487        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7488            .expect("test operation should succeed");
7489        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7490        let mut reader = pack.pack.as_slice();
7491
7492        let result = db
7493            .install_raw_pack_from_reader_with_options(
7494                &mut reader,
7495                RawPackInstallOptions { promisor: true },
7496            )
7497            .expect("test operation should succeed");
7498
7499        let promisor_path = result.promisor_path.expect("promisor sidecar");
7500        assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
7501        assert_eq!(
7502            promisor_path.extension().and_then(|ext| ext.to_str()),
7503            Some("promisor")
7504        );
7505        assert!(promisor_path.exists());
7506        assert_eq!(
7507            fs::read(&promisor_path).expect("test operation should succeed"),
7508            b""
7509        );
7510        assert!(result.pack_path.exists());
7511        assert!(result.index_path.exists());
7512        assert!(
7513            !db.loose()
7514                .object_path(&oid)
7515                .expect("test operation should succeed")
7516                .exists()
7517        );
7518        assert_eq!(read_object_for_assert(&db, &oid), object);
7519        fs::remove_dir_all(root).expect("test operation should succeed");
7520    }
7521
7522    #[test]
7523    fn repository_objects_dir_uses_linked_worktree_common_dir() {
7524        let root = temp_root("sley-odb-common-dir");
7525        let common = root.join(".git");
7526        let admin = common.join("worktrees").join("linked");
7527        fs::create_dir_all(&admin).expect("test operation should succeed");
7528        fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
7529
7530        let common = fs::canonicalize(common).expect("test operation should succeed");
7531        assert_eq!(repository_common_dir(&admin), common);
7532        assert_eq!(repository_objects_dir(&admin), common.join("objects"));
7533
7534        fs::remove_dir_all(root).expect("test operation should succeed");
7535    }
7536
7537    #[test]
7538    fn reachable_object_helpers_walk_graph_and_install_pack() {
7539        let root = temp_root("sley-reachable-pack");
7540        let source_git_dir = root.join("source.git");
7541        let destination_git_dir = root.join("destination.git");
7542        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7543        fs::create_dir_all(destination_git_dir.join("objects"))
7544            .expect("test operation should succeed");
7545        let format = ObjectFormat::Sha1;
7546        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7547        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7548
7549        let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
7550        let blob_oid = source
7551            .write_object(blob.clone())
7552            .expect("test operation should succeed");
7553        let tree = EncodedObject::new(
7554            ObjectType::Tree,
7555            Tree {
7556                entries: vec![TreeEntry {
7557                    mode: 0o100644,
7558                    name: BString::from(b"payload.txt"),
7559                    oid: blob_oid,
7560                }],
7561            }
7562            .write(),
7563        );
7564        let tree_oid = source
7565            .write_object(tree.clone())
7566            .expect("test operation should succeed");
7567        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7568        let commit = EncodedObject::new(
7569            ObjectType::Commit,
7570            Commit {
7571                tree: tree_oid,
7572                parents: Vec::new(),
7573                author: identity.clone(),
7574                committer: identity,
7575                encoding: None,
7576                message: b"initial\n".to_vec(),
7577            }
7578            .write(),
7579        );
7580        let commit_oid = source
7581            .write_object(commit.clone())
7582            .expect("test operation should succeed");
7583
7584        let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
7585            .expect("test operation should succeed");
7586        assert!(reachable.contains(&commit_oid));
7587        assert!(reachable.contains(&tree_oid));
7588        assert!(reachable.contains(&blob_oid));
7589
7590        let install =
7591            install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
7592                .expect("test operation should succeed")
7593                .expect("reachable pack should be written");
7594        assert_eq!(install.object_ids.len(), 3);
7595        for (oid, object) in [
7596            (&commit_oid, &commit),
7597            (&tree_oid, &tree),
7598            (&blob_oid, &blob),
7599        ] {
7600            assert!(
7601                !destination
7602                    .loose()
7603                    .object_path(oid)
7604                    .expect("test operation should succeed")
7605                    .exists()
7606            );
7607            assert!(
7608                destination
7609                    .contains(oid)
7610                    .expect("test operation should succeed")
7611            );
7612            assert_eq!(read_object_for_assert(&destination, oid), *object);
7613        }
7614        fs::remove_dir_all(root).expect("test operation should succeed");
7615    }
7616
7617    #[test]
7618    fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
7619        let root = temp_root("sley-reachable-exclusions");
7620        let git_dir = root.join("repo.git");
7621        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7622        let format = ObjectFormat::Sha1;
7623        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7624
7625        let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
7626        let blob_oid = db
7627            .write_object(blob)
7628            .expect("test operation should succeed");
7629        let tree = EncodedObject::new(
7630            ObjectType::Tree,
7631            Tree {
7632                entries: vec![TreeEntry {
7633                    mode: 0o100644,
7634                    name: BString::from(b"payload.txt"),
7635                    oid: blob_oid,
7636                }],
7637            }
7638            .write(),
7639        );
7640        let tree_oid = db
7641            .write_object(tree)
7642            .expect("test operation should succeed");
7643        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7644        let commit = EncodedObject::new(
7645            ObjectType::Commit,
7646            Commit {
7647                tree: tree_oid,
7648                parents: Vec::new(),
7649                author: identity.clone(),
7650                committer: identity,
7651                encoding: None,
7652                message: b"initial\n".to_vec(),
7653            }
7654            .write(),
7655        );
7656        let commit_oid = db
7657            .write_object(commit)
7658            .expect("test operation should succeed");
7659        let excluded = HashSet::from([tree_oid]);
7660
7661        let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
7662            .expect("test operation should succeed");
7663
7664        assert_eq!(objects.len(), 1);
7665        assert_eq!(
7666            objects[0]
7667                .object_id(format)
7668                .expect("test operation should succeed"),
7669            commit_oid
7670        );
7671        fs::remove_dir_all(root).expect("test operation should succeed");
7672    }
7673
7674    #[test]
7675    fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
7676        let root = temp_root("sley-build-reachable-pack");
7677        let git_dir = root.join("repo.git");
7678        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7679        let format = ObjectFormat::Sha1;
7680        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7681
7682        let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
7683        let oid = db
7684            .write_object(object.clone())
7685            .expect("test operation should succeed");
7686        let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
7687            .expect("test operation should succeed")
7688            .expect("reachable pack should be built");
7689        assert!(pack.pack.starts_with(b"PACK"));
7690        assert_eq!(pack.entries.len(), 1);
7691        assert_eq!(pack.entries[0].oid, oid);
7692
7693        let pack_path = root.join("reachable.pack");
7694        let pack_file = build_reachable_pack_file(
7695            &db,
7696            format,
7697            std::iter::once(oid),
7698            &HashSet::new(),
7699            &pack_path,
7700        )
7701        .expect("test operation should succeed")
7702        .expect("reachable pack file should be built");
7703        assert_eq!(pack_file.checksum, pack.checksum);
7704        assert_eq!(pack_file.pack_size, pack.pack.len() as u64);
7705        assert_eq!(pack_file.object_count, 1);
7706        assert_eq!(
7707            fs::read(&pack_file.pack_path).expect("test operation should succeed"),
7708            pack.pack
7709        );
7710
7711        let mut streamed_pack = Vec::new();
7712        let streamed = write_reachable_pack_to_writer(
7713            &db,
7714            format,
7715            std::iter::once(oid),
7716            &HashSet::new(),
7717            &mut streamed_pack,
7718        )
7719        .expect("test operation should succeed")
7720        .expect("reachable pack should be streamed");
7721        assert_eq!(streamed.checksum, pack.checksum);
7722        assert_eq!(streamed.pack_size, pack.pack.len() as u64);
7723        assert_eq!(streamed.object_count, 1);
7724        assert_eq!(streamed_pack, pack.pack);
7725
7726        let mut sink = std::io::sink();
7727        let dry_run = write_reachable_pack_to_writer(
7728            &db,
7729            format,
7730            std::iter::once(oid),
7731            &HashSet::new(),
7732            &mut sink,
7733        )
7734        .expect("test operation should succeed")
7735        .expect("reachable pack should stream to sink");
7736        assert_eq!(dry_run.checksum, pack.checksum);
7737        assert_eq!(dry_run.pack_size, pack.pack.len() as u64);
7738        assert_eq!(dry_run.object_count, 1);
7739
7740        let excluded = HashSet::from([oid]);
7741        assert!(
7742            build_reachable_pack(
7743                &db,
7744                format,
7745                pack.entries.into_iter().map(|entry| entry.oid),
7746                &excluded
7747            )
7748            .expect("test operation should succeed")
7749            .is_none()
7750        );
7751        fs::remove_dir_all(root).expect("test operation should succeed");
7752    }
7753
7754    #[test]
7755    fn index_raw_pack_returns_validated_pack_metadata() {
7756        let root = temp_root("sley-index-raw-pack");
7757        let git_dir = root.join("repo.git");
7758        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7759        let format = ObjectFormat::Sha1;
7760        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7761        let graph = write_commit_graph(&mut db, b"pack indexed\n");
7762        let commit_oid = graph[0].0;
7763        let expected = graph
7764            .iter()
7765            .map(|(oid, object)| (*oid, (object.object_type, object.body.len() as u64)))
7766            .collect::<HashMap<_, _>>();
7767        let pack = build_reachable_pack(&db, format, std::iter::once(commit_oid), &HashSet::new())
7768            .expect("test operation should succeed")
7769            .expect("reachable pack should be built");
7770
7771        let indexed = index_raw_pack(&pack.pack, format).expect("test operation should succeed");
7772        let mut cursor = std::io::Cursor::new(pack.pack.clone());
7773        let streamed = index_raw_pack_from_reader(&mut cursor, format)
7774            .expect("streamed pack indexing should match in-memory indexing");
7775        assert_eq!(streamed, indexed);
7776        let pack_path = root.join("reachable.pack");
7777        fs::write(&pack_path, &pack.pack).expect("test operation should succeed");
7778        let file_indexed = index_raw_pack_file(&pack_path, format)
7779            .expect("file-backed pack indexing should match in-memory indexing");
7780        assert_eq!(file_indexed, indexed);
7781
7782        assert_eq!(indexed.pack_id, pack.checksum);
7783        assert_eq!(indexed.index, pack.index);
7784        assert_eq!(indexed.objects.len(), 3);
7785        for object in indexed.objects {
7786            let (expected_type, expected_size) = expected
7787                .get(&object.oid)
7788                .copied()
7789                .expect("indexed object should be reachable");
7790            assert_eq!(object.object_type, expected_type);
7791            assert_eq!(object.size, expected_size);
7792            assert!(object.offset > 0);
7793        }
7794        fs::remove_dir_all(root).expect("test operation should succeed");
7795    }
7796
7797    #[test]
7798    fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
7799        let root = temp_root("sley-reachable-tags");
7800        let git_dir = root.join("repo.git");
7801        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7802        let format = ObjectFormat::Sha1;
7803        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7804
7805        let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
7806        let blob_oid = db
7807            .write_object(blob)
7808            .expect("test operation should succeed");
7809        let tag = EncodedObject::new(
7810            ObjectType::Tag,
7811            Tag {
7812                object: blob_oid,
7813                object_type: ObjectType::Blob,
7814                name: b"v1".to_vec(),
7815                tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
7816                message: b"tag message\n".to_vec(),
7817                raw_body: None,
7818            }
7819            .write(),
7820        );
7821        let tag_oid = db.write_object(tag).expect("test operation should succeed");
7822
7823        let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
7824            .expect("test operation should succeed");
7825        assert!(reachable.contains(&tag_oid));
7826        assert!(reachable.contains(&blob_oid));
7827
7828        let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
7829            .expect("test operation should succeed");
7830        let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
7831            .expect_err("missing traversal root should error");
7832        let kind = err.not_found_kind().expect("typed not found");
7833        assert_eq!(kind.object_id(), Some(missing));
7834        assert_eq!(
7835            kind.missing_object_context(),
7836            Some(MissingObjectContext::Traversal)
7837        );
7838        fs::remove_dir_all(root).expect("test operation should succeed");
7839    }
7840
7841    #[test]
7842    fn install_reachable_pack_empty_starts_create_no_pack() {
7843        let root = temp_root("sley-reachable-empty");
7844        let source_git_dir = root.join("source.git");
7845        let destination_git_dir = root.join("destination.git");
7846        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7847        fs::create_dir_all(destination_git_dir.join("objects"))
7848            .expect("test operation should succeed");
7849        let format = ObjectFormat::Sha1;
7850        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7851        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7852
7853        let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
7854            .expect("test operation should succeed");
7855
7856        assert!(result.is_none());
7857        assert!(!destination_git_dir.join("objects").join("pack").exists());
7858        fs::remove_dir_all(root).expect("test operation should succeed");
7859    }
7860
7861    #[test]
7862    fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
7863        let root = temp_root("sley-reachable-install-excluding");
7864        let source_git_dir = root.join("source.git");
7865        let destination_git_dir = root.join("destination.git");
7866        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7867        fs::create_dir_all(destination_git_dir.join("objects"))
7868            .expect("test operation should succeed");
7869        let format = ObjectFormat::Sha1;
7870        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7871        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7872        let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
7873        let oid = source
7874            .write_object(object)
7875            .expect("test operation should succeed");
7876        let excluded = HashSet::from([oid]);
7877
7878        let result = install_reachable_pack_excluding(
7879            &source,
7880            &destination,
7881            format,
7882            std::iter::once(oid),
7883            &excluded,
7884        )
7885        .expect("test operation should succeed");
7886
7887        assert!(result.is_none());
7888        assert!(!destination_git_dir.join("objects").join("pack").exists());
7889        fs::remove_dir_all(root).expect("test operation should succeed");
7890    }
7891
7892    #[test]
7893    fn install_reachable_pack_supports_sha256() {
7894        let root = temp_root("sley-reachable-pack-sha256");
7895        let source_git_dir = root.join("source.git");
7896        let destination_git_dir = root.join("destination.git");
7897        fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7898        fs::create_dir_all(destination_git_dir.join("objects"))
7899            .expect("test operation should succeed");
7900        let format = ObjectFormat::Sha256;
7901        let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7902        let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7903        let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
7904        let oid = source
7905            .write_object(object.clone())
7906            .expect("test operation should succeed");
7907
7908        let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
7909            .expect("test operation should succeed")
7910            .expect("sha256 reachable pack should be built");
7911        assert!(pack.pack.starts_with(b"PACK"));
7912        assert_eq!(pack.entries[0].oid, oid);
7913
7914        let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
7915            .expect("test operation should succeed")
7916            .expect("sha256 reachable pack should be written");
7917
7918        assert_eq!(result.object_ids, vec![oid]);
7919        assert!(
7920            !destination
7921                .loose()
7922                .object_path(&oid)
7923                .expect("test operation should succeed")
7924                .exists()
7925        );
7926        assert_eq!(read_object_for_assert(&destination, &oid), object);
7927        fs::remove_dir_all(root).expect("test operation should succeed");
7928    }
7929
7930    #[test]
7931    fn install_helpers_accept_custom_raw_pack_installer() {
7932        #[derive(Default)]
7933        struct RecordingInstaller {
7934            packs: std::cell::RefCell<Vec<Vec<u8>>>,
7935            installed: std::cell::RefCell<Vec<ObjectId>>,
7936        }
7937
7938        impl RawPackInstaller for RecordingInstaller {
7939            fn install_raw_pack_from_reader<R>(
7940                &self,
7941                reader: &mut R,
7942            ) -> Result<RawPackInstallResult>
7943            where
7944                R: Read,
7945            {
7946                let mut pack_bytes = Vec::new();
7947                reader.read_to_end(&mut pack_bytes)?;
7948                self.packs.borrow_mut().push(pack_bytes.to_vec());
7949                let object_ids = self.installed.borrow().clone();
7950                Ok(RawPackInstallResult { object_ids })
7951            }
7952        }
7953
7954        let format = ObjectFormat::Sha1;
7955        let source = ObjectDatabase::new(format);
7956        let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
7957        let oid = source
7958            .write_object(object)
7959            .expect("test operation should succeed");
7960        let installer = RecordingInstaller::default();
7961        installer.installed.borrow_mut().push(oid);
7962
7963        let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
7964            .expect("test operation should succeed")
7965            .expect("custom installer should receive pack");
7966
7967        assert_eq!(result.object_ids, installer.installed.into_inner());
7968        let packs = installer.packs.into_inner();
7969        assert_eq!(packs.len(), 1);
7970        assert!(packs[0].starts_with(b"PACK"));
7971    }
7972
7973    #[test]
7974    fn file_database_reads_object_from_multi_pack_index() {
7975        let root = temp_root("sley-file-odb-midx");
7976        let git_dir = root.join(".git");
7977        let pack_dir = git_dir.join("objects").join("pack");
7978        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7979        let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
7980        let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
7981        let first_oid = first
7982            .object_id(ObjectFormat::Sha1)
7983            .expect("test operation should succeed");
7984        let second_oid = second
7985            .object_id(ObjectFormat::Sha1)
7986            .expect("test operation should succeed");
7987        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7988            .expect("test operation should succeed");
7989        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7990            .expect("test operation should succeed");
7991        let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
7992        let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
7993        fs::write(
7994            pack_dir.join(first_pack_name.replace(".idx", ".pack")),
7995            first_pack.pack,
7996        )
7997        .expect("test operation should succeed");
7998        fs::write(
7999            pack_dir.join(second_pack_name.replace(".idx", ".pack")),
8000            second_pack.pack,
8001        )
8002        .expect("test operation should succeed");
8003        let midx = MultiPackIndex::write(
8004            ObjectFormat::Sha1,
8005            2,
8006            &[first_pack_name, second_pack_name],
8007            &[
8008                sley_pack::MultiPackIndexEntry {
8009                    oid: first_oid,
8010                    pack_int_id: 0,
8011                    offset: first_pack.entries[0].offset,
8012                    force_large_offset: false,
8013                },
8014                sley_pack::MultiPackIndexEntry {
8015                    oid: second_oid,
8016                    pack_int_id: 1,
8017                    offset: second_pack.entries[0].offset,
8018                    force_large_offset: false,
8019                },
8020            ],
8021        )
8022        .expect("test operation should succeed");
8023        fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
8024
8025        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8026        assert!(
8027            db.contains(&second_oid)
8028                .expect("test operation should succeed")
8029        );
8030        assert_eq!(
8031            db.resolve_prefix(&second_oid.to_hex()[..8])
8032                .expect("test operation should succeed"),
8033            ObjectPrefixResolution::Unique(second_oid)
8034        );
8035        assert_eq!(read_object_for_assert(&db, &second_oid), second);
8036        assert_eq!(read_object_for_assert(&db, &first_oid), first);
8037        fs::remove_dir_all(root).expect("test operation should succeed");
8038    }
8039
8040    #[test]
8041    fn file_database_finds_pack_added_after_registry_was_cached() {
8042        // Regression guard for the cached pack-directory registry: a pack written
8043        // after the registry was first cached (via a prior read) must still be
8044        // discovered by the same handle, because a miss triggers a re-scan.
8045        let root = temp_root("sley-file-odb-pack-added-late");
8046        let git_dir = root.join(".git");
8047        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8048        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8049
8050        // First pack + object; reading it populates the registry cache.
8051        let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
8052        let first_oid = first
8053            .object_id(ObjectFormat::Sha1)
8054            .expect("test operation should succeed");
8055        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8056            .expect("test operation should succeed");
8057        db.install_pack(&first_pack)
8058            .expect("test operation should succeed");
8059        assert_eq!(read_object_for_assert(&db, &first_oid), first);
8060
8061        // A second object that the cached registry does not yet know about.
8062        let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
8063        let second_oid = second
8064            .object_id(ObjectFormat::Sha1)
8065            .expect("test operation should succeed");
8066        // It is genuinely absent right now.
8067        assert!(matches!(
8068            db.read_object(&second_oid),
8069            Err(GitError::NotFound(_))
8070        ));
8071
8072        // Install its pack through the same handle; the next read must find it via
8073        // a re-scan, not be masked by the stale registry.
8074        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8075            .expect("test operation should succeed");
8076        db.install_pack(&second_pack)
8077            .expect("test operation should succeed");
8078        assert!(
8079            db.contains(&second_oid)
8080                .expect("test operation should succeed")
8081        );
8082        assert_eq!(read_object_for_assert(&db, &second_oid), second);
8083        // The original object still resolves too.
8084        assert_eq!(read_object_for_assert(&db, &first_oid), first);
8085
8086        fs::remove_dir_all(root).expect("test operation should succeed");
8087    }
8088
8089    #[test]
8090    fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
8091        let root = temp_root("sley-presence-checker-pack-added-late");
8092        let git_dir = root.join(".git");
8093        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8094        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8095
8096        let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
8097        let first_oid = first
8098            .object_id(ObjectFormat::Sha1)
8099            .expect("test operation should succeed");
8100        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8101            .expect("test operation should succeed");
8102        db.install_pack(&first_pack)
8103            .expect("test operation should succeed");
8104
8105        let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
8106        let second_oid = second
8107            .object_id(ObjectFormat::Sha1)
8108            .expect("test operation should succeed");
8109        let mut checker = db.presence_checker();
8110        assert!(
8111            checker
8112                .contains(&first_oid)
8113                .expect("test operation should succeed")
8114        );
8115        assert!(
8116            !checker
8117                .contains(&second_oid)
8118                .expect("test operation should succeed")
8119        );
8120
8121        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8122            .expect("test operation should succeed");
8123        db.install_pack(&second_pack)
8124            .expect("test operation should succeed");
8125
8126        assert!(
8127            checker
8128                .contains(&second_oid)
8129                .expect("test operation should succeed")
8130        );
8131        fs::remove_dir_all(root).expect("test operation should succeed");
8132    }
8133
8134    #[test]
8135    fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
8136        let root = temp_root("sley-file-odb-pack-registry-refresh");
8137        let git_dir = root.join(".git");
8138        let pack_dir = git_dir.join("objects").join("pack");
8139        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8140        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8141
8142        let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
8143        let first_oid = first
8144            .object_id(ObjectFormat::Sha1)
8145            .expect("test operation should succeed");
8146        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8147            .expect("test operation should succeed");
8148        db.install_pack(&first_pack)
8149            .expect("test operation should succeed");
8150
8151        let first_registry = db
8152            .cached_pack_registry(&pack_dir, false)
8153            .expect("test operation should succeed");
8154        assert_eq!(first_registry.fingerprint.idx_count, 1);
8155        assert_eq!(first_registry.fingerprint.pack_count, 1);
8156        assert_eq!(first_registry.packs.len(), 1);
8157        assert!(
8158            first_registry.packs[0]
8159                .index
8160                .lock()
8161                .expect("test operation should succeed")
8162                .is_none()
8163        );
8164        assert!(
8165            first_registry.packs[0]
8166                .data
8167                .lock()
8168                .expect("test operation should succeed")
8169                .is_none()
8170        );
8171
8172        // Existence checks use the parsed index directly and do not load pack
8173        // bytes; a full read fills the registry-owned pack data handle.
8174        assert!(
8175            db.contains(&first_oid)
8176                .expect("test operation should succeed")
8177        );
8178        assert!(
8179            first_registry.packs[0]
8180                .index
8181                .lock()
8182                .expect("test operation should succeed")
8183                .is_some()
8184        );
8185        assert!(
8186            first_registry.packs[0]
8187                .data
8188                .lock()
8189                .expect("test operation should succeed")
8190                .is_none()
8191        );
8192        assert_eq!(read_object_for_assert(&db, &first_oid), first);
8193        assert!(
8194            first_registry.packs[0]
8195                .data
8196                .lock()
8197                .expect("test operation should succeed")
8198                .is_some()
8199        );
8200
8201        let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
8202        let second_oid = second
8203            .object_id(ObjectFormat::Sha1)
8204            .expect("test operation should succeed");
8205        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8206            .expect("test operation should succeed");
8207        db.install_pack(&second_pack)
8208            .expect("test operation should succeed");
8209
8210        let refreshed = db
8211            .cached_pack_registry(&pack_dir, true)
8212            .expect("test operation should succeed");
8213        assert!(!Arc::ptr_eq(&first_registry, &refreshed));
8214        assert_eq!(refreshed.fingerprint.idx_count, 2);
8215        assert_eq!(refreshed.fingerprint.pack_count, 2);
8216        assert_eq!(refreshed.packs.len(), 2);
8217        assert_eq!(read_object_for_assert(&db, &second_oid), second);
8218
8219        fs::remove_dir_all(root).expect("test operation should succeed");
8220    }
8221
8222    #[test]
8223    fn file_database_pack_search_hint_rebuilds_after_pack_added() {
8224        // Regression guard for the recent-pack search hint: it is tied to the
8225        // cached pack registry, so a miss followed by a changed registry must not
8226        // hide newly-added packs.
8227        let root = temp_root("sley-file-odb-pack-lookup-added-late");
8228        let git_dir = root.join(".git");
8229        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8230        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8231
8232        let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
8233        let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
8234        let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
8235        let first_oid = first
8236            .object_id(ObjectFormat::Sha1)
8237            .expect("test operation should succeed");
8238        let second_oid = second
8239            .object_id(ObjectFormat::Sha1)
8240            .expect("test operation should succeed");
8241        let third_oid = third
8242            .object_id(ObjectFormat::Sha1)
8243            .expect("test operation should succeed");
8244
8245        let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8246            .expect("test operation should succeed");
8247        let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8248            .expect("test operation should succeed");
8249        db.install_pack(&first_pack)
8250            .expect("test operation should succeed");
8251        db.install_pack(&second_pack)
8252            .expect("test operation should succeed");
8253
8254        // With two packs, these reads establish a cached registry and pack hint.
8255        assert_eq!(read_object_for_assert(&db, &first_oid), first);
8256        assert_eq!(read_object_for_assert(&db, &second_oid), second);
8257        assert!(matches!(
8258            db.read_object(&third_oid),
8259            Err(GitError::NotFound(_))
8260        ));
8261
8262        let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
8263            .expect("test operation should succeed");
8264        db.install_pack(&third_pack)
8265            .expect("test operation should succeed");
8266
8267        assert_eq!(read_object_for_assert(&db, &third_oid), third);
8268        assert_eq!(read_object_for_assert(&db, &first_oid), first);
8269
8270        fs::remove_dir_all(root).expect("test operation should succeed");
8271    }
8272
8273    #[test]
8274    fn file_database_prefers_loose_object_over_packed_object() {
8275        let root = temp_root("sley-file-odb-prefer-loose");
8276        let git_dir = root.join(".git");
8277        let pack_dir = git_dir.join("objects").join("pack");
8278        fs::create_dir_all(&pack_dir).expect("test operation should succeed");
8279        let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
8280        let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8281            .expect("test operation should succeed");
8282        let pack_name = written.checksum.to_hex();
8283        fs::write(
8284            pack_dir.join(format!("pack-{pack_name}.pack")),
8285            written.pack,
8286        )
8287        .expect("test operation should succeed");
8288        fs::write(
8289            pack_dir.join(format!("pack-{pack_name}.idx")),
8290            written.index,
8291        )
8292        .expect("test operation should succeed");
8293
8294        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8295        let oid = db
8296            .write_object(object.clone())
8297            .expect("test operation should succeed");
8298        assert_eq!(read_object_for_assert(&db, &oid), object);
8299        fs::remove_dir_all(root).expect("test operation should succeed");
8300    }
8301
8302    #[test]
8303    fn bundle_prerequisite_verification_reads_existing_objects() {
8304        let db = ObjectDatabase::new(ObjectFormat::Sha1);
8305        let oid = db
8306            .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
8307            .expect("test operation should succeed");
8308        let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
8309        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8310            .expect("test operation should succeed");
8311
8312        verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
8313    }
8314
8315    #[test]
8316    fn bundle_prerequisite_verification_reports_missing_objects() {
8317        let db = ObjectDatabase::new(ObjectFormat::Sha1);
8318        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
8319            .expect("test operation should succeed");
8320        let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
8321        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8322            .expect("test operation should succeed");
8323
8324        assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
8325    }
8326
8327    #[test]
8328    fn unbundle_objects_writes_pack_entries_and_returns_refs() {
8329        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
8330        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
8331        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
8332        let oid = object
8333            .object_id(ObjectFormat::Sha1)
8334            .expect("test operation should succeed");
8335        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8336            .expect("test operation should succeed");
8337        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
8338            .into_bytes()
8339            .into_iter()
8340            .chain(pack.pack)
8341            .collect::<Vec<_>>();
8342        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8343            .expect("test operation should succeed");
8344
8345        let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
8346            .expect("test operation should succeed");
8347        assert_eq!(result.written_objects, vec![oid]);
8348        assert_eq!(result.references, bundle.references);
8349        assert_eq!(read_object_for_assert(&writer, &oid), object);
8350    }
8351
8352    #[test]
8353    fn install_bundle_pack_writes_pack_and_returns_refs() {
8354        let root = temp_root("sley-install-bundle-pack");
8355        let git_dir = root.join(".git");
8356        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8357        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
8358        let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8359        let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
8360        let oid = object
8361            .object_id(ObjectFormat::Sha1)
8362            .expect("test operation should succeed");
8363        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8364            .expect("test operation should succeed");
8365        let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
8366            .into_bytes()
8367            .into_iter()
8368            .chain(pack.pack)
8369            .collect::<Vec<_>>();
8370        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8371            .expect("test operation should succeed");
8372
8373        let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
8374            .expect("test operation should succeed");
8375
8376        assert_eq!(result.written_objects, vec![oid]);
8377        assert_eq!(result.references, bundle.references);
8378        assert!(
8379            database
8380                .contains(&oid)
8381                .expect("test operation should succeed")
8382        );
8383        assert_eq!(read_object_for_assert(&database, &oid), object);
8384        assert!(
8385            !database
8386                .loose()
8387                .object_path(&oid)
8388                .expect("test operation should succeed")
8389                .exists()
8390        );
8391        fs::remove_dir_all(root).expect("test operation should succeed");
8392    }
8393
8394    #[test]
8395    fn unpack_packfile_objects_writes_sha256_pack_entries() {
8396        let writer = ObjectDatabase::new(ObjectFormat::Sha256);
8397        let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
8398        let oid = object
8399            .object_id(ObjectFormat::Sha256)
8400            .expect("test operation should succeed");
8401        let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
8402            .expect("test operation should succeed");
8403
8404        let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
8405            .expect("test operation should succeed");
8406
8407        assert_eq!(result.written_objects, vec![oid]);
8408        assert_eq!(read_object_for_assert(&writer, &oid), object);
8409    }
8410
8411    #[test]
8412    fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
8413        let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
8414        let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
8415        let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
8416            .expect("test operation should succeed");
8417        let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
8418        let oid = object
8419            .object_id(ObjectFormat::Sha1)
8420            .expect("test operation should succeed");
8421        let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8422            .expect("test operation should succeed");
8423        let bundle_bytes =
8424            format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
8425                .into_bytes()
8426                .into_iter()
8427                .chain(pack.pack)
8428                .collect::<Vec<_>>();
8429        let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8430            .expect("test operation should succeed");
8431
8432        assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
8433        assert!(!writer.contains(&oid));
8434    }
8435
8436    /// Build a commit -> tree -> blob graph in `db`, returning the three object
8437    /// ids and their canonical encodings as `(oid, object)` pairs.
8438    fn write_commit_graph(
8439        db: &mut FileObjectDatabase,
8440        payload: &[u8],
8441    ) -> Vec<(ObjectId, EncodedObject)> {
8442        let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
8443        let blob_oid = db
8444            .write_object(blob.clone())
8445            .expect("test operation should succeed");
8446        let tree = EncodedObject::new(
8447            ObjectType::Tree,
8448            Tree {
8449                entries: vec![TreeEntry {
8450                    mode: 0o100644,
8451                    name: BString::from(b"payload.txt"),
8452                    oid: blob_oid,
8453                }],
8454            }
8455            .write(),
8456        );
8457        let tree_oid = db
8458            .write_object(tree.clone())
8459            .expect("test operation should succeed");
8460        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
8461        let commit = EncodedObject::new(
8462            ObjectType::Commit,
8463            Commit {
8464                tree: tree_oid,
8465                parents: Vec::new(),
8466                author: identity.clone(),
8467                committer: identity,
8468                encoding: None,
8469                message: b"initial\n".to_vec(),
8470            }
8471            .write(),
8472        );
8473        let commit_oid = db
8474            .write_object(commit.clone())
8475            .expect("test operation should succeed");
8476        vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
8477    }
8478
8479    fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
8480        let root = temp_root("sley-repack-all");
8481        let git_dir = root.join(".git");
8482        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8483        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8484
8485        // A pre-existing pack holds one blob; the rest of the graph is loose.
8486        let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
8487        let packed_oid = packed_blob
8488            .object_id(format)
8489            .expect("test operation should succeed");
8490        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
8491            .expect("test operation should succeed");
8492        let existing = db
8493            .install_pack(&existing_pack)
8494            .expect("test operation should succeed");
8495
8496        let graph = write_commit_graph(&mut db, b"repack payload\n");
8497
8498        let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
8499        expected.insert(packed_oid, packed_blob.clone());
8500
8501        let result = repack_all_objects(&git_dir, format)
8502            .expect("test operation should succeed")
8503            .expect("repository has objects");
8504
8505        // The new pack round-trips and contains every original object byte-for-byte.
8506        assert_eq!(result.object_count, expected.len());
8507        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
8508        assert_eq!(parsed.entries.len(), expected.len());
8509        for entry in &parsed.entries {
8510            let want = expected
8511                .get(&entry.entry.oid)
8512                .expect("packed object was in the repository");
8513            assert_eq!(&entry.object, want);
8514            assert_eq!(
8515                entry
8516                    .object
8517                    .object_id(format)
8518                    .expect("test operation should succeed"),
8519                entry.entry.oid
8520            );
8521        }
8522        // The generated index parses and agrees with the pack checksum.
8523        let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
8524        assert_eq!(idx.pack_checksum, parsed.checksum);
8525        assert_eq!(idx.entries.len(), expected.len());
8526
8527        // The pre-existing pack is reported obsolete (by its .pack path).
8528        assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
8529        // Every loose object id is reported as now packed.
8530        let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
8531        want_loose.sort_by_key(ObjectId::to_hex);
8532        assert_eq!(result.packed_loose, want_loose);
8533        assert!(!result.packed_loose.contains(&packed_oid));
8534
8535        fs::remove_dir_all(root).expect("test operation should succeed");
8536    }
8537
8538    #[test]
8539    fn repack_all_objects_consolidates_loose_and_pack_sha1() {
8540        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
8541    }
8542
8543    #[test]
8544    fn repack_all_objects_consolidates_loose_and_pack_sha256() {
8545        repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
8546    }
8547
8548    #[test]
8549    fn repack_all_objects_returns_none_for_empty_repository() {
8550        let root = temp_root("sley-repack-empty");
8551        let git_dir = root.join(".git");
8552        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8553
8554        assert!(
8555            repack_all_objects(&git_dir, ObjectFormat::Sha1)
8556                .expect("test operation should succeed")
8557                .is_none()
8558        );
8559
8560        fs::remove_dir_all(root).expect("test operation should succeed");
8561    }
8562
8563    #[test]
8564    fn install_repack_result_writes_pack_without_pruning_by_default() {
8565        let root = temp_root("sley-repack-install-nodelete");
8566        let git_dir = root.join(".git");
8567        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8568        let format = ObjectFormat::Sha1;
8569        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8570        let graph = write_commit_graph(&mut db, b"install no prune\n");
8571
8572        let result = repack_all_objects(&git_dir, format)
8573            .expect("test operation should succeed")
8574            .expect("test operation should succeed");
8575        install_repack_result(&git_dir, format, &result, false)
8576            .expect("test operation should succeed");
8577
8578        // New pack is on disk and readable.
8579        let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
8580        let pack_dir = git_dir.join("objects").join("pack");
8581        let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
8582        let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
8583        assert!(pack_path.exists());
8584        assert!(idx_path.exists());
8585        // Loose objects survive because prune was not requested.
8586        for (oid, object) in &graph {
8587            assert!(
8588                db.loose()
8589                    .object_path(oid)
8590                    .expect("test operation should succeed")
8591                    .exists()
8592            );
8593            assert_eq!(read_object_for_assert(&db, oid), *object);
8594        }
8595
8596        fs::remove_dir_all(root).expect("test operation should succeed");
8597    }
8598
8599    #[test]
8600    fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
8601        let root = temp_root("sley-repack-install-prune");
8602        let git_dir = root.join(".git");
8603        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8604        let format = ObjectFormat::Sha1;
8605        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8606
8607        let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
8608        let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
8609            .expect("test operation should succeed");
8610        let existing = db
8611            .install_pack(&existing_pack)
8612            .expect("test operation should succeed");
8613        let graph = write_commit_graph(&mut db, b"prune payload\n");
8614
8615        let result = repack_all_objects(&git_dir, format)
8616            .expect("test operation should succeed")
8617            .expect("test operation should succeed");
8618        let new_pack_checksum = PackFile::parse(&result.pack, format)
8619            .expect("test operation should succeed")
8620            .checksum;
8621        install_repack_result(&git_dir, format, &result, true)
8622            .expect("test operation should succeed");
8623
8624        // Obsolete pack and its index are gone.
8625        assert!(!existing.pack_path.exists());
8626        assert!(!existing.index_path.exists());
8627        // Packed loose objects are gone from disk.
8628        for (oid, _) in &graph {
8629            assert!(
8630                !db.loose()
8631                    .object_path(oid)
8632                    .expect("test operation should succeed")
8633                    .exists()
8634            );
8635        }
8636        // The new consolidated pack remains and still serves every object.
8637        let pack_dir = git_dir.join("objects").join("pack");
8638        assert!(
8639            pack_dir
8640                .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
8641                .exists()
8642        );
8643        let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
8644        for (oid, object) in &graph {
8645            assert!(
8646                reopened
8647                    .contains(oid)
8648                    .expect("test operation should succeed")
8649            );
8650            assert_eq!(read_object_for_assert(&reopened, oid), *object);
8651        }
8652        let packed_oid = packed_blob
8653            .object_id(format)
8654            .expect("test operation should succeed");
8655        assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
8656
8657        fs::remove_dir_all(root).expect("test operation should succeed");
8658    }
8659
8660    #[test]
8661    fn install_repack_result_preserves_keep_and_promisor_packs() {
8662        let root = temp_root("sley-repack-install-keep-promisor");
8663        let git_dir = root.join(".git");
8664        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8665        let format = ObjectFormat::Sha1;
8666        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8667
8668        let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
8669        let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
8670            .expect("test operation should succeed");
8671        let keep_install = db
8672            .install_pack(&keep_pack)
8673            .expect("test operation should succeed");
8674        let keep_sidecar = keep_install.pack_path.with_extension("keep");
8675        fs::write(&keep_sidecar, b"").expect("test operation should succeed");
8676
8677        let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
8678        let promisor_pack =
8679            PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
8680                .expect("test operation should succeed");
8681        let promisor_install = db
8682            .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
8683            .expect("test operation should succeed");
8684        let promisor_sidecar = promisor_install
8685            .promisor_path
8686            .clone()
8687            .expect("promisor sidecar");
8688
8689        let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
8690        let result = repack_all_objects(&git_dir, format)
8691            .expect("test operation should succeed")
8692            .expect("test operation should succeed");
8693        assert!(result.obsolete_packs.contains(&keep_install.pack_path));
8694        assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
8695
8696        install_repack_result(&git_dir, format, &result, true)
8697            .expect("test operation should succeed");
8698
8699        for path in [
8700            &keep_install.pack_path,
8701            &keep_install.index_path,
8702            &keep_sidecar,
8703            &promisor_install.pack_path,
8704            &promisor_install.index_path,
8705            &promisor_sidecar,
8706        ] {
8707            assert!(path.exists(), "{} should be preserved", path.display());
8708        }
8709        for (oid, _) in &graph {
8710            assert!(
8711                !db.loose()
8712                    .object_path(oid)
8713                    .expect("test operation should succeed")
8714                    .exists()
8715            );
8716        }
8717
8718        fs::remove_dir_all(root).expect("test operation should succeed");
8719    }
8720
8721    #[test]
8722    fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
8723        // Safety: a loose object whose id is not in the new pack must survive
8724        // pruning even if the caller lists it in `packed_loose`.
8725        let root = temp_root("sley-repack-install-safety");
8726        let git_dir = root.join(".git");
8727        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8728        let format = ObjectFormat::Sha1;
8729        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8730        let graph = write_commit_graph(&mut db, b"safety packed\n");
8731
8732        let mut result = repack_all_objects(&git_dir, format)
8733            .expect("test operation should succeed")
8734            .expect("test operation should succeed");
8735
8736        // A loose object that is NOT in the new pack, but mislabeled as packed.
8737        let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
8738        let stray_oid = db
8739            .write_object(stray.clone())
8740            .expect("test operation should succeed");
8741        assert!(!result.packed_loose.contains(&stray_oid));
8742        result.packed_loose.push(stray_oid);
8743
8744        install_repack_result(&git_dir, format, &result, true)
8745            .expect("test operation should succeed");
8746
8747        // The stray loose object is untouched because it is not in the new pack.
8748        assert!(
8749            db.loose()
8750                .object_path(&stray_oid)
8751                .expect("test operation should succeed")
8752                .exists()
8753        );
8754        assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
8755        // Genuinely packed loose objects were still removed.
8756        for (oid, _) in &graph {
8757            assert!(
8758                !db.loose()
8759                    .object_path(oid)
8760                    .expect("test operation should succeed")
8761                    .exists()
8762            );
8763        }
8764
8765        fs::remove_dir_all(root).expect("test operation should succeed");
8766    }
8767
8768    #[test]
8769    fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
8770        let root = temp_root("sley-prune-unreachable");
8771        let git_dir = root.join(".git");
8772        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8773        let format = ObjectFormat::Sha1;
8774        let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8775        let graph = write_commit_graph(&mut db, b"reachable payload\n");
8776        let commit_oid = graph[0].0.clone();
8777
8778        // A dangling loose blob not referenced by the commit graph.
8779        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
8780        let dangling_oid = db
8781            .write_object(dangling)
8782            .expect("test operation should succeed");
8783
8784        // Report-only pass leaves everything on disk.
8785        let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
8786            .expect("test operation should succeed");
8787        assert_eq!(reported, vec![dangling_oid]);
8788        assert!(
8789            db.loose()
8790                .object_path(&dangling_oid)
8791                .expect("test operation should succeed")
8792                .exists()
8793        );
8794
8795        // Deleting pass removes only the unreachable object.
8796        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8797            .expect("test operation should succeed");
8798        assert_eq!(deleted, vec![dangling_oid]);
8799        assert!(
8800            !db.loose()
8801                .object_path(&dangling_oid)
8802                .expect("test operation should succeed")
8803                .exists()
8804        );
8805        for (oid, object) in &graph {
8806            assert!(
8807                db.loose()
8808                    .object_path(oid)
8809                    .expect("test operation should succeed")
8810                    .exists()
8811            );
8812            assert_eq!(read_object_for_assert(&db, oid), *object);
8813        }
8814
8815        fs::remove_dir_all(root).expect("test operation should succeed");
8816    }
8817
8818    #[test]
8819    fn prune_unreachable_loose_ignores_gitlink_targets() {
8820        let root = temp_root("sley-prune-gitlink");
8821        let git_dir = root.join(".git");
8822        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8823        let format = ObjectFormat::Sha1;
8824        let db = FileObjectDatabase::from_git_dir(&git_dir, format);
8825
8826        let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
8827            .expect("test operation should succeed");
8828        let tree = EncodedObject::new(
8829            ObjectType::Tree,
8830            Tree {
8831                entries: vec![TreeEntry {
8832                    mode: 0o160000,
8833                    name: BString::from(b"submodule"),
8834                    oid: submodule_oid,
8835                }],
8836            }
8837            .write(),
8838        );
8839        let tree_oid = db
8840            .write_object(tree)
8841            .expect("test operation should succeed");
8842        let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
8843        let commit = EncodedObject::new(
8844            ObjectType::Commit,
8845            Commit {
8846                tree: tree_oid,
8847                parents: Vec::new(),
8848                author: identity.clone(),
8849                committer: identity,
8850                encoding: None,
8851                message: b"gitlink\n".to_vec(),
8852            }
8853            .write(),
8854        );
8855        let commit_oid = db
8856            .write_object(commit)
8857            .expect("test operation should succeed");
8858        let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
8859        let dangling_oid = db
8860            .write_object(dangling)
8861            .expect("test operation should succeed");
8862
8863        let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8864            .expect("test operation should succeed");
8865
8866        assert_eq!(deleted, vec![dangling_oid]);
8867        assert!(
8868            !db.loose()
8869                .object_path(&dangling_oid)
8870                .expect("test operation should succeed")
8871                .exists()
8872        );
8873
8874        fs::remove_dir_all(root).expect("test operation should succeed");
8875    }
8876
8877    fn temp_root(prefix: &str) -> PathBuf {
8878        std::env::temp_dir().join(format!(
8879            "{prefix}-{}-{}",
8880            std::process::id(),
8881            TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
8882        ))
8883    }
8884}