1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{
12 Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object,
13 tree_entry_object_type,
14};
15use sley_pack::{
16 MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
17 PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput,
18 PackStreamIndexBuild, PackWrite, PackWriteOptions, PackWriteSummary,
19};
20use std::collections::{HashMap, HashSet};
21use std::io::{Read, Write};
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicU64, Ordering};
24use std::sync::{Arc, Mutex, OnceLock};
25use std::{env, fs};
26
27static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
28
29pub trait ObjectReader {
30 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
31
32 fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
40 false
41 }
42
43 fn has_shallow_grafts(&self) -> bool {
47 false
48 }
49
50 fn is_promised_object(&self, _oid: &ObjectId) -> bool {
54 false
55 }
56}
57
58fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
59 (*oid == ObjectId::empty_tree(format))
60 .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
61}
62
63fn with_missing_object_context(
64 err: GitError,
65 oid: ObjectId,
66 context: MissingObjectContext,
67) -> GitError {
68 let kind = err
69 .not_found_kind()
70 .and_then(sley_core::NotFoundKind::missing_object_kind);
71 match kind {
72 Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
73 None => err,
74 }
75}
76
77pub fn grafted_parents<R: ObjectReader + ?Sized>(
81 reader: &R,
82 oid: &ObjectId,
83 parents: Vec<ObjectId>,
84) -> Vec<ObjectId> {
85 if reader.is_shallow_graft(oid) {
86 Vec::new()
87 } else {
88 parents
89 }
90}
91
92pub trait ObjectWriter {
93 fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct BundleUnbundleResult {
103 pub written_objects: Vec<ObjectId>,
104 pub references: Vec<BundleReference>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct PackUnpackResult {
109 pub written_objects: Vec<ObjectId>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct PackInstallResult {
114 pub pack_name: String,
115 pub pack_path: PathBuf,
116 pub index_path: PathBuf,
117 pub promisor_path: Option<PathBuf>,
118 pub object_ids: Vec<ObjectId>,
119}
120
121#[derive(Debug)]
122pub struct RawPackStreamingInstall {
123 format: ObjectFormat,
124 expected_pack_id: ObjectId,
125 expected_pack_size: u64,
126 options: RawPackInstallOptions,
127 pack_dir: PathBuf,
128 pack_name: String,
129 pack_path: PathBuf,
130 index_path: PathBuf,
131 temp_pack_path: PathBuf,
132 file: Option<fs::File>,
133 written: u64,
134 finished: bool,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct RawPackInstallResult {
139 pub object_ids: Vec<ObjectId>,
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct RawPackIndexResult {
144 pub pack_id: ObjectId,
145 pub index: Vec<u8>,
146 pub objects: Vec<RawPackIndexedObject>,
147}
148
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct RawPackIndexedObject {
151 pub oid: ObjectId,
152 pub object_type: ObjectType,
153 pub size: u64,
154 pub offset: u64,
155}
156
157struct PackInstallTeeReader<'a, R, W> {
158 reader: &'a mut R,
159 writer: &'a mut W,
160}
161
162impl<R, W> Read for PackInstallTeeReader<'_, R, W>
163where
164 R: Read,
165 W: Write,
166{
167 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
168 let len = self.reader.read(buf)?;
169 if len > 0 {
170 self.writer.write_all(&buf[..len])?;
171 }
172 Ok(len)
173 }
174}
175
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct ReachablePackFile {
178 pub pack_path: PathBuf,
179 pub pack_size: u64,
180 pub checksum: ObjectId,
181 pub object_count: usize,
182 pub delta_count: u32,
183}
184
185#[derive(Debug, Clone, PartialEq, Eq)]
186pub struct ReachablePackWriteSummary {
187 pub index: Vec<u8>,
188 pub checksum: ObjectId,
189 pub object_count: usize,
190 pub delta_count: u32,
191 pub pack_size: u64,
192}
193
194#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
195pub struct RawPackInstallOptions {
196 pub promisor: bool,
197}
198
199pub trait RawPackInstaller {
200 fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<RawPackInstallResult>
201 where
202 R: Read;
203}
204
205#[derive(Debug, Clone, PartialEq, Eq)]
206pub enum ObjectPrefixResolution {
207 Missing,
208 Unique(ObjectId),
209 Ambiguous(Vec<ObjectId>),
210}
211
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub struct ObjectStorageInfo {
214 pub disk_size: u64,
215 pub deltabase: ObjectId,
216}
217
218impl RawPackInstaller for FileObjectDatabase {
219 fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<RawPackInstallResult>
220 where
221 R: Read,
222 {
223 let result = FileObjectDatabase::install_raw_pack_from_reader(self, reader)?;
224 Ok(RawPackInstallResult {
225 object_ids: result.object_ids,
226 })
227 }
228}
229
230impl RawPackInstaller for ObjectDatabase {
231 fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<RawPackInstallResult>
232 where
233 R: Read,
234 {
235 let mut pack_bytes = Vec::new();
236 reader.read_to_end(&mut pack_bytes)?;
237 let result = unpack_packfile_objects(&pack_bytes, self.format, self)?;
238 Ok(RawPackInstallResult {
239 object_ids: result.written_objects,
240 })
241 }
242}
243
244impl RawPackStreamingInstall {
245 pub fn bytes_written(&self) -> u64 {
246 self.written
247 }
248
249 pub fn pack_path(&self) -> &Path {
250 &self.pack_path
251 }
252
253 pub fn index_path(&self) -> &Path {
254 &self.index_path
255 }
256
257 pub fn finish(mut self) -> Result<PackInstallResult> {
258 let result = (|| -> Result<PackInstallResult> {
259 let mut file = self.file.take().ok_or_else(|| {
260 GitError::InvalidFormat("raw pack stream already finished".into())
261 })?;
262 file.flush()?;
263 file.sync_all()?;
264 drop(file);
265
266 if self.written != self.expected_pack_size {
267 return Err(GitError::InvalidFormat(format!(
268 "raw pack stream length mismatch: expected {}, got {}",
269 self.expected_pack_size, self.written
270 )));
271 }
272
273 let built = PackIndex::write_v2_for_pack_path(&self.temp_pack_path, self.format)?;
274 if built.pack_checksum != self.expected_pack_id {
275 return Err(GitError::InvalidFormat(format!(
276 "raw pack stream checksum mismatch: expected {}, got {}",
277 self.expected_pack_id, built.pack_checksum
278 )));
279 }
280
281 match fs::rename(&self.temp_pack_path, &self.pack_path) {
282 Ok(()) => {}
283 Err(_) if self.pack_path.exists() => {
284 let _ = fs::remove_file(&self.temp_pack_path);
285 }
286 Err(err) => return Err(GitError::Io(err.to_string())),
287 }
288 write_pack_component(&self.index_path, &built.index)?;
289 let promisor_path = write_promisor_pack_sidecar(
290 &self.pack_dir,
291 &self.pack_name,
292 self.options.promisor,
293 )?;
294 Ok(PackInstallResult {
295 pack_name: self.pack_name.clone(),
296 pack_path: self.pack_path.clone(),
297 index_path: self.index_path.clone(),
298 promisor_path,
299 object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
300 })
301 })();
302
303 if result.is_ok() {
304 self.finished = true;
305 } else {
306 let _ = fs::remove_file(&self.temp_pack_path);
307 }
308 result
309 }
310}
311
312impl Write for RawPackStreamingInstall {
313 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
314 let next_written = self.written.checked_add(buf.len() as u64).ok_or_else(|| {
315 std::io::Error::new(std::io::ErrorKind::InvalidData, "pack size overflow")
316 })?;
317 if next_written > self.expected_pack_size {
318 return Err(std::io::Error::new(
319 std::io::ErrorKind::InvalidData,
320 format!(
321 "raw pack stream exceeds expected size {}; got at least {}",
322 self.expected_pack_size, next_written
323 ),
324 ));
325 }
326 let file = self.file.as_mut().ok_or_else(|| {
327 std::io::Error::new(
328 std::io::ErrorKind::BrokenPipe,
329 "raw pack stream already finished",
330 )
331 })?;
332 let written = file.write(buf)?;
333 self.written = self.written.checked_add(written as u64).ok_or_else(|| {
334 std::io::Error::new(std::io::ErrorKind::InvalidData, "pack size overflow")
335 })?;
336 Ok(written)
337 }
338
339 fn flush(&mut self) -> std::io::Result<()> {
340 match self.file.as_mut() {
341 Some(file) => file.flush(),
342 None => Ok(()),
343 }
344 }
345}
346
347impl Drop for RawPackStreamingInstall {
348 fn drop(&mut self) {
349 if !self.finished {
350 let _ = self.file.take();
351 let _ = fs::remove_file(&self.temp_pack_path);
352 }
353 }
354}
355
356pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
357 let mut missing = Vec::new();
358 for prerequisite in &bundle.prerequisites {
359 match reader.read_object(&prerequisite.oid) {
360 Ok(object) => {
361 let actual = object.object_id(bundle.format)?;
362 if actual != prerequisite.oid {
363 return Err(GitError::InvalidObject(format!(
364 "bundle prerequisite {} hashes to {actual}",
365 prerequisite.oid
366 )));
367 }
368 }
369 Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
370 Err(err) => return Err(err),
371 }
372 }
373 if missing.is_empty() {
374 return Ok(());
375 }
376 Err(GitError::object_not_found_in(
377 missing[0],
378 MissingObjectContext::PackInstall,
379 ))
380}
381
382pub fn unbundle_objects<R, W>(
383 bundle: &Bundle,
384 prerequisite_reader: &R,
385 writer: &mut W,
386) -> Result<BundleUnbundleResult>
387where
388 R: ObjectReader,
389 W: ObjectWriter,
390{
391 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
392 let pack = PackFile::parse_bundle(bundle)?;
393 let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
394 Ok(BundleUnbundleResult {
395 written_objects,
396 references: bundle.references.clone(),
397 })
398}
399
400pub fn install_bundle_pack<R>(
401 bundle: &Bundle,
402 prerequisite_reader: &R,
403 destination: &impl RawPackInstaller,
404) -> Result<BundleUnbundleResult>
405where
406 R: ObjectReader,
407{
408 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
409 let mut reader = bundle.pack.as_slice();
410 let install = destination.install_raw_pack_from_reader(&mut reader)?;
411 Ok(BundleUnbundleResult {
412 written_objects: install.object_ids,
413 references: bundle.references.clone(),
414 })
415}
416
417pub fn unpack_packfile_objects<W>(
418 pack_bytes: &[u8],
419 format: ObjectFormat,
420 writer: &W,
421) -> Result<PackUnpackResult>
422where
423 W: ObjectWriter,
424{
425 let pack = PackFile::parse(pack_bytes, format)?;
426 write_pack_objects(pack, writer, "pack")
427}
428
429pub fn index_raw_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<RawPackIndexResult> {
430 let pack = PackFile::parse(pack_bytes, format)?;
431 let built = PackIndex::write_v2_for_pack(pack_bytes, format)?;
432 if built.pack_checksum != pack.checksum {
433 return Err(GitError::InvalidFormat(
434 "pack index checksum does not match parsed pack checksum".to_string(),
435 ));
436 }
437
438 let offsets = built
439 .entries
440 .iter()
441 .map(|entry| (entry.oid, entry.offset))
442 .collect::<HashMap<_, _>>();
443 let mut objects = Vec::with_capacity(pack.entries.len());
444 for object in pack.entries {
445 let offset = offsets.get(&object.entry.oid).copied().ok_or_else(|| {
446 GitError::InvalidFormat(format!(
447 "pack index is missing object {}",
448 object.entry.oid.to_hex()
449 ))
450 })?;
451 objects.push(RawPackIndexedObject {
452 oid: object.entry.oid,
453 object_type: object.object.object_type,
454 size: object.object.body.len() as u64,
455 offset,
456 });
457 }
458
459 Ok(RawPackIndexResult {
460 pack_id: built.pack_checksum,
461 index: built.index,
462 objects,
463 })
464}
465
466pub fn index_raw_pack_from_reader<R>(
467 reader: &mut R,
468 format: ObjectFormat,
469) -> Result<RawPackIndexResult>
470where
471 R: Read,
472{
473 Ok(stream_index_build_to_raw_result(
474 PackIndex::write_v2_for_pack_reader_to_trailer(reader, format)?,
475 ))
476}
477
478pub fn index_raw_pack_from_reader_with_len<R>(
479 reader: &mut R,
480 format: ObjectFormat,
481 pack_len: u64,
482) -> Result<RawPackIndexResult>
483where
484 R: Read,
485{
486 Ok(stream_index_build_to_raw_result(
487 PackIndex::write_v2_for_pack_reader_with_len(reader, format, pack_len)?,
488 ))
489}
490
491pub fn index_raw_pack_file(
492 path: impl AsRef<Path>,
493 format: ObjectFormat,
494) -> Result<RawPackIndexResult> {
495 Ok(stream_index_build_to_raw_result(
496 PackIndex::write_v2_for_pack_path(path, format)?,
497 ))
498}
499
500fn stream_index_build_to_raw_result(built: PackStreamIndexBuild) -> RawPackIndexResult {
501 let objects = built
502 .objects
503 .into_iter()
504 .map(|object| RawPackIndexedObject {
505 oid: object.oid,
506 object_type: object.object_type,
507 size: object.size,
508 offset: object.offset,
509 })
510 .collect::<Vec<_>>();
511 RawPackIndexResult {
512 pack_id: built.pack_checksum,
513 index: built.index,
514 objects,
515 }
516}
517
518fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
519where
520 W: ObjectWriter,
521{
522 let mut written_objects = Vec::with_capacity(pack.entries.len());
523 for entry in pack.entries {
524 let expected = entry.entry.oid;
525 let actual = writer.write_object(entry.object)?;
526 if actual != expected {
527 return Err(GitError::InvalidObject(format!(
528 "{source} object id mismatch: expected {expected}, wrote {actual}"
529 )));
530 }
531 written_objects.push(actual);
532 }
533 Ok(PackUnpackResult { written_objects })
534}
535
536pub fn collect_reachable_object_ids<R, I>(
537 reader: &R,
538 format: ObjectFormat,
539 starts: I,
540) -> Result<HashSet<ObjectId>>
541where
542 R: ObjectReader,
543 I: IntoIterator<Item = ObjectId>,
544{
545 walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
546}
547
548pub fn collect_reachable_object_ids_with_cut<R, I>(
553 reader: &R,
554 format: ObjectFormat,
555 starts: I,
556 cut: &HashSet<ObjectId>,
557) -> Result<HashSet<ObjectId>>
558where
559 R: ObjectReader,
560 I: IntoIterator<Item = ObjectId>,
561{
562 walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
563}
564
565pub fn collect_reachable_object_ids_excluding<R, I>(
569 reader: &R,
570 format: ObjectFormat,
571 starts: I,
572 excluded: &HashSet<ObjectId>,
573) -> Result<HashSet<ObjectId>>
574where
575 R: ObjectReader,
576 I: IntoIterator<Item = ObjectId>,
577{
578 walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
579}
580
581pub fn collect_reachable_objects<R, I>(
582 reader: &R,
583 format: ObjectFormat,
584 starts: I,
585 excluded: &HashSet<ObjectId>,
586) -> Result<Vec<Arc<EncodedObject>>>
587where
588 R: ObjectReader,
589 I: IntoIterator<Item = ObjectId>,
590{
591 let mut objects = Vec::new();
592 walk_reachable_objects(reader, format, starts, excluded, |_, object| {
593 objects.push(Arc::clone(object));
594 })?;
595 Ok(objects)
596}
597
598#[derive(Debug, Clone)]
599struct ReachablePackObject {
600 oid: ObjectId,
601 object: Arc<EncodedObject>,
602}
603
604fn collect_reachable_pack_objects<R, I>(
605 reader: &R,
606 format: ObjectFormat,
607 starts: I,
608 excluded: &HashSet<ObjectId>,
609) -> Result<Vec<ReachablePackObject>>
610where
611 R: ObjectReader,
612 I: IntoIterator<Item = ObjectId>,
613{
614 let mut objects = Vec::new();
615 walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
616 objects.push(ReachablePackObject {
617 oid: *oid,
618 object: Arc::clone(object),
619 });
620 })?;
621 Ok(objects)
622}
623
624fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
625 objects
626 .iter()
627 .map(|entry| PackInput {
628 oid: &entry.oid,
629 object: &entry.object,
630 })
631 .collect()
632}
633
634pub fn install_reachable_pack<I>(
635 source: &impl ObjectReader,
636 destination: &impl RawPackInstaller,
637 format: ObjectFormat,
638 starts: I,
639) -> Result<Option<RawPackInstallResult>>
640where
641 I: IntoIterator<Item = ObjectId>,
642{
643 install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
644}
645
646pub fn install_reachable_pack_excluding<I>(
647 source: &impl ObjectReader,
648 destination: &impl RawPackInstaller,
649 format: ObjectFormat,
650 starts: I,
651 excluded: &HashSet<ObjectId>,
652) -> Result<Option<RawPackInstallResult>>
653where
654 I: IntoIterator<Item = ObjectId>,
655{
656 let pack = match build_reachable_pack(source, format, starts, excluded)? {
657 Some(pack) => pack,
658 None => return Ok(None),
659 };
660 let mut reader = pack.pack.as_slice();
661 destination
662 .install_raw_pack_from_reader(&mut reader)
663 .map(Some)
664}
665
666pub fn build_reachable_pack<R, I>(
667 reader: &R,
668 format: ObjectFormat,
669 starts: I,
670 excluded: &HashSet<ObjectId>,
671) -> Result<Option<PackWrite>>
672where
673 R: ObjectReader,
674 I: IntoIterator<Item = ObjectId>,
675{
676 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
677 if objects.is_empty() {
678 return Ok(None);
679 }
680 let inputs = pack_inputs(&objects);
685 PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
686}
687
688pub fn build_reachable_pack_file<R, I>(
689 reader: &R,
690 format: ObjectFormat,
691 starts: I,
692 excluded: &HashSet<ObjectId>,
693 pack_path: impl AsRef<Path>,
694) -> Result<Option<ReachablePackFile>>
695where
696 R: ObjectReader,
697 I: IntoIterator<Item = ObjectId>,
698{
699 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
700 if objects.is_empty() {
701 return Ok(None);
702 }
703 let inputs = pack_inputs(&objects);
704 let pack_path = pack_path.as_ref();
705 if let Some(parent) = pack_path.parent() {
706 fs::create_dir_all(parent)?;
707 }
708 let mut file = fs::OpenOptions::new()
709 .write(true)
710 .create(true)
711 .truncate(true)
712 .open(pack_path)?;
713 let summary = PackFile::write_packed_with_known_ids_to_writer(
714 &inputs,
715 format,
716 &PackWriteOptions::new(),
717 &mut file,
718 )?;
719 file.sync_all()?;
720 Ok(Some(reachable_pack_file_result(pack_path, summary)))
721}
722
723pub fn write_reachable_pack_to_writer<R, I, W>(
724 reader: &R,
725 format: ObjectFormat,
726 starts: I,
727 excluded: &HashSet<ObjectId>,
728 writer: &mut W,
729) -> Result<Option<ReachablePackWriteSummary>>
730where
731 R: ObjectReader,
732 I: IntoIterator<Item = ObjectId>,
733 W: Write,
734{
735 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
736 if objects.is_empty() {
737 return Ok(None);
738 }
739 let inputs = pack_inputs(&objects);
740 let summary = PackFile::write_packed_with_known_ids_to_writer(
741 &inputs,
742 format,
743 &PackWriteOptions::new(),
744 writer,
745 )?;
746 Ok(Some(reachable_pack_write_summary(summary)))
747}
748
749fn reachable_pack_file_result(path: &Path, summary: PackWriteSummary) -> ReachablePackFile {
750 ReachablePackFile {
751 pack_path: path.to_path_buf(),
752 pack_size: summary.pack_size,
753 checksum: summary.checksum,
754 object_count: summary.entries.len(),
755 delta_count: summary.delta_count,
756 }
757}
758
759fn reachable_pack_write_summary(summary: PackWriteSummary) -> ReachablePackWriteSummary {
760 ReachablePackWriteSummary {
761 index: summary.index,
762 checksum: summary.checksum,
763 object_count: summary.entries.len(),
764 delta_count: summary.delta_count,
765 pack_size: summary.pack_size,
766 }
767}
768
769pub fn build_and_install_reachable_pack<R, I>(
770 source: &R,
771 destination: &FileObjectDatabase,
772 format: ObjectFormat,
773 starts: I,
774 excluded: &HashSet<ObjectId>,
775 options: RawPackInstallOptions,
776) -> Result<Option<PackInstallResult>>
777where
778 R: ObjectReader,
779 I: IntoIterator<Item = ObjectId>,
780{
781 build_and_install_reachable_pack_filtered(
782 source,
783 destination,
784 format,
785 starts,
786 excluded,
787 options,
788 None,
789 None,
790 )
791}
792
793#[derive(Debug, Clone, PartialEq, Eq)]
800pub enum PackObjectFilter {
801 BlobNone,
803 BlobLimit(u64),
805 TreeDepth(u32),
807 SparsePathSet(Vec<String>),
809}
810
811#[allow(clippy::too_many_arguments)]
815pub fn build_and_install_reachable_pack_filtered<R, I>(
816 source: &R,
817 destination: &FileObjectDatabase,
818 format: ObjectFormat,
819 starts: I,
820 excluded: &HashSet<ObjectId>,
821 options: RawPackInstallOptions,
822 filter: Option<PackObjectFilter>,
823 unpack_limit: Option<usize>,
824) -> Result<Option<PackInstallResult>>
825where
826 R: ObjectReader,
827 I: IntoIterator<Item = ObjectId>,
828{
829 let starts: Vec<ObjectId> = starts.into_iter().collect();
830 let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
831 let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
832 match filter {
833 Some(PackObjectFilter::BlobNone) => {
834 objects.retain(|entry| {
835 entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
836 });
837 }
838 Some(PackObjectFilter::BlobLimit(limit)) => {
839 objects.retain(|entry| {
840 entry.object.object_type != ObjectType::Blob
841 || wanted.contains(&entry.oid)
842 || (entry.object.body.len() as u64) < limit
843 });
844 }
845 Some(PackObjectFilter::TreeDepth(depth)) => {
846 let tree_depths = collect_tree_filter_depths(source, format, &objects)?;
847 objects.retain(|entry| {
848 if wanted.contains(&entry.oid) {
849 return true;
850 }
851 match entry.object.object_type {
852 ObjectType::Blob => false,
853 ObjectType::Tree => tree_depths
854 .get(&entry.oid)
855 .is_some_and(|tree_depth| *tree_depth < depth),
856 _ => true,
857 }
858 });
859 }
860 Some(PackObjectFilter::SparsePathSet(paths)) => {
861 let allowed_blobs = collect_sparse_filter_blobs(source, format, &objects, &paths)?;
862 objects.retain(|entry| {
863 entry.object.object_type != ObjectType::Blob
864 || wanted.contains(&entry.oid)
865 || allowed_blobs.contains(&entry.oid)
866 });
867 }
868 None => {}
869 }
870 if objects.is_empty() {
871 return Ok(None);
872 }
873 if let Some(limit) = unpack_limit
877 && objects.len() < limit
878 {
879 for entry in &objects {
880 destination.loose().write_object((*entry.object).clone())?;
881 }
882 return Ok(None);
883 }
884 let inputs = pack_inputs(&objects);
885 let pack_dir = destination.objects_dir.join("pack");
886 fs::create_dir_all(&pack_dir)?;
887 let temp_pack_path = unique_temp_path(&pack_dir);
888 let result = (|| -> Result<PackInstallResult> {
889 let mut file = fs::OpenOptions::new()
890 .write(true)
891 .create_new(true)
892 .open(&temp_pack_path)?;
893 let summary = PackFile::write_packed_with_known_ids_to_writer(
894 &inputs,
895 format,
896 &PackWriteOptions::new(),
897 &mut file,
898 )?;
899 file.flush()?;
900 file.sync_all()?;
901 drop(file);
902 trace_packfile_path(&temp_pack_path)?;
903 destination.install_pack_file_from_temp(
904 &temp_pack_path,
905 summary.checksum,
906 &summary.index,
907 summary.entries.iter().map(|entry| entry.oid).collect(),
908 options,
909 )
910 })();
911 if result.is_err() {
912 let _ = fs::remove_file(&temp_pack_path);
913 }
914 result.map(Some)
915}
916
917fn trace_packfile_path(pack_path: &Path) -> Result<()> {
918 let Some(path) = env::var_os("GIT_TRACE_PACKFILE").filter(|value| !value.is_empty()) else {
919 return Ok(());
920 };
921 fs::copy(pack_path, path)?;
922 Ok(())
923}
924
925fn collect_tree_filter_depths<R>(
926 reader: &R,
927 format: ObjectFormat,
928 objects: &[ReachablePackObject],
929) -> Result<HashMap<ObjectId, u32>>
930where
931 R: ObjectReader,
932{
933 let available: HashSet<ObjectId> = objects.iter().map(|entry| entry.oid).collect();
934 let mut depths = HashMap::new();
935 let mut stack = Vec::new();
936 for entry in objects {
937 if entry.object.object_type != ObjectType::Commit {
938 continue;
939 }
940 let commit = Commit::parse(format, &entry.object.body)?;
941 if available.contains(&commit.tree) {
942 stack.push((commit.tree, 0u32));
943 }
944 }
945 while let Some((tree_oid, depth)) = stack.pop() {
946 if depths
947 .get(&tree_oid)
948 .is_some_and(|old_depth| *old_depth <= depth)
949 {
950 continue;
951 }
952 depths.insert(tree_oid, depth);
953 let tree = reader.read_object(&tree_oid)?;
954 if tree.object_type != ObjectType::Tree {
955 continue;
956 }
957 let child_depth = depth.saturating_add(1);
958 for entry in TreeEntries::new(format, &tree.body) {
959 let entry = entry?;
960 if tree_entry_object_type(entry.mode) == ObjectType::Tree
961 && available.contains(&entry.oid)
962 {
963 stack.push((entry.oid, child_depth));
964 }
965 }
966 }
967 Ok(depths)
968}
969
970fn collect_sparse_filter_blobs<R>(
971 reader: &R,
972 format: ObjectFormat,
973 objects: &[ReachablePackObject],
974 paths: &[String],
975) -> Result<HashSet<ObjectId>>
976where
977 R: ObjectReader,
978{
979 let wanted_paths: HashSet<&str> = paths.iter().map(String::as_str).collect();
980 let mut allowed = HashSet::new();
981 let mut seen_trees = HashSet::new();
982 for entry in objects {
983 if entry.object.object_type != ObjectType::Commit {
984 continue;
985 }
986 let commit = Commit::parse(format, &entry.object.body)?;
987 collect_sparse_tree_blobs(
988 reader,
989 format,
990 &commit.tree,
991 "",
992 &wanted_paths,
993 &mut seen_trees,
994 &mut allowed,
995 )?;
996 }
997 Ok(allowed)
998}
999
1000fn collect_sparse_tree_blobs<R>(
1001 reader: &R,
1002 format: ObjectFormat,
1003 tree_oid: &ObjectId,
1004 prefix: &str,
1005 wanted_paths: &HashSet<&str>,
1006 seen_trees: &mut HashSet<ObjectId>,
1007 allowed: &mut HashSet<ObjectId>,
1008) -> Result<()>
1009where
1010 R: ObjectReader,
1011{
1012 if !seen_trees.insert(*tree_oid) {
1013 return Ok(());
1014 }
1015 let tree = reader.read_object(tree_oid)?;
1016 if tree.object_type != ObjectType::Tree {
1017 return Ok(());
1018 }
1019 for entry in TreeEntries::new(format, &tree.body) {
1020 let entry = entry?;
1021 let name = String::from_utf8_lossy(entry.name);
1022 let path = if prefix.is_empty() {
1023 name.into_owned()
1024 } else {
1025 format!("{prefix}/{name}")
1026 };
1027 if tree_entry_object_type(entry.mode) == ObjectType::Tree {
1028 collect_sparse_tree_blobs(
1029 reader,
1030 format,
1031 &entry.oid,
1032 &path,
1033 wanted_paths,
1034 seen_trees,
1035 allowed,
1036 )?;
1037 } else if wanted_paths.contains(path.as_str()) {
1038 allowed.insert(entry.oid);
1039 }
1040 }
1041 Ok(())
1042}
1043
1044pub fn assemble_pack_with_verbatim_reuse(
1054 format: ObjectFormat,
1055 reused_pack_bytes: &[u8],
1056 appended: &[PackInput<'_>],
1057) -> Result<(Vec<u8>, u32)> {
1058 assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
1059}
1060
1061pub fn assemble_pack_with_verbatim_reuses(
1064 format: ObjectFormat,
1065 reused_packs: &[&[u8]],
1066 appended: &[PackInput<'_>],
1067) -> Result<(Vec<u8>, u32)> {
1068 let hash_len = format.raw_len();
1069 let mut reused_count = 0u32;
1070 let mut capacity = 12 + hash_len + 64 * appended.len();
1071 for reused_pack_bytes in reused_packs {
1072 if reused_pack_bytes.len() < 12 + hash_len {
1073 return Err(GitError::InvalidFormat("reused pack too short".into()));
1074 }
1075 if &reused_pack_bytes[..4] != b"PACK" {
1076 return Err(GitError::InvalidFormat(
1077 "reused pack has no signature".into(),
1078 ));
1079 }
1080 let version = u32::from_be_bytes([
1081 reused_pack_bytes[4],
1082 reused_pack_bytes[5],
1083 reused_pack_bytes[6],
1084 reused_pack_bytes[7],
1085 ]);
1086 if version != 2 {
1087 return Err(GitError::Unsupported(format!(
1088 "reused pack version {version}"
1089 )));
1090 }
1091 let count = u32::from_be_bytes([
1092 reused_pack_bytes[8],
1093 reused_pack_bytes[9],
1094 reused_pack_bytes[10],
1095 reused_pack_bytes[11],
1096 ]);
1097 reused_count = reused_count
1098 .checked_add(count)
1099 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
1100 capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
1101 }
1102 let total = reused_count
1103 .checked_add(appended.len() as u32)
1104 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
1105
1106 let mut out = Vec::with_capacity(capacity);
1107 out.extend_from_slice(b"PACK");
1108 out.extend_from_slice(&2u32.to_be_bytes());
1109 out.extend_from_slice(&total.to_be_bytes());
1110 for reused_pack_bytes in reused_packs {
1111 out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
1112 }
1113 for input in appended {
1114 write_undeltified_pack_entry(&mut out, input.object)?;
1115 }
1116 let checksum = sley_core::digest_bytes(format, &out)?;
1117 out.extend_from_slice(checksum.as_bytes());
1118 Ok((out, reused_count))
1119}
1120
1121pub fn assemble_pack_with_verbatim_entries(
1124 format: ObjectFormat,
1125 reused_entries: &[&[u8]],
1126 appended: &[PackInput<'_>],
1127) -> Result<(Vec<u8>, u32)> {
1128 let reused_count = u32::try_from(reused_entries.len())
1129 .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
1130 let total = reused_count
1131 .checked_add(appended.len() as u32)
1132 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
1133
1134 let mut capacity = 12 + format.raw_len() + 64 * appended.len();
1135 for entry in reused_entries {
1136 capacity = capacity.saturating_add(entry.len());
1137 }
1138 let mut out = Vec::with_capacity(capacity);
1139 out.extend_from_slice(b"PACK");
1140 out.extend_from_slice(&2u32.to_be_bytes());
1141 out.extend_from_slice(&total.to_be_bytes());
1142 for entry in reused_entries {
1143 out.extend_from_slice(entry);
1144 }
1145 for input in appended {
1146 write_undeltified_pack_entry(&mut out, input.object)?;
1147 }
1148 let checksum = sley_core::digest_bytes(format, &out)?;
1149 out.extend_from_slice(checksum.as_bytes());
1150 Ok((out, reused_count))
1151}
1152
1153fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
1155 let type_bits: u8 = match object.object_type {
1156 ObjectType::Commit => 1,
1157 ObjectType::Tree => 2,
1158 ObjectType::Blob => 3,
1159 ObjectType::Tag => 4,
1160 };
1161 let mut size = object.body.len() as u64;
1162 let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
1163 size >>= 4;
1164 while size > 0 {
1165 out.push(byte | 0x80);
1166 byte = (size & 0x7f) as u8;
1167 size >>= 7;
1168 }
1169 out.push(byte);
1170 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
1171 encoder.write_all(&object.body)?;
1172 out.extend_from_slice(&encoder.finish()?);
1173 Ok(())
1174}
1175
1176#[derive(Debug, Clone, PartialEq, Eq)]
1184pub struct RepackResult {
1185 pub pack: Vec<u8>,
1187 pub idx: Vec<u8>,
1189 pub object_count: usize,
1191 pub obsolete_packs: Vec<PathBuf>,
1194 pub packed_loose: Vec<ObjectId>,
1197 retained_pack_stems: Vec<String>,
1200 pack_checksum: ObjectId,
1201 index_entries: Vec<PackIndexEntry>,
1202}
1203
1204#[derive(Debug, Clone, Default)]
1205pub struct RepackOptions {
1206 pub local: bool,
1208 pub pack_kept_objects: bool,
1210 pub keep_pack_stems: HashSet<String>,
1212}
1213
1214pub fn repack_reachable_objects(
1234 git_dir: &Path,
1235 format: ObjectFormat,
1236 roots: &[ObjectId],
1237) -> Result<Option<RepackResult>> {
1238 repack_reachable_objects_with_options(git_dir, format, roots, &RepackOptions::default())
1239}
1240
1241pub fn repack_reachable_objects_with_options(
1242 git_dir: &Path,
1243 format: ObjectFormat,
1244 roots: &[ObjectId],
1245 options: &RepackOptions,
1246) -> Result<Option<RepackResult>> {
1247 let objects_dir = repository_objects_dir(git_dir);
1248 let database = if options.local {
1249 FileObjectDatabase::without_alternates(objects_dir.clone(), format)
1250 } else {
1251 FileObjectDatabase::new(objects_dir.clone(), format)
1252 };
1253 let retained_pack_stems = repack_retained_pack_stems(
1254 &objects_dir.join("pack"),
1255 &options.keep_pack_stems,
1256 !options.pack_kept_objects,
1257 )?;
1258 let excluded_oids = if options.pack_kept_objects {
1259 HashSet::new()
1260 } else {
1261 pack_oids_for_stems(&objects_dir.join("pack"), format, &retained_pack_stems)?
1262 };
1263
1264 let mut seen: HashSet<ObjectId> = HashSet::new();
1265 let mut objects: Vec<ReachablePackObject> = Vec::new();
1266 let mut pending: Vec<ObjectId> = roots.to_vec();
1267 while let Some(oid) = pending.pop() {
1268 if !seen.insert(oid) {
1269 continue;
1270 }
1271 let object = match database.read_object(&oid) {
1272 Ok(object) => object,
1273 Err(GitError::NotFound(_)) => continue,
1274 Err(err) => return Err(err),
1275 };
1276 match object.object_type {
1277 ObjectType::Commit => {
1278 let commit = Commit::parse_ref(format, &object.body)?;
1279 pending.extend(grafted_parents(&database, &oid, commit.parents));
1280 pending.push(commit.tree);
1281 }
1282 ObjectType::Tree => {
1283 for entry in TreeEntries::new(format, &object.body) {
1284 let entry = entry?;
1285 if !entry.is_gitlink() {
1286 pending.push(entry.oid);
1287 }
1288 }
1289 }
1290 ObjectType::Tag => {
1291 let tag = Tag::parse_ref(format, &object.body)?;
1292 pending.push(tag.object);
1293 }
1294 ObjectType::Blob => {}
1295 }
1296 if !excluded_oids.contains(&oid) {
1297 objects.push(ReachablePackObject { oid, object });
1298 }
1299 }
1300
1301 if !options.local {
1307 for (alternate, oid) in alternate_packed_object_ids(&objects_dir, format)? {
1308 if excluded_oids.contains(&oid) || !seen.insert(oid) {
1309 continue;
1310 }
1311 let alternate_db = FileObjectDatabase::without_alternates(alternate, format);
1312 match alternate_db.read_object(&oid) {
1313 Ok(object) => objects.push(ReachablePackObject { oid, object }),
1314 Err(GitError::NotFound(_)) => {}
1315 Err(err) => return Err(err),
1316 }
1317 }
1318 }
1319
1320 if objects.is_empty() {
1321 return Ok(None);
1322 }
1323
1324 let inputs = pack_inputs(&objects);
1325 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1326 let object_count = written.entries.len();
1327
1328 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
1331 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
1332 .into_iter()
1333 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
1334 .collect();
1335
1336 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1337 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1338 .into_iter()
1339 .filter(|oid| packed_oid_set.contains(oid))
1340 .collect();
1341 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1342
1343 let pack_checksum = written.checksum;
1344 let index_entries = written.entries.clone();
1345 Ok(Some(RepackResult {
1346 pack: written.pack,
1347 idx: written.index,
1348 object_count,
1349 obsolete_packs,
1350 packed_loose,
1351 retained_pack_stems,
1352 pack_checksum,
1353 index_entries,
1354 }))
1355}
1356
1357fn repack_retained_pack_stems(
1358 pack_dir: &Path,
1359 explicit: &HashSet<String>,
1360 keep_dot_keep: bool,
1361) -> Result<Vec<String>> {
1362 let mut stems = explicit.clone();
1363 if keep_dot_keep {
1364 for pack_path in existing_pack_files(pack_dir)? {
1365 if pack_path.with_extension("keep").exists()
1366 && let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
1367 {
1368 stems.insert(stem.to_string());
1369 }
1370 }
1371 }
1372 let mut stems = stems.into_iter().collect::<Vec<_>>();
1373 stems.sort();
1374 Ok(stems)
1375}
1376
1377fn pack_oids_for_stems(
1378 pack_dir: &Path,
1379 format: ObjectFormat,
1380 stems: &[String],
1381) -> Result<HashSet<ObjectId>> {
1382 let wanted: HashSet<&str> = stems.iter().map(String::as_str).collect();
1383 if wanted.is_empty() {
1384 return Ok(HashSet::new());
1385 }
1386 let mut oids = HashSet::new();
1387 for pack_path in existing_pack_files(pack_dir)? {
1388 let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1389 continue;
1390 };
1391 if !wanted.contains(stem) {
1392 continue;
1393 }
1394 let index_path = pack_path.with_extension("idx");
1395 if !index_path.exists() {
1396 continue;
1397 }
1398 let index = PackIndex::parse(&fs::read(index_path)?, format)?;
1399 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
1400 }
1401 Ok(oids)
1402}
1403
1404fn alternate_packed_object_ids(
1405 objects_dir: &Path,
1406 format: ObjectFormat,
1407) -> Result<Vec<(PathBuf, ObjectId)>> {
1408 let mut oids = Vec::new();
1409 for alternate in alternate_object_dirs(objects_dir) {
1410 let mut alternate_oids = HashSet::new();
1411 collect_packed_object_ids(&alternate.join("pack"), format, &mut alternate_oids)?;
1412 oids.extend(
1413 alternate_oids
1414 .into_iter()
1415 .map(|oid| (alternate.clone(), oid)),
1416 );
1417 }
1418 oids.sort_by(|left, right| {
1419 left.0
1420 .cmp(&right.0)
1421 .then(left.1.as_bytes().cmp(right.1.as_bytes()))
1422 });
1423 Ok(oids)
1424}
1425
1426pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1427 let objects_dir = repository_objects_dir(git_dir);
1428 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1429
1430 let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
1434 if all_oids.is_empty() {
1435 return Ok(None);
1436 }
1437
1438 let mut objects = Vec::with_capacity(all_oids.len());
1442 for oid in &all_oids {
1443 objects.push(ReachablePackObject {
1444 oid: *oid,
1445 object: database.read_object(oid)?,
1446 });
1447 }
1448
1449 let inputs = pack_inputs(&objects);
1450 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1451 let object_count = written.entries.len();
1452
1453 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
1459 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
1460 .into_iter()
1461 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
1462 .collect();
1463
1464 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1467 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1468 .into_iter()
1469 .filter(|oid| packed_oid_set.contains(oid))
1470 .collect();
1471 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1472
1473 Ok(Some(RepackResult {
1474 pack: written.pack,
1475 idx: written.index,
1476 object_count,
1477 obsolete_packs,
1478 packed_loose,
1479 retained_pack_stems: Vec::new(),
1480 pack_checksum: written.checksum,
1481 index_entries: written.entries,
1482 }))
1483}
1484
1485pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1491 let objects_dir = repository_objects_dir(git_dir);
1492 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1493 let loose_oids = loose_object_ids(&objects_dir, format)?;
1494 if loose_oids.is_empty() {
1495 return Ok(None);
1496 }
1497
1498 let mut objects = Vec::with_capacity(loose_oids.len());
1499 for oid in &loose_oids {
1500 objects.push(ReachablePackObject {
1501 oid: *oid,
1502 object: database.read_object(oid)?,
1503 });
1504 }
1505
1506 let inputs = pack_inputs(&objects);
1507 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1508 let object_count = written.entries.len();
1509 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1510 let mut packed_loose: Vec<ObjectId> = loose_oids
1511 .into_iter()
1512 .filter(|oid| packed_oid_set.contains(oid))
1513 .collect();
1514 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1515
1516 let pack_checksum = written.checksum;
1517 let index_entries = written.entries.clone();
1518 Ok(Some(RepackResult {
1519 pack: written.pack,
1520 idx: written.index,
1521 object_count,
1522 obsolete_packs: Vec::new(),
1523 packed_loose,
1524 retained_pack_stems: Vec::new(),
1525 pack_checksum,
1526 index_entries,
1527 }))
1528}
1529
1530#[derive(Debug, Clone)]
1533struct GeometryPack {
1534 pack_path: PathBuf,
1536 oids: Vec<ObjectId>,
1538 weight: u64,
1540 is_promisor: bool,
1542}
1543
1544#[derive(Debug, Clone)]
1547pub struct GeometricRepackResult {
1548 pub result: Option<RepackResult>,
1550 pub rolled_up_packs: Vec<PathBuf>,
1552}
1553
1554fn collect_geometry_packs(
1557 objects_dir: &Path,
1558 format: ObjectFormat,
1559 kept_pack_stems: &HashSet<String>,
1560) -> Result<Vec<GeometryPack>> {
1561 let pack_dir = objects_dir.join("pack");
1562 let mut packs = Vec::new();
1563 for pack_path in existing_pack_files(&pack_dir)? {
1564 if pack_path.with_extension("mtimes").exists() {
1567 continue;
1568 }
1569 if pack_path.with_extension("keep").exists() {
1570 continue;
1571 }
1572 let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1573 continue;
1574 };
1575 if kept_pack_stems.contains(stem) {
1576 continue;
1577 }
1578 let index_path = pack_path.with_extension("idx");
1579 if !index_path.exists() {
1580 continue;
1581 }
1582 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1583 let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
1584 let weight = oids.len() as u64;
1585 packs.push(GeometryPack {
1586 is_promisor: pack_path.with_extension("promisor").exists(),
1587 pack_path,
1588 oids,
1589 weight,
1590 });
1591 }
1592 packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
1594 Ok(packs)
1595}
1596
1597fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
1601 let pack_nr = packs.len();
1602 if pack_nr == 0 {
1603 return 0;
1604 }
1605 let mut i = pack_nr - 1;
1607 while i > 0 {
1608 let ours = packs[i].weight;
1609 let prev = packs[i - 1].weight;
1610 if ours < split_factor.saturating_mul(prev) {
1611 break;
1612 }
1613 i -= 1;
1614 }
1615 let mut split = i;
1616 if split != 0 {
1617 split += 1;
1619 }
1620
1621 let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
1625 for pack in &packs[split..] {
1626 if pack.weight < split_factor.saturating_mul(total_size) {
1627 split += 1;
1628 total_size = total_size.saturating_add(pack.weight);
1629 } else {
1630 break;
1631 }
1632 }
1633 split
1634}
1635
1636pub fn repack_geometric(
1646 git_dir: &Path,
1647 format: ObjectFormat,
1648 split_factor: u64,
1649 kept_pack_stems: &HashSet<String>,
1650) -> Result<GeometricRepackResult> {
1651 let objects_dir = repository_objects_dir(git_dir);
1652 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1653
1654 let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
1658 let packs: Vec<GeometryPack> = all_packs
1659 .into_iter()
1660 .filter(|pack| !pack.is_promisor)
1661 .collect();
1662
1663 let split = compute_geometry_split(&packs, split_factor);
1664
1665 let loose_oids = loose_object_ids(&objects_dir, format)?;
1666
1667 let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
1672 for pack in &packs[split..] {
1673 excluded_oids.extend(pack.oids.iter().copied());
1674 }
1675
1676 let mut included: Vec<ObjectId> = Vec::new();
1677 let mut seen: HashSet<ObjectId> = HashSet::new();
1678 for pack in &packs[..split] {
1679 for oid in &pack.oids {
1680 if excluded_oids.contains(oid) {
1681 continue;
1682 }
1683 if seen.insert(*oid) {
1684 included.push(*oid);
1685 }
1686 }
1687 }
1688 for oid in &loose_oids {
1689 if excluded_oids.contains(oid) {
1690 continue;
1691 }
1692 if seen.insert(*oid) {
1693 included.push(*oid);
1694 }
1695 }
1696
1697 if included.is_empty() {
1699 return Ok(GeometricRepackResult {
1700 result: None,
1701 rolled_up_packs: Vec::new(),
1702 });
1703 }
1704
1705 included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1706 let mut objects = Vec::with_capacity(included.len());
1707 for oid in &included {
1708 objects.push(ReachablePackObject {
1709 oid: *oid,
1710 object: database.read_object(oid)?,
1711 });
1712 }
1713
1714 let inputs = pack_inputs(&objects);
1715 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1716 let object_count = written.entries.len();
1717
1718 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1719 let mut packed_loose: Vec<ObjectId> = loose_oids
1720 .into_iter()
1721 .filter(|oid| packed_oid_set.contains(oid))
1722 .collect();
1723 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1724
1725 let rolled_up_packs: Vec<PathBuf> = packs[..split]
1726 .iter()
1727 .map(|pack| pack.pack_path.clone())
1728 .collect();
1729
1730 let pack_checksum = written.checksum;
1731 let index_entries = written.entries.clone();
1732 Ok(GeometricRepackResult {
1733 result: Some(RepackResult {
1734 pack: written.pack,
1735 idx: written.index,
1736 object_count,
1737 obsolete_packs: rolled_up_packs.clone(),
1738 packed_loose,
1739 retained_pack_stems: Vec::new(),
1740 pack_checksum,
1741 index_entries,
1742 }),
1743 rolled_up_packs,
1744 })
1745}
1746
1747pub fn install_repack_result(
1762 git_dir: &Path,
1763 format: ObjectFormat,
1764 result: &RepackResult,
1765 prune: bool,
1766) -> Result<()> {
1767 install_repack_result_with_bitmap(git_dir, format, result, prune, None)
1768}
1769
1770pub fn install_repack_result_with_bitmap(
1776 git_dir: &Path,
1777 format: ObjectFormat,
1778 result: &RepackResult,
1779 prune: bool,
1780 bitmap_tips: Option<&HashSet<ObjectId>>,
1781) -> Result<()> {
1782 let objects_dir = repository_objects_dir(git_dir);
1783 let pack_dir = objects_dir.join("pack");
1784 fs::create_dir_all(&pack_dir)?;
1785
1786 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1791 let parsed_index = PackIndex::parse(&result.idx, format)?;
1792 if parsed_index.pack_checksum != result.pack_checksum {
1793 return Err(GitError::InvalidFormat(
1794 "repack index checksum does not match the new pack".into(),
1795 ));
1796 }
1797 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1798 return Err(GitError::InvalidFormat(
1799 "repack index does not match the new pack contents".into(),
1800 ));
1801 }
1802 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1803 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1804 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1805 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1806 let reverse_index = sley_pack::PackReverseIndex::write(
1810 format,
1811 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1812 &result.pack_checksum,
1813 )?;
1814 write_pack_component(&new_pack_path, &result.pack)?;
1815 write_pack_component(&new_rev_path, &reverse_index)?;
1816 write_pack_component(&new_index_path, &result.idx)?;
1817
1818 if let Some(tips) = bitmap_tips {
1819 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1822 if let Some(bitmap) = build_pack_bitmap(
1823 &database,
1824 format,
1825 &result.index_entries,
1826 &result.pack_checksum,
1827 tips,
1828 )? {
1829 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1834 remove_file_if_exists(&bitmap_path)?;
1835 write_pack_component(&bitmap_path, &bitmap)?;
1836 }
1837 }
1838
1839 if !prune {
1840 return Ok(());
1841 }
1842
1843 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1847
1848 prune_obsolete_pack_paths(
1849 &objects_dir,
1850 format,
1851 &result.obsolete_packs,
1852 &new_pack_path,
1853 &result.retained_pack_stems,
1854 )?;
1855 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1856 Ok(())
1857}
1858
1859pub fn install_geometric_repack_result(
1865 git_dir: &Path,
1866 format: ObjectFormat,
1867 geometric: &GeometricRepackResult,
1868 prune: bool,
1869 bitmap_tips: Option<&HashSet<ObjectId>>,
1870) -> Result<()> {
1871 let Some(result) = geometric.result.as_ref() else {
1872 return Ok(());
1873 };
1874 let objects_dir = repository_objects_dir(git_dir);
1875 let pack_dir = objects_dir.join("pack");
1876 fs::create_dir_all(&pack_dir)?;
1877
1878 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1879 let parsed_index = PackIndex::parse(&result.idx, format)?;
1880 if parsed_index.pack_checksum != result.pack_checksum {
1881 return Err(GitError::InvalidFormat(
1882 "repack index checksum does not match the new pack".into(),
1883 ));
1884 }
1885 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1886 return Err(GitError::InvalidFormat(
1887 "repack index does not match the new pack contents".into(),
1888 ));
1889 }
1890 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1891 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1892 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1893 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1894 let reverse_index = sley_pack::PackReverseIndex::write(
1895 format,
1896 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1897 &result.pack_checksum,
1898 )?;
1899 write_pack_component(&new_pack_path, &result.pack)?;
1900 write_pack_component(&new_rev_path, &reverse_index)?;
1901 write_pack_component(&new_index_path, &result.idx)?;
1902
1903 if let Some(tips) = bitmap_tips {
1904 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1905 if let Some(bitmap) = build_pack_bitmap(
1906 &database,
1907 format,
1908 &result.index_entries,
1909 &result.pack_checksum,
1910 tips,
1911 )? {
1912 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1913 remove_file_if_exists(&bitmap_path)?;
1914 write_pack_component(&bitmap_path, &bitmap)?;
1915 }
1916 }
1917
1918 if !prune {
1919 return Ok(());
1920 }
1921
1922 for pack_path in &geometric.rolled_up_packs {
1925 if *pack_path == new_pack_path {
1926 continue;
1927 }
1928 if pack_path.with_extension("keep").exists() {
1929 continue;
1930 }
1931 remove_file_if_exists(pack_path)?;
1932 remove_file_if_exists(&pack_path.with_extension("idx"))?;
1933 for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1934 remove_file_if_exists(&pack_path.with_extension(ext))?;
1935 }
1936 }
1937
1938 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1940 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1941
1942 let removed_stems: HashSet<String> = geometric
1944 .rolled_up_packs
1945 .iter()
1946 .filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
1947 .collect();
1948 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1949 Ok(())
1950}
1951
1952fn validate_pack_checksum(
1953 pack: &[u8],
1954 format: ObjectFormat,
1955 expected: &ObjectId,
1956 context: &str,
1957) -> Result<()> {
1958 if expected.format() != format {
1959 return Err(GitError::InvalidObjectId(format!(
1960 "{context} checksum format does not match object format"
1961 )));
1962 }
1963 let hash_len = format.raw_len();
1964 if pack.len() < 12 + hash_len {
1965 return Err(GitError::InvalidFormat(format!(
1966 "{context} pack file too short"
1967 )));
1968 }
1969 if &pack[..4] != b"PACK" {
1970 return Err(GitError::InvalidFormat(format!(
1971 "{context} pack file missing PACK signature"
1972 )));
1973 }
1974 let trailer_offset = pack.len() - hash_len;
1975 let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
1976 let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
1977 if &actual != expected || trailer != *expected {
1978 return Err(GitError::InvalidFormat(format!(
1979 "{context} pack checksum does not match generated pack"
1980 )));
1981 }
1982 Ok(())
1983}
1984
1985fn path_mtime_secs(path: &Path) -> u32 {
1987 fs::metadata(path)
1988 .and_then(|metadata| metadata.modified())
1989 .ok()
1990 .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
1991 .map(|dur| dur.as_secs() as u32)
1992 .unwrap_or(0)
1993}
1994
1995#[derive(Debug, Clone)]
1998pub struct CruftPack {
1999 pub pack: Vec<u8>,
2000 pub idx: Vec<u8>,
2001 pub rev: Vec<u8>,
2002 pub mtimes: Vec<u8>,
2003 pub checksum: ObjectId,
2004 pub oids: Vec<ObjectId>,
2006}
2007
2008#[derive(Debug, Clone)]
2011pub struct CruftRepackResult {
2012 pub reachable: Option<RepackResult>,
2014 pub cruft: Option<CruftPack>,
2016 pub obsolete_packs: Vec<PathBuf>,
2019 pub obsolete_cruft_packs: Vec<PathBuf>,
2022 retained_pack_stems: Vec<String>,
2023}
2024
2025pub fn object_mtimes_on_disk_pub(
2029 objects_dir: &Path,
2030 format: ObjectFormat,
2031) -> Result<HashMap<ObjectId, u32>> {
2032 object_mtimes_on_disk(objects_dir, format)
2033}
2034
2035fn object_mtimes_on_disk(
2036 objects_dir: &Path,
2037 format: ObjectFormat,
2038) -> Result<HashMap<ObjectId, u32>> {
2039 let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
2040 let mut record = |oid: ObjectId, mtime: u32| {
2041 mtimes
2042 .entry(oid)
2043 .and_modify(|existing| {
2044 if mtime > *existing {
2045 *existing = mtime;
2046 }
2047 })
2048 .or_insert(mtime);
2049 };
2050
2051 let pack_dir = objects_dir.join("pack");
2052 if let Ok(entries) = fs::read_dir(&pack_dir) {
2053 let mut idx_paths: Vec<PathBuf> = Vec::new();
2054 for entry in entries {
2055 let path = entry?.path();
2056 if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
2057 idx_paths.push(path);
2058 }
2059 }
2060 idx_paths.sort();
2061 for idx_path in idx_paths {
2062 let pack_path = idx_path.with_extension("pack");
2063 if !pack_path.exists() {
2064 continue;
2065 }
2066 let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
2067 let mtimes_path = idx_path.with_extension("mtimes");
2068 let pack_object_mtimes: Option<Vec<u32>> =
2069 fs::read(&mtimes_path).ok().and_then(|bytes| {
2070 sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
2071 .ok()
2072 .map(|parsed| parsed.mtimes)
2073 });
2074 let pack_mtime = path_mtime_secs(&pack_path);
2075 for (pos, entry) in index.entries.iter().enumerate() {
2076 let mtime = pack_object_mtimes
2077 .as_ref()
2078 .and_then(|table| table.get(pos).copied())
2079 .unwrap_or(pack_mtime);
2080 record(entry.oid, mtime);
2081 }
2082 }
2083 }
2084
2085 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
2086 for oid in loose_object_ids(objects_dir, format)? {
2087 let path = store.object_path(&oid)?;
2088 record(oid, path_mtime_secs(&path));
2089 }
2090 Ok(mtimes)
2091}
2092
2093pub fn build_cruft_pack_pub(
2095 database: &FileObjectDatabase,
2096 format: ObjectFormat,
2097 survivors: &HashMap<ObjectId, u32>,
2098) -> Result<Option<CruftPack>> {
2099 build_cruft_pack(database, format, survivors)
2100}
2101
2102fn build_cruft_pack(
2105 database: &FileObjectDatabase,
2106 format: ObjectFormat,
2107 survivors: &HashMap<ObjectId, u32>,
2108) -> Result<Option<CruftPack>> {
2109 if survivors.is_empty() {
2110 return Ok(None);
2111 }
2112 let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
2113 ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
2114
2115 let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
2116 let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
2117 let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
2118 for (oid, mtime) in ordered {
2119 match database.read_object(&oid) {
2120 Ok(object) => {
2121 oids.push(oid);
2122 objects.push(object);
2123 mtime_by_oid.insert(oid, mtime);
2124 }
2125 Err(GitError::NotFound(_)) => {}
2126 Err(err) => return Err(err),
2127 }
2128 }
2129 if oids.is_empty() {
2130 return Ok(None);
2131 }
2132
2133 let inputs: Vec<PackInput<'_>> = oids
2134 .iter()
2135 .zip(&objects)
2136 .map(|(oid, object)| PackInput {
2137 oid,
2138 object: object.as_ref(),
2139 })
2140 .collect();
2141 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
2142
2143 let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
2145 sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
2146 let mtimes_table: Vec<u32> = sorted_entries
2147 .iter()
2148 .map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
2149 .collect();
2150 let positions = sley_pack::pack_order_index_positions(&written.entries);
2151 let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
2152 let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
2153
2154 let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
2155 cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
2156 Ok(Some(CruftPack {
2157 pack: written.pack,
2158 idx: written.index,
2159 rev,
2160 mtimes,
2161 checksum: written.checksum,
2162 oids: cruft_oids,
2163 }))
2164}
2165
2166pub fn repack_cruft(
2176 git_dir: &Path,
2177 format: ObjectFormat,
2178 roots: &[ObjectId],
2179 cruft_expiration: Option<u32>,
2180) -> Result<CruftRepackResult> {
2181 repack_cruft_with_options(
2182 git_dir,
2183 format,
2184 roots,
2185 cruft_expiration,
2186 &RepackOptions::default(),
2187 )
2188}
2189
2190pub fn repack_cruft_with_options(
2191 git_dir: &Path,
2192 format: ObjectFormat,
2193 roots: &[ObjectId],
2194 cruft_expiration: Option<u32>,
2195 options: &RepackOptions,
2196) -> Result<CruftRepackResult> {
2197 let objects_dir = repository_objects_dir(git_dir);
2198 let database = FileObjectDatabase::new(objects_dir.clone(), format);
2199 let pack_dir = objects_dir.join("pack");
2200 let retained_pack_stems = repack_retained_pack_stems(
2201 &pack_dir,
2202 &options.keep_pack_stems,
2203 !options.pack_kept_objects,
2204 )?;
2205 let excluded_oids = if options.pack_kept_objects {
2206 HashSet::new()
2207 } else {
2208 pack_oids_for_stems(&pack_dir, format, &retained_pack_stems)?
2209 };
2210
2211 let mut reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
2213 reachable_ids.retain(|oid| !excluded_oids.contains(oid));
2214 let reachable_result = if reachable_ids.is_empty() {
2215 None
2216 } else {
2217 let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
2218 ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
2219 let mut objects = Vec::with_capacity(ids.len());
2220 for oid in &ids {
2221 match database.read_object(oid) {
2222 Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
2223 Err(GitError::NotFound(_)) => {}
2224 Err(err) => return Err(err),
2225 }
2226 }
2227 if objects.is_empty() {
2228 None
2229 } else {
2230 let inputs = pack_inputs(&objects);
2231 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
2232 let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
2233 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2234 .into_iter()
2235 .filter(|oid| packed_set.contains(oid))
2236 .collect();
2237 packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
2238 Some(RepackResult {
2239 pack: written.pack,
2240 idx: written.index,
2241 object_count: written.entries.len(),
2242 obsolete_packs: Vec::new(),
2243 packed_loose,
2244 retained_pack_stems: Vec::new(),
2245 pack_checksum: written.checksum,
2246 index_entries: written.entries,
2247 })
2248 }
2249 };
2250
2251 let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
2254 .into_iter()
2255 .filter(|(oid, _)| !reachable_ids.contains(oid) && !excluded_oids.contains(oid))
2256 .collect();
2257
2258 if let Some(expiration) = cruft_expiration {
2260 rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
2261 }
2262
2263 let cruft = build_cruft_pack(&database, format, &survivors)?;
2264
2265 let mut obsolete_packs = Vec::new();
2268 let mut obsolete_cruft_packs = Vec::new();
2269 for pack_path in existing_pack_files(&pack_dir)? {
2270 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
2271 && retained_pack_stems.iter().any(|retained| retained == stem)
2272 {
2273 continue;
2274 }
2275 if pack_path.with_extension("keep").exists() {
2276 continue;
2277 }
2278 if pack_path.with_extension("mtimes").exists() {
2279 obsolete_cruft_packs.push(pack_path);
2280 } else {
2281 obsolete_packs.push(pack_path);
2282 }
2283 }
2284
2285 Ok(CruftRepackResult {
2286 reachable: reachable_result,
2287 cruft,
2288 obsolete_packs,
2289 obsolete_cruft_packs,
2290 retained_pack_stems,
2291 })
2292}
2293
2294fn rescue_and_expire_cruft_objects(
2299 database: &FileObjectDatabase,
2300 format: ObjectFormat,
2301 survivors: &mut HashMap<ObjectId, u32>,
2302 expiration: u32,
2303) -> Result<()> {
2304 let recent: Vec<ObjectId> = survivors
2305 .iter()
2306 .filter(|(_, mtime)| **mtime > expiration)
2307 .map(|(oid, _)| *oid)
2308 .collect();
2309
2310 let mut keep: HashSet<ObjectId> = HashSet::new();
2311 let mut pending: Vec<ObjectId> = recent.clone();
2312 while let Some(oid) = pending.pop() {
2313 if !keep.insert(oid) {
2314 continue;
2315 }
2316 let Ok(object) = database.read_object(&oid) else {
2317 continue;
2318 };
2319 match object.object_type {
2320 ObjectType::Commit => {
2321 if let Ok(commit) = Commit::parse_ref(format, &object.body) {
2322 pending.extend(commit.parents.iter().copied());
2323 pending.push(commit.tree);
2324 }
2325 }
2326 ObjectType::Tree => {
2327 for entry in TreeEntries::new(format, &object.body).flatten() {
2328 if !entry.is_gitlink() {
2329 pending.push(entry.oid);
2330 }
2331 }
2332 }
2333 ObjectType::Tag => {
2334 if let Ok(tag) = Tag::parse_ref(format, &object.body) {
2335 pending.push(tag.object);
2336 }
2337 }
2338 ObjectType::Blob => {}
2339 }
2340 }
2341
2342 survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
2345 Ok(())
2346}
2347
2348pub fn install_cruft_repack_result(
2352 git_dir: &Path,
2353 format: ObjectFormat,
2354 result: &CruftRepackResult,
2355 prune: bool,
2356) -> Result<()> {
2357 let objects_dir = repository_objects_dir(git_dir);
2358 let pack_dir = objects_dir.join("pack");
2359 fs::create_dir_all(&pack_dir)?;
2360
2361 let new_reachable_name = result
2363 .reachable
2364 .as_ref()
2365 .map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
2366 let new_cruft_name = result
2367 .cruft
2368 .as_ref()
2369 .map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
2370
2371 if let Some(reachable) = result.reachable.as_ref() {
2373 let parsed_index = PackIndex::parse(&reachable.idx, format)?;
2374 let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
2375 let reverse_index = sley_pack::PackReverseIndex::write(
2376 format,
2377 &sley_pack::pack_order_index_positions(&parsed_index.entries),
2378 &reachable.pack_checksum,
2379 )?;
2380 write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
2381 write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
2382 write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
2383 }
2384
2385 if let Some(cruft) = result.cruft.as_ref() {
2387 let pack_name = format!("pack-{}", cruft.checksum.to_hex());
2388 write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
2389 write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
2390 write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
2391 write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
2392 }
2393
2394 if !prune {
2395 return Ok(());
2396 }
2397
2398 let mut present: HashSet<ObjectId> = HashSet::new();
2400 if let Some(reachable) = result.reachable.as_ref() {
2401 present.extend(reachable.index_entries.iter().map(|e| e.oid));
2402 }
2403 if let Some(cruft) = result.cruft.as_ref() {
2404 present.extend(cruft.oids.iter().copied());
2405 }
2406
2407 let mut removed_stems: HashSet<String> = HashSet::new();
2409 for pack_path in result
2410 .obsolete_packs
2411 .iter()
2412 .chain(result.obsolete_cruft_packs.iter())
2413 {
2414 let file_name = pack_path.file_name().and_then(|n| n.to_str());
2415 if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
2416 continue;
2417 }
2418 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
2419 && result
2420 .retained_pack_stems
2421 .iter()
2422 .any(|retained| retained == stem)
2423 {
2424 continue;
2425 }
2426 if pack_path.with_extension("keep").exists() {
2427 continue;
2428 }
2429 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
2430 removed_stems.insert(stem.to_string());
2431 }
2432 remove_file_if_exists(pack_path)?;
2433 remove_file_if_exists(&pack_path.with_extension("idx"))?;
2434 for ext in ["rev", "mtimes", "bitmap", "promisor"] {
2435 remove_file_if_exists(&pack_path.with_extension(ext))?;
2436 }
2437 }
2438
2439 let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2441 .into_iter()
2442 .filter(|oid| present.contains(oid))
2443 .collect();
2444 prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
2445
2446 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2447 Ok(())
2448}
2449
2450fn pack_index_entries_match_writer(
2451 parsed: &[PackIndexEntry],
2452 writer_entries: &[PackIndexEntry],
2453) -> bool {
2454 if parsed.len() != writer_entries.len() {
2455 return false;
2456 }
2457 let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
2458 writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2459 parsed.iter().zip(writer_entries).all(|(left, right)| {
2460 left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
2461 })
2462}
2463
2464pub fn prune_unreachable_loose<I>(
2473 git_dir: &Path,
2474 format: ObjectFormat,
2475 roots: I,
2476 delete: bool,
2477) -> Result<Vec<ObjectId>>
2478where
2479 I: IntoIterator<Item = ObjectId>,
2480{
2481 let objects_dir = repository_objects_dir(git_dir);
2482 let database = FileObjectDatabase::new(objects_dir.clone(), format);
2483 let reachable = collect_reachable_object_ids(&database, format, roots)?;
2484
2485 let store = LooseObjectStore::new(objects_dir.clone(), format);
2486 let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2487 .into_iter()
2488 .filter(|oid| !reachable.contains(oid))
2489 .collect();
2490 pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2491
2492 if delete {
2493 for oid in &pruned {
2494 let path = store.object_path(oid)?;
2495 match fs::remove_file(&path) {
2496 Ok(()) => {}
2497 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
2498 Err(err) => return Err(GitError::Io(err.to_string())),
2499 }
2500 }
2501 }
2502 Ok(pruned)
2503}
2504
2505fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
2508 let oids = loose_object_id_set(objects_dir, format)?;
2509 let mut oids = oids.into_iter().collect::<Vec<_>>();
2510 oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2511 Ok(oids)
2512}
2513
2514fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
2515 let mut oids = HashSet::new();
2516 collect_loose_object_ids(objects_dir, format, &mut oids)?;
2517 Ok(oids)
2518}
2519
2520fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
2523 if !pack_dir.exists() {
2524 return Ok(Vec::new());
2525 }
2526 let mut packs = Vec::new();
2527 for entry in fs::read_dir(pack_dir)? {
2528 let path = entry?.path();
2529 if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
2530 packs.push(path);
2531 }
2532 }
2533 packs.sort();
2534 Ok(packs)
2535}
2536
2537fn prune_obsolete_pack_paths(
2541 objects_dir: &Path,
2542 format: ObjectFormat,
2543 packs: &[PathBuf],
2544 keep: &Path,
2545 retained_pack_stems: &[String],
2546) -> Result<()> {
2547 prune_pack_paths_matching(
2548 objects_dir,
2549 format,
2550 packs.iter(),
2551 keep,
2552 retained_pack_stems,
2553 |_| Ok(true),
2554 )
2555}
2556
2557fn prune_pack_paths_matching<'a>(
2558 objects_dir: &Path,
2559 format: ObjectFormat,
2560 packs: impl IntoIterator<Item = &'a PathBuf>,
2561 keep: &Path,
2562 retained_pack_stems: &[String],
2563 mut should_prune: impl FnMut(&Path) -> Result<bool>,
2564) -> Result<()> {
2565 let pack_dir = objects_dir.join("pack");
2566 let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
2567 let retained_pack_stems: HashSet<&str> =
2568 retained_pack_stems.iter().map(String::as_str).collect();
2569 let mut removed_stems: HashSet<String> = HashSet::new();
2570
2571 for pack_path in packs {
2572 if pack_path == keep {
2573 continue;
2574 }
2575 let Some(stem) = pack_path.file_stem() else {
2576 continue;
2577 };
2578 if Some(stem) == keep_stem.as_deref() {
2579 continue;
2580 }
2581 if let Some(stem) = stem.to_str()
2582 && retained_pack_stems.contains(stem)
2583 {
2584 continue;
2585 }
2586 if pack_path.with_extension("keep").exists()
2587 || pack_path.with_extension("promisor").exists()
2588 {
2589 continue;
2590 }
2591 if !should_prune(pack_path)? {
2592 continue;
2593 }
2594 remove_file_if_exists(pack_path)?;
2595 remove_file_if_exists(&pack_path.with_extension("idx"))?;
2596 for ext in ["rev", "mtimes", "bitmap"] {
2597 remove_file_if_exists(&pack_path.with_extension(ext))?;
2598 }
2599 removed_stems.insert(stem.to_string_lossy().into_owned());
2600 }
2601
2602 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2603 Ok(())
2604}
2605
2606fn prune_stale_multi_pack_index(
2613 pack_dir: &Path,
2614 format: ObjectFormat,
2615 removed_stems: &HashSet<String>,
2616) -> Result<()> {
2617 if removed_stems.is_empty() {
2618 return Ok(());
2619 }
2620 let midx_path = pack_dir.join("multi-pack-index");
2621 if !midx_path.exists() {
2622 return Ok(());
2623 }
2624 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
2625 let references_removed_pack = midx.pack_names.iter().any(|name| {
2626 let stem = name.strip_suffix(".idx").unwrap_or(name);
2627 removed_stems.contains(stem)
2628 });
2629 if references_removed_pack {
2630 remove_file_if_exists(&midx_path)?;
2631 }
2632 Ok(())
2633}
2634
2635fn prune_loose_objects<'a, I>(
2638 objects_dir: &Path,
2639 format: ObjectFormat,
2640 candidates: I,
2641 present: &HashSet<ObjectId>,
2642) -> Result<()>
2643where
2644 I: IntoIterator<Item = &'a ObjectId>,
2645{
2646 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
2647 for oid in candidates {
2648 if !present.contains(oid) {
2649 continue;
2650 }
2651 remove_file_if_exists(&store.object_path(oid)?)?;
2652 }
2653 Ok(())
2654}
2655
2656enum PackDeltaBase {
2657 Offset(u64),
2658 Ref(ObjectId),
2659}
2660
2661struct PackIndexOffsetInfo {
2662 end_offset: u64,
2663 delta_base_oid: Option<ObjectId>,
2664}
2665
2666fn scan_pack_index_offsets(
2667 index: &PackIndex,
2668 target_offset: u64,
2669 trailer_offset: u64,
2670 delta_base_offset: Option<u64>,
2671) -> Result<PackIndexOffsetInfo> {
2672 let mut target_count = 0usize;
2673 let mut next_offset = None;
2674 let mut delta_base_oid = None;
2675
2676 for entry in &index.entries {
2677 if entry.offset == target_offset {
2678 target_count += 1;
2679 } else if entry.offset > target_offset {
2680 match next_offset {
2681 Some(current) if current <= entry.offset => {}
2682 _ => next_offset = Some(entry.offset),
2683 }
2684 }
2685 if Some(entry.offset) == delta_base_offset {
2686 delta_base_oid = Some(entry.oid);
2687 }
2688 }
2689
2690 if target_count == 0 {
2691 return Err(GitError::InvalidFormat(format!(
2692 "pack index offset {target_offset} not found"
2693 )));
2694 }
2695 if let Some(offset) = delta_base_offset
2696 && delta_base_oid.is_none()
2697 {
2698 return Err(GitError::InvalidFormat(format!(
2699 "ofs-delta base offset {offset} not found"
2700 )));
2701 }
2702
2703 Ok(PackIndexOffsetInfo {
2704 end_offset: if target_count > 1 {
2707 target_offset
2708 } else {
2709 next_offset.unwrap_or(trailer_offset)
2710 },
2711 delta_base_oid,
2712 })
2713}
2714
2715fn pack_entry_delta_base(
2716 format: ObjectFormat,
2717 pack: &[u8],
2718 entry_offset: u64,
2719) -> Result<Option<PackDeltaBase>> {
2720 let mut cursor = usize::try_from(entry_offset)
2721 .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
2722 let first = pack_next_byte(pack, &mut cursor)?;
2723 let kind = (first >> 4) & 0x07;
2724 let mut byte = first;
2725 while byte & 0x80 != 0 {
2726 byte = pack_next_byte(pack, &mut cursor)?;
2727 }
2728 match kind {
2729 6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
2730 pack,
2731 &mut cursor,
2732 entry_offset,
2733 )?))),
2734 7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
2735 format,
2736 pack,
2737 &mut cursor,
2738 )?))),
2739 _ => Ok(None),
2740 }
2741}
2742
2743fn parse_ref_delta_base_oid(
2744 format: ObjectFormat,
2745 pack: &[u8],
2746 cursor: &mut usize,
2747) -> Result<ObjectId> {
2748 let raw_len = format.raw_len();
2749 if *cursor + raw_len > pack.len() {
2750 return Err(GitError::InvalidFormat(
2751 "truncated ref-delta base object id".into(),
2752 ));
2753 }
2754 let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
2755 *cursor += raw_len;
2756 Ok(oid)
2757}
2758
2759fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
2760 let mut byte = pack_next_byte(pack, cursor)?;
2761 let mut relative = u64::from(byte & 0x7f);
2762 while byte & 0x80 != 0 {
2763 byte = pack_next_byte(pack, cursor)?;
2764 relative = relative
2765 .checked_add(1)
2766 .and_then(|value| value.checked_shl(7))
2767 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2768 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2769 }
2770 entry_offset
2771 .checked_sub(relative)
2772 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2773}
2774
2775fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
2776 let Some(byte) = pack.get(*cursor).copied() else {
2777 return Err(GitError::InvalidFormat("truncated pack entry".into()));
2778 };
2779 *cursor += 1;
2780 Ok(byte)
2781}
2782
2783fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
2784 Ok(ObjectId::null(format))
2785}
2786
2787fn remove_file_if_exists(path: &Path) -> Result<()> {
2789 match fs::remove_file(path) {
2790 Ok(()) => Ok(()),
2791 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
2792 Err(err) => Err(GitError::Io(err.to_string())),
2793 }
2794}
2795
2796fn walk_reachable_objects<R, I, F>(
2797 reader: &R,
2798 format: ObjectFormat,
2799 starts: I,
2800 excluded: &HashSet<ObjectId>,
2801 visit: F,
2802) -> Result<HashSet<ObjectId>>
2803where
2804 R: ObjectReader,
2805 I: IntoIterator<Item = ObjectId>,
2806 F: FnMut(&ObjectId, &Arc<EncodedObject>),
2807{
2808 walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
2809}
2810
2811fn walk_reachable_objects_with_cut<R, I, F>(
2815 reader: &R,
2816 format: ObjectFormat,
2817 starts: I,
2818 excluded: &HashSet<ObjectId>,
2819 cut: &HashSet<ObjectId>,
2820 mut visit: F,
2821) -> Result<HashSet<ObjectId>>
2822where
2823 R: ObjectReader,
2824 I: IntoIterator<Item = ObjectId>,
2825 F: FnMut(&ObjectId, &Arc<EncodedObject>),
2826{
2827 let mut seen = HashSet::new();
2828 let mut pending = Vec::new();
2829 for start in starts {
2830 pending.push(start);
2831 while let Some(oid) = pending.pop() {
2832 if excluded.contains(&oid) {
2833 continue;
2834 }
2835 if !seen.insert(oid) {
2836 continue;
2837 }
2838 let object = reader.read_object(&oid).map_err(|err| {
2839 with_missing_object_context(err, oid, MissingObjectContext::Traversal)
2840 })?;
2841 match object.object_type {
2842 ObjectType::Commit => {
2843 let (tree, parents) = {
2844 let commit = Commit::parse_ref(format, &object.body)?;
2845 (commit.tree, commit.parents)
2846 };
2847 visit(&oid, &object);
2848 if !cut.contains(&oid) {
2849 for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
2850 pending.push(parent);
2851 }
2852 }
2853 pending.push(tree);
2854 }
2855 ObjectType::Tree => {
2856 let mut child_oids = Vec::new();
2857 for entry in TreeEntries::new(format, &object.body) {
2858 let entry = entry?;
2859 if entry.is_gitlink() {
2860 continue;
2861 }
2862 child_oids.push(entry.oid);
2863 }
2864 visit(&oid, &object);
2865 pending.extend(child_oids.into_iter().rev());
2866 }
2867 ObjectType::Tag => {
2868 let target = {
2869 let tag = Tag::parse_ref(format, &object.body)?;
2870 tag.object
2871 };
2872 visit(&oid, &object);
2873 pending.push(target);
2874 }
2875 ObjectType::Blob => visit(&oid, &object),
2876 }
2877 }
2878 }
2879 Ok(seen)
2880}
2881
2882fn bitset_get(words: &[u64], position: u32) -> bool {
2887 let word = (position / 64) as usize;
2888 word < words.len() && words[word] & (1u64 << (position % 64)) != 0
2889}
2890
2891fn bitset_set(words: &mut [u64], position: u32) {
2892 let word = (position / 64) as usize;
2893 if word < words.len() {
2894 words[word] |= 1u64 << (position % 64);
2895 }
2896}
2897
2898fn bitset_or(acc: &mut [u64], other: &[u64]) {
2899 for (dst, src) in acc.iter_mut().zip(other) {
2900 *dst |= *src;
2901 }
2902}
2903
2904fn bitset_positions(words: &[u64]) -> Vec<u32> {
2906 let mut positions = Vec::new();
2907 for (word_index, word) in words.iter().enumerate() {
2908 let mut remaining = *word;
2909 while remaining != 0 {
2910 let bit = remaining.trailing_zeros();
2911 positions.push(word_index as u32 * 64 + bit);
2912 remaining &= remaining - 1;
2913 }
2914 }
2915 positions
2916}
2917
2918fn commit_identity_timestamp(identity: &[u8]) -> i64 {
2922 let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
2923 let _tz = fields.next();
2924 fields
2925 .next()
2926 .and_then(|raw| std::str::from_utf8(raw).ok())
2927 .and_then(|raw| raw.parse::<i64>().ok())
2928 .unwrap_or(0)
2929}
2930
2931fn bitmap_next_commit_index(idx: u32) -> u32 {
2934 const MIN_COMMITS: u32 = 100;
2935 const MAX_COMMITS: u32 = 5000;
2936 const MUST_REGION: u32 = 100;
2937 const MIN_REGION: u32 = 20000;
2938
2939 if idx <= MUST_REGION {
2940 return 0;
2941 }
2942 if idx <= MIN_REGION {
2943 let offset = idx - MUST_REGION;
2944 return offset.min(MIN_COMMITS);
2945 }
2946 let offset = idx - MIN_REGION;
2947 offset.clamp(MIN_COMMITS, MAX_COMMITS)
2948}
2949
2950pub fn build_pack_bitmap(
2964 db: &FileObjectDatabase,
2965 format: ObjectFormat,
2966 index_entries: &[PackIndexEntry],
2967 pack_checksum: &ObjectId,
2968 preferred_tips: &HashSet<ObjectId>,
2969) -> Result<Option<Vec<u8>>> {
2970 let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
2973 by_offset.sort_by_key(|&slot| index_entries[slot].offset);
2974 let bit_order: Vec<ObjectId> = by_offset
2975 .into_iter()
2976 .map(|slot| index_entries[slot].oid)
2977 .collect();
2978 build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
2979}
2980
2981pub fn build_midx_bitmap(
2987 db: &FileObjectDatabase,
2988 format: ObjectFormat,
2989 midx_entries: &[sley_pack::MultiPackIndexEntry],
2990 midx_checksum: &ObjectId,
2991 preferred_pack: u32,
2992 preferred_tips: &HashSet<ObjectId>,
2993) -> Result<Option<Vec<u8>>> {
2994 let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
2995 pseudo.sort_by_key(|&slot| {
2996 let entry = &midx_entries[slot];
2997 (
2998 entry.pack_int_id != preferred_pack,
2999 entry.pack_int_id,
3000 entry.offset,
3001 )
3002 });
3003 let bit_order: Vec<ObjectId> = pseudo
3004 .into_iter()
3005 .map(|slot| midx_entries[slot].oid)
3006 .collect();
3007 build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
3008}
3009
3010fn bitmap_num_maximal_commits(
3018 db: &FileObjectDatabase,
3019 format: ObjectFormat,
3020 selected: &[ObjectId],
3021) -> Result<usize> {
3022 let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
3024 let mut stack: Vec<ObjectId> = selected.to_vec();
3025 while let Some(oid) = stack.pop() {
3026 if first_parent.contains_key(&oid) {
3027 continue;
3028 }
3029 let object = db.read_object(&oid)?;
3030 let commit = Commit::parse_ref(format, &object.body)?;
3031 let parent = grafted_parents(db, &oid, commit.parents).first().copied();
3032 first_parent.insert(oid, parent);
3033 if let Some(parent) = parent {
3034 stack.push(parent);
3035 }
3036 }
3037 let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
3039 for parent in first_parent.values().flatten() {
3040 *pending_children.entry(*parent).or_default() += 1;
3041 }
3042 let word_count = selected.len().div_ceil(64);
3043 struct MaximalEnt {
3044 mask: Vec<u64>,
3045 maximal: bool,
3046 }
3047 let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
3048 for (bit, oid) in selected.iter().enumerate() {
3049 let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
3050 mask: vec![0u64; word_count],
3051 maximal: true,
3052 });
3053 ent.mask[bit / 64] |= 1u64 << (bit % 64);
3054 ent.maximal = true;
3055 }
3056 let mut queue: Vec<ObjectId> = first_parent
3057 .keys()
3058 .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
3059 .copied()
3060 .collect();
3061 let mut num_maximal = 0usize;
3062 while let Some(oid) = queue.pop() {
3063 if let Some(ent) = ents.remove(&oid) {
3064 if ent.maximal {
3065 num_maximal += 1;
3066 }
3067 if let Some(Some(parent)) = first_parent.get(&oid) {
3068 match ents.entry(*parent) {
3069 std::collections::hash_map::Entry::Vacant(vacant) => {
3070 vacant.insert(MaximalEnt {
3072 mask: ent.mask.clone(),
3073 maximal: false,
3074 });
3075 }
3076 std::collections::hash_map::Entry::Occupied(mut occupied) => {
3077 let parent_ent = occupied.get_mut();
3078 let c_not_p = ent
3079 .mask
3080 .iter()
3081 .zip(&parent_ent.mask)
3082 .any(|(child, parent)| child & !parent != 0);
3083 if c_not_p {
3084 let p_not_c = parent_ent
3085 .mask
3086 .iter()
3087 .zip(&ent.mask)
3088 .any(|(parent, child)| parent & !child != 0);
3089 for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
3090 *parent |= child;
3091 }
3092 parent_ent.maximal = p_not_c;
3093 }
3094 }
3095 }
3096 }
3097 }
3098 if let Some(Some(parent)) = first_parent.get(&oid)
3099 && let Some(remaining) = pending_children.get_mut(parent)
3100 {
3101 *remaining -= 1;
3102 if *remaining == 0 {
3103 queue.push(*parent);
3104 }
3105 }
3106 }
3107 Ok(num_maximal)
3108}
3109
3110fn build_reachability_bitmap(
3114 db: &FileObjectDatabase,
3115 format: ObjectFormat,
3116 checksum: &ObjectId,
3117 bit_order: &[ObjectId],
3118 preferred_tips: &HashSet<ObjectId>,
3119) -> Result<Option<Vec<u8>>> {
3120 if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
3121 return Ok(None);
3122 }
3123 let object_count = bit_order.len();
3124
3125 let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
3128 oid_sorted.sort_by(|&left, &right| {
3129 bit_order[left as usize]
3130 .as_bytes()
3131 .cmp(bit_order[right as usize].as_bytes())
3132 });
3133 let mut index_position = vec![0u32; object_count];
3134 for (position, &slot) in oid_sorted.iter().enumerate() {
3135 index_position[slot as usize] = position as u32;
3136 }
3137 let mut oid_to_pack = HashMap::with_capacity(object_count);
3138 for (pack_pos, oid) in bit_order.iter().enumerate() {
3139 oid_to_pack.insert(*oid, pack_pos as u32);
3140 }
3141
3142 let mut object_types = Vec::with_capacity(object_count);
3144 struct IndexedCommit {
3145 oid: ObjectId,
3146 pack_pos: u32,
3147 index_pos: u32,
3148 date: i64,
3149 parent_count: usize,
3150 }
3151 let mut indexed_commits = Vec::new();
3152 for (pack_pos, oid) in bit_order.iter().enumerate() {
3153 let object_type = match db.read_object_header(oid)? {
3156 Some((object_type, _)) => object_type,
3157 None => db.read_object(oid)?.object_type,
3158 };
3159 object_types.push(object_type);
3160 if object_type == ObjectType::Commit {
3161 let object = db.read_object(oid)?;
3162 let commit = Commit::parse_ref(format, &object.body)?;
3163 indexed_commits.push(IndexedCommit {
3164 oid: *oid,
3165 pack_pos: pack_pos as u32,
3166 index_pos: index_position[pack_pos],
3167 date: commit_identity_timestamp(commit.committer),
3168 parent_count: grafted_parents(db, oid, commit.parents).len(),
3169 });
3170 }
3171 }
3172
3173 indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
3175 let mut selected: Vec<&IndexedCommit> = Vec::new();
3176 let commit_count = indexed_commits.len() as u32;
3177 if commit_count < 100 {
3178 selected.extend(indexed_commits.iter());
3179 } else {
3180 let mut i = 0u32;
3181 loop {
3182 let next = bitmap_next_commit_index(i);
3183 if i + next >= commit_count {
3184 break;
3185 }
3186 let mut chosen = &indexed_commits[(i + next) as usize];
3187 if next > 0 {
3188 for j in 0..=next {
3189 let candidate = &indexed_commits[(i + j) as usize];
3190 if preferred_tips.contains(&candidate.oid) {
3191 chosen = candidate;
3192 break;
3193 }
3194 if candidate.parent_count >= 2 {
3195 chosen = candidate;
3196 }
3197 }
3198 }
3199 selected.push(chosen);
3200 i += next + 1;
3201 }
3202 }
3203
3204 if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
3209 let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
3210 let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
3211 sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
3212 sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
3213 }
3214
3215 let word_count = object_count.div_ceil(64);
3218 let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
3219 for commit in selected.iter().rev() {
3220 let mut acc = vec![0u64; word_count];
3221 let mut pending = vec![commit.oid];
3222 while let Some(oid) = pending.pop() {
3223 let Some(&pack_pos) = oid_to_pack.get(&oid) else {
3224 eprintln!(
3226 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
3227 );
3228 return Ok(None);
3229 };
3230 if bitset_get(&acc, pack_pos) {
3231 continue;
3232 }
3233 if let Some(stored) = memo.get(&oid) {
3234 bitset_or(&mut acc, stored);
3235 continue;
3236 }
3237 bitset_set(&mut acc, pack_pos);
3238 let object = db.read_object(&oid)?;
3239 let tree = {
3240 let parsed = Commit::parse_ref(format, &object.body)?;
3241 pending.extend(grafted_parents(db, &oid, parsed.parents));
3242 parsed.tree
3243 };
3244 if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
3245 return Ok(None);
3246 }
3247 }
3248 memo.insert(commit.oid, Arc::new(acc));
3249 }
3250
3251 let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
3252 for commit in &selected {
3253 let words = match memo.get(&commit.oid) {
3254 Some(words) => words,
3255 None => continue,
3256 };
3257 writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
3258 }
3259 writer.write().map(Some)
3260}
3261
3262fn bitmap_mark_tree(
3266 db: &impl ObjectReader,
3267 format: ObjectFormat,
3268 tree: &ObjectId,
3269 oid_to_pack: &HashMap<ObjectId, u32>,
3270 acc: &mut [u64],
3271) -> Result<bool> {
3272 let Some(&pack_pos) = oid_to_pack.get(tree) else {
3273 eprintln!(
3274 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
3275 );
3276 return Ok(false);
3277 };
3278 if bitset_get(acc, pack_pos) {
3279 return Ok(true);
3280 }
3281 bitset_set(acc, pack_pos);
3282 let object = db.read_object(tree)?;
3283 for entry in TreeEntries::new(format, &object.body) {
3284 let entry = entry?;
3285 if entry.is_gitlink() {
3286 continue;
3287 }
3288 if entry.is_tree() {
3289 if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
3290 return Ok(false);
3291 }
3292 } else {
3293 let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
3294 eprintln!(
3295 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
3296 entry.oid
3297 );
3298 return Ok(false);
3299 };
3300 bitset_set(acc, blob_pos);
3301 }
3302 }
3303 Ok(true)
3304}
3305
3306pub struct LoadedPackBitmap {
3310 object_count: u32,
3311 oid_to_pack: HashMap<ObjectId, u32>,
3312 pack_to_oid: Vec<ObjectId>,
3313 commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
3314 commits: Vec<u64>,
3315 trees: Vec<u64>,
3316 blobs: Vec<u64>,
3317 tags: Vec<u64>,
3318}
3319
3320impl LoadedPackBitmap {
3321 pub fn object_count(&self) -> u32 {
3322 self.object_count
3323 }
3324
3325 pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
3327 self.oid_to_pack.get(oid).copied()
3328 }
3329
3330 pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
3331 self.pack_to_oid.get(position as usize)
3332 }
3333
3334 pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
3337 self.commit_words.get(oid)
3338 }
3339
3340 pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
3342 self.commit_words.keys()
3343 }
3344
3345 pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
3347 match object_type {
3348 ObjectType::Commit => &self.commits,
3349 ObjectType::Tree => &self.trees,
3350 ObjectType::Blob => &self.blobs,
3351 ObjectType::Tag => &self.tags,
3352 }
3353 }
3354
3355 fn word_count(&self) -> usize {
3356 (self.object_count as usize).div_ceil(64)
3357 }
3358}
3359
3360pub fn load_pack_bitmap(
3367 objects_dir: &Path,
3368 format: ObjectFormat,
3369) -> Result<Option<LoadedPackBitmap>> {
3370 let pack_dir = objects_dir.join("pack");
3371 if !pack_dir.exists() {
3372 return Ok(None);
3373 }
3374 if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
3377 return Ok(Some(bitmap));
3378 }
3379 let mut bitmap_paths = Vec::new();
3380 for entry in fs::read_dir(&pack_dir)? {
3381 let path = entry?.path();
3382 if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
3383 && path
3384 .file_name()
3385 .and_then(|name| name.to_str())
3386 .is_some_and(|name| name.starts_with("pack-"))
3387 {
3388 bitmap_paths.push(path);
3389 }
3390 }
3391 bitmap_paths.sort();
3392 for bitmap_path in bitmap_paths {
3393 match load_pack_bitmap_file(&bitmap_path, format) {
3394 Ok(Some(bitmap)) => return Ok(Some(bitmap)),
3395 Ok(None) | Err(_) => continue,
3396 }
3397 }
3398 Ok(None)
3399}
3400
3401fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
3406 let midx_path = pack_dir.join("multi-pack-index");
3407 if !midx_path.exists() {
3408 return Ok(None);
3409 }
3410 let Ok(midx_bytes) = fs::read(&midx_path) else {
3411 return Ok(None);
3412 };
3413 if midx_has_bad_ridx_chunk(&midx_bytes, format) {
3414 eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
3415 eprintln!("warning: multi-pack bitmap is missing required reverse index");
3416 return Ok(None);
3417 }
3418 let midx = match MultiPackIndex::parse(&midx_bytes, format) {
3419 Ok(midx) => midx,
3420 Err(GitError::InvalidFormat(message))
3421 if message == "multi-pack-index reverse-index chunk is the wrong size" =>
3422 {
3423 eprintln!("error: {message}");
3424 eprintln!("warning: multi-pack bitmap is missing required reverse index");
3425 return Ok(None);
3426 }
3427 Err(_) => return Ok(None),
3428 };
3429 let bitmap_path = pack_dir.join(format!(
3430 "multi-pack-index-{}.bitmap",
3431 midx.checksum.to_hex()
3432 ));
3433 if !bitmap_path.exists() {
3434 return Ok(None);
3435 }
3436 let object_count = midx.objects.len();
3437 let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
3442 .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
3443 .unwrap_or(true);
3444 let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
3445 (Some(chunk), true) => {
3446 sley_core::trace2::data("load_midx_revindex", "source", "midx");
3447 chunk.clone()
3448 }
3449 _ => {
3450 let rev_path =
3451 pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
3452 let Ok(rev_bytes) = fs::read(&rev_path) else {
3453 return Ok(None);
3455 };
3456 let Ok(parsed_rev) =
3457 sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
3458 else {
3459 return Ok(None);
3460 };
3461 sley_core::trace2::data("load_midx_revindex", "source", "rev");
3462 parsed_rev.positions
3463 }
3464 };
3465 let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
3466 return Ok(None);
3467 };
3468 let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
3469 Ok(parsed) => parsed,
3470 Err(_) => return Ok(None),
3471 };
3472 if parsed.pack_checksum != midx.checksum {
3473 return Ok(None);
3474 }
3475
3476 let mut pack_to_oid = Vec::with_capacity(object_count);
3479 for &midx_pos in &reverse_index {
3480 let Some(entry) = midx.objects.get(midx_pos as usize) else {
3481 return Ok(None);
3482 };
3483 pack_to_oid.push(entry.oid);
3484 }
3485 let mut oid_to_pack = HashMap::with_capacity(object_count);
3486 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3487 oid_to_pack.insert(*oid, pack_pos as u32);
3488 }
3489 match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3490 midx.objects.get(position).map(|entry| entry.oid)
3491 }) {
3492 Ok(loaded) => Ok(Some(loaded)),
3493 Err(_) => Ok(None),
3494 }
3495}
3496
3497fn midx_has_bad_ridx_chunk(bytes: &[u8], format: ObjectFormat) -> bool {
3498 let hash_len = format.raw_len();
3499 if bytes.len() < 12 + 12 + hash_len || &bytes[..4] != b"MIDX" {
3500 return false;
3501 }
3502 let chunk_count = bytes[6] as usize;
3503 let table_len = match (chunk_count + 1).checked_mul(12) {
3504 Some(table_len) => table_len,
3505 None => return false,
3506 };
3507 let table_end = match 12usize.checked_add(table_len) {
3508 Some(table_end) if table_end <= bytes.len().saturating_sub(hash_len) => table_end,
3509 _ => return false,
3510 };
3511 let mut entries = Vec::with_capacity(chunk_count + 1);
3512 let mut cursor = 12usize;
3513 while cursor < table_end {
3514 let id = [
3515 bytes[cursor],
3516 bytes[cursor + 1],
3517 bytes[cursor + 2],
3518 bytes[cursor + 3],
3519 ];
3520 let mut raw_offset = [0u8; 8];
3521 raw_offset.copy_from_slice(&bytes[cursor + 4..cursor + 12]);
3522 entries.push((id, u64::from_be_bytes(raw_offset) as usize));
3523 cursor += 12;
3524 }
3525 let mut oidf = None;
3526 let mut ridx = None;
3527 for pair in entries.windows(2) {
3528 let start = pair[0].1;
3529 let end = pair[1].1;
3530 if end < start || end > bytes.len().saturating_sub(hash_len) {
3531 return false;
3532 }
3533 match &pair[0].0 {
3534 b"OIDF" => oidf = Some((start, end)),
3535 b"RIDX" => ridx = Some((start, end)),
3536 _ => {}
3537 }
3538 }
3539 let Some((oidf_start, oidf_end)) = oidf else {
3540 return false;
3541 };
3542 let Some((ridx_start, ridx_end)) = ridx else {
3543 return false;
3544 };
3545 if oidf_end.saturating_sub(oidf_start) != 256 * 4 {
3546 return false;
3547 }
3548 let object_count_start = oidf_end - 4;
3549 let object_count = u32::from_be_bytes([
3550 bytes[object_count_start],
3551 bytes[object_count_start + 1],
3552 bytes[object_count_start + 2],
3553 bytes[object_count_start + 3],
3554 ]) as usize;
3555 ridx_end.saturating_sub(ridx_start) != object_count.saturating_mul(4)
3556}
3557
3558fn load_pack_bitmap_file(
3559 bitmap_path: &Path,
3560 format: ObjectFormat,
3561) -> Result<Option<LoadedPackBitmap>> {
3562 let index_path = bitmap_path.with_extension("idx");
3563 if !index_path.exists() {
3564 return Ok(None);
3565 }
3566 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
3567 let object_count = index.entries.len();
3568 let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
3569 if parsed.pack_checksum != index.pack_checksum {
3570 return Ok(None);
3571 }
3572
3573 let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
3574 pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
3575 let mut pack_to_oid = Vec::with_capacity(object_count);
3576 for index_pos in &pack_order {
3577 pack_to_oid.push(index.entries[*index_pos as usize].oid);
3578 }
3579 let mut oid_to_pack = HashMap::with_capacity(object_count);
3580 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3581 oid_to_pack.insert(*oid, pack_pos as u32);
3582 }
3583
3584 assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3585 index.entries.get(position).map(|entry| entry.oid)
3586 })
3587 .map(Some)
3588}
3589
3590fn assemble_loaded_bitmap(
3595 parsed: PackBitmapIndex,
3596 object_count: usize,
3597 pack_to_oid: Vec<ObjectId>,
3598 oid_to_pack: HashMap<ObjectId, u32>,
3599 lookup_oid: impl Fn(usize) -> Option<ObjectId>,
3600) -> Result<LoadedPackBitmap> {
3601 let word_count = object_count.div_ceil(64);
3602 let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
3603 let mut words = bitmap.to_words()?;
3604 words.resize(word_count, 0);
3605 Ok(words)
3606 };
3607
3608 let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
3609 let mut commit_words = HashMap::with_capacity(parsed.entries.len());
3610 for (entry_index, entry) in parsed.entries.iter().enumerate() {
3611 let mut words = expand(&entry.bitmap)?;
3612 if entry.xor_offset > 0 {
3613 let base_index = entry_index - entry.xor_offset as usize;
3614 let base = &resolved[base_index];
3615 for (dst, src) in words.iter_mut().zip(base.iter()) {
3616 *dst ^= *src;
3617 }
3618 }
3619 let words = Arc::new(words);
3620 resolved.push(Arc::clone(&words));
3621 let commit_oid = lookup_oid(entry.object_position as usize)
3622 .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
3623 commit_words.insert(commit_oid, words);
3624 }
3625
3626 Ok(LoadedPackBitmap {
3627 object_count: object_count as u32,
3628 oid_to_pack,
3629 pack_to_oid,
3630 commit_words,
3631 commits: expand(&parsed.type_bitmaps.commits)?,
3632 trees: expand(&parsed.type_bitmaps.trees)?,
3633 blobs: expand(&parsed.type_bitmaps.blobs)?,
3634 tags: expand(&parsed.type_bitmaps.tags)?,
3635 })
3636}
3637
3638pub struct BitmapWalkResult {
3642 pub words: Vec<u64>,
3643 pub extended: Vec<(ObjectId, ObjectType)>,
3644}
3645
3646impl BitmapWalkResult {
3647 pub fn subtract(&mut self, haves: &BitmapWalkResult) {
3649 for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
3650 *dst &= !*src;
3651 }
3652 let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
3653 self.extended.retain(|(oid, _)| !have_ext.contains(oid));
3654 }
3655}
3656
3657pub fn bitmap_reachable(
3668 bitmap: &LoadedPackBitmap,
3669 db: &impl ObjectReader,
3670 format: ObjectFormat,
3671 roots: &[ObjectId],
3672 include_objects: bool,
3673) -> Result<BitmapWalkResult> {
3674 let mut walk = BitmapFillWalk {
3675 bitmap,
3676 words: vec![0u64; bitmap.word_count()],
3677 extended: Vec::new(),
3678 extended_seen: HashSet::new(),
3679 };
3680 let mut commit_stack: Vec<ObjectId> = Vec::new();
3681
3682 for root in roots {
3683 let mut oid = *root;
3684 loop {
3686 let object = db.read_object(&oid)?;
3687 match object.object_type {
3688 ObjectType::Tag => {
3689 walk.mark(&oid, ObjectType::Tag);
3690 let tag = Tag::parse_ref(format, &object.body)?;
3691 oid = tag.object;
3692 }
3693 ObjectType::Commit => {
3694 commit_stack.push(oid);
3695 break;
3696 }
3697 ObjectType::Tree => {
3698 walk.mark_tree_closure(db, format, &oid)?;
3699 break;
3700 }
3701 ObjectType::Blob => {
3702 walk.mark(&oid, ObjectType::Blob);
3703 break;
3704 }
3705 }
3706 }
3707 }
3708
3709 while let Some(oid) = commit_stack.pop() {
3710 if let Some(position) = bitmap.pack_position(&oid) {
3711 if bitset_get(&walk.words, position) {
3712 continue;
3713 }
3714 if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
3715 bitset_or(&mut walk.words, stored);
3716 continue;
3717 }
3718 bitset_set(&mut walk.words, position);
3719 } else {
3720 if walk.extended_seen.contains(&oid) {
3721 continue;
3722 }
3723 walk.extended_seen.insert(oid);
3724 walk.extended.push((oid, ObjectType::Commit));
3725 }
3726 let object = db.read_object(&oid)?;
3727 let commit = Commit::parse_ref(format, &object.body)?;
3728 commit_stack.extend(grafted_parents(db, &oid, commit.parents));
3729 if include_objects {
3730 walk.mark_tree_closure(db, format, &commit.tree)?;
3731 }
3732 }
3733
3734 Ok(BitmapWalkResult {
3735 words: walk.words,
3736 extended: walk.extended,
3737 })
3738}
3739
3740struct BitmapFillWalk<'a> {
3741 bitmap: &'a LoadedPackBitmap,
3742 words: Vec<u64>,
3743 extended: Vec<(ObjectId, ObjectType)>,
3744 extended_seen: HashSet<ObjectId>,
3745}
3746
3747impl BitmapFillWalk<'_> {
3748 fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
3750 if let Some(position) = self.bitmap.pack_position(oid) {
3751 if bitset_get(&self.words, position) {
3752 return false;
3753 }
3754 bitset_set(&mut self.words, position);
3755 true
3756 } else {
3757 if !self.extended_seen.insert(*oid) {
3758 return false;
3759 }
3760 self.extended.push((*oid, object_type));
3761 true
3762 }
3763 }
3764
3765 fn mark_tree_closure(
3769 &mut self,
3770 db: &impl ObjectReader,
3771 format: ObjectFormat,
3772 tree: &ObjectId,
3773 ) -> Result<()> {
3774 if !self.mark(tree, ObjectType::Tree) {
3775 return Ok(());
3776 }
3777 let object = db.read_object(tree)?;
3778 for entry in TreeEntries::new(format, &object.body) {
3779 let entry = entry?;
3780 if entry.is_gitlink() {
3781 continue;
3782 }
3783 if entry.is_tree() {
3784 self.mark_tree_closure(db, format, &entry.oid)?;
3785 } else {
3786 self.mark(&entry.oid, ObjectType::Blob);
3787 }
3788 }
3789 Ok(())
3790 }
3791}
3792
3793#[derive(Debug)]
3794pub struct ObjectDatabase {
3795 format: ObjectFormat,
3796 objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
3802 promisor: bool,
3803}
3804
3805impl ObjectDatabase {
3806 pub fn new(format: ObjectFormat) -> Self {
3807 Self {
3808 format,
3809 objects: Mutex::new(HashMap::new()),
3810 promisor: false,
3811 }
3812 }
3813
3814 pub fn with_promisor(mut self, promisor: bool) -> Self {
3815 self.promisor = promisor;
3816 self
3817 }
3818
3819 pub fn contains(&self, oid: &ObjectId) -> bool {
3820 self.objects
3821 .lock()
3822 .map(|objects| objects.contains_key(oid))
3823 .unwrap_or(false)
3824 }
3825
3826 pub fn validate(&self, oid: &ObjectId) -> Result<()> {
3827 let object = self.read_object(oid)?;
3828 let actual = object.object_id(self.format)?;
3829 if &actual == oid {
3830 Ok(())
3831 } else {
3832 Err(GitError::InvalidObject(format!(
3833 "object id mismatch: expected {oid}, got {actual}"
3834 )))
3835 }
3836 }
3837}
3838
3839impl ObjectReader for ObjectDatabase {
3840 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3841 self.objects
3842 .lock()
3843 .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
3844 .get(oid)
3845 .map(Arc::clone)
3846 .or_else(|| implied_empty_tree_object(self.format, oid))
3847 .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
3848 }
3849}
3850
3851impl ObjectWriter for ObjectDatabase {
3852 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3853 let oid = object.object_id(self.format)?;
3854 self.objects
3855 .lock()
3856 .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
3857 .entry(oid)
3858 .or_insert_with(|| Arc::new(object));
3859 Ok(oid)
3860 }
3861}
3862
3863#[derive(Debug, Clone, PartialEq, Eq)]
3864pub struct Alternate {
3865 pub path: std::path::PathBuf,
3866}
3867
3868#[derive(Debug, Clone, PartialEq, Eq)]
3869pub struct PartialClonePolicy {
3870 pub promisor_remote: Option<String>,
3871 pub allow_missing_promised_objects: bool,
3872}
3873
3874type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
3878
3879#[derive(Debug)]
3882enum PackData {
3883 #[cfg(feature = "mmap")]
3884 Mapped(sley_mmap::MappedFile),
3885 Heap(Vec<u8>),
3886}
3887
3888impl std::ops::Deref for PackData {
3889 type Target = [u8];
3890
3891 fn deref(&self) -> &[u8] {
3892 match self {
3893 #[cfg(feature = "mmap")]
3894 Self::Mapped(mapped) => mapped,
3895 Self::Heap(bytes) => bytes,
3896 }
3897 }
3898}
3899
3900#[cfg(feature = "mmap")]
3903fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3904 match sley_mmap::MappedFile::open_pack(pack_path) {
3905 Ok(mapped) => Ok(PackData::Mapped(mapped)),
3906 Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
3907 }
3908}
3909
3910#[cfg(not(feature = "mmap"))]
3911fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3912 Ok(PackData::Heap(fs::read(pack_path)?))
3913}
3914
3915#[cfg(feature = "mmap")]
3916fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3917 match sley_mmap::MappedFile::open_pack(index_path) {
3918 Ok(mapped) => Ok(Arc::new(mapped)),
3919 Err(_) => Ok(Arc::new(fs::read(index_path)?)),
3920 }
3921}
3922
3923#[cfg(not(feature = "mmap"))]
3924fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3925 Ok(Arc::new(fs::read(index_path)?))
3926}
3927
3928#[cfg(feature = "mmap")]
3929fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3930 match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
3931 Ok(mapped) => Ok(Arc::new(mapped)),
3932 Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
3933 }
3934}
3935
3936#[cfg(not(feature = "mmap"))]
3937fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3938 Ok(Arc::new(fs::read(midx_path)?))
3939}
3940
3941type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
3947
3948type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
3953
3954type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
3964
3965type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
3966
3967const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
3973
3974const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
3978
3979fn cached_object_cost(object: &EncodedObject) -> usize {
3983 object.body.len().saturating_add(64)
3984}
3985
3986fn cache_budget_from_env(var: &str, default: usize) -> usize {
3989 match env::var(var) {
3990 Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
3991 Err(_) => default,
3992 }
3993}
3994
3995fn object_cache_budget() -> usize {
4002 static BUDGET: OnceLock<usize> = OnceLock::new();
4003 *BUDGET.get_or_init(|| {
4004 cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
4005 })
4006}
4007
4008fn delta_base_cache_budget() -> usize {
4012 static BUDGET: OnceLock<usize> = OnceLock::new();
4013 *BUDGET.get_or_init(|| {
4014 cache_budget_from_env(
4015 "SLEY_DELTA_BASE_CACHE_BYTES",
4016 DEFAULT_DELTA_BASE_CACHE_BYTES,
4017 )
4018 })
4019}
4020
4021fn verify_reads_enabled() -> bool {
4032 static VERIFY: OnceLock<bool> = OnceLock::new();
4033 *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
4034 Ok(value) => !matches!(value.trim(), "" | "0"),
4035 Err(_) => false,
4036 })
4037}
4038
4039#[derive(Debug)]
4047struct LruCache<K: std::hash::Hash + Eq + Clone> {
4048 budget: usize,
4049 used: usize,
4050 map: HashMap<K, LruEntry<K>>,
4051 head: Option<K>,
4052 tail: Option<K>,
4053}
4054
4055#[derive(Debug)]
4056struct LruEntry<K> {
4057 object: Arc<EncodedObject>,
4058 prev: Option<K>,
4059 next: Option<K>,
4060}
4061
4062impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
4063 fn new(budget: usize) -> Self {
4064 Self {
4065 budget,
4066 used: 0,
4067 map: HashMap::new(),
4068 head: None,
4069 tail: None,
4070 }
4071 }
4072
4073 fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
4074 let object = Arc::clone(&self.map.get(key)?.object);
4075 self.touch(key);
4076 Some(object)
4077 }
4078
4079 fn touch(&mut self, key: &K) {
4081 if self.tail.as_ref() == Some(key) {
4082 return;
4083 }
4084 if self.map.contains_key(key) {
4085 self.detach(key);
4086 self.attach_back(key.clone());
4087 }
4088 }
4089
4090 fn remove(&mut self, key: &K) {
4092 if let Some(entry) = self.map.get(key) {
4093 self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
4094 }
4095 self.detach(key);
4096 self.map.remove(key);
4097 }
4098
4099 fn detach(&mut self, key: &K) {
4100 let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
4101 let prev = entry.prev.take();
4102 let next = entry.next.take();
4103 (prev, next)
4104 }) else {
4105 return;
4106 };
4107
4108 match &prev {
4109 Some(prev_key) => {
4110 if let Some(prev_entry) = self.map.get_mut(prev_key) {
4111 prev_entry.next = next.clone();
4112 }
4113 }
4114 None => self.head = next.clone(),
4115 }
4116 match &next {
4117 Some(next_key) => {
4118 if let Some(next_entry) = self.map.get_mut(next_key) {
4119 next_entry.prev = prev.clone();
4120 }
4121 }
4122 None => self.tail = prev.clone(),
4123 }
4124 }
4125
4126 fn attach_back(&mut self, key: K) {
4127 let previous_tail = self.tail.replace(key.clone());
4128 match previous_tail {
4129 Some(tail_key) => {
4130 if let Some(tail_entry) = self.map.get_mut(&tail_key) {
4131 tail_entry.next = Some(key.clone());
4132 }
4133 if let Some(entry) = self.map.get_mut(&key) {
4134 entry.prev = Some(tail_key);
4135 entry.next = None;
4136 }
4137 }
4138 None => {
4139 self.head = Some(key.clone());
4140 if let Some(entry) = self.map.get_mut(&key) {
4141 entry.prev = None;
4142 entry.next = None;
4143 }
4144 }
4145 }
4146 }
4147
4148 fn clear(&mut self) {
4149 self.map.clear();
4150 self.head = None;
4151 self.tail = None;
4152 self.used = 0;
4153 }
4154
4155 fn put(&mut self, key: K, object: Arc<EncodedObject>) {
4156 if self.budget == 0 {
4157 return;
4158 }
4159 let cost = cached_object_cost(&object);
4160 if cost > self.budget {
4164 self.remove(&key);
4165 return;
4166 }
4167 if let Some(entry) = self.map.get_mut(&key) {
4168 let previous = std::mem::replace(&mut entry.object, object);
4169 self.used = self
4171 .used
4172 .saturating_sub(cached_object_cost(&previous))
4173 .saturating_add(cost);
4174 self.touch(&key);
4175 } else {
4176 self.used = self.used.saturating_add(cost);
4177 self.map.insert(
4178 key.clone(),
4179 LruEntry {
4180 object,
4181 prev: None,
4182 next: None,
4183 },
4184 );
4185 self.attach_back(key);
4186 }
4187 while self.used > self.budget {
4188 let Some(evicted) = self.head.clone() else {
4189 break;
4190 };
4191 self.remove(&evicted);
4192 }
4193 }
4194}
4195
4196type LruObjectCache = LruCache<ObjectId>;
4198type LruOffsetCache = LruCache<u64>;
4200
4201struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
4206
4207impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
4208 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
4209 self.0.lock().ok()?.get(&offset)
4210 }
4211
4212 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
4213 if let Ok(mut cache) = self.0.lock() {
4214 cache.put(offset, object);
4215 }
4216 }
4217}
4218
4219struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
4223
4224impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
4225 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
4226 self.0.lock().ok()?.get(&pack_offset).copied()
4227 }
4228
4229 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
4230 if let Ok(mut cache) = self.0.lock() {
4231 cache.insert(pack_offset, header);
4232 }
4233 }
4234}
4235
4236type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
4241
4242type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
4246
4247type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
4251
4252#[derive(Debug)]
4257struct RegisteredPack {
4258 idx: PathBuf,
4259 pack: PathBuf,
4260 index: Mutex<Option<Arc<PackIndexViewData>>>,
4261 data: Mutex<Option<Arc<PackData>>>,
4262 delta_cache: Arc<Mutex<LruOffsetCache>>,
4263 header_type_cache: PackHeaderTypeCache,
4264}
4265
4266impl RegisteredPack {
4267 fn new(idx: PathBuf, pack: PathBuf) -> Self {
4268 Self {
4269 idx,
4270 pack,
4271 index: Mutex::new(None),
4272 data: Mutex::new(None),
4273 delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
4274 header_type_cache: Arc::new(Mutex::new(HashMap::new())),
4275 }
4276 }
4277
4278 fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
4279 if let Ok(cache) = self.index.lock()
4280 && let Some(index) = cache.as_ref()
4281 {
4282 return Ok(Arc::clone(index));
4283 }
4284 let index_bytes = load_pack_index_data(&self.idx)?;
4285 let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
4286 index_bytes,
4287 format,
4288 )?);
4289 if let Ok(mut cache) = self.index.lock() {
4290 *cache = Some(Arc::clone(&index));
4291 }
4292 Ok(index)
4293 }
4294
4295 fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
4296 if let Ok(cache) = self.data.lock()
4297 && let Some(bytes) = cache.as_ref()
4298 {
4299 return Ok(Arc::clone(bytes));
4300 }
4301 if let Ok(cache) = pack_bytes.lock()
4302 && let Some(bytes) = cache.get(&self.pack)
4303 {
4304 let bytes = Arc::clone(bytes);
4305 if let Ok(mut local_cache) = self.data.lock() {
4306 *local_cache = Some(Arc::clone(&bytes));
4307 }
4308 return Ok(bytes);
4309 }
4310 let bytes = Arc::new(load_pack_data(&self.pack)?);
4311 if let Ok(mut local_cache) = self.data.lock() {
4312 *local_cache = Some(Arc::clone(&bytes));
4313 }
4314 if let Ok(mut cache) = pack_bytes.lock() {
4315 cache.insert(self.pack.clone(), Arc::clone(&bytes));
4316 }
4317 Ok(bytes)
4318 }
4319}
4320
4321#[derive(Debug, Clone, PartialEq, Eq)]
4322struct PackDirFingerprint {
4323 modified: Option<std::time::SystemTime>,
4324 idx_count: usize,
4325 pack_count: usize,
4326}
4327
4328#[derive(Debug)]
4333struct PackRegistrySnapshot {
4334 fingerprint: PackDirFingerprint,
4335 packs: Vec<Arc<RegisteredPack>>,
4336 recent_pack: Mutex<Option<usize>>,
4337}
4338
4339impl PackRegistrySnapshot {
4340 fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
4341 Self {
4342 fingerprint,
4343 packs,
4344 recent_pack: Mutex::new(None),
4345 }
4346 }
4347
4348 fn cached_hint(&self) -> Option<usize> {
4349 self.recent_pack
4350 .lock()
4351 .ok()
4352 .and_then(|hint| *hint)
4353 .filter(|pack_index| *pack_index < self.packs.len())
4354 }
4355
4356 fn remember_hint(&self, pack_index: usize) {
4357 if let Ok(mut hint) = self.recent_pack.lock() {
4358 *hint = Some(pack_index);
4359 }
4360 }
4361}
4362
4363type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
4367
4368#[derive(Debug, Clone)]
4369struct PackLookup {
4370 pack: PathBuf,
4371 registered: Option<Arc<RegisteredPack>>,
4372 offset: u64,
4373}
4374
4375impl PackLookup {
4376 fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
4377 Self {
4378 pack: pack.pack.clone(),
4379 registered: Some(pack),
4380 offset,
4381 }
4382 }
4383
4384 fn from_path(pack: PathBuf, offset: u64) -> Self {
4385 Self {
4386 pack,
4387 registered: None,
4388 offset,
4389 }
4390 }
4391
4392 fn pack_path(&self) -> &Path {
4393 &self.pack
4394 }
4395
4396 fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
4397 match &self.registered {
4398 Some(pack) => pack.bytes(&database.pack_bytes),
4399 None => database.cached_pack_bytes(&self.pack),
4400 }
4401 }
4402
4403 fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
4404 match &self.registered {
4405 Some(pack) => database.cached_pack_index(&pack.idx),
4406 None => database.cached_pack_index(&self.pack.with_extension("idx")),
4407 }
4408 }
4409
4410 fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
4411 match &self.registered {
4412 Some(pack) => Some(Arc::clone(&pack.delta_cache)),
4413 None => database.pack_delta_cache(&self.pack),
4414 }
4415 }
4416
4417 fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
4418 match &self.registered {
4419 Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
4420 None => database.pack_header_type_cache(&self.pack),
4421 }
4422 }
4423}
4424
4425#[derive(Debug, Clone)]
4426pub struct FileObjectDatabase {
4427 loose: LooseObjectStore,
4428 objects_dir: PathBuf,
4429 alternates: Vec<PathBuf>,
4430 format: ObjectFormat,
4431 pack_bytes: PackBytesCache,
4432 pack_indexes: PackIndexCache,
4433 multi_pack_indexes: MultiPackIndexCache,
4434 multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
4435 pack_registry: PackRegistryCache,
4436 decoded: DecodedObjectCache,
4437 pack_deltas: PackDeltaCaches,
4438 pack_header_types: PackHeaderTypeCaches,
4439 promisor_objects: Arc<OnceLock<HashSet<ObjectId>>>,
4440 promisor_remote_present: bool,
4448 shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
4452}
4453
4454#[derive(Debug)]
4455pub struct ObjectPresenceChecker {
4456 db: FileObjectDatabase,
4457 pack_dir: PathBuf,
4458 midx: Option<Arc<MultiPackIndexOidLookup>>,
4459 registry: Option<Arc<PackRegistrySnapshot>>,
4460 registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
4461 recent_pack: Option<usize>,
4462 prepared_packs: bool,
4463 prepared_registry: bool,
4464}
4465
4466impl ObjectPresenceChecker {
4467 fn new(db: FileObjectDatabase) -> Self {
4468 let pack_dir = db.objects_dir.join("pack");
4469 Self {
4470 db,
4471 pack_dir,
4472 midx: None,
4473 registry: None,
4474 registry_indexes: Vec::new(),
4475 recent_pack: None,
4476 prepared_packs: false,
4477 prepared_registry: false,
4478 }
4479 }
4480
4481 pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
4482 if oid.format() != self.db.format {
4483 return Err(GitError::InvalidObjectId(format!(
4484 "object {oid} uses {}, store uses {}",
4485 oid.format().name(),
4486 self.db.format.name()
4487 )));
4488 }
4489 if self.db.loose.exists(oid)? {
4490 return Ok(true);
4491 }
4492 if self.find_packed(oid, false)? {
4493 return Ok(true);
4494 }
4495 if self.find_packed(oid, true)? {
4496 return Ok(true);
4497 }
4498 for alternate in &self.db.alternates {
4499 if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
4500 return Ok(true);
4501 }
4502 }
4503 self.db.loose.invalidate_cache();
4506 self.db.loose.exists(oid)
4507 }
4508
4509 fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
4510 self.prepare_packs(force_rescan)?;
4511 if let Some(midx) = &self.midx
4512 && midx.contains(oid)
4513 {
4514 return Ok(true);
4515 }
4516 self.prepare_registry(force_rescan)?;
4517 self.find_in_registry(oid)
4518 }
4519
4520 fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
4521 if self.prepared_packs && !force_rescan {
4522 return Ok(());
4523 }
4524 let midx_path = self.pack_dir.join("multi-pack-index");
4525 self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
4526 self.prepared_packs = true;
4527 Ok(())
4528 }
4529
4530 fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
4531 if self.prepared_registry && !force_rescan {
4532 return Ok(());
4533 }
4534 let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
4535 let registry_changed = match self.registry.as_ref() {
4536 Some(cached) => !Arc::ptr_eq(cached, ®istry),
4537 None => true,
4538 };
4539 if registry_changed {
4540 self.registry_indexes = vec![None; registry.packs.len()];
4541 self.recent_pack = None;
4542 self.registry = Some(registry);
4543 }
4544 self.prepared_registry = true;
4545 Ok(())
4546 }
4547
4548 fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
4549 let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
4550 return Ok(false);
4551 };
4552 if let Some(pack_index) = self
4553 .recent_pack
4554 .filter(|pack_index| *pack_index < registry.packs.len())
4555 {
4556 let index = self.registry_index(®istry, pack_index)?;
4557 if index.find(oid).is_some() {
4558 return Ok(true);
4559 }
4560 }
4561 for pack_index in 0..registry.packs.len() {
4562 if Some(pack_index) == self.recent_pack {
4563 continue;
4564 }
4565 let index = self.registry_index(®istry, pack_index)?;
4566 if index.find(oid).is_some() {
4567 self.recent_pack = Some(pack_index);
4568 return Ok(true);
4569 }
4570 }
4571 Ok(false)
4572 }
4573
4574 fn registry_index(
4575 &mut self,
4576 registry: &PackRegistrySnapshot,
4577 pack_index: usize,
4578 ) -> Result<Arc<PackIndexViewData>> {
4579 if self.registry_indexes.len() != registry.packs.len() {
4580 self.registry_indexes = vec![None; registry.packs.len()];
4581 self.recent_pack = None;
4582 }
4583 if let Some(index) = self
4584 .registry_indexes
4585 .get(pack_index)
4586 .and_then(|index| index.as_ref())
4587 {
4588 return Ok(Arc::clone(index));
4589 }
4590 let index = registry.packs[pack_index].index(self.db.format)?;
4591 if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
4592 *slot = Some(Arc::clone(&index));
4593 }
4594 Ok(index)
4595 }
4596}
4597
4598fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
4602 let Ok(contents) = std::fs::read_to_string(shallow_file) else {
4603 return HashSet::new();
4604 };
4605 contents
4606 .lines()
4607 .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
4608 .collect()
4609}
4610
4611pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4612 env::var_os("GIT_OBJECT_DIRECTORY")
4613 .map(PathBuf::from)
4614 .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
4615}
4616
4617pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4618 if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
4619 return PathBuf::from(common_dir);
4620 }
4621 let git_dir = git_dir.as_ref();
4622 let commondir = git_dir.join("commondir");
4623 if let Ok(value) = fs::read_to_string(&commondir) {
4624 let path = PathBuf::from(value.trim());
4625 let common = if path.is_absolute() {
4626 path
4627 } else {
4628 git_dir.join(path)
4629 };
4630 return fs::canonicalize(&common).unwrap_or(common);
4631 }
4632 git_dir.to_path_buf()
4633}
4634
4635pub fn repository_object_ids(
4636 git_dir: impl AsRef<Path>,
4637 format: ObjectFormat,
4638) -> Result<Vec<ObjectId>> {
4639 object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
4640}
4641
4642pub fn object_ids_in_objects_dir(
4643 objects_dir: impl AsRef<Path>,
4644 format: ObjectFormat,
4645) -> Result<Vec<ObjectId>> {
4646 let objects_dir = objects_dir.as_ref();
4647 let mut oids = HashSet::new();
4648 collect_loose_object_ids(objects_dir, format, &mut oids)?;
4649 collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
4650 let mut oids = oids.into_iter().collect::<Vec<_>>();
4651 oids.sort_by_key(ObjectId::to_hex);
4652 Ok(oids)
4653}
4654
4655fn collect_loose_object_ids(
4656 objects_dir: &Path,
4657 format: ObjectFormat,
4658 oids: &mut HashSet<ObjectId>,
4659) -> Result<()> {
4660 if !objects_dir.exists() {
4661 return Ok(());
4662 }
4663 let hex_len = format.hex_len();
4664 for entry in fs::read_dir(objects_dir)? {
4665 let entry = entry?;
4666 if !entry.file_type()?.is_dir() {
4667 continue;
4668 }
4669 let name = entry.file_name();
4670 let Some(fanout) = name.to_str() else {
4671 continue;
4672 };
4673 if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4674 continue;
4675 }
4676 for object_entry in fs::read_dir(entry.path())? {
4677 let object_entry = object_entry?;
4678 if !object_entry.file_type()?.is_file() {
4679 continue;
4680 }
4681 let name = object_entry.file_name();
4682 let Some(suffix) = name.to_str() else {
4683 continue;
4684 };
4685 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4686 continue;
4687 }
4688 oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
4689 }
4690 }
4691 Ok(())
4692}
4693
4694fn collect_loose_fanout_object_ids(
4695 objects_dir: &Path,
4696 format: ObjectFormat,
4697 fanout: u8,
4698 oids: &mut HashSet<ObjectId>,
4699) -> Result<()> {
4700 let fanout_hex = format!("{fanout:02x}");
4701 let fanout_dir = objects_dir.join(&fanout_hex);
4702 let entries = match fs::read_dir(&fanout_dir) {
4703 Ok(entries) => entries,
4704 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
4705 Err(err) => return Err(GitError::Io(err.to_string())),
4706 };
4707 let hex_len = format.hex_len();
4708 for object_entry in entries {
4709 let object_entry = object_entry?;
4710 let name = object_entry.file_name();
4711 let Some(suffix) = name.to_str() else {
4712 continue;
4713 };
4714 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4715 continue;
4716 }
4717 oids.insert(ObjectId::from_hex(
4718 format,
4719 &format!("{fanout_hex}{suffix}"),
4720 )?);
4721 }
4722 Ok(())
4723}
4724
4725#[derive(Debug, Default)]
4726struct LoosePresenceCache {
4727 loaded_fanouts: HashSet<u8>,
4728 objects: HashSet<ObjectId>,
4729}
4730
4731pub fn packed_object_ids(
4736 objects_dir: impl AsRef<Path>,
4737 format: ObjectFormat,
4738) -> Result<HashSet<ObjectId>> {
4739 let mut oids = HashSet::new();
4740 collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
4741 Ok(oids)
4742}
4743
4744fn collect_packed_object_ids(
4745 pack_dir: &Path,
4746 format: ObjectFormat,
4747 oids: &mut HashSet<ObjectId>,
4748) -> Result<()> {
4749 if !pack_dir.exists() {
4750 return Ok(());
4751 }
4752 let mut midx_pack_names = HashSet::new();
4753 let midx_path = pack_dir.join("multi-pack-index");
4754 if midx_path.exists() {
4755 let midx = MultiPackIndex::parse_without_checksum(&fs::read(&midx_path)?, format)?;
4756 midx_pack_names.extend(midx.pack_names.iter().cloned());
4757 oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
4758 }
4759 for entry in fs::read_dir(pack_dir)? {
4760 let path = entry?.path();
4761 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
4762 continue;
4763 }
4764 if !path.with_extension("pack").exists() {
4765 continue;
4766 }
4767 let index = match PackIndex::parse(&fs::read(&path)?, format) {
4768 Ok(index) => index,
4769 Err(_err)
4770 if path
4771 .file_name()
4772 .and_then(|name| name.to_str())
4773 .is_some_and(|name| midx_pack_names.contains(name)) =>
4774 {
4775 eprintln!(
4776 "error: packfile {} index unavailable",
4777 path.with_extension("pack").display()
4778 );
4779 continue;
4780 }
4781 Err(err) => return Err(err),
4782 };
4783 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
4784 }
4785 Ok(())
4786}
4787
4788impl FileObjectDatabase {
4789 pub fn object_format(&self) -> ObjectFormat {
4791 self.format
4792 }
4793
4794 pub fn objects_dir(&self) -> &Path {
4796 &self.objects_dir
4797 }
4798
4799 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4800 let objects_dir = objects_dir.into();
4801 Self {
4802 loose: LooseObjectStore::new(objects_dir.clone(), format),
4803 alternates: alternate_object_dirs(&objects_dir),
4804 objects_dir,
4805 format,
4806 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4807 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4808 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4809 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4810 pack_registry: Arc::new(Mutex::new(None)),
4811 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4812 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4813 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4814 promisor_objects: Arc::new(OnceLock::new()),
4815 promisor_remote_present: false,
4816 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4817 }
4818 }
4819
4820 fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4821 let objects_dir = objects_dir.into();
4822 Self {
4823 loose: LooseObjectStore::new(objects_dir.clone(), format),
4824 alternates: Vec::new(),
4825 objects_dir,
4826 format,
4827 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4828 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4829 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4830 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4831 pack_registry: Arc::new(Mutex::new(None)),
4832 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4833 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4834 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4835 promisor_objects: Arc::new(OnceLock::new()),
4836 promisor_remote_present: false,
4837 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4838 }
4839 }
4840
4841 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4842 Self::new(repository_objects_dir(git_dir), format)
4843 }
4844
4845 pub fn with_promisor_remote_present(mut self, present: bool) -> Self {
4854 self.promisor_remote_present = present;
4855 self
4856 }
4857
4858 pub fn refresh_read_cache(&self) {
4863 if let Ok(mut cache) = self.pack_registry.lock() {
4864 *cache = None;
4865 }
4866 if let Ok(mut cache) = self.pack_indexes.lock() {
4867 cache.clear();
4868 }
4869 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
4870 cache.clear();
4871 }
4872 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
4873 cache.clear();
4874 }
4875 if let Ok(mut cache) = self.pack_bytes.lock() {
4876 cache.clear();
4877 }
4878 if let Ok(mut cache) = self.pack_deltas.lock() {
4879 cache.clear();
4880 }
4881 if let Ok(mut cache) = self.pack_header_types.lock() {
4882 cache.clear();
4883 }
4884 if let Ok(mut cache) = self.decoded.lock() {
4885 cache.clear();
4886 }
4887 self.loose.invalidate_cache();
4888 }
4889
4890 pub fn loose(&self) -> &LooseObjectStore {
4891 &self.loose
4892 }
4893
4894 pub fn presence_checker(&self) -> ObjectPresenceChecker {
4895 ObjectPresenceChecker::new(self.clone())
4896 }
4897
4898 pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4899 self.install_pack_with_options(pack, RawPackInstallOptions::default())
4900 }
4901
4902 pub fn write_blob_as_pack(
4903 &self,
4904 oid: ObjectId,
4905 object: &EncodedObject,
4906 compression_level: u32,
4907 ) -> Result<ObjectId> {
4908 if object.object_type != ObjectType::Blob {
4909 return Err(GitError::InvalidObject(
4910 "write_blob_as_pack requires a blob object".into(),
4911 ));
4912 }
4913 if oid.format() != self.format {
4914 return Err(GitError::InvalidObjectId(format!(
4915 "object {oid} uses {}, store uses {}",
4916 oid.format().name(),
4917 self.format.name()
4918 )));
4919 }
4920 if self.contains(&oid)? {
4921 return Ok(oid);
4922 }
4923 let input = [PackInput { oid: &oid, object }];
4924 let options = PackWriteOptions::new()
4925 .with_window(0)
4926 .with_depth(0)
4927 .with_reorder(false)
4928 .with_compression_level(compression_level);
4929 let pack =
4930 PackFile::write_packed_with_known_ids_and_options(&input, self.format, &options)?;
4931 self.install_pack(&pack)?;
4932 Ok(oid)
4933 }
4934
4935 pub fn write_blobs_as_pack(
4936 &self,
4937 objects: &[(ObjectId, EncodedObject)],
4938 compression_level: u32,
4939 ) -> Result<()> {
4940 let mut seen = HashSet::with_capacity(objects.len());
4941 let mut inputs = Vec::new();
4942 for (oid, object) in objects {
4943 if object.object_type != ObjectType::Blob {
4944 return Err(GitError::InvalidObject(
4945 "write_blobs_as_pack requires blob objects".into(),
4946 ));
4947 }
4948 if oid.format() != self.format {
4949 return Err(GitError::InvalidObjectId(format!(
4950 "object {oid} uses {}, store uses {}",
4951 oid.format().name(),
4952 self.format.name()
4953 )));
4954 }
4955 if seen.insert(*oid) && !self.contains(oid)? {
4956 inputs.push(PackInput { oid, object });
4957 }
4958 }
4959 if inputs.is_empty() {
4960 return Ok(());
4961 }
4962 let options = PackWriteOptions::new()
4963 .with_window(0)
4964 .with_depth(0)
4965 .with_reorder(false)
4966 .with_compression_level(compression_level);
4967 let pack =
4968 PackFile::write_packed_with_known_ids_and_options(&inputs, self.format, &options)?;
4969 self.install_pack(&pack)?;
4970 Ok(())
4971 }
4972
4973 pub fn install_pack_with_options(
4974 &self,
4975 pack: &PackWrite,
4976 options: RawPackInstallOptions,
4977 ) -> Result<PackInstallResult> {
4978 if pack.checksum.format() != self.format {
4979 return Err(GitError::InvalidObjectId(format!(
4980 "pack checksum uses {}, store uses {}",
4981 pack.checksum.format().name(),
4982 self.format.name()
4983 )));
4984 }
4985 for entry in &pack.entries {
4986 if entry.oid.format() != self.format {
4987 return Err(GitError::InvalidObjectId(format!(
4988 "pack entry {} uses {}, store uses {}",
4989 entry.oid,
4990 entry.oid.format().name(),
4991 self.format.name()
4992 )));
4993 }
4994 }
4995 let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
4996 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4997 if canonical_index.pack_checksum != pack.checksum
4998 || parsed_index.pack_checksum != pack.checksum
4999 {
5000 return Err(GitError::InvalidFormat(
5001 "pack and index checksums do not match pack write".into(),
5002 ));
5003 }
5004 if pack.index != canonical_index.index {
5005 return Err(GitError::InvalidFormat(
5006 "pack index does not match pack contents".into(),
5007 ));
5008 }
5009
5010 let pack_dir = self.objects_dir.join("pack");
5011 fs::create_dir_all(&pack_dir)?;
5012 let pack_name = format!("pack-{}", pack.checksum.to_hex());
5013 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5014 let index_path = pack_dir.join(format!("{pack_name}.idx"));
5015 if !pack_path.exists() || !index_path.exists() {
5016 write_pack_component(&pack_path, &pack.pack)?;
5017 write_pack_component(&index_path, &pack.index)?;
5018 }
5019 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
5020 Ok(PackInstallResult {
5021 pack_name,
5022 pack_path,
5023 index_path,
5024 promisor_path,
5025 object_ids: canonical_index
5026 .entries
5027 .iter()
5028 .map(|entry| entry.oid)
5029 .collect(),
5030 })
5031 }
5032
5033 pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
5041 self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
5042 }
5043
5044 pub fn install_written_pack_with_options(
5045 &self,
5046 pack: &PackWrite,
5047 options: RawPackInstallOptions,
5048 ) -> Result<PackInstallResult> {
5049 validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
5050 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
5051 if parsed_index.pack_checksum != pack.checksum {
5052 return Err(GitError::InvalidFormat(
5053 "pack write index checksum does not match pack".into(),
5054 ));
5055 }
5056 if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
5057 return Err(GitError::InvalidFormat(
5058 "pack write index does not match generated entries".into(),
5059 ));
5060 }
5061 self.install_generated_pack_unchecked(pack, options)
5062 }
5063
5064 fn install_generated_pack_unchecked(
5065 &self,
5066 pack: &PackWrite,
5067 options: RawPackInstallOptions,
5068 ) -> Result<PackInstallResult> {
5069 let pack_dir = self.objects_dir.join("pack");
5070 fs::create_dir_all(&pack_dir)?;
5071 let pack_name = format!("pack-{}", pack.checksum.to_hex());
5072 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5073 let index_path = pack_dir.join(format!("{pack_name}.idx"));
5074 if !pack_path.exists() || !index_path.exists() {
5075 write_pack_component(&pack_path, &pack.pack)?;
5076 write_pack_component(&index_path, &pack.index)?;
5077 }
5078 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
5079 Ok(PackInstallResult {
5080 pack_name,
5081 pack_path,
5082 index_path,
5083 promisor_path,
5084 object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
5085 })
5086 }
5087
5088 fn install_pack_file_from_temp(
5089 &self,
5090 temp_pack_path: &Path,
5091 pack_checksum: ObjectId,
5092 index: &[u8],
5093 object_ids: Vec<ObjectId>,
5094 options: RawPackInstallOptions,
5095 ) -> Result<PackInstallResult> {
5096 let pack_dir = self.objects_dir.join("pack");
5097 fs::create_dir_all(&pack_dir)?;
5098 let pack_name = format!("pack-{}", pack_checksum.to_hex());
5099 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5100 let index_path = pack_dir.join(format!("{pack_name}.idx"));
5101 match fs::rename(temp_pack_path, &pack_path) {
5102 Ok(()) => {}
5103 Err(_) if pack_path.exists() => {
5104 let _ = fs::remove_file(temp_pack_path);
5105 }
5106 Err(err) => return Err(GitError::Io(err.to_string())),
5107 }
5108 write_pack_component(&index_path, index)?;
5109 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
5110 Ok(PackInstallResult {
5111 pack_name,
5112 pack_path,
5113 index_path,
5114 promisor_path,
5115 object_ids,
5116 })
5117 }
5118
5119 pub fn install_raw_pack_from_reader<R>(&self, reader: &mut R) -> Result<PackInstallResult>
5120 where
5121 R: Read,
5122 {
5123 self.install_raw_pack_from_reader_with_options(reader, RawPackInstallOptions::default())
5124 }
5125
5126 pub fn begin_raw_pack_install(
5127 &self,
5128 expected_pack_id: ObjectId,
5129 expected_pack_size: u64,
5130 ) -> Result<RawPackStreamingInstall> {
5131 self.begin_raw_pack_install_with_options(
5132 expected_pack_id,
5133 expected_pack_size,
5134 RawPackInstallOptions::default(),
5135 )
5136 }
5137
5138 pub fn begin_raw_pack_install_with_options(
5139 &self,
5140 expected_pack_id: ObjectId,
5141 expected_pack_size: u64,
5142 options: RawPackInstallOptions,
5143 ) -> Result<RawPackStreamingInstall> {
5144 if expected_pack_id.format() != self.format {
5145 return Err(GitError::InvalidObjectId(format!(
5146 "pack checksum uses {}, store uses {}",
5147 expected_pack_id.format().name(),
5148 self.format.name()
5149 )));
5150 }
5151 let pack_dir = self.objects_dir.join("pack");
5152 fs::create_dir_all(&pack_dir)?;
5153 let pack_name = format!("pack-{}", expected_pack_id.to_hex());
5154 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
5155 let index_path = pack_dir.join(format!("{pack_name}.idx"));
5156 let temp_pack_path = unique_temp_path(&pack_dir);
5157 let file = fs::OpenOptions::new()
5158 .write(true)
5159 .create_new(true)
5160 .open(&temp_pack_path)?;
5161 Ok(RawPackStreamingInstall {
5162 format: self.format,
5163 expected_pack_id,
5164 expected_pack_size,
5165 options,
5166 pack_dir,
5167 pack_name,
5168 pack_path,
5169 index_path,
5170 temp_pack_path,
5171 file: Some(file),
5172 written: 0,
5173 finished: false,
5174 })
5175 }
5176
5177 pub fn install_raw_pack_from_reader_with_options<R>(
5178 &self,
5179 reader: &mut R,
5180 options: RawPackInstallOptions,
5181 ) -> Result<PackInstallResult>
5182 where
5183 R: Read,
5184 {
5185 let pack_dir = self.objects_dir.join("pack");
5186 fs::create_dir_all(&pack_dir)?;
5187 let temp_pack_path = unique_temp_path(&pack_dir);
5188 let result = (|| -> Result<PackInstallResult> {
5189 let mut file = fs::OpenOptions::new()
5192 .write(true)
5193 .create_new(true)
5194 .open(&temp_pack_path)?;
5195 let built = {
5196 let mut tee = PackInstallTeeReader {
5197 reader,
5198 writer: &mut file,
5199 };
5200 PackIndex::write_v2_for_pack_reader_to_trailer(&mut tee, self.format)?
5201 };
5202 file.flush()?;
5203 file.sync_all()?;
5204 drop(file);
5205
5206 self.install_pack_file_from_temp(
5207 &temp_pack_path,
5208 built.pack_checksum,
5209 &built.index,
5210 built.entries.iter().map(|entry| entry.oid).collect(),
5211 options,
5212 )
5213 })();
5214 if result.is_err() {
5215 let _ = fs::remove_file(&temp_pack_path);
5216 }
5217 result
5218 }
5219
5220 pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
5221 if self.loose.exists(oid)? {
5222 return Ok(true);
5223 }
5224 if self.find_pack_containing(oid)?.is_some() {
5225 return Ok(true);
5226 }
5227 for alternate in &self.alternates {
5228 if Self::without_alternates(alternate, self.format).contains(oid)? {
5229 return Ok(true);
5230 }
5231 }
5232 self.loose.invalidate_cache();
5235 self.loose.exists(oid)
5236 }
5237
5238 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
5239 let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
5240 .into_iter()
5241 .collect::<HashSet<_>>();
5242 for alternate in &self.alternates {
5243 oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
5244 }
5245 let mut oids = oids.into_iter().collect::<Vec<_>>();
5246 oids.sort_by_key(ObjectId::to_hex);
5247 Ok(oids)
5248 }
5249
5250 pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
5251 if let Some(disk_size) = self.loose.disk_size(oid)? {
5252 return Ok(Some(ObjectStorageInfo {
5253 disk_size,
5254 deltabase: zero_oid(self.format)?,
5255 }));
5256 }
5257 if let Some(info) = self.packed_object_storage_info(oid)? {
5258 return Ok(Some(info));
5259 }
5260 for alternate in &self.alternates {
5261 if let Some(info) =
5262 Self::without_alternates(alternate, self.format).object_storage_info(oid)?
5263 {
5264 return Ok(Some(info));
5265 }
5266 }
5267 self.loose.invalidate_cache();
5270 if let Some(disk_size) = self.loose.disk_size(oid)? {
5271 return Ok(Some(ObjectStorageInfo {
5272 disk_size,
5273 deltabase: zero_oid(self.format)?,
5274 }));
5275 }
5276 Ok(None)
5277 }
5278
5279 pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
5280 let mut matches = self.object_ids_with_prefix(prefix)?;
5281 Ok(match matches.len() {
5282 0 => ObjectPrefixResolution::Missing,
5283 1 => ObjectPrefixResolution::Unique(matches.remove(0)),
5284 _ => ObjectPrefixResolution::Ambiguous(matches),
5285 })
5286 }
5287
5288 pub fn object_ids_with_prefix(&self, prefix: &str) -> Result<Vec<ObjectId>> {
5289 validate_object_id_prefix(self.format, prefix)?;
5290 let mut matches = Vec::new();
5291 for oid in self.object_ids()? {
5292 if object_id_matches_prefix(&oid, prefix) {
5293 matches.push(oid);
5294 }
5295 }
5296 Ok(matches)
5297 }
5298
5299 pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
5309 if implied_empty_tree_object(self.format, oid).is_some() {
5310 return Ok(Some((ObjectType::Tree, 0)));
5311 }
5312 if let Ok(mut cache) = self.decoded.lock()
5313 && let Some(object) = cache.get(oid)
5314 {
5315 return Ok(Some((object.object_type, object.body.len() as u64)));
5316 }
5317 if let Some(header) = self.loose.read_header(oid)? {
5318 return Ok(Some(header));
5319 }
5320 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
5321 let bytes = pack_lookup.pack_bytes(self)?;
5322 let type_cache = pack_lookup.header_type_cache(self);
5327 let resolve_ref_base = |base: &ObjectId| {
5328 self.read_object_header(base)
5329 .map(|header| header.map(|(t, _)| t))
5330 };
5331 let header = match &type_cache {
5332 Some(cache) => {
5333 let mut adapter = PackHeaderTypeCacheAdapter(cache);
5334 sley_pack::read_object_header_at_with_cache(
5335 &bytes,
5336 pack_lookup.offset,
5337 self.format,
5338 resolve_ref_base,
5339 &mut adapter,
5340 )?
5341 }
5342 None => sley_pack::read_object_header_at(
5343 &bytes,
5344 pack_lookup.offset,
5345 self.format,
5346 resolve_ref_base,
5347 )?,
5348 };
5349 return Ok(Some(header));
5350 }
5351 for alternate in &self.alternates {
5352 if let Some(header) =
5353 Self::without_alternates(alternate, self.format).read_object_header(oid)?
5354 {
5355 return Ok(Some(header));
5356 }
5357 }
5358 self.loose.invalidate_cache();
5361 if let Some(header) = self.loose.read_header(oid)? {
5362 return Ok(Some(header));
5363 }
5364 Ok(None)
5365 }
5366
5367 fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
5368 if let Ok(mut cache) = self.decoded.lock()
5371 && let Some(object) = cache.get(oid)
5372 {
5373 return Ok(Some(object));
5374 }
5375 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
5376 return Ok(None);
5377 };
5378 self.read_packed_object_at_lookup(oid, &pack_lookup)
5379 .map(Some)
5380 }
5381
5382 fn read_packed_object_at_lookup(
5383 &self,
5384 oid: &ObjectId,
5385 pack_lookup: &PackLookup,
5386 ) -> Result<Arc<EncodedObject>> {
5387 if let Ok(mut cache) = self.decoded.lock()
5388 && let Some(object) = cache.get(oid)
5389 {
5390 return Ok(object);
5391 }
5392 let bytes = pack_lookup.pack_bytes(self)?;
5393 let delta_cache = pack_lookup.delta_cache(self);
5398 let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
5399 let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
5405 let object = match &delta_adapter {
5406 Some(adapter) => sley_pack::read_object_at_with_cache_arc(
5407 &bytes,
5408 pack_lookup.offset,
5409 self.format,
5410 resolve_ref_base,
5411 adapter,
5412 )?,
5413 None => sley_pack::read_object_at_arc(
5414 &bytes,
5415 pack_lookup.offset,
5416 self.format,
5417 resolve_ref_base,
5418 )?,
5419 };
5420 if verify_reads_enabled() {
5424 let actual = object.object_id(self.format)?;
5425 if actual != *oid {
5426 return Err(GitError::InvalidObject(format!(
5427 "pack object id mismatch: index says {oid}, decoded {actual}"
5428 )));
5429 }
5430 }
5431 if let Ok(mut cache) = self.decoded.lock() {
5432 cache.put(*oid, Arc::clone(&object));
5433 }
5434 Ok(object)
5435 }
5436
5437 fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
5441 let mut caches = self.pack_deltas.lock().ok()?;
5442 let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
5443 Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
5444 });
5445 Some(Arc::clone(cache))
5446 }
5447
5448 fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
5452 let mut caches = self.pack_header_types.lock().ok()?;
5453 let cache = caches
5454 .entry(pack_path.to_path_buf())
5455 .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
5456 Some(Arc::clone(cache))
5457 }
5458
5459 fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
5464 if let Ok(cache) = self.pack_bytes.lock()
5465 && let Some(bytes) = cache.get(pack_path)
5466 {
5467 return Ok(Arc::clone(bytes));
5468 }
5469 let bytes = Arc::new(load_pack_data(pack_path)?);
5470 if let Ok(mut cache) = self.pack_bytes.lock() {
5471 cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
5472 }
5473 Ok(bytes)
5474 }
5475
5476 fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
5480 if let Ok(cache) = self.pack_indexes.lock()
5481 && let Some(index) = cache.get(index_path)
5482 {
5483 return Ok(Arc::clone(index));
5484 }
5485 let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
5486 if let Ok(mut cache) = self.pack_indexes.lock() {
5487 cache.insert(index_path.to_path_buf(), Arc::clone(&index));
5488 }
5489 Ok(index)
5490 }
5491
5492 fn cached_multi_pack_index_oid_lookup(
5493 &self,
5494 midx_path: &Path,
5495 ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
5496 if !midx_path.exists() {
5497 return Ok(None);
5498 }
5499 if let Ok(cache) = self.multi_pack_oid_lookups.lock()
5500 && let Some(midx) = cache.get(midx_path)
5501 {
5502 return Ok(Some(Arc::clone(midx)));
5503 }
5504 let bytes = load_multi_pack_index_lookup_data(midx_path)?;
5505 let midx = match MultiPackIndexOidLookup::parse(bytes, self.format) {
5506 Ok(midx) => Arc::new(midx),
5507 Err(GitError::InvalidFormat(message))
5508 if message.starts_with("multi-pack-index hash id ") =>
5509 {
5510 let actual = message
5511 .strip_prefix("multi-pack-index hash id ")
5512 .and_then(|rest| rest.split_whitespace().next())
5513 .unwrap_or("0");
5514 let expected = match self.format {
5515 ObjectFormat::Sha1 => 1,
5516 ObjectFormat::Sha256 => 2,
5517 };
5518 eprintln!(
5519 "error: multi-pack-index hash version {actual} does not match version {expected}"
5520 );
5521 return Ok(None);
5522 }
5523 Err(err) => return Err(err),
5524 };
5525 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
5526 cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
5527 }
5528 Ok(Some(midx))
5529 }
5530
5531 fn cached_pack_registry(
5536 &self,
5537 pack_dir: &Path,
5538 force_rescan: bool,
5539 ) -> Result<Arc<PackRegistrySnapshot>> {
5540 if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
5541 return Ok(registry);
5542 }
5543 let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
5544 if let Ok(mut cache) = self.pack_registry.lock() {
5545 match cache.as_ref() {
5546 Some(existing)
5547 if existing.fingerprint == scanned.fingerprint
5548 && same_registered_pack_set(&existing.packs, &scanned.packs) =>
5549 {
5550 return Ok(Arc::clone(existing));
5551 }
5552 _ => {
5553 *cache = Some(Arc::clone(&scanned));
5554 }
5555 }
5556 }
5557 Ok(scanned)
5558 }
5559
5560 fn find_in_pack_registry(
5561 &self,
5562 registry: Arc<PackRegistrySnapshot>,
5563 oid: &ObjectId,
5564 ) -> Result<Option<PackLookup>> {
5565 let hinted_pack_index = registry.cached_hint();
5566 if let Some(pack_index) = hinted_pack_index {
5567 let pack = ®istry.packs[pack_index];
5568 match pack.index(self.format) {
5569 Ok(index) => {
5570 if let Some(entry) = index.find(oid) {
5571 return Ok(Some(PackLookup::from_registered(
5572 Arc::clone(pack),
5573 entry.offset,
5574 )));
5575 }
5576 }
5577 Err(_) => {
5578 eprintln!("error: packfile {} index unavailable", pack.pack.display());
5579 }
5580 }
5581 }
5582 for (pack_index, pack) in registry.packs.iter().enumerate() {
5583 if Some(pack_index) == hinted_pack_index {
5584 continue;
5585 }
5586 let index = match pack.index(self.format) {
5587 Ok(index) => index,
5588 Err(_) => {
5589 eprintln!("error: packfile {} index unavailable", pack.pack.display());
5590 continue;
5591 }
5592 };
5593 if let Some(entry) = index.find(oid) {
5594 registry.remember_hint(pack_index);
5595 return Ok(Some(PackLookup::from_registered(
5596 Arc::clone(pack),
5597 entry.offset,
5598 )));
5599 }
5600 }
5601 Ok(None)
5602 }
5603
5604 fn read_packed_object_from_other_packs(
5610 &self,
5611 oid: &ObjectId,
5612 exclude: &PackLookup,
5613 ) -> Result<Option<Arc<EncodedObject>>> {
5614 let pack_dir = self.objects_dir.join("pack");
5615 let Ok(entries) = fs::read_dir(&pack_dir) else {
5616 return Ok(None);
5617 };
5618 let excluded_pack = exclude.pack_path().to_path_buf();
5619 for entry in entries {
5620 let idx_path = entry?.path();
5621 if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
5622 continue;
5623 }
5624 let pack_path = idx_path.with_extension("pack");
5625 if pack_path == excluded_pack {
5626 continue;
5627 }
5628 let Ok(idx_bytes) = fs::read(&idx_path) else {
5629 continue;
5630 };
5631 let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
5632 continue;
5633 };
5634 let Some(entry) = index.find(oid) else {
5635 continue;
5636 };
5637 let candidate = PackLookup::from_path(pack_path, entry.offset);
5638 if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
5639 return Ok(Some(object));
5640 }
5641 }
5642 Ok(None)
5643 }
5644
5645 fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
5646 if oid.format() != self.format {
5647 return Err(GitError::InvalidObjectId(format!(
5648 "object {oid} uses {}, store uses {}",
5649 oid.format().name(),
5650 self.format.name()
5651 )));
5652 }
5653 let pack_dir = self.objects_dir.join("pack");
5654 if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
5659 && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
5660 {
5661 return Ok(Some(pack_paths));
5662 }
5663 if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
5664 && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
5665 {
5666 return Ok(Some(pack_paths));
5667 }
5668
5669 if !pack_dir.exists() {
5670 return Ok(None);
5671 }
5672 if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
5673 return Ok(Some(pack_paths));
5674 }
5675 let registry = self.cached_pack_registry(&pack_dir, false)?;
5679 if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(®istry), oid)? {
5680 return Ok(Some(pack_paths));
5681 }
5682 let refreshed = self.cached_pack_registry(&pack_dir, true)?;
5683 if Arc::ptr_eq(®istry, &refreshed) {
5684 return Ok(None);
5686 }
5687 self.find_in_pack_registry(refreshed, oid)
5688 }
5689
5690 fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
5691 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
5692 return Ok(None);
5693 };
5694 let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
5695 let trailer_offset = pack_len
5696 .checked_sub(self.format.raw_len() as u64)
5697 .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
5698 let index = pack_lookup.pack_index(self)?;
5699 let pack = pack_lookup.pack_bytes(self)?;
5700 let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
5701 let delta_base_offset = match &delta_base {
5702 Some(PackDeltaBase::Offset(offset)) => Some(*offset),
5703 Some(PackDeltaBase::Ref(_)) | None => None,
5704 };
5705 let offset_info = scan_pack_index_offsets(
5706 &index,
5707 pack_lookup.offset,
5708 trailer_offset,
5709 delta_base_offset,
5710 )?;
5711 let disk_size = offset_info
5712 .end_offset
5713 .checked_sub(pack_lookup.offset)
5714 .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
5715 let deltabase = match delta_base {
5716 Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
5717 GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
5723 })?,
5724 Some(PackDeltaBase::Ref(oid)) => oid,
5725 None => zero_oid(self.format)?,
5726 };
5727 Ok(Some(ObjectStorageInfo {
5728 disk_size,
5729 deltabase,
5730 }))
5731 }
5732
5733 fn find_midx_pack_containing(
5734 &self,
5735 pack_dir: &Path,
5736 oid: &ObjectId,
5737 ) -> Result<Option<PackLookup>> {
5738 let midx_path = pack_dir.join("multi-pack-index");
5739 let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
5740 return Ok(None);
5741 };
5742 self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
5743 }
5744
5745 fn midx_oid_lookup_pack_paths(
5746 &self,
5747 pack_dir: &Path,
5748 midx: &MultiPackIndexOidLookup,
5749 oid: &ObjectId,
5750 ) -> Result<Option<PackLookup>> {
5751 let Some(entry) = midx.find(oid)? else {
5752 return Ok(None);
5753 };
5754 let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
5755 return Err(GitError::InvalidFormat(
5756 "multi-pack-index object points past pack table".into(),
5757 ));
5758 };
5759 let pack_file_name = pack_name
5760 .strip_suffix(".idx")
5761 .map(|stem| format!("{stem}.pack"))
5762 .unwrap_or_else(|| pack_name.to_string());
5763 let pack = pack_dir.join(pack_file_name);
5764 Ok(Some(PackLookup::from_path(pack, entry.offset)))
5765 }
5766
5767 fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
5768 let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
5769 let cache = self.multi_pack_oid_lookups.lock().ok()?;
5770 cache.get(&midx_path).map(Arc::clone)
5771 }
5772
5773 fn cached_loaded_pack_registry(
5779 &self,
5780 _pack_dir: &Path,
5781 ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
5782 let cache = match self.pack_registry.lock() {
5783 Ok(cache) => cache,
5784 Err(_) => return Ok(None),
5785 };
5786 Ok(cache.as_ref().map(Arc::clone))
5787 }
5788}
5789
5790fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
5791 if prefix.len() < 4 || prefix.len() > format.hex_len() {
5792 return Err(GitError::InvalidObjectId(format!(
5793 "expected 4 to {} hex digits for {}, got {}",
5794 format.hex_len(),
5795 format.name(),
5796 prefix.len()
5797 )));
5798 }
5799 if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
5800 return Err(GitError::InvalidObjectId(format!(
5801 "non-hex object id prefix {prefix}"
5802 )));
5803 }
5804 Ok(())
5805}
5806
5807fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
5808 oid.to_hex()
5809 .as_bytes()
5810 .iter()
5811 .zip(prefix.as_bytes())
5812 .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
5813}
5814
5815fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
5816 match fs::metadata(pack_dir) {
5817 Ok(metadata) => Ok(metadata.modified().ok()),
5818 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5819 Err(err) => Err(GitError::Io(err.to_string())),
5820 }
5821}
5822
5823fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
5828 let modified = pack_dir_modified(pack_dir)?;
5829 let entries = match fs::read_dir(pack_dir) {
5830 Ok(entries) => entries,
5831 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
5832 return Ok(PackRegistrySnapshot::new(
5833 PackDirFingerprint {
5834 modified,
5835 idx_count: 0,
5836 pack_count: 0,
5837 },
5838 Vec::new(),
5839 ));
5840 }
5841 Err(err) => return Err(GitError::Io(err.to_string())),
5842 };
5843
5844 let mut idx_paths = Vec::new();
5845 let mut idx_count = 0;
5846 let mut pack_count = 0;
5847 for entry in entries {
5848 let entry = entry?;
5849 let path = entry.path();
5850 match path.extension().and_then(|ext| ext.to_str()) {
5851 Some("idx") => {
5852 idx_count += 1;
5853 idx_paths.push(path);
5854 }
5855 Some("pack") => {
5856 pack_count += 1;
5857 }
5858 _ => {}
5859 }
5860 }
5861
5862 let mut packs = Vec::new();
5863 for idx in idx_paths {
5864 let pack = idx.with_extension("pack");
5865 let Ok(metadata) = fs::metadata(&pack) else {
5866 continue;
5867 };
5868 let modified = pack_sort_modified(&metadata);
5869 packs.push((
5870 modified,
5871 metadata.len(),
5872 Arc::new(RegisteredPack::new(idx, pack)),
5873 ));
5874 }
5875 packs.sort_by(|left, right| {
5880 right
5881 .0
5882 .cmp(&left.0)
5883 .then_with(|| right.1.cmp(&left.1))
5884 .then_with(|| left.2.idx.cmp(&right.2.idx))
5885 });
5886 let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
5887 Ok(PackRegistrySnapshot::new(
5888 PackDirFingerprint {
5889 modified,
5890 idx_count,
5891 pack_count,
5892 },
5893 packs,
5894 ))
5895}
5896
5897fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
5898 metadata
5899 .modified()
5900 .ok()
5901 .and_then(|modified| {
5902 modified
5903 .duration_since(std::time::UNIX_EPOCH)
5904 .ok()
5905 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
5906 })
5907 .unwrap_or((0, 0))
5908}
5909
5910fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
5913 left.len() == right.len()
5914 && left
5915 .iter()
5916 .zip(right.iter())
5917 .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
5918}
5919
5920fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
5921 let mut alternates = Vec::new();
5922 if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
5923 for raw in value.to_string_lossy().split(':') {
5924 if !raw.is_empty() {
5925 alternates.push(PathBuf::from(raw));
5926 }
5927 }
5928 }
5929 let alternates_path = objects_dir.join("info").join("alternates");
5930 if let Ok(contents) = fs::read(&alternates_path) {
5931 for raw in contents.split(|byte| *byte == b'\n') {
5932 let line = raw.strip_suffix(b"\r").unwrap_or(raw);
5933 if line.is_empty() || line.starts_with(b"#") {
5934 continue;
5935 }
5936 let Ok(value) = std::str::from_utf8(line) else {
5937 continue;
5938 };
5939 let path = Path::new(value);
5940 let absolute = if path.is_absolute() {
5941 path.to_path_buf()
5942 } else {
5943 objects_dir.join(path)
5944 };
5945 alternates.push(absolute);
5946 }
5947 }
5948 alternates
5949}
5950
5951impl ObjectReader for FileObjectDatabase {
5952 fn is_promised_object(&self, oid: &ObjectId) -> bool {
5953 self.promisor_remote_present && self.promisor_objects().contains(oid)
5959 }
5960
5961 fn has_shallow_grafts(&self) -> bool {
5962 !self
5963 .shallow_grafts
5964 .get_or_init(|| {
5965 let shallow_file = self
5966 .objects_dir
5967 .parent()
5968 .map(|git_dir| git_dir.join("shallow"));
5969 match shallow_file {
5970 Some(path) => read_shallow_grafts(&path, self.format),
5971 None => HashSet::new(),
5972 }
5973 })
5974 .is_empty()
5975 }
5976
5977 fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
5978 self.shallow_grafts
5979 .get_or_init(|| {
5980 let shallow_file = self
5981 .objects_dir
5982 .parent()
5983 .map(|git_dir| git_dir.join("shallow"));
5984 match shallow_file {
5985 Some(path) => read_shallow_grafts(&path, self.format),
5986 None => HashSet::new(),
5987 }
5988 })
5989 .contains(oid)
5990 }
5991
5992 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
5993 if let Some(object) = implied_empty_tree_object(self.format, oid) {
5994 return Ok(object);
5995 }
5996 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
6004 match self.read_packed_object_at_lookup(oid, &pack_lookup) {
6005 Ok(object) => return Ok(object),
6006 Err(GitError::NotFound(_)) => {}
6007 Err(packed_err) => {
6013 if let Ok(object) = self.loose.read_object(oid) {
6014 return Ok(object);
6015 }
6016 if let Some(object) =
6019 self.read_packed_object_from_other_packs(oid, &pack_lookup)?
6020 {
6021 return Ok(object);
6022 }
6023 for alternate in &self.alternates {
6024 if let Ok(object) =
6025 Self::without_alternates(alternate, self.format).read_object(oid)
6026 {
6027 return Ok(object);
6028 }
6029 }
6030 return Err(packed_err);
6031 }
6032 }
6033 }
6034 let loose_err = match self.loose.read_object(oid) {
6035 Ok(object) => return Ok(object),
6036 Err(GitError::NotFound(_)) => None,
6037 Err(err) => Some(err),
6038 };
6039 if let Some(object) = self.read_packed_object(oid)? {
6040 return Ok(object);
6041 }
6042 for alternate in &self.alternates {
6043 match Self::without_alternates(alternate, self.format).read_object(oid) {
6044 Ok(object) => return Ok(object),
6045 Err(GitError::NotFound(_)) => {}
6046 Err(err) => return Err(err),
6047 }
6048 }
6049 self.loose.invalidate_cache();
6055 match self.loose.read_object(oid) {
6056 Ok(object) => return Ok(object),
6057 Err(GitError::NotFound(_)) => {}
6058 Err(err) => return Err(err),
6059 }
6060 if let Some(err) = loose_err {
6064 return Err(err);
6065 }
6066 Err(GitError::object_not_found_in(
6067 *oid,
6068 MissingObjectContext::Read,
6069 ))
6070 }
6071}
6072
6073impl FileObjectDatabase {
6074 fn promisor_objects(&self) -> &HashSet<ObjectId> {
6075 self.promisor_objects.get_or_init(|| {
6076 let mut promised =
6077 promisor_pack_object_ids(&self.objects_dir, self.format).unwrap_or_default();
6078 let mut pending = promised.iter().copied().collect::<Vec<_>>();
6079 while let Some(oid) = pending.pop() {
6080 let Ok(object) = self.read_object(&oid) else {
6081 continue;
6082 };
6083 for link in promisor_object_links(self.format, &object) {
6084 if promised.insert(link) {
6085 pending.push(link);
6086 }
6087 }
6088 }
6089 promised
6090 })
6091 }
6092}
6093
6094fn promisor_pack_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
6095 let pack_dir = objects_dir.join("pack");
6096 let mut oids = HashSet::new();
6097 if !pack_dir.exists() {
6098 return Ok(oids);
6099 }
6100 for entry in fs::read_dir(pack_dir)? {
6101 let path = entry?.path();
6102 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
6103 continue;
6104 }
6105 if !path.with_extension("pack").exists() || !path.with_extension("promisor").exists() {
6106 continue;
6107 }
6108 let index = PackIndex::parse(&fs::read(path)?, format)?;
6109 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
6110 }
6111 Ok(oids)
6112}
6113
6114fn promisor_object_links(format: ObjectFormat, object: &EncodedObject) -> Vec<ObjectId> {
6115 match object.object_type {
6116 ObjectType::Commit => Commit::parse_ref(format, &object.body)
6117 .map(|commit| {
6118 let mut links = Vec::with_capacity(commit.parents.len() + 1);
6119 links.push(commit.tree);
6120 links.extend(commit.parents);
6121 links
6122 })
6123 .unwrap_or_default(),
6124 ObjectType::Tree => TreeEntries::new(format, &object.body)
6125 .filter_map(|entry| entry.ok().map(|entry| entry.oid))
6126 .collect(),
6127 ObjectType::Tag => Tag::parse_ref(format, &object.body)
6128 .map(|tag| vec![tag.object])
6129 .unwrap_or_default(),
6130 ObjectType::Blob => Vec::new(),
6131 }
6132}
6133
6134impl ObjectWriter for FileObjectDatabase {
6135 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
6136 let oid = object.object_id(self.format)?;
6142 if self.contains(&oid)? {
6143 return Ok(oid);
6144 }
6145 self.loose.write_object(object)
6146 }
6147}
6148
6149fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
6150 if path.exists() {
6151 return Ok(());
6152 }
6153 let parent = path
6154 .parent()
6155 .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
6156 fs::create_dir_all(parent)?;
6157 let temp_path = unique_temp_path(parent);
6158 let write_result = (|| -> Result<()> {
6159 {
6160 let mut file = fs::OpenOptions::new()
6161 .write(true)
6162 .create_new(true)
6163 .open(&temp_path)?;
6164 file.write_all(bytes)?;
6165 file.sync_all()?;
6166 }
6167 match fs::rename(&temp_path, path) {
6168 Ok(()) => Ok(()),
6169 Err(_) if path.exists() => {
6170 let _ = fs::remove_file(&temp_path);
6171 Ok(())
6172 }
6173 Err(err) => Err(GitError::Io(err.to_string())),
6174 }
6175 })();
6176 if write_result.is_err() {
6177 let _ = fs::remove_file(&temp_path);
6178 }
6179 write_result
6180}
6181
6182fn write_promisor_pack_sidecar(
6183 pack_dir: &Path,
6184 pack_name: &str,
6185 promisor: bool,
6186) -> Result<Option<PathBuf>> {
6187 if !promisor {
6188 return Ok(None);
6189 }
6190 let path = pack_dir.join(format!("{pack_name}.promisor"));
6191 write_pack_component(&path, b"")?;
6192 Ok(Some(path))
6193}
6194
6195const MAX_LOOSE_HEADER_LEN: usize = 32;
6200
6201fn loose_header_too_long(oid: &ObjectId) -> GitError {
6206 GitError::InvalidObject(format!(
6207 "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
6208 ))
6209}
6210
6211fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
6215 GitError::InvalidObject(format!("unable to unpack {oid} header"))
6216}
6217
6218fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
6226 let [cmf, flg, ..] = *input else { return None };
6227 if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
6228 return Some("inflate: data stream error (incorrect header check)");
6229 }
6230 if cmf & 0x0f != 8 {
6231 return Some("inflate: data stream error (unknown compression method)");
6232 }
6233 if cmf >> 4 > 7 {
6234 return Some("inflate: data stream error (invalid window size)");
6235 }
6236 if flg & 0x20 != 0 {
6237 return Some("inflate: needs dictionary (no message)");
6238 }
6239 None
6240}
6241
6242fn emit_inflate_diagnostic(input: &[u8]) {
6245 if let Some(diagnostic) = inflate_header_diagnostic(input) {
6246 eprintln!("error: {diagnostic}");
6247 }
6248}
6249
6250#[derive(Debug, Clone, PartialEq, Eq)]
6253pub enum LooseObjectIntegrity {
6254 Ok,
6256 HashMismatch { actual: ObjectId },
6259 Corrupt,
6262}
6263
6264#[derive(Debug, Clone)]
6265pub struct LooseObjectStore {
6266 objects_dir: PathBuf,
6267 format: ObjectFormat,
6268 loose_cache: Arc<Mutex<LoosePresenceCache>>,
6277}
6278
6279impl LooseObjectStore {
6280 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
6281 Self {
6282 objects_dir: objects_dir.into(),
6283 format,
6284 loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
6285 }
6286 }
6287
6288 fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
6293 let mut guard = self.loose_cache.lock().ok()?;
6294 let fanout = oid.as_bytes()[0];
6295 if !guard.loaded_fanouts.contains(&fanout) {
6296 collect_loose_fanout_object_ids(
6297 &self.objects_dir,
6298 self.format,
6299 fanout,
6300 &mut guard.objects,
6301 )
6302 .ok()?;
6303 guard.loaded_fanouts.insert(fanout);
6304 }
6305 Some(guard.objects.contains(oid))
6306 }
6307
6308 fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
6312 if let Ok(mut guard) = self.loose_cache.lock() {
6313 guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
6314 guard.loaded_fanouts = (0..=u8::MAX).collect();
6315 let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
6316 ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
6317 return Ok(ids);
6318 }
6319 loose_object_ids(&self.objects_dir, self.format)
6320 }
6321
6322 fn note_loose_write(&self, oid: ObjectId) {
6326 if let Ok(mut guard) = self.loose_cache.lock() {
6327 guard.objects.insert(oid);
6328 }
6329 }
6330
6331 pub(crate) fn invalidate_cache(&self) {
6334 if let Ok(mut guard) = self.loose_cache.lock() {
6335 *guard = LoosePresenceCache::default();
6336 }
6337 }
6338
6339 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
6340 Self::new(repository_objects_dir(git_dir), format)
6341 }
6342
6343 fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
6344 if oid.format() != self.format {
6345 return Err(GitError::InvalidObjectId(format!(
6346 "object {oid} uses {}, store uses {}",
6347 oid.format().name(),
6348 self.format.name()
6349 )));
6350 }
6351 Ok(())
6352 }
6353
6354 pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
6355 self.validate_oid_format(oid)?;
6356 let hex = oid.to_hex();
6357 Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
6358 }
6359
6360 pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
6361 self.validate_oid_format(oid)?;
6362 if self.cached_loose_presence(oid) == Some(false) {
6363 return Ok(false);
6364 }
6365 let path = self.object_path(oid)?;
6366 Ok(path.exists())
6367 }
6368
6369 pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
6370 self.validate_oid_format(oid)?;
6371 if self.cached_loose_presence(oid) == Some(false) {
6372 return Ok(None);
6373 }
6374 let path = self.object_path(oid)?;
6375 match fs::metadata(path) {
6376 Ok(metadata) => Ok(Some(metadata.len())),
6377 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
6378 Err(err) => Err(GitError::Io(err.to_string())),
6379 }
6380 }
6381
6382 pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
6387 self.validate_oid_format(oid)?;
6388 if self.cached_loose_presence(oid) == Some(false) {
6389 return Ok(None);
6390 }
6391 let path = self.object_path(oid)?;
6392 let compressed = match fs::read(&path) {
6393 Ok(compressed) => compressed,
6394 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
6395 Err(err) => return Err(GitError::Io(err.to_string())),
6396 };
6397 match inflate_loose_header(&compressed)? {
6398 LooseHeader::Ok(header) => {
6399 let header = std::str::from_utf8(&header)
6400 .map_err(|err| GitError::InvalidObject(err.to_string()))?;
6401 let (kind, size) = header
6402 .split_once(' ')
6403 .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
6404 let object_type = kind.parse::<ObjectType>()?;
6405 let size = size
6406 .parse::<u64>()
6407 .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
6408 Ok(Some((object_type, size)))
6409 }
6410 LooseHeader::Bad => {
6411 emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
6414 Err(loose_unpack_header_failed(oid))
6415 }
6416 LooseHeader::TooLong => {
6417 Err(loose_header_too_long(oid))
6422 }
6423 }
6424 }
6425
6426 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
6428 self.loose_object_ids_cached()
6429 }
6430
6431 pub fn verify_object(
6439 &self,
6440 oid: &ObjectId,
6441 display_path: &str,
6442 ) -> Result<Option<LooseObjectIntegrity>> {
6443 let path = self.object_path(oid)?;
6444 let compressed = match fs::read(&path) {
6445 Ok(compressed) => compressed,
6446 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
6447 Err(err) => return Err(GitError::Io(err.to_string())),
6448 };
6449 let mut decoder = ZlibDecoder::new(compressed.as_slice());
6450 let mut framed = Vec::new();
6451 if decoder.read_to_end(&mut framed).is_err() {
6452 emit_inflate_diagnostic(&compressed);
6453 if framed_loose_header_terminated(&framed) {
6461 eprintln!("error: corrupt loose object '{oid}'");
6462 eprintln!("error: unable to unpack contents of {display_path}");
6463 } else {
6464 eprintln!("error: unable to unpack header of {display_path}");
6465 }
6466 return Ok(Some(LooseObjectIntegrity::Corrupt));
6467 }
6468 if !framed_loose_header_terminated(&framed) {
6469 eprintln!("error: unable to unpack header of {display_path}");
6472 return Ok(Some(LooseObjectIntegrity::Corrupt));
6473 }
6474 if (decoder.total_in() as usize) < compressed.len() {
6481 eprintln!("error: garbage at end of loose object '{oid}'");
6485 eprintln!("error: unable to unpack contents of {display_path}");
6486 return Ok(Some(LooseObjectIntegrity::Corrupt));
6487 }
6488 if let Some(declared) = loose_header_declared_size(&framed) {
6495 let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
6496 let body_len = framed.len() - (nul + 1).min(framed.len());
6497 if body_len < declared {
6498 eprintln!("error: corrupt loose object '{oid}'");
6499 eprintln!("error: unable to unpack contents of {display_path}");
6500 return Ok(Some(LooseObjectIntegrity::Corrupt));
6501 }
6502 }
6503 let Ok(object) = parse_framed_object(&framed) else {
6504 if let Some(header) = loose_header_with_unknown_type(&framed) {
6509 eprintln!("error: unable to parse type from header '{header}' of {display_path}");
6510 } else {
6511 eprintln!("error: unable to parse header of {display_path}");
6512 }
6513 return Ok(Some(LooseObjectIntegrity::Corrupt));
6514 };
6515 let actual = object.object_id(self.format)?;
6516 if &actual != oid {
6517 return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
6518 }
6519 Ok(Some(LooseObjectIntegrity::Ok))
6520 }
6521}
6522
6523fn framed_loose_header_terminated(framed: &[u8]) -> bool {
6527 framed
6528 .iter()
6529 .take(MAX_LOOSE_HEADER_LEN)
6530 .any(|byte| *byte == 0)
6531}
6532
6533fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
6538 let nul = framed.iter().position(|&b| b == 0)?;
6539 let header = std::str::from_utf8(&framed[..nul]).ok()?;
6540 let (kind, size) = header.split_once(' ')?;
6541 let size: usize = size.parse().ok()?;
6542 if framed.len() - (nul + 1) != size {
6545 return None;
6546 }
6547 if kind.parse::<ObjectType>().is_ok() {
6550 return None;
6551 }
6552 Some(header.to_string())
6553}
6554
6555fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
6559 let nul = framed.iter().position(|&b| b == 0)?;
6560 let header = std::str::from_utf8(&framed[..nul]).ok()?;
6561 let (_kind, size) = header.split_once(' ')?;
6562 size.parse::<usize>().ok()
6563}
6564
6565enum LooseHeader {
6571 Ok(Vec<u8>),
6574 Bad,
6576 TooLong,
6578}
6579
6580fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
6594 let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
6595 let mut decompress = Decompress::new(true);
6596 let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
6600 let produced = decompress.total_out() as usize;
6601 match status {
6602 Ok(_) => {
6603 let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
6604 match window.iter().position(|&byte| byte == 0) {
6605 Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
6606 None => Ok(LooseHeader::TooLong),
6610 }
6611 }
6612 Err(_) => Ok(LooseHeader::Bad),
6614 }
6615}
6616
6617impl ObjectReader for LooseObjectStore {
6618 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
6619 self.validate_oid_format(oid)?;
6620 if self.cached_loose_presence(oid) == Some(false) {
6624 return Err(GitError::object_not_found_in(
6625 *oid,
6626 MissingObjectContext::Read,
6627 ));
6628 }
6629 let path = self.object_path(oid)?;
6630 let compressed = match fs::read(&path) {
6631 Ok(compressed) => compressed,
6632 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
6633 return Err(GitError::object_not_found_in(
6634 *oid,
6635 MissingObjectContext::Read,
6636 ));
6637 }
6638 Err(err) => return Err(GitError::Io(err.to_string())),
6639 };
6640 let mut decoder = ZlibDecoder::new(compressed.as_slice());
6641 let mut framed = Vec::new();
6642 if decoder.read_to_end(&mut framed).is_err() {
6643 emit_inflate_diagnostic(&compressed);
6644 if !framed_loose_header_terminated(&framed) {
6649 return Err(loose_unpack_header_failed(oid));
6650 }
6651 return Err(GitError::InvalidObject(format!(
6652 "corrupt loose object '{oid}'"
6653 )));
6654 }
6655 if framed
6660 .iter()
6661 .take(MAX_LOOSE_HEADER_LEN)
6662 .all(|byte| *byte != 0)
6663 {
6664 return Err(loose_header_too_long(oid));
6665 }
6666 let object = parse_framed_object(&framed)?;
6667 if verify_reads_enabled() {
6671 let actual = object.object_id(self.format)?;
6672 if &actual != oid {
6673 return Err(GitError::InvalidObject(format!(
6674 "loose object {} hashes to {actual}",
6675 path.display()
6676 )));
6677 }
6678 }
6679 Ok(Arc::new(object))
6680 }
6681}
6682
6683impl ObjectWriter for LooseObjectStore {
6684 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
6685 let oid = object.object_id(self.format)?;
6686 let path = self.object_path(&oid)?;
6687 if path.exists() {
6688 self.note_loose_write(oid);
6689 return Ok(oid);
6690 }
6691 let parent = path
6692 .parent()
6693 .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
6694 fs::create_dir_all(parent)?;
6695 let temp_path = unique_temp_path(parent);
6696 let write_result = (|| -> Result<()> {
6697 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6698 encoder.write_all(&object.framed_bytes())?;
6699 let compressed = encoder.finish()?;
6700 {
6701 let mut file = fs::OpenOptions::new()
6702 .write(true)
6703 .create_new(true)
6704 .open(&temp_path)?;
6705 file.write_all(&compressed)?;
6706 }
6716 match fs::rename(&temp_path, &path) {
6717 Ok(()) => Ok(()),
6718 Err(_) if path.exists() => {
6719 let _ = fs::remove_file(&temp_path);
6720 Ok(())
6721 }
6722 Err(err) => Err(GitError::Io(err.to_string())),
6723 }
6724 })();
6725 if write_result.is_err() {
6726 let _ = fs::remove_file(&temp_path);
6727 }
6728 write_result?;
6729 self.note_loose_write(oid);
6730 Ok(oid)
6731 }
6732}
6733
6734fn unique_temp_path(parent: &Path) -> PathBuf {
6735 let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
6736 parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
6737}
6738
6739#[cfg(test)]
6740mod tests {
6741 use super::*;
6742 use sley_core::BString;
6743 use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
6744 use sley_pack::{PackFile, PackWriteOptions};
6745
6746 fn blob_of(byte: u8, len: usize) -> EncodedObject {
6747 EncodedObject::new(ObjectType::Blob, vec![byte; len])
6748 }
6749
6750 fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
6751 Arc::new(blob_of(byte, len))
6752 }
6753
6754 fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
6755 reader
6756 .read_object(oid)
6757 .expect("test operation should succeed")
6758 .as_ref()
6759 .clone()
6760 }
6761
6762 #[test]
6763 fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
6764 let one = cached_object_cost(&blob_of(0, 1000));
6766 let mut cache = LruCache::<u32>::new(one * 2 + 8);
6767 cache.put(1, cached_blob_of(b'a', 1000));
6768 cache.put(2, cached_blob_of(b'b', 1000));
6769 assert!(cache.get(&1).is_some());
6771 cache.put(3, cached_blob_of(b'c', 1000));
6772 assert!(cache.get(&1).is_some());
6774 assert!(cache.get(&2).is_none());
6775 assert!(cache.get(&3).is_some());
6776 }
6777
6778 #[test]
6779 fn lru_cache_zero_budget_is_inert() {
6780 let mut cache = LruCache::<u32>::new(0);
6781 cache.put(1, cached_blob_of(b'a', 16));
6782 assert!(cache.get(&1).is_none());
6783 }
6784
6785 #[test]
6786 fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
6787 let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
6788 cache.put(1, cached_blob_of(b'a', 50));
6789 assert!(cache.get(&1).is_some());
6790 cache.put(1, cached_blob_of(b'b', 10_000));
6793 assert!(cache.get(&1).is_none());
6794 cache.put(2, cached_blob_of(b'c', 50));
6797 assert!(cache.get(&2).is_some());
6798 }
6799
6800 #[test]
6801 fn lru_cache_replacing_entry_updates_byte_accounting() {
6802 let small = cached_object_cost(&blob_of(0, 500));
6805 let mut cache = LruCache::<u32>::new(small * 2 + 200);
6806 cache.put(1, cached_blob_of(b'a', 500));
6807 cache.put(2, cached_blob_of(b'b', 500));
6808 assert!(cache.get(&1).is_some());
6809 assert!(cache.get(&2).is_some());
6810 cache.put(2, cached_blob_of(b'b', 1000));
6815 assert!(cache.get(&2).is_some());
6816 assert!(cache.get(&1).is_none());
6817 }
6818
6819 #[test]
6820 fn write_and_validate_blob() {
6821 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6822 let oid = db
6823 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
6824 .expect("test operation should succeed");
6825 assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
6826 db.validate(&oid).expect("test operation should succeed");
6827 }
6828
6829 #[test]
6830 fn loose_store_writes_and_reads_object() {
6831 let root = std::env::temp_dir().join(format!(
6832 "sley-loose-store-{}-{}",
6833 std::process::id(),
6834 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6835 ));
6836 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6837 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
6838 let oid = store
6839 .write_object(object.clone())
6840 .expect("test operation should succeed");
6841 assert_eq!(read_object_for_assert(&store, &oid), object);
6842 assert!(
6843 store
6844 .object_path(&oid)
6845 .expect("test operation should succeed")
6846 .exists()
6847 );
6848 fs::remove_dir_all(root).expect("test operation should succeed");
6849 }
6850
6851 #[test]
6852 fn read_header_detects_corruption_within_gits_header_window() {
6853 let root = temp_root("sley-loose-header-corrupt");
6861 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6862 let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
6863 let oid = store
6864 .write_object(object)
6865 .expect("test operation should succeed");
6866 let path = store
6867 .object_path(&oid)
6868 .expect("test operation should succeed");
6869 let mut bytes = fs::read(&path).expect("test operation should succeed");
6870 bytes[10] = 0;
6874 fs::write(&path, &bytes).expect("test operation should succeed");
6875 store.invalidate_cache();
6876 let err = store
6877 .read_header(&oid)
6878 .expect_err("corrupt loose header must fail like git's ULHR_BAD");
6879 let msg = err.to_string();
6880 assert!(
6881 msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
6882 "expected git's ULHR_BAD message, got: {msg}"
6883 );
6884 fs::remove_dir_all(root).expect("test operation should succeed");
6885 }
6886
6887 #[test]
6888 fn read_header_ignores_corruption_past_gits_header_window() {
6889 let root = temp_root("sley-loose-header-deep-corrupt");
6894 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6895 let body: Vec<u8> = (0..4096u32)
6898 .map(|i| (i.wrapping_mul(2654435761)) as u8)
6899 .collect();
6900 let object = EncodedObject::new(ObjectType::Blob, body.clone());
6901 let oid = store
6902 .write_object(object)
6903 .expect("test operation should succeed");
6904 let path = store
6905 .object_path(&oid)
6906 .expect("test operation should succeed");
6907 let mut bytes = fs::read(&path).expect("test operation should succeed");
6908 let deep = bytes.len() / 2;
6909 bytes[deep] ^= 0xff;
6910 fs::write(&path, &bytes).expect("test operation should succeed");
6911 store.invalidate_cache();
6912 let header = store
6913 .read_header(&oid)
6914 .expect("header-only read must still succeed for deep body corruption");
6915 assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
6916 fs::remove_dir_all(root).expect("test operation should succeed");
6917 }
6918
6919 #[test]
6920 fn file_database_reads_object_from_pack_index() {
6921 let root = temp_root("sley-file-odb-pack");
6922 let git_dir = root.join(".git");
6923 let pack_dir = git_dir.join("objects").join("pack");
6924 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6925 let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
6926 let oid = object
6927 .object_id(ObjectFormat::Sha1)
6928 .expect("test operation should succeed");
6929 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6930 .expect("test operation should succeed");
6931 let pack_name = written.checksum.to_hex();
6932 fs::write(
6933 pack_dir.join(format!("pack-{pack_name}.pack")),
6934 written.pack,
6935 )
6936 .expect("test operation should succeed");
6937 fs::write(
6938 pack_dir.join(format!("pack-{pack_name}.idx")),
6939 written.index,
6940 )
6941 .expect("test operation should succeed");
6942
6943 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6944 assert!(db.contains(&oid).expect("test operation should succeed"));
6945 assert_eq!(read_object_for_assert(&db, &oid), object);
6946 fs::remove_dir_all(root).expect("test operation should succeed");
6947 }
6948
6949 #[test]
6950 fn file_database_loose_cache_observes_same_process_write_after_miss() {
6951 let root = temp_root("sley-file-odb-loose-cache-write");
6952 let git_dir = root.join(".git");
6953 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6954 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6955
6956 let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
6957 let oid = object
6958 .object_id(ObjectFormat::Sha1)
6959 .expect("test operation should succeed");
6960
6961 assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
6962 db.loose()
6963 .write_object(object.clone())
6964 .expect("test operation should succeed");
6965
6966 assert_eq!(read_object_for_assert(&db, &oid), object);
6967 fs::remove_dir_all(root).expect("test operation should succeed");
6968 }
6969
6970 #[test]
6971 fn object_presence_checker_observes_same_process_loose_write_after_miss() {
6972 let root = temp_root("sley-presence-checker-loose-cache-write");
6973 let git_dir = root.join(".git");
6974 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6975 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6976 let mut checker = db.presence_checker();
6977
6978 let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
6979 let oid = object
6980 .object_id(ObjectFormat::Sha1)
6981 .expect("test operation should succeed");
6982
6983 assert!(
6984 !checker
6985 .contains(&oid)
6986 .expect("test operation should succeed")
6987 );
6988 db.loose()
6989 .write_object(object)
6990 .expect("test operation should succeed");
6991
6992 assert!(
6993 checker
6994 .contains(&oid)
6995 .expect("test operation should succeed")
6996 );
6997 fs::remove_dir_all(root).expect("test operation should succeed");
6998 }
6999
7000 #[test]
7001 fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
7002 let root = temp_root("sley-read-object-header");
7003 let git_dir = root.join(".git");
7004 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7005 let format = ObjectFormat::Sha1;
7006 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7007
7008 let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
7010 let loose_oid = db
7011 .write_object(loose.clone())
7012 .expect("test operation should succeed");
7013
7014 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
7018 let mut child_body = vec![b'a'; 4096];
7019 child_body.extend_from_slice(b" plus a deltified tail\n");
7020 let child = EncodedObject::new(ObjectType::Blob, child_body);
7021 let commitish =
7022 EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
7023 let base_oid = base
7024 .object_id(format)
7025 .expect("test operation should succeed");
7026 let child_oid = child
7027 .object_id(format)
7028 .expect("test operation should succeed");
7029 let commit_oid = commitish
7030 .object_id(format)
7031 .expect("test operation should succeed");
7032 let options = PackWriteOptions::new()
7033 .with_prefer_ofs_delta(true)
7034 .with_reorder(false);
7035 let pack = PackFile::write_packed_with_options(
7036 &[base.clone(), child.clone(), commitish.clone()],
7037 format,
7038 &options,
7039 )
7040 .expect("test operation should succeed");
7041 db.install_pack(&pack)
7042 .expect("test operation should succeed");
7043
7044 for (oid, want_type, want_len) in [
7047 (&loose_oid, ObjectType::Blob, loose.body.len()),
7048 (&base_oid, ObjectType::Blob, base.body.len()),
7049 (&child_oid, ObjectType::Blob, child.body.len()),
7050 (&commit_oid, ObjectType::Commit, commitish.body.len()),
7051 ] {
7052 assert_eq!(
7053 db.read_object_header(oid)
7054 .expect("test operation should succeed"),
7055 Some((want_type, want_len as u64)),
7056 "header for {oid}"
7057 );
7058 let full = db.read_object(oid).expect("test operation should succeed");
7059 assert_eq!(
7060 db.read_object_header(oid)
7061 .expect("test operation should succeed"),
7062 Some((full.object_type, full.body.len() as u64))
7063 );
7064 }
7065
7066 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
7067 .expect("test operation should succeed");
7068 assert_eq!(
7069 db.read_object_header(&missing)
7070 .expect("test operation should succeed"),
7071 None
7072 );
7073 fs::remove_dir_all(root).expect("test operation should succeed");
7074 }
7075
7076 #[test]
7077 fn object_storage_info_reports_loose_packed_and_delta_metadata() {
7078 let root = temp_root("sley-object-storage-info");
7079 let git_dir = root.join(".git");
7080 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7081 let format = ObjectFormat::Sha1;
7082 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7083
7084 let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
7085 let loose_oid = db
7086 .write_object(loose)
7087 .expect("test operation should succeed");
7088 let loose_size = fs::metadata(
7089 db.loose()
7090 .object_path(&loose_oid)
7091 .expect("test operation should succeed"),
7092 )
7093 .expect("test operation should succeed")
7094 .len();
7095 let loose_info = db
7096 .object_storage_info(&loose_oid)
7097 .expect("test operation should succeed")
7098 .expect("test operation should succeed");
7099 assert_eq!(loose_info.disk_size, loose_size);
7100 assert_eq!(
7101 loose_info.deltabase,
7102 zero_oid(format).expect("test operation should succeed")
7103 );
7104
7105 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
7106 let mut child_body = vec![b'a'; 4096];
7107 child_body.extend_from_slice(b" changed tail\n");
7108 let child = EncodedObject::new(ObjectType::Blob, child_body);
7109 let base_oid = base
7110 .object_id(format)
7111 .expect("test operation should succeed");
7112 let child_oid = child
7113 .object_id(format)
7114 .expect("test operation should succeed");
7115 let options = PackWriteOptions::new()
7116 .with_prefer_ofs_delta(true)
7117 .with_reorder(false);
7118 let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
7119 .expect("test operation should succeed");
7120 db.install_pack(&pack)
7121 .expect("test operation should succeed");
7122
7123 let base_info = db
7124 .object_storage_info(&base_oid)
7125 .expect("test operation should succeed")
7126 .expect("test operation should succeed");
7127 assert!(base_info.disk_size > 0);
7128 assert_eq!(
7129 base_info.deltabase,
7130 zero_oid(format).expect("test operation should succeed")
7131 );
7132
7133 let child_info = db
7134 .object_storage_info(&child_oid)
7135 .expect("test operation should succeed")
7136 .expect("test operation should succeed");
7137 assert!(child_info.disk_size > 0);
7138 assert_eq!(child_info.deltabase, base_oid);
7139
7140 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
7141 .expect("test operation should succeed");
7142 assert_eq!(
7143 db.object_storage_info(&missing)
7144 .expect("test operation should succeed"),
7145 None
7146 );
7147 fs::remove_dir_all(root).expect("test operation should succeed");
7148 }
7149
7150 #[test]
7151 fn file_database_resolves_unique_loose_object_prefix() {
7152 let root = temp_root("sley-file-odb-prefix-loose");
7153 let git_dir = root.join(".git");
7154 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7155 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7156 let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
7157 let oid = db
7158 .write_object(object)
7159 .expect("test operation should succeed");
7160 let prefix = &oid.to_hex()[..8];
7161
7162 assert_eq!(
7163 db.resolve_prefix(prefix)
7164 .expect("test operation should succeed"),
7165 ObjectPrefixResolution::Unique(oid)
7166 );
7167 assert!(
7168 db.object_ids()
7169 .expect("test operation should succeed")
7170 .contains(&oid)
7171 );
7172 fs::remove_dir_all(root).expect("test operation should succeed");
7173 }
7174
7175 #[test]
7176 fn file_database_resolves_unique_packed_object_prefix() {
7177 let root = temp_root("sley-file-odb-prefix-packed");
7178 let git_dir = root.join(".git");
7179 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7180 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7181 let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
7182 let oid = object
7183 .object_id(ObjectFormat::Sha1)
7184 .expect("test operation should succeed");
7185 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7186 .expect("test operation should succeed");
7187 db.install_pack(&pack)
7188 .expect("test operation should succeed");
7189 let prefix = &oid.to_hex()[..8];
7190
7191 assert_eq!(
7192 db.resolve_prefix(prefix)
7193 .expect("test operation should succeed"),
7194 ObjectPrefixResolution::Unique(oid)
7195 );
7196 fs::remove_dir_all(root).expect("test operation should succeed");
7197 }
7198
7199 #[test]
7200 fn file_database_reports_ambiguous_object_prefix() {
7201 let root = temp_root("sley-file-odb-prefix-ambiguous");
7202 let git_dir = root.join(".git");
7203 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7204 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7205 let mut seen = HashMap::new();
7206 let (prefix, first, second) = (0..10_000)
7207 .find_map(|idx| {
7208 let object =
7209 EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
7210 let oid = db
7211 .write_object(object)
7212 .expect("test operation should succeed");
7213 let prefix = oid.to_hex()[..4].to_string();
7214 seen.insert(prefix.clone(), oid)
7215 .map(|first| (prefix, first, oid))
7216 })
7217 .expect("test should find a 4-hex collision");
7218
7219 let ObjectPrefixResolution::Ambiguous(mut matches) = db
7220 .resolve_prefix(&prefix)
7221 .expect("test operation should succeed")
7222 else {
7223 panic!("expected ambiguous prefix {prefix}");
7224 };
7225 matches.sort_by_key(ObjectId::to_hex);
7226 let mut expected = vec![first, second];
7227 expected.sort_by_key(ObjectId::to_hex);
7228 assert_eq!(matches, expected);
7229 fs::remove_dir_all(root).expect("test operation should succeed");
7230 }
7231
7232 #[test]
7233 fn file_database_rejects_too_short_object_prefix() {
7234 let root = temp_root("sley-file-odb-prefix-short");
7235 let git_dir = root.join(".git");
7236 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7237 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7238
7239 assert!(matches!(
7240 db.resolve_prefix("abc"),
7241 Err(GitError::InvalidObjectId(_))
7242 ));
7243 fs::remove_dir_all(root).expect("test operation should succeed");
7244 }
7245
7246 #[test]
7247 fn file_database_reads_sha256_object_from_pack_index() {
7248 let root = temp_root("sley-file-odb-pack-sha256");
7249 let git_dir = root.join(".git");
7250 let pack_dir = git_dir.join("objects").join("pack");
7251 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7252 let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
7253 let oid = object
7254 .object_id(ObjectFormat::Sha256)
7255 .expect("test operation should succeed");
7256 let written =
7257 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7258 .expect("test operation should succeed");
7259 let pack_name = written.checksum.to_hex();
7260 fs::write(
7261 pack_dir.join(format!("pack-{pack_name}.pack")),
7262 written.pack,
7263 )
7264 .expect("test operation should succeed");
7265 fs::write(
7266 pack_dir.join(format!("pack-{pack_name}.idx")),
7267 written.index,
7268 )
7269 .expect("test operation should succeed");
7270
7271 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
7272 assert!(db.contains(&oid).expect("test operation should succeed"));
7273 assert_eq!(read_object_for_assert(&db, &oid), object);
7274 fs::remove_dir_all(root).expect("test operation should succeed");
7275 }
7276
7277 #[test]
7278 fn file_database_installs_sha256_pack_without_loose_objects() {
7279 let root = temp_root("sley-file-odb-install-pack");
7280 let git_dir = root.join(".git");
7281 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7282 let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
7283 let oid = object
7284 .object_id(ObjectFormat::Sha256)
7285 .expect("test operation should succeed");
7286 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7287 .expect("test operation should succeed");
7288 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
7289
7290 let result = db
7291 .install_pack(&pack)
7292 .expect("test operation should succeed");
7293
7294 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7295 assert_eq!(result.object_ids, vec![oid]);
7296 assert!(result.pack_path.exists());
7297 assert!(result.index_path.exists());
7298 assert_eq!(result.promisor_path, None);
7299 assert!(
7300 !db.loose()
7301 .object_path(&oid)
7302 .expect("test operation should succeed")
7303 .exists()
7304 );
7305 assert!(db.contains(&oid).expect("test operation should succeed"));
7306 assert_eq!(read_object_for_assert(&db, &oid), object);
7307 fs::remove_dir_all(root).expect("test operation should succeed");
7308 }
7309
7310 #[test]
7311 fn file_database_installs_raw_sha256_pack_without_loose_objects() {
7312 let root = temp_root("sley-file-odb-install-raw-pack");
7313 let git_dir = root.join(".git");
7314 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7315 let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
7316 let oid = object
7317 .object_id(ObjectFormat::Sha256)
7318 .expect("test operation should succeed");
7319 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7320 .expect("test operation should succeed");
7321 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
7322 let mut reader = pack.pack.as_slice();
7323
7324 let result = db
7325 .install_raw_pack_from_reader(&mut reader)
7326 .expect("test operation should succeed");
7327
7328 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7329 assert_eq!(result.object_ids, vec![oid]);
7330 assert!(result.pack_path.exists());
7331 assert!(result.index_path.exists());
7332 assert_eq!(result.promisor_path, None);
7333 assert!(
7334 !db.loose()
7335 .object_path(&oid)
7336 .expect("test operation should succeed")
7337 .exists()
7338 );
7339 assert!(db.contains(&oid).expect("test operation should succeed"));
7340 assert_eq!(read_object_for_assert(&db, &oid), object);
7341 fs::remove_dir_all(root).expect("test operation should succeed");
7342 }
7343
7344 #[test]
7345 fn file_database_streams_raw_pack_install_to_packfile() {
7346 use std::io::Write as _;
7347
7348 let root = temp_root("sley-file-odb-stream-raw-pack");
7349 let git_dir = root.join(".git");
7350 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7351 let object = EncodedObject::new(ObjectType::Blob, b"streamed raw pack\n".to_vec());
7352 let oid = object
7353 .object_id(ObjectFormat::Sha1)
7354 .expect("test operation should succeed");
7355 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7356 .expect("test operation should succeed");
7357 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7358
7359 let mut install = db
7360 .begin_raw_pack_install(pack.checksum, pack.pack.len() as u64)
7361 .expect("test operation should succeed");
7362 for chunk in pack.pack.chunks(5) {
7363 install
7364 .write_all(chunk)
7365 .expect("test operation should succeed");
7366 }
7367 let result = install.finish().expect("test operation should succeed");
7368
7369 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7370 assert_eq!(result.object_ids, vec![oid]);
7371 assert_eq!(
7372 fs::read(&result.pack_path).expect("test operation should succeed"),
7373 pack.pack
7374 );
7375 assert!(result.index_path.exists());
7376 assert!(db.contains(&oid).expect("test operation should succeed"));
7377 assert_eq!(read_object_for_assert(&db, &oid), object);
7378
7379 let bad_id = ObjectId::from_raw(ObjectFormat::Sha1, &[0x42; 20])
7380 .expect("test operation should succeed");
7381 let mut bad_install = db
7382 .begin_raw_pack_install(bad_id, pack.pack.len() as u64)
7383 .expect("test operation should succeed");
7384 bad_install
7385 .write_all(&pack.pack)
7386 .expect("test operation should succeed");
7387 assert!(
7388 bad_install.finish().is_err(),
7389 "checksum mismatch should reject the streamed pack"
7390 );
7391 assert!(
7392 !git_dir
7393 .join("objects")
7394 .join("pack")
7395 .join(format!("pack-{}.pack", bad_id.to_hex()))
7396 .exists()
7397 );
7398
7399 fs::remove_dir_all(root).expect("test operation should succeed");
7400 }
7401
7402 #[test]
7403 fn file_database_installs_unknown_length_raw_pack_from_reader() {
7404 let root = temp_root("sley-file-odb-install-raw-pack-reader");
7405 let git_dir = root.join(".git");
7406 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7407 let object = EncodedObject::new(ObjectType::Blob, b"reader streamed raw pack\n".to_vec());
7408 let oid = object
7409 .object_id(ObjectFormat::Sha1)
7410 .expect("test operation should succeed");
7411 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7412 .expect("test operation should succeed");
7413 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7414 let mut reader = pack.pack.as_slice();
7415
7416 let result = db
7417 .install_raw_pack_from_reader(&mut reader)
7418 .expect("test operation should succeed");
7419
7420 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
7421 assert_eq!(result.object_ids, vec![oid]);
7422 assert_eq!(
7423 fs::read(&result.pack_path).expect("test operation should succeed"),
7424 pack.pack
7425 );
7426 assert!(result.index_path.exists());
7427 assert!(db.contains(&oid).expect("test operation should succeed"));
7428 assert_eq!(read_object_for_assert(&db, &oid), object);
7429 fs::remove_dir_all(root).expect("test operation should succeed");
7430 }
7431
7432 #[test]
7433 fn file_database_rejects_unknown_length_raw_pack_with_trailing_bytes() {
7434 let root = temp_root("sley-file-odb-install-raw-pack-reader-trailing");
7435 let git_dir = root.join(".git");
7436 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7437 let object = EncodedObject::new(ObjectType::Blob, b"trailing streamed raw pack\n".to_vec());
7438 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7439 .expect("test operation should succeed");
7440 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7441 let mut bytes = pack.pack;
7442 bytes.extend_from_slice(b"not part of the pack");
7443 let mut reader = bytes.as_slice();
7444
7445 let err = db
7446 .install_raw_pack_from_reader(&mut reader)
7447 .expect_err("trailing bytes should be rejected");
7448
7449 assert!(err.to_string().contains("trailing bytes after checksum"));
7450 let pack_dir = git_dir.join("objects").join("pack");
7451 let pack_entries = fs::read_dir(&pack_dir)
7452 .map(|entries| entries.count())
7453 .unwrap_or_default();
7454 assert_eq!(pack_entries, 0);
7455 fs::remove_dir_all(root).expect("test operation should succeed");
7456 }
7457
7458 #[test]
7459 fn file_database_rejects_noncanonical_pack_index() {
7460 let root = temp_root("sley-file-odb-install-bad-index");
7461 let git_dir = root.join(".git");
7462 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7463 let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
7464 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7465 .expect("test operation should succeed");
7466 let mut entries = pack.entries.clone();
7467 entries[0].crc32 ^= 1;
7468 let mut bad_pack = pack.clone();
7469 bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
7470 .expect("test operation should succeed");
7471 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7472
7473 assert!(db.install_pack(&bad_pack).is_err());
7474
7475 fs::remove_dir_all(root).expect("test operation should succeed");
7476 }
7477
7478 #[test]
7479 fn file_database_installs_raw_promisor_pack_with_sidecar() {
7480 let root = temp_root("sley-file-odb-install-raw-promisor-pack");
7481 let git_dir = root.join(".git");
7482 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7483 let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
7484 let oid = object
7485 .object_id(ObjectFormat::Sha1)
7486 .expect("test operation should succeed");
7487 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
7488 .expect("test operation should succeed");
7489 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7490 let mut reader = pack.pack.as_slice();
7491
7492 let result = db
7493 .install_raw_pack_from_reader_with_options(
7494 &mut reader,
7495 RawPackInstallOptions { promisor: true },
7496 )
7497 .expect("test operation should succeed");
7498
7499 let promisor_path = result.promisor_path.expect("promisor sidecar");
7500 assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
7501 assert_eq!(
7502 promisor_path.extension().and_then(|ext| ext.to_str()),
7503 Some("promisor")
7504 );
7505 assert!(promisor_path.exists());
7506 assert_eq!(
7507 fs::read(&promisor_path).expect("test operation should succeed"),
7508 b""
7509 );
7510 assert!(result.pack_path.exists());
7511 assert!(result.index_path.exists());
7512 assert!(
7513 !db.loose()
7514 .object_path(&oid)
7515 .expect("test operation should succeed")
7516 .exists()
7517 );
7518 assert_eq!(read_object_for_assert(&db, &oid), object);
7519 fs::remove_dir_all(root).expect("test operation should succeed");
7520 }
7521
7522 #[test]
7523 fn repository_objects_dir_uses_linked_worktree_common_dir() {
7524 let root = temp_root("sley-odb-common-dir");
7525 let common = root.join(".git");
7526 let admin = common.join("worktrees").join("linked");
7527 fs::create_dir_all(&admin).expect("test operation should succeed");
7528 fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
7529
7530 let common = fs::canonicalize(common).expect("test operation should succeed");
7531 assert_eq!(repository_common_dir(&admin), common);
7532 assert_eq!(repository_objects_dir(&admin), common.join("objects"));
7533
7534 fs::remove_dir_all(root).expect("test operation should succeed");
7535 }
7536
7537 #[test]
7538 fn reachable_object_helpers_walk_graph_and_install_pack() {
7539 let root = temp_root("sley-reachable-pack");
7540 let source_git_dir = root.join("source.git");
7541 let destination_git_dir = root.join("destination.git");
7542 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7543 fs::create_dir_all(destination_git_dir.join("objects"))
7544 .expect("test operation should succeed");
7545 let format = ObjectFormat::Sha1;
7546 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7547 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7548
7549 let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
7550 let blob_oid = source
7551 .write_object(blob.clone())
7552 .expect("test operation should succeed");
7553 let tree = EncodedObject::new(
7554 ObjectType::Tree,
7555 Tree {
7556 entries: vec![TreeEntry {
7557 mode: 0o100644,
7558 name: BString::from(b"payload.txt"),
7559 oid: blob_oid,
7560 }],
7561 }
7562 .write(),
7563 );
7564 let tree_oid = source
7565 .write_object(tree.clone())
7566 .expect("test operation should succeed");
7567 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7568 let commit = EncodedObject::new(
7569 ObjectType::Commit,
7570 Commit {
7571 tree: tree_oid,
7572 parents: Vec::new(),
7573 author: identity.clone(),
7574 committer: identity,
7575 encoding: None,
7576 message: b"initial\n".to_vec(),
7577 }
7578 .write(),
7579 );
7580 let commit_oid = source
7581 .write_object(commit.clone())
7582 .expect("test operation should succeed");
7583
7584 let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
7585 .expect("test operation should succeed");
7586 assert!(reachable.contains(&commit_oid));
7587 assert!(reachable.contains(&tree_oid));
7588 assert!(reachable.contains(&blob_oid));
7589
7590 let install =
7591 install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
7592 .expect("test operation should succeed")
7593 .expect("reachable pack should be written");
7594 assert_eq!(install.object_ids.len(), 3);
7595 for (oid, object) in [
7596 (&commit_oid, &commit),
7597 (&tree_oid, &tree),
7598 (&blob_oid, &blob),
7599 ] {
7600 assert!(
7601 !destination
7602 .loose()
7603 .object_path(oid)
7604 .expect("test operation should succeed")
7605 .exists()
7606 );
7607 assert!(
7608 destination
7609 .contains(oid)
7610 .expect("test operation should succeed")
7611 );
7612 assert_eq!(read_object_for_assert(&destination, oid), *object);
7613 }
7614 fs::remove_dir_all(root).expect("test operation should succeed");
7615 }
7616
7617 #[test]
7618 fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
7619 let root = temp_root("sley-reachable-exclusions");
7620 let git_dir = root.join("repo.git");
7621 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7622 let format = ObjectFormat::Sha1;
7623 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7624
7625 let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
7626 let blob_oid = db
7627 .write_object(blob)
7628 .expect("test operation should succeed");
7629 let tree = EncodedObject::new(
7630 ObjectType::Tree,
7631 Tree {
7632 entries: vec![TreeEntry {
7633 mode: 0o100644,
7634 name: BString::from(b"payload.txt"),
7635 oid: blob_oid,
7636 }],
7637 }
7638 .write(),
7639 );
7640 let tree_oid = db
7641 .write_object(tree)
7642 .expect("test operation should succeed");
7643 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7644 let commit = EncodedObject::new(
7645 ObjectType::Commit,
7646 Commit {
7647 tree: tree_oid,
7648 parents: Vec::new(),
7649 author: identity.clone(),
7650 committer: identity,
7651 encoding: None,
7652 message: b"initial\n".to_vec(),
7653 }
7654 .write(),
7655 );
7656 let commit_oid = db
7657 .write_object(commit)
7658 .expect("test operation should succeed");
7659 let excluded = HashSet::from([tree_oid]);
7660
7661 let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
7662 .expect("test operation should succeed");
7663
7664 assert_eq!(objects.len(), 1);
7665 assert_eq!(
7666 objects[0]
7667 .object_id(format)
7668 .expect("test operation should succeed"),
7669 commit_oid
7670 );
7671 fs::remove_dir_all(root).expect("test operation should succeed");
7672 }
7673
7674 #[test]
7675 fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
7676 let root = temp_root("sley-build-reachable-pack");
7677 let git_dir = root.join("repo.git");
7678 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7679 let format = ObjectFormat::Sha1;
7680 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7681
7682 let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
7683 let oid = db
7684 .write_object(object.clone())
7685 .expect("test operation should succeed");
7686 let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
7687 .expect("test operation should succeed")
7688 .expect("reachable pack should be built");
7689 assert!(pack.pack.starts_with(b"PACK"));
7690 assert_eq!(pack.entries.len(), 1);
7691 assert_eq!(pack.entries[0].oid, oid);
7692
7693 let pack_path = root.join("reachable.pack");
7694 let pack_file = build_reachable_pack_file(
7695 &db,
7696 format,
7697 std::iter::once(oid),
7698 &HashSet::new(),
7699 &pack_path,
7700 )
7701 .expect("test operation should succeed")
7702 .expect("reachable pack file should be built");
7703 assert_eq!(pack_file.checksum, pack.checksum);
7704 assert_eq!(pack_file.pack_size, pack.pack.len() as u64);
7705 assert_eq!(pack_file.object_count, 1);
7706 assert_eq!(
7707 fs::read(&pack_file.pack_path).expect("test operation should succeed"),
7708 pack.pack
7709 );
7710
7711 let mut streamed_pack = Vec::new();
7712 let streamed = write_reachable_pack_to_writer(
7713 &db,
7714 format,
7715 std::iter::once(oid),
7716 &HashSet::new(),
7717 &mut streamed_pack,
7718 )
7719 .expect("test operation should succeed")
7720 .expect("reachable pack should be streamed");
7721 assert_eq!(streamed.checksum, pack.checksum);
7722 assert_eq!(streamed.pack_size, pack.pack.len() as u64);
7723 assert_eq!(streamed.object_count, 1);
7724 assert_eq!(streamed_pack, pack.pack);
7725
7726 let mut sink = std::io::sink();
7727 let dry_run = write_reachable_pack_to_writer(
7728 &db,
7729 format,
7730 std::iter::once(oid),
7731 &HashSet::new(),
7732 &mut sink,
7733 )
7734 .expect("test operation should succeed")
7735 .expect("reachable pack should stream to sink");
7736 assert_eq!(dry_run.checksum, pack.checksum);
7737 assert_eq!(dry_run.pack_size, pack.pack.len() as u64);
7738 assert_eq!(dry_run.object_count, 1);
7739
7740 let excluded = HashSet::from([oid]);
7741 assert!(
7742 build_reachable_pack(
7743 &db,
7744 format,
7745 pack.entries.into_iter().map(|entry| entry.oid),
7746 &excluded
7747 )
7748 .expect("test operation should succeed")
7749 .is_none()
7750 );
7751 fs::remove_dir_all(root).expect("test operation should succeed");
7752 }
7753
7754 #[test]
7755 fn index_raw_pack_returns_validated_pack_metadata() {
7756 let root = temp_root("sley-index-raw-pack");
7757 let git_dir = root.join("repo.git");
7758 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7759 let format = ObjectFormat::Sha1;
7760 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7761 let graph = write_commit_graph(&mut db, b"pack indexed\n");
7762 let commit_oid = graph[0].0;
7763 let expected = graph
7764 .iter()
7765 .map(|(oid, object)| (*oid, (object.object_type, object.body.len() as u64)))
7766 .collect::<HashMap<_, _>>();
7767 let pack = build_reachable_pack(&db, format, std::iter::once(commit_oid), &HashSet::new())
7768 .expect("test operation should succeed")
7769 .expect("reachable pack should be built");
7770
7771 let indexed = index_raw_pack(&pack.pack, format).expect("test operation should succeed");
7772 let mut cursor = std::io::Cursor::new(pack.pack.clone());
7773 let streamed = index_raw_pack_from_reader(&mut cursor, format)
7774 .expect("streamed pack indexing should match in-memory indexing");
7775 assert_eq!(streamed, indexed);
7776 let pack_path = root.join("reachable.pack");
7777 fs::write(&pack_path, &pack.pack).expect("test operation should succeed");
7778 let file_indexed = index_raw_pack_file(&pack_path, format)
7779 .expect("file-backed pack indexing should match in-memory indexing");
7780 assert_eq!(file_indexed, indexed);
7781
7782 assert_eq!(indexed.pack_id, pack.checksum);
7783 assert_eq!(indexed.index, pack.index);
7784 assert_eq!(indexed.objects.len(), 3);
7785 for object in indexed.objects {
7786 let (expected_type, expected_size) = expected
7787 .get(&object.oid)
7788 .copied()
7789 .expect("indexed object should be reachable");
7790 assert_eq!(object.object_type, expected_type);
7791 assert_eq!(object.size, expected_size);
7792 assert!(object.offset > 0);
7793 }
7794 fs::remove_dir_all(root).expect("test operation should succeed");
7795 }
7796
7797 #[test]
7798 fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
7799 let root = temp_root("sley-reachable-tags");
7800 let git_dir = root.join("repo.git");
7801 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7802 let format = ObjectFormat::Sha1;
7803 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7804
7805 let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
7806 let blob_oid = db
7807 .write_object(blob)
7808 .expect("test operation should succeed");
7809 let tag = EncodedObject::new(
7810 ObjectType::Tag,
7811 Tag {
7812 object: blob_oid,
7813 object_type: ObjectType::Blob,
7814 name: b"v1".to_vec(),
7815 tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
7816 message: b"tag message\n".to_vec(),
7817 raw_body: None,
7818 }
7819 .write(),
7820 );
7821 let tag_oid = db.write_object(tag).expect("test operation should succeed");
7822
7823 let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
7824 .expect("test operation should succeed");
7825 assert!(reachable.contains(&tag_oid));
7826 assert!(reachable.contains(&blob_oid));
7827
7828 let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
7829 .expect("test operation should succeed");
7830 let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
7831 .expect_err("missing traversal root should error");
7832 let kind = err.not_found_kind().expect("typed not found");
7833 assert_eq!(kind.object_id(), Some(missing));
7834 assert_eq!(
7835 kind.missing_object_context(),
7836 Some(MissingObjectContext::Traversal)
7837 );
7838 fs::remove_dir_all(root).expect("test operation should succeed");
7839 }
7840
7841 #[test]
7842 fn install_reachable_pack_empty_starts_create_no_pack() {
7843 let root = temp_root("sley-reachable-empty");
7844 let source_git_dir = root.join("source.git");
7845 let destination_git_dir = root.join("destination.git");
7846 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7847 fs::create_dir_all(destination_git_dir.join("objects"))
7848 .expect("test operation should succeed");
7849 let format = ObjectFormat::Sha1;
7850 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7851 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7852
7853 let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
7854 .expect("test operation should succeed");
7855
7856 assert!(result.is_none());
7857 assert!(!destination_git_dir.join("objects").join("pack").exists());
7858 fs::remove_dir_all(root).expect("test operation should succeed");
7859 }
7860
7861 #[test]
7862 fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
7863 let root = temp_root("sley-reachable-install-excluding");
7864 let source_git_dir = root.join("source.git");
7865 let destination_git_dir = root.join("destination.git");
7866 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7867 fs::create_dir_all(destination_git_dir.join("objects"))
7868 .expect("test operation should succeed");
7869 let format = ObjectFormat::Sha1;
7870 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7871 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7872 let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
7873 let oid = source
7874 .write_object(object)
7875 .expect("test operation should succeed");
7876 let excluded = HashSet::from([oid]);
7877
7878 let result = install_reachable_pack_excluding(
7879 &source,
7880 &destination,
7881 format,
7882 std::iter::once(oid),
7883 &excluded,
7884 )
7885 .expect("test operation should succeed");
7886
7887 assert!(result.is_none());
7888 assert!(!destination_git_dir.join("objects").join("pack").exists());
7889 fs::remove_dir_all(root).expect("test operation should succeed");
7890 }
7891
7892 #[test]
7893 fn install_reachable_pack_supports_sha256() {
7894 let root = temp_root("sley-reachable-pack-sha256");
7895 let source_git_dir = root.join("source.git");
7896 let destination_git_dir = root.join("destination.git");
7897 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7898 fs::create_dir_all(destination_git_dir.join("objects"))
7899 .expect("test operation should succeed");
7900 let format = ObjectFormat::Sha256;
7901 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7902 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7903 let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
7904 let oid = source
7905 .write_object(object.clone())
7906 .expect("test operation should succeed");
7907
7908 let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
7909 .expect("test operation should succeed")
7910 .expect("sha256 reachable pack should be built");
7911 assert!(pack.pack.starts_with(b"PACK"));
7912 assert_eq!(pack.entries[0].oid, oid);
7913
7914 let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
7915 .expect("test operation should succeed")
7916 .expect("sha256 reachable pack should be written");
7917
7918 assert_eq!(result.object_ids, vec![oid]);
7919 assert!(
7920 !destination
7921 .loose()
7922 .object_path(&oid)
7923 .expect("test operation should succeed")
7924 .exists()
7925 );
7926 assert_eq!(read_object_for_assert(&destination, &oid), object);
7927 fs::remove_dir_all(root).expect("test operation should succeed");
7928 }
7929
7930 #[test]
7931 fn install_helpers_accept_custom_raw_pack_installer() {
7932 #[derive(Default)]
7933 struct RecordingInstaller {
7934 packs: std::cell::RefCell<Vec<Vec<u8>>>,
7935 installed: std::cell::RefCell<Vec<ObjectId>>,
7936 }
7937
7938 impl RawPackInstaller for RecordingInstaller {
7939 fn install_raw_pack_from_reader<R>(
7940 &self,
7941 reader: &mut R,
7942 ) -> Result<RawPackInstallResult>
7943 where
7944 R: Read,
7945 {
7946 let mut pack_bytes = Vec::new();
7947 reader.read_to_end(&mut pack_bytes)?;
7948 self.packs.borrow_mut().push(pack_bytes.to_vec());
7949 let object_ids = self.installed.borrow().clone();
7950 Ok(RawPackInstallResult { object_ids })
7951 }
7952 }
7953
7954 let format = ObjectFormat::Sha1;
7955 let source = ObjectDatabase::new(format);
7956 let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
7957 let oid = source
7958 .write_object(object)
7959 .expect("test operation should succeed");
7960 let installer = RecordingInstaller::default();
7961 installer.installed.borrow_mut().push(oid);
7962
7963 let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
7964 .expect("test operation should succeed")
7965 .expect("custom installer should receive pack");
7966
7967 assert_eq!(result.object_ids, installer.installed.into_inner());
7968 let packs = installer.packs.into_inner();
7969 assert_eq!(packs.len(), 1);
7970 assert!(packs[0].starts_with(b"PACK"));
7971 }
7972
7973 #[test]
7974 fn file_database_reads_object_from_multi_pack_index() {
7975 let root = temp_root("sley-file-odb-midx");
7976 let git_dir = root.join(".git");
7977 let pack_dir = git_dir.join("objects").join("pack");
7978 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7979 let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
7980 let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
7981 let first_oid = first
7982 .object_id(ObjectFormat::Sha1)
7983 .expect("test operation should succeed");
7984 let second_oid = second
7985 .object_id(ObjectFormat::Sha1)
7986 .expect("test operation should succeed");
7987 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7988 .expect("test operation should succeed");
7989 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7990 .expect("test operation should succeed");
7991 let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
7992 let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
7993 fs::write(
7994 pack_dir.join(first_pack_name.replace(".idx", ".pack")),
7995 first_pack.pack,
7996 )
7997 .expect("test operation should succeed");
7998 fs::write(
7999 pack_dir.join(second_pack_name.replace(".idx", ".pack")),
8000 second_pack.pack,
8001 )
8002 .expect("test operation should succeed");
8003 let midx = MultiPackIndex::write(
8004 ObjectFormat::Sha1,
8005 2,
8006 &[first_pack_name, second_pack_name],
8007 &[
8008 sley_pack::MultiPackIndexEntry {
8009 oid: first_oid,
8010 pack_int_id: 0,
8011 offset: first_pack.entries[0].offset,
8012 force_large_offset: false,
8013 },
8014 sley_pack::MultiPackIndexEntry {
8015 oid: second_oid,
8016 pack_int_id: 1,
8017 offset: second_pack.entries[0].offset,
8018 force_large_offset: false,
8019 },
8020 ],
8021 )
8022 .expect("test operation should succeed");
8023 fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
8024
8025 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8026 assert!(
8027 db.contains(&second_oid)
8028 .expect("test operation should succeed")
8029 );
8030 assert_eq!(
8031 db.resolve_prefix(&second_oid.to_hex()[..8])
8032 .expect("test operation should succeed"),
8033 ObjectPrefixResolution::Unique(second_oid)
8034 );
8035 assert_eq!(read_object_for_assert(&db, &second_oid), second);
8036 assert_eq!(read_object_for_assert(&db, &first_oid), first);
8037 fs::remove_dir_all(root).expect("test operation should succeed");
8038 }
8039
8040 #[test]
8041 fn file_database_finds_pack_added_after_registry_was_cached() {
8042 let root = temp_root("sley-file-odb-pack-added-late");
8046 let git_dir = root.join(".git");
8047 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8048 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8049
8050 let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
8052 let first_oid = first
8053 .object_id(ObjectFormat::Sha1)
8054 .expect("test operation should succeed");
8055 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8056 .expect("test operation should succeed");
8057 db.install_pack(&first_pack)
8058 .expect("test operation should succeed");
8059 assert_eq!(read_object_for_assert(&db, &first_oid), first);
8060
8061 let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
8063 let second_oid = second
8064 .object_id(ObjectFormat::Sha1)
8065 .expect("test operation should succeed");
8066 assert!(matches!(
8068 db.read_object(&second_oid),
8069 Err(GitError::NotFound(_))
8070 ));
8071
8072 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8075 .expect("test operation should succeed");
8076 db.install_pack(&second_pack)
8077 .expect("test operation should succeed");
8078 assert!(
8079 db.contains(&second_oid)
8080 .expect("test operation should succeed")
8081 );
8082 assert_eq!(read_object_for_assert(&db, &second_oid), second);
8083 assert_eq!(read_object_for_assert(&db, &first_oid), first);
8085
8086 fs::remove_dir_all(root).expect("test operation should succeed");
8087 }
8088
8089 #[test]
8090 fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
8091 let root = temp_root("sley-presence-checker-pack-added-late");
8092 let git_dir = root.join(".git");
8093 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8094 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8095
8096 let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
8097 let first_oid = first
8098 .object_id(ObjectFormat::Sha1)
8099 .expect("test operation should succeed");
8100 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8101 .expect("test operation should succeed");
8102 db.install_pack(&first_pack)
8103 .expect("test operation should succeed");
8104
8105 let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
8106 let second_oid = second
8107 .object_id(ObjectFormat::Sha1)
8108 .expect("test operation should succeed");
8109 let mut checker = db.presence_checker();
8110 assert!(
8111 checker
8112 .contains(&first_oid)
8113 .expect("test operation should succeed")
8114 );
8115 assert!(
8116 !checker
8117 .contains(&second_oid)
8118 .expect("test operation should succeed")
8119 );
8120
8121 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8122 .expect("test operation should succeed");
8123 db.install_pack(&second_pack)
8124 .expect("test operation should succeed");
8125
8126 assert!(
8127 checker
8128 .contains(&second_oid)
8129 .expect("test operation should succeed")
8130 );
8131 fs::remove_dir_all(root).expect("test operation should succeed");
8132 }
8133
8134 #[test]
8135 fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
8136 let root = temp_root("sley-file-odb-pack-registry-refresh");
8137 let git_dir = root.join(".git");
8138 let pack_dir = git_dir.join("objects").join("pack");
8139 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8140 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8141
8142 let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
8143 let first_oid = first
8144 .object_id(ObjectFormat::Sha1)
8145 .expect("test operation should succeed");
8146 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8147 .expect("test operation should succeed");
8148 db.install_pack(&first_pack)
8149 .expect("test operation should succeed");
8150
8151 let first_registry = db
8152 .cached_pack_registry(&pack_dir, false)
8153 .expect("test operation should succeed");
8154 assert_eq!(first_registry.fingerprint.idx_count, 1);
8155 assert_eq!(first_registry.fingerprint.pack_count, 1);
8156 assert_eq!(first_registry.packs.len(), 1);
8157 assert!(
8158 first_registry.packs[0]
8159 .index
8160 .lock()
8161 .expect("test operation should succeed")
8162 .is_none()
8163 );
8164 assert!(
8165 first_registry.packs[0]
8166 .data
8167 .lock()
8168 .expect("test operation should succeed")
8169 .is_none()
8170 );
8171
8172 assert!(
8175 db.contains(&first_oid)
8176 .expect("test operation should succeed")
8177 );
8178 assert!(
8179 first_registry.packs[0]
8180 .index
8181 .lock()
8182 .expect("test operation should succeed")
8183 .is_some()
8184 );
8185 assert!(
8186 first_registry.packs[0]
8187 .data
8188 .lock()
8189 .expect("test operation should succeed")
8190 .is_none()
8191 );
8192 assert_eq!(read_object_for_assert(&db, &first_oid), first);
8193 assert!(
8194 first_registry.packs[0]
8195 .data
8196 .lock()
8197 .expect("test operation should succeed")
8198 .is_some()
8199 );
8200
8201 let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
8202 let second_oid = second
8203 .object_id(ObjectFormat::Sha1)
8204 .expect("test operation should succeed");
8205 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8206 .expect("test operation should succeed");
8207 db.install_pack(&second_pack)
8208 .expect("test operation should succeed");
8209
8210 let refreshed = db
8211 .cached_pack_registry(&pack_dir, true)
8212 .expect("test operation should succeed");
8213 assert!(!Arc::ptr_eq(&first_registry, &refreshed));
8214 assert_eq!(refreshed.fingerprint.idx_count, 2);
8215 assert_eq!(refreshed.fingerprint.pack_count, 2);
8216 assert_eq!(refreshed.packs.len(), 2);
8217 assert_eq!(read_object_for_assert(&db, &second_oid), second);
8218
8219 fs::remove_dir_all(root).expect("test operation should succeed");
8220 }
8221
8222 #[test]
8223 fn file_database_pack_search_hint_rebuilds_after_pack_added() {
8224 let root = temp_root("sley-file-odb-pack-lookup-added-late");
8228 let git_dir = root.join(".git");
8229 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8230 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8231
8232 let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
8233 let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
8234 let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
8235 let first_oid = first
8236 .object_id(ObjectFormat::Sha1)
8237 .expect("test operation should succeed");
8238 let second_oid = second
8239 .object_id(ObjectFormat::Sha1)
8240 .expect("test operation should succeed");
8241 let third_oid = third
8242 .object_id(ObjectFormat::Sha1)
8243 .expect("test operation should succeed");
8244
8245 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
8246 .expect("test operation should succeed");
8247 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
8248 .expect("test operation should succeed");
8249 db.install_pack(&first_pack)
8250 .expect("test operation should succeed");
8251 db.install_pack(&second_pack)
8252 .expect("test operation should succeed");
8253
8254 assert_eq!(read_object_for_assert(&db, &first_oid), first);
8256 assert_eq!(read_object_for_assert(&db, &second_oid), second);
8257 assert!(matches!(
8258 db.read_object(&third_oid),
8259 Err(GitError::NotFound(_))
8260 ));
8261
8262 let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
8263 .expect("test operation should succeed");
8264 db.install_pack(&third_pack)
8265 .expect("test operation should succeed");
8266
8267 assert_eq!(read_object_for_assert(&db, &third_oid), third);
8268 assert_eq!(read_object_for_assert(&db, &first_oid), first);
8269
8270 fs::remove_dir_all(root).expect("test operation should succeed");
8271 }
8272
8273 #[test]
8274 fn file_database_prefers_loose_object_over_packed_object() {
8275 let root = temp_root("sley-file-odb-prefer-loose");
8276 let git_dir = root.join(".git");
8277 let pack_dir = git_dir.join("objects").join("pack");
8278 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
8279 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
8280 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8281 .expect("test operation should succeed");
8282 let pack_name = written.checksum.to_hex();
8283 fs::write(
8284 pack_dir.join(format!("pack-{pack_name}.pack")),
8285 written.pack,
8286 )
8287 .expect("test operation should succeed");
8288 fs::write(
8289 pack_dir.join(format!("pack-{pack_name}.idx")),
8290 written.index,
8291 )
8292 .expect("test operation should succeed");
8293
8294 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8295 let oid = db
8296 .write_object(object.clone())
8297 .expect("test operation should succeed");
8298 assert_eq!(read_object_for_assert(&db, &oid), object);
8299 fs::remove_dir_all(root).expect("test operation should succeed");
8300 }
8301
8302 #[test]
8303 fn bundle_prerequisite_verification_reads_existing_objects() {
8304 let db = ObjectDatabase::new(ObjectFormat::Sha1);
8305 let oid = db
8306 .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
8307 .expect("test operation should succeed");
8308 let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
8309 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8310 .expect("test operation should succeed");
8311
8312 verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
8313 }
8314
8315 #[test]
8316 fn bundle_prerequisite_verification_reports_missing_objects() {
8317 let db = ObjectDatabase::new(ObjectFormat::Sha1);
8318 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
8319 .expect("test operation should succeed");
8320 let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
8321 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8322 .expect("test operation should succeed");
8323
8324 assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
8325 }
8326
8327 #[test]
8328 fn unbundle_objects_writes_pack_entries_and_returns_refs() {
8329 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
8330 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
8331 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
8332 let oid = object
8333 .object_id(ObjectFormat::Sha1)
8334 .expect("test operation should succeed");
8335 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8336 .expect("test operation should succeed");
8337 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
8338 .into_bytes()
8339 .into_iter()
8340 .chain(pack.pack)
8341 .collect::<Vec<_>>();
8342 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8343 .expect("test operation should succeed");
8344
8345 let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
8346 .expect("test operation should succeed");
8347 assert_eq!(result.written_objects, vec![oid]);
8348 assert_eq!(result.references, bundle.references);
8349 assert_eq!(read_object_for_assert(&writer, &oid), object);
8350 }
8351
8352 #[test]
8353 fn install_bundle_pack_writes_pack_and_returns_refs() {
8354 let root = temp_root("sley-install-bundle-pack");
8355 let git_dir = root.join(".git");
8356 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8357 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
8358 let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8359 let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
8360 let oid = object
8361 .object_id(ObjectFormat::Sha1)
8362 .expect("test operation should succeed");
8363 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8364 .expect("test operation should succeed");
8365 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
8366 .into_bytes()
8367 .into_iter()
8368 .chain(pack.pack)
8369 .collect::<Vec<_>>();
8370 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8371 .expect("test operation should succeed");
8372
8373 let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
8374 .expect("test operation should succeed");
8375
8376 assert_eq!(result.written_objects, vec![oid]);
8377 assert_eq!(result.references, bundle.references);
8378 assert!(
8379 database
8380 .contains(&oid)
8381 .expect("test operation should succeed")
8382 );
8383 assert_eq!(read_object_for_assert(&database, &oid), object);
8384 assert!(
8385 !database
8386 .loose()
8387 .object_path(&oid)
8388 .expect("test operation should succeed")
8389 .exists()
8390 );
8391 fs::remove_dir_all(root).expect("test operation should succeed");
8392 }
8393
8394 #[test]
8395 fn unpack_packfile_objects_writes_sha256_pack_entries() {
8396 let writer = ObjectDatabase::new(ObjectFormat::Sha256);
8397 let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
8398 let oid = object
8399 .object_id(ObjectFormat::Sha256)
8400 .expect("test operation should succeed");
8401 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
8402 .expect("test operation should succeed");
8403
8404 let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
8405 .expect("test operation should succeed");
8406
8407 assert_eq!(result.written_objects, vec![oid]);
8408 assert_eq!(read_object_for_assert(&writer, &oid), object);
8409 }
8410
8411 #[test]
8412 fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
8413 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
8414 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
8415 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
8416 .expect("test operation should succeed");
8417 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
8418 let oid = object
8419 .object_id(ObjectFormat::Sha1)
8420 .expect("test operation should succeed");
8421 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
8422 .expect("test operation should succeed");
8423 let bundle_bytes =
8424 format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
8425 .into_bytes()
8426 .into_iter()
8427 .chain(pack.pack)
8428 .collect::<Vec<_>>();
8429 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
8430 .expect("test operation should succeed");
8431
8432 assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
8433 assert!(!writer.contains(&oid));
8434 }
8435
8436 fn write_commit_graph(
8439 db: &mut FileObjectDatabase,
8440 payload: &[u8],
8441 ) -> Vec<(ObjectId, EncodedObject)> {
8442 let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
8443 let blob_oid = db
8444 .write_object(blob.clone())
8445 .expect("test operation should succeed");
8446 let tree = EncodedObject::new(
8447 ObjectType::Tree,
8448 Tree {
8449 entries: vec![TreeEntry {
8450 mode: 0o100644,
8451 name: BString::from(b"payload.txt"),
8452 oid: blob_oid,
8453 }],
8454 }
8455 .write(),
8456 );
8457 let tree_oid = db
8458 .write_object(tree.clone())
8459 .expect("test operation should succeed");
8460 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
8461 let commit = EncodedObject::new(
8462 ObjectType::Commit,
8463 Commit {
8464 tree: tree_oid,
8465 parents: Vec::new(),
8466 author: identity.clone(),
8467 committer: identity,
8468 encoding: None,
8469 message: b"initial\n".to_vec(),
8470 }
8471 .write(),
8472 );
8473 let commit_oid = db
8474 .write_object(commit.clone())
8475 .expect("test operation should succeed");
8476 vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
8477 }
8478
8479 fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
8480 let root = temp_root("sley-repack-all");
8481 let git_dir = root.join(".git");
8482 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8483 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8484
8485 let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
8487 let packed_oid = packed_blob
8488 .object_id(format)
8489 .expect("test operation should succeed");
8490 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
8491 .expect("test operation should succeed");
8492 let existing = db
8493 .install_pack(&existing_pack)
8494 .expect("test operation should succeed");
8495
8496 let graph = write_commit_graph(&mut db, b"repack payload\n");
8497
8498 let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
8499 expected.insert(packed_oid, packed_blob.clone());
8500
8501 let result = repack_all_objects(&git_dir, format)
8502 .expect("test operation should succeed")
8503 .expect("repository has objects");
8504
8505 assert_eq!(result.object_count, expected.len());
8507 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
8508 assert_eq!(parsed.entries.len(), expected.len());
8509 for entry in &parsed.entries {
8510 let want = expected
8511 .get(&entry.entry.oid)
8512 .expect("packed object was in the repository");
8513 assert_eq!(&entry.object, want);
8514 assert_eq!(
8515 entry
8516 .object
8517 .object_id(format)
8518 .expect("test operation should succeed"),
8519 entry.entry.oid
8520 );
8521 }
8522 let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
8524 assert_eq!(idx.pack_checksum, parsed.checksum);
8525 assert_eq!(idx.entries.len(), expected.len());
8526
8527 assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
8529 let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
8531 want_loose.sort_by_key(ObjectId::to_hex);
8532 assert_eq!(result.packed_loose, want_loose);
8533 assert!(!result.packed_loose.contains(&packed_oid));
8534
8535 fs::remove_dir_all(root).expect("test operation should succeed");
8536 }
8537
8538 #[test]
8539 fn repack_all_objects_consolidates_loose_and_pack_sha1() {
8540 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
8541 }
8542
8543 #[test]
8544 fn repack_all_objects_consolidates_loose_and_pack_sha256() {
8545 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
8546 }
8547
8548 #[test]
8549 fn repack_all_objects_returns_none_for_empty_repository() {
8550 let root = temp_root("sley-repack-empty");
8551 let git_dir = root.join(".git");
8552 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8553
8554 assert!(
8555 repack_all_objects(&git_dir, ObjectFormat::Sha1)
8556 .expect("test operation should succeed")
8557 .is_none()
8558 );
8559
8560 fs::remove_dir_all(root).expect("test operation should succeed");
8561 }
8562
8563 #[test]
8564 fn install_repack_result_writes_pack_without_pruning_by_default() {
8565 let root = temp_root("sley-repack-install-nodelete");
8566 let git_dir = root.join(".git");
8567 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8568 let format = ObjectFormat::Sha1;
8569 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8570 let graph = write_commit_graph(&mut db, b"install no prune\n");
8571
8572 let result = repack_all_objects(&git_dir, format)
8573 .expect("test operation should succeed")
8574 .expect("test operation should succeed");
8575 install_repack_result(&git_dir, format, &result, false)
8576 .expect("test operation should succeed");
8577
8578 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
8580 let pack_dir = git_dir.join("objects").join("pack");
8581 let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
8582 let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
8583 assert!(pack_path.exists());
8584 assert!(idx_path.exists());
8585 for (oid, object) in &graph {
8587 assert!(
8588 db.loose()
8589 .object_path(oid)
8590 .expect("test operation should succeed")
8591 .exists()
8592 );
8593 assert_eq!(read_object_for_assert(&db, oid), *object);
8594 }
8595
8596 fs::remove_dir_all(root).expect("test operation should succeed");
8597 }
8598
8599 #[test]
8600 fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
8601 let root = temp_root("sley-repack-install-prune");
8602 let git_dir = root.join(".git");
8603 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8604 let format = ObjectFormat::Sha1;
8605 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8606
8607 let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
8608 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
8609 .expect("test operation should succeed");
8610 let existing = db
8611 .install_pack(&existing_pack)
8612 .expect("test operation should succeed");
8613 let graph = write_commit_graph(&mut db, b"prune payload\n");
8614
8615 let result = repack_all_objects(&git_dir, format)
8616 .expect("test operation should succeed")
8617 .expect("test operation should succeed");
8618 let new_pack_checksum = PackFile::parse(&result.pack, format)
8619 .expect("test operation should succeed")
8620 .checksum;
8621 install_repack_result(&git_dir, format, &result, true)
8622 .expect("test operation should succeed");
8623
8624 assert!(!existing.pack_path.exists());
8626 assert!(!existing.index_path.exists());
8627 for (oid, _) in &graph {
8629 assert!(
8630 !db.loose()
8631 .object_path(oid)
8632 .expect("test operation should succeed")
8633 .exists()
8634 );
8635 }
8636 let pack_dir = git_dir.join("objects").join("pack");
8638 assert!(
8639 pack_dir
8640 .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
8641 .exists()
8642 );
8643 let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
8644 for (oid, object) in &graph {
8645 assert!(
8646 reopened
8647 .contains(oid)
8648 .expect("test operation should succeed")
8649 );
8650 assert_eq!(read_object_for_assert(&reopened, oid), *object);
8651 }
8652 let packed_oid = packed_blob
8653 .object_id(format)
8654 .expect("test operation should succeed");
8655 assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
8656
8657 fs::remove_dir_all(root).expect("test operation should succeed");
8658 }
8659
8660 #[test]
8661 fn install_repack_result_preserves_keep_and_promisor_packs() {
8662 let root = temp_root("sley-repack-install-keep-promisor");
8663 let git_dir = root.join(".git");
8664 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8665 let format = ObjectFormat::Sha1;
8666 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8667
8668 let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
8669 let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
8670 .expect("test operation should succeed");
8671 let keep_install = db
8672 .install_pack(&keep_pack)
8673 .expect("test operation should succeed");
8674 let keep_sidecar = keep_install.pack_path.with_extension("keep");
8675 fs::write(&keep_sidecar, b"").expect("test operation should succeed");
8676
8677 let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
8678 let promisor_pack =
8679 PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
8680 .expect("test operation should succeed");
8681 let promisor_install = db
8682 .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
8683 .expect("test operation should succeed");
8684 let promisor_sidecar = promisor_install
8685 .promisor_path
8686 .clone()
8687 .expect("promisor sidecar");
8688
8689 let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
8690 let result = repack_all_objects(&git_dir, format)
8691 .expect("test operation should succeed")
8692 .expect("test operation should succeed");
8693 assert!(result.obsolete_packs.contains(&keep_install.pack_path));
8694 assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
8695
8696 install_repack_result(&git_dir, format, &result, true)
8697 .expect("test operation should succeed");
8698
8699 for path in [
8700 &keep_install.pack_path,
8701 &keep_install.index_path,
8702 &keep_sidecar,
8703 &promisor_install.pack_path,
8704 &promisor_install.index_path,
8705 &promisor_sidecar,
8706 ] {
8707 assert!(path.exists(), "{} should be preserved", path.display());
8708 }
8709 for (oid, _) in &graph {
8710 assert!(
8711 !db.loose()
8712 .object_path(oid)
8713 .expect("test operation should succeed")
8714 .exists()
8715 );
8716 }
8717
8718 fs::remove_dir_all(root).expect("test operation should succeed");
8719 }
8720
8721 #[test]
8722 fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
8723 let root = temp_root("sley-repack-install-safety");
8726 let git_dir = root.join(".git");
8727 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8728 let format = ObjectFormat::Sha1;
8729 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8730 let graph = write_commit_graph(&mut db, b"safety packed\n");
8731
8732 let mut result = repack_all_objects(&git_dir, format)
8733 .expect("test operation should succeed")
8734 .expect("test operation should succeed");
8735
8736 let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
8738 let stray_oid = db
8739 .write_object(stray.clone())
8740 .expect("test operation should succeed");
8741 assert!(!result.packed_loose.contains(&stray_oid));
8742 result.packed_loose.push(stray_oid);
8743
8744 install_repack_result(&git_dir, format, &result, true)
8745 .expect("test operation should succeed");
8746
8747 assert!(
8749 db.loose()
8750 .object_path(&stray_oid)
8751 .expect("test operation should succeed")
8752 .exists()
8753 );
8754 assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
8755 for (oid, _) in &graph {
8757 assert!(
8758 !db.loose()
8759 .object_path(oid)
8760 .expect("test operation should succeed")
8761 .exists()
8762 );
8763 }
8764
8765 fs::remove_dir_all(root).expect("test operation should succeed");
8766 }
8767
8768 #[test]
8769 fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
8770 let root = temp_root("sley-prune-unreachable");
8771 let git_dir = root.join(".git");
8772 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8773 let format = ObjectFormat::Sha1;
8774 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8775 let graph = write_commit_graph(&mut db, b"reachable payload\n");
8776 let commit_oid = graph[0].0.clone();
8777
8778 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
8780 let dangling_oid = db
8781 .write_object(dangling)
8782 .expect("test operation should succeed");
8783
8784 let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
8786 .expect("test operation should succeed");
8787 assert_eq!(reported, vec![dangling_oid]);
8788 assert!(
8789 db.loose()
8790 .object_path(&dangling_oid)
8791 .expect("test operation should succeed")
8792 .exists()
8793 );
8794
8795 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8797 .expect("test operation should succeed");
8798 assert_eq!(deleted, vec![dangling_oid]);
8799 assert!(
8800 !db.loose()
8801 .object_path(&dangling_oid)
8802 .expect("test operation should succeed")
8803 .exists()
8804 );
8805 for (oid, object) in &graph {
8806 assert!(
8807 db.loose()
8808 .object_path(oid)
8809 .expect("test operation should succeed")
8810 .exists()
8811 );
8812 assert_eq!(read_object_for_assert(&db, oid), *object);
8813 }
8814
8815 fs::remove_dir_all(root).expect("test operation should succeed");
8816 }
8817
8818 #[test]
8819 fn prune_unreachable_loose_ignores_gitlink_targets() {
8820 let root = temp_root("sley-prune-gitlink");
8821 let git_dir = root.join(".git");
8822 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8823 let format = ObjectFormat::Sha1;
8824 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
8825
8826 let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
8827 .expect("test operation should succeed");
8828 let tree = EncodedObject::new(
8829 ObjectType::Tree,
8830 Tree {
8831 entries: vec![TreeEntry {
8832 mode: 0o160000,
8833 name: BString::from(b"submodule"),
8834 oid: submodule_oid,
8835 }],
8836 }
8837 .write(),
8838 );
8839 let tree_oid = db
8840 .write_object(tree)
8841 .expect("test operation should succeed");
8842 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
8843 let commit = EncodedObject::new(
8844 ObjectType::Commit,
8845 Commit {
8846 tree: tree_oid,
8847 parents: Vec::new(),
8848 author: identity.clone(),
8849 committer: identity,
8850 encoding: None,
8851 message: b"gitlink\n".to_vec(),
8852 }
8853 .write(),
8854 );
8855 let commit_oid = db
8856 .write_object(commit)
8857 .expect("test operation should succeed");
8858 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
8859 let dangling_oid = db
8860 .write_object(dangling)
8861 .expect("test operation should succeed");
8862
8863 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8864 .expect("test operation should succeed");
8865
8866 assert_eq!(deleted, vec![dangling_oid]);
8867 assert!(
8868 !db.loose()
8869 .object_path(&dangling_oid)
8870 .expect("test operation should succeed")
8871 .exists()
8872 );
8873
8874 fs::remove_dir_all(root).expect("test operation should succeed");
8875 }
8876
8877 fn temp_root(prefix: &str) -> PathBuf {
8878 std::env::temp_dir().join(format!(
8879 "{prefix}-{}-{}",
8880 std::process::id(),
8881 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
8882 ))
8883 }
8884}