1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{
12 Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object,
13 tree_entry_object_type,
14};
15use sley_pack::{
16 MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
17 PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
18 PackWriteOptions,
19};
20use std::collections::{HashMap, HashSet};
21use std::io::{Read, Write};
22use std::path::{Path, PathBuf};
23use std::sync::atomic::{AtomicU64, Ordering};
24use std::sync::{Arc, Mutex, OnceLock};
25use std::{env, fs};
26
27static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
28
29pub trait ObjectReader {
30 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
31
32 fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
40 false
41 }
42
43 fn has_shallow_grafts(&self) -> bool {
47 false
48 }
49
50 fn is_promised_object(&self, _oid: &ObjectId) -> bool {
54 false
55 }
56}
57
58fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
59 (*oid == ObjectId::empty_tree(format))
60 .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
61}
62
63fn with_missing_object_context(
64 err: GitError,
65 oid: ObjectId,
66 context: MissingObjectContext,
67) -> GitError {
68 let kind = err
69 .not_found_kind()
70 .and_then(sley_core::NotFoundKind::missing_object_kind);
71 match kind {
72 Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
73 None => err,
74 }
75}
76
77pub fn grafted_parents<R: ObjectReader + ?Sized>(
81 reader: &R,
82 oid: &ObjectId,
83 parents: Vec<ObjectId>,
84) -> Vec<ObjectId> {
85 if reader.is_shallow_graft(oid) {
86 Vec::new()
87 } else {
88 parents
89 }
90}
91
92pub trait ObjectWriter {
93 fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct BundleUnbundleResult {
103 pub written_objects: Vec<ObjectId>,
104 pub references: Vec<BundleReference>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct PackUnpackResult {
109 pub written_objects: Vec<ObjectId>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct PackInstallResult {
114 pub pack_name: String,
115 pub pack_path: PathBuf,
116 pub index_path: PathBuf,
117 pub promisor_path: Option<PathBuf>,
118 pub object_ids: Vec<ObjectId>,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct RawPackInstallResult {
123 pub object_ids: Vec<ObjectId>,
124}
125
126#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
127pub struct RawPackInstallOptions {
128 pub promisor: bool,
129}
130
131pub trait RawPackInstaller {
132 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
133}
134
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub enum ObjectPrefixResolution {
137 Missing,
138 Unique(ObjectId),
139 Ambiguous(Vec<ObjectId>),
140}
141
142#[derive(Debug, Clone, PartialEq, Eq)]
143pub struct ObjectStorageInfo {
144 pub disk_size: u64,
145 pub deltabase: ObjectId,
146}
147
148impl RawPackInstaller for FileObjectDatabase {
149 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
150 let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
151 Ok(RawPackInstallResult {
152 object_ids: result.object_ids,
153 })
154 }
155}
156
157impl RawPackInstaller for ObjectDatabase {
158 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
159 let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
160 Ok(RawPackInstallResult {
161 object_ids: result.written_objects,
162 })
163 }
164}
165
166pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
167 let mut missing = Vec::new();
168 for prerequisite in &bundle.prerequisites {
169 match reader.read_object(&prerequisite.oid) {
170 Ok(object) => {
171 let actual = object.object_id(bundle.format)?;
172 if actual != prerequisite.oid {
173 return Err(GitError::InvalidObject(format!(
174 "bundle prerequisite {} hashes to {actual}",
175 prerequisite.oid
176 )));
177 }
178 }
179 Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
180 Err(err) => return Err(err),
181 }
182 }
183 if missing.is_empty() {
184 return Ok(());
185 }
186 Err(GitError::object_not_found_in(
187 missing[0],
188 MissingObjectContext::PackInstall,
189 ))
190}
191
192pub fn unbundle_objects<R, W>(
193 bundle: &Bundle,
194 prerequisite_reader: &R,
195 writer: &mut W,
196) -> Result<BundleUnbundleResult>
197where
198 R: ObjectReader,
199 W: ObjectWriter,
200{
201 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
202 let pack = PackFile::parse_bundle(bundle)?;
203 let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
204 Ok(BundleUnbundleResult {
205 written_objects,
206 references: bundle.references.clone(),
207 })
208}
209
210pub fn install_bundle_pack<R>(
211 bundle: &Bundle,
212 prerequisite_reader: &R,
213 destination: &impl RawPackInstaller,
214) -> Result<BundleUnbundleResult>
215where
216 R: ObjectReader,
217{
218 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
219 let install = destination.install_raw_pack(&bundle.pack)?;
220 Ok(BundleUnbundleResult {
221 written_objects: install.object_ids,
222 references: bundle.references.clone(),
223 })
224}
225
226pub fn unpack_packfile_objects<W>(
227 pack_bytes: &[u8],
228 format: ObjectFormat,
229 writer: &W,
230) -> Result<PackUnpackResult>
231where
232 W: ObjectWriter,
233{
234 let pack = PackFile::parse(pack_bytes, format)?;
235 write_pack_objects(pack, writer, "pack")
236}
237
238fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
239where
240 W: ObjectWriter,
241{
242 let mut written_objects = Vec::with_capacity(pack.entries.len());
243 for entry in pack.entries {
244 let expected = entry.entry.oid;
245 let actual = writer.write_object(entry.object)?;
246 if actual != expected {
247 return Err(GitError::InvalidObject(format!(
248 "{source} object id mismatch: expected {expected}, wrote {actual}"
249 )));
250 }
251 written_objects.push(actual);
252 }
253 Ok(PackUnpackResult { written_objects })
254}
255
256pub fn collect_reachable_object_ids<R, I>(
257 reader: &R,
258 format: ObjectFormat,
259 starts: I,
260) -> Result<HashSet<ObjectId>>
261where
262 R: ObjectReader,
263 I: IntoIterator<Item = ObjectId>,
264{
265 walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
266}
267
268pub fn collect_reachable_object_ids_with_cut<R, I>(
273 reader: &R,
274 format: ObjectFormat,
275 starts: I,
276 cut: &HashSet<ObjectId>,
277) -> Result<HashSet<ObjectId>>
278where
279 R: ObjectReader,
280 I: IntoIterator<Item = ObjectId>,
281{
282 walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
283}
284
285pub fn collect_reachable_object_ids_excluding<R, I>(
289 reader: &R,
290 format: ObjectFormat,
291 starts: I,
292 excluded: &HashSet<ObjectId>,
293) -> Result<HashSet<ObjectId>>
294where
295 R: ObjectReader,
296 I: IntoIterator<Item = ObjectId>,
297{
298 walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
299}
300
301pub fn collect_reachable_objects<R, I>(
302 reader: &R,
303 format: ObjectFormat,
304 starts: I,
305 excluded: &HashSet<ObjectId>,
306) -> Result<Vec<Arc<EncodedObject>>>
307where
308 R: ObjectReader,
309 I: IntoIterator<Item = ObjectId>,
310{
311 let mut objects = Vec::new();
312 walk_reachable_objects(reader, format, starts, excluded, |_, object| {
313 objects.push(Arc::clone(object));
314 })?;
315 Ok(objects)
316}
317
318#[derive(Debug, Clone)]
319struct ReachablePackObject {
320 oid: ObjectId,
321 object: Arc<EncodedObject>,
322}
323
324fn collect_reachable_pack_objects<R, I>(
325 reader: &R,
326 format: ObjectFormat,
327 starts: I,
328 excluded: &HashSet<ObjectId>,
329) -> Result<Vec<ReachablePackObject>>
330where
331 R: ObjectReader,
332 I: IntoIterator<Item = ObjectId>,
333{
334 let mut objects = Vec::new();
335 walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
336 objects.push(ReachablePackObject {
337 oid: *oid,
338 object: Arc::clone(object),
339 });
340 })?;
341 Ok(objects)
342}
343
344fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
345 objects
346 .iter()
347 .map(|entry| PackInput {
348 oid: &entry.oid,
349 object: &entry.object,
350 })
351 .collect()
352}
353
354pub fn install_reachable_pack<I>(
355 source: &impl ObjectReader,
356 destination: &impl RawPackInstaller,
357 format: ObjectFormat,
358 starts: I,
359) -> Result<Option<RawPackInstallResult>>
360where
361 I: IntoIterator<Item = ObjectId>,
362{
363 install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
364}
365
366pub fn install_reachable_pack_excluding<I>(
367 source: &impl ObjectReader,
368 destination: &impl RawPackInstaller,
369 format: ObjectFormat,
370 starts: I,
371 excluded: &HashSet<ObjectId>,
372) -> Result<Option<RawPackInstallResult>>
373where
374 I: IntoIterator<Item = ObjectId>,
375{
376 let pack = match build_reachable_pack(source, format, starts, excluded)? {
377 Some(pack) => pack,
378 None => return Ok(None),
379 };
380 destination.install_raw_pack(&pack.pack).map(Some)
381}
382
383pub fn build_reachable_pack<R, I>(
384 reader: &R,
385 format: ObjectFormat,
386 starts: I,
387 excluded: &HashSet<ObjectId>,
388) -> Result<Option<PackWrite>>
389where
390 R: ObjectReader,
391 I: IntoIterator<Item = ObjectId>,
392{
393 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
394 if objects.is_empty() {
395 return Ok(None);
396 }
397 let inputs = pack_inputs(&objects);
402 PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
403}
404
405pub fn build_and_install_reachable_pack<R, I>(
406 source: &R,
407 destination: &FileObjectDatabase,
408 format: ObjectFormat,
409 starts: I,
410 excluded: &HashSet<ObjectId>,
411 options: RawPackInstallOptions,
412) -> Result<Option<PackInstallResult>>
413where
414 R: ObjectReader,
415 I: IntoIterator<Item = ObjectId>,
416{
417 build_and_install_reachable_pack_filtered(
418 source,
419 destination,
420 format,
421 starts,
422 excluded,
423 options,
424 None,
425 None,
426 )
427}
428
429#[derive(Debug, Clone, PartialEq, Eq)]
436pub enum PackObjectFilter {
437 BlobNone,
439 BlobLimit(u64),
441 TreeDepth(u32),
443 SparsePathSet(Vec<String>),
445}
446
447#[allow(clippy::too_many_arguments)]
451pub fn build_and_install_reachable_pack_filtered<R, I>(
452 source: &R,
453 destination: &FileObjectDatabase,
454 format: ObjectFormat,
455 starts: I,
456 excluded: &HashSet<ObjectId>,
457 options: RawPackInstallOptions,
458 filter: Option<PackObjectFilter>,
459 unpack_limit: Option<usize>,
460) -> Result<Option<PackInstallResult>>
461where
462 R: ObjectReader,
463 I: IntoIterator<Item = ObjectId>,
464{
465 let starts: Vec<ObjectId> = starts.into_iter().collect();
466 let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
467 let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
468 match filter {
469 Some(PackObjectFilter::BlobNone) => {
470 objects.retain(|entry| {
471 entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
472 });
473 }
474 Some(PackObjectFilter::BlobLimit(limit)) => {
475 objects.retain(|entry| {
476 entry.object.object_type != ObjectType::Blob
477 || wanted.contains(&entry.oid)
478 || (entry.object.body.len() as u64) < limit
479 });
480 }
481 Some(PackObjectFilter::TreeDepth(depth)) => {
482 let tree_depths = collect_tree_filter_depths(source, format, &objects)?;
483 objects.retain(|entry| {
484 if wanted.contains(&entry.oid) {
485 return true;
486 }
487 match entry.object.object_type {
488 ObjectType::Blob => false,
489 ObjectType::Tree => tree_depths
490 .get(&entry.oid)
491 .is_some_and(|tree_depth| *tree_depth < depth),
492 _ => true,
493 }
494 });
495 }
496 Some(PackObjectFilter::SparsePathSet(paths)) => {
497 let allowed_blobs = collect_sparse_filter_blobs(source, format, &objects, &paths)?;
498 objects.retain(|entry| {
499 entry.object.object_type != ObjectType::Blob
500 || wanted.contains(&entry.oid)
501 || allowed_blobs.contains(&entry.oid)
502 });
503 }
504 None => {}
505 }
506 if objects.is_empty() {
507 return Ok(None);
508 }
509 if let Some(limit) = unpack_limit
513 && objects.len() < limit
514 {
515 for entry in &objects {
516 destination.loose().write_object((*entry.object).clone())?;
517 }
518 return Ok(None);
519 }
520 let inputs = pack_inputs(&objects);
521 let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
522 trace_packfile(&pack.pack)?;
523 destination
524 .install_generated_pack_unchecked(&pack, options)
525 .map(Some)
526}
527
528fn trace_packfile(pack: &[u8]) -> Result<()> {
529 let Some(path) = env::var_os("GIT_TRACE_PACKFILE").filter(|value| !value.is_empty()) else {
530 return Ok(());
531 };
532 fs::write(path, pack)?;
533 Ok(())
534}
535
536fn collect_tree_filter_depths<R>(
537 reader: &R,
538 format: ObjectFormat,
539 objects: &[ReachablePackObject],
540) -> Result<HashMap<ObjectId, u32>>
541where
542 R: ObjectReader,
543{
544 let available: HashSet<ObjectId> = objects.iter().map(|entry| entry.oid).collect();
545 let mut depths = HashMap::new();
546 let mut stack = Vec::new();
547 for entry in objects {
548 if entry.object.object_type != ObjectType::Commit {
549 continue;
550 }
551 let commit = Commit::parse(format, &entry.object.body)?;
552 if available.contains(&commit.tree) {
553 stack.push((commit.tree, 0u32));
554 }
555 }
556 while let Some((tree_oid, depth)) = stack.pop() {
557 if depths
558 .get(&tree_oid)
559 .is_some_and(|old_depth| *old_depth <= depth)
560 {
561 continue;
562 }
563 depths.insert(tree_oid, depth);
564 let tree = reader.read_object(&tree_oid)?;
565 if tree.object_type != ObjectType::Tree {
566 continue;
567 }
568 let child_depth = depth.saturating_add(1);
569 for entry in TreeEntries::new(format, &tree.body) {
570 let entry = entry?;
571 if tree_entry_object_type(entry.mode) == ObjectType::Tree
572 && available.contains(&entry.oid)
573 {
574 stack.push((entry.oid, child_depth));
575 }
576 }
577 }
578 Ok(depths)
579}
580
581fn collect_sparse_filter_blobs<R>(
582 reader: &R,
583 format: ObjectFormat,
584 objects: &[ReachablePackObject],
585 paths: &[String],
586) -> Result<HashSet<ObjectId>>
587where
588 R: ObjectReader,
589{
590 let wanted_paths: HashSet<&str> = paths.iter().map(String::as_str).collect();
591 let mut allowed = HashSet::new();
592 let mut seen_trees = HashSet::new();
593 for entry in objects {
594 if entry.object.object_type != ObjectType::Commit {
595 continue;
596 }
597 let commit = Commit::parse(format, &entry.object.body)?;
598 collect_sparse_tree_blobs(
599 reader,
600 format,
601 &commit.tree,
602 "",
603 &wanted_paths,
604 &mut seen_trees,
605 &mut allowed,
606 )?;
607 }
608 Ok(allowed)
609}
610
611fn collect_sparse_tree_blobs<R>(
612 reader: &R,
613 format: ObjectFormat,
614 tree_oid: &ObjectId,
615 prefix: &str,
616 wanted_paths: &HashSet<&str>,
617 seen_trees: &mut HashSet<ObjectId>,
618 allowed: &mut HashSet<ObjectId>,
619) -> Result<()>
620where
621 R: ObjectReader,
622{
623 if !seen_trees.insert(*tree_oid) {
624 return Ok(());
625 }
626 let tree = reader.read_object(tree_oid)?;
627 if tree.object_type != ObjectType::Tree {
628 return Ok(());
629 }
630 for entry in TreeEntries::new(format, &tree.body) {
631 let entry = entry?;
632 let name = String::from_utf8_lossy(entry.name);
633 let path = if prefix.is_empty() {
634 name.into_owned()
635 } else {
636 format!("{prefix}/{name}")
637 };
638 if tree_entry_object_type(entry.mode) == ObjectType::Tree {
639 collect_sparse_tree_blobs(
640 reader,
641 format,
642 &entry.oid,
643 &path,
644 wanted_paths,
645 seen_trees,
646 allowed,
647 )?;
648 } else if wanted_paths.contains(path.as_str()) {
649 allowed.insert(entry.oid);
650 }
651 }
652 Ok(())
653}
654
655pub fn assemble_pack_with_verbatim_reuse(
665 format: ObjectFormat,
666 reused_pack_bytes: &[u8],
667 appended: &[PackInput<'_>],
668) -> Result<(Vec<u8>, u32)> {
669 assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
670}
671
672pub fn assemble_pack_with_verbatim_reuses(
675 format: ObjectFormat,
676 reused_packs: &[&[u8]],
677 appended: &[PackInput<'_>],
678) -> Result<(Vec<u8>, u32)> {
679 let hash_len = format.raw_len();
680 let mut reused_count = 0u32;
681 let mut capacity = 12 + hash_len + 64 * appended.len();
682 for reused_pack_bytes in reused_packs {
683 if reused_pack_bytes.len() < 12 + hash_len {
684 return Err(GitError::InvalidFormat("reused pack too short".into()));
685 }
686 if &reused_pack_bytes[..4] != b"PACK" {
687 return Err(GitError::InvalidFormat(
688 "reused pack has no signature".into(),
689 ));
690 }
691 let version = u32::from_be_bytes([
692 reused_pack_bytes[4],
693 reused_pack_bytes[5],
694 reused_pack_bytes[6],
695 reused_pack_bytes[7],
696 ]);
697 if version != 2 {
698 return Err(GitError::Unsupported(format!(
699 "reused pack version {version}"
700 )));
701 }
702 let count = u32::from_be_bytes([
703 reused_pack_bytes[8],
704 reused_pack_bytes[9],
705 reused_pack_bytes[10],
706 reused_pack_bytes[11],
707 ]);
708 reused_count = reused_count
709 .checked_add(count)
710 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
711 capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
712 }
713 let total = reused_count
714 .checked_add(appended.len() as u32)
715 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
716
717 let mut out = Vec::with_capacity(capacity);
718 out.extend_from_slice(b"PACK");
719 out.extend_from_slice(&2u32.to_be_bytes());
720 out.extend_from_slice(&total.to_be_bytes());
721 for reused_pack_bytes in reused_packs {
722 out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
723 }
724 for input in appended {
725 write_undeltified_pack_entry(&mut out, input.object)?;
726 }
727 let checksum = sley_core::digest_bytes(format, &out)?;
728 out.extend_from_slice(checksum.as_bytes());
729 Ok((out, reused_count))
730}
731
732pub fn assemble_pack_with_verbatim_entries(
735 format: ObjectFormat,
736 reused_entries: &[&[u8]],
737 appended: &[PackInput<'_>],
738) -> Result<(Vec<u8>, u32)> {
739 let reused_count = u32::try_from(reused_entries.len())
740 .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
741 let total = reused_count
742 .checked_add(appended.len() as u32)
743 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
744
745 let mut capacity = 12 + format.raw_len() + 64 * appended.len();
746 for entry in reused_entries {
747 capacity = capacity.saturating_add(entry.len());
748 }
749 let mut out = Vec::with_capacity(capacity);
750 out.extend_from_slice(b"PACK");
751 out.extend_from_slice(&2u32.to_be_bytes());
752 out.extend_from_slice(&total.to_be_bytes());
753 for entry in reused_entries {
754 out.extend_from_slice(entry);
755 }
756 for input in appended {
757 write_undeltified_pack_entry(&mut out, input.object)?;
758 }
759 let checksum = sley_core::digest_bytes(format, &out)?;
760 out.extend_from_slice(checksum.as_bytes());
761 Ok((out, reused_count))
762}
763
764fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
766 let type_bits: u8 = match object.object_type {
767 ObjectType::Commit => 1,
768 ObjectType::Tree => 2,
769 ObjectType::Blob => 3,
770 ObjectType::Tag => 4,
771 };
772 let mut size = object.body.len() as u64;
773 let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
774 size >>= 4;
775 while size > 0 {
776 out.push(byte | 0x80);
777 byte = (size & 0x7f) as u8;
778 size >>= 7;
779 }
780 out.push(byte);
781 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
782 encoder.write_all(&object.body)?;
783 out.extend_from_slice(&encoder.finish()?);
784 Ok(())
785}
786
787#[derive(Debug, Clone, PartialEq, Eq)]
795pub struct RepackResult {
796 pub pack: Vec<u8>,
798 pub idx: Vec<u8>,
800 pub object_count: usize,
802 pub obsolete_packs: Vec<PathBuf>,
805 pub packed_loose: Vec<ObjectId>,
808 retained_pack_stems: Vec<String>,
811 pack_checksum: ObjectId,
812 index_entries: Vec<PackIndexEntry>,
813}
814
815#[derive(Debug, Clone, Default)]
816pub struct RepackOptions {
817 pub local: bool,
819 pub pack_kept_objects: bool,
821 pub keep_pack_stems: HashSet<String>,
823}
824
825pub fn repack_reachable_objects(
845 git_dir: &Path,
846 format: ObjectFormat,
847 roots: &[ObjectId],
848) -> Result<Option<RepackResult>> {
849 repack_reachable_objects_with_options(git_dir, format, roots, &RepackOptions::default())
850}
851
852pub fn repack_reachable_objects_with_options(
853 git_dir: &Path,
854 format: ObjectFormat,
855 roots: &[ObjectId],
856 options: &RepackOptions,
857) -> Result<Option<RepackResult>> {
858 let objects_dir = repository_objects_dir(git_dir);
859 let database = if options.local {
860 FileObjectDatabase::without_alternates(objects_dir.clone(), format)
861 } else {
862 FileObjectDatabase::new(objects_dir.clone(), format)
863 };
864 let retained_pack_stems = repack_retained_pack_stems(
865 &objects_dir.join("pack"),
866 &options.keep_pack_stems,
867 !options.pack_kept_objects,
868 )?;
869 let excluded_oids = if options.pack_kept_objects {
870 HashSet::new()
871 } else {
872 pack_oids_for_stems(&objects_dir.join("pack"), format, &retained_pack_stems)?
873 };
874
875 let mut seen: HashSet<ObjectId> = HashSet::new();
876 let mut objects: Vec<ReachablePackObject> = Vec::new();
877 let mut pending: Vec<ObjectId> = roots.to_vec();
878 while let Some(oid) = pending.pop() {
879 if !seen.insert(oid) {
880 continue;
881 }
882 let object = match database.read_object(&oid) {
883 Ok(object) => object,
884 Err(GitError::NotFound(_)) => continue,
885 Err(err) => return Err(err),
886 };
887 match object.object_type {
888 ObjectType::Commit => {
889 let commit = Commit::parse_ref(format, &object.body)?;
890 pending.extend(grafted_parents(&database, &oid, commit.parents));
891 pending.push(commit.tree);
892 }
893 ObjectType::Tree => {
894 for entry in TreeEntries::new(format, &object.body) {
895 let entry = entry?;
896 if !entry.is_gitlink() {
897 pending.push(entry.oid);
898 }
899 }
900 }
901 ObjectType::Tag => {
902 let tag = Tag::parse_ref(format, &object.body)?;
903 pending.push(tag.object);
904 }
905 ObjectType::Blob => {}
906 }
907 if !excluded_oids.contains(&oid) {
908 objects.push(ReachablePackObject { oid, object });
909 }
910 }
911
912 if !options.local {
918 for (alternate, oid) in alternate_packed_object_ids(&objects_dir, format)? {
919 if excluded_oids.contains(&oid) || !seen.insert(oid) {
920 continue;
921 }
922 let alternate_db = FileObjectDatabase::without_alternates(alternate, format);
923 match alternate_db.read_object(&oid) {
924 Ok(object) => objects.push(ReachablePackObject { oid, object }),
925 Err(GitError::NotFound(_)) => {}
926 Err(err) => return Err(err),
927 }
928 }
929 }
930
931 if objects.is_empty() {
932 return Ok(None);
933 }
934
935 let inputs = pack_inputs(&objects);
936 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
937 let object_count = written.entries.len();
938
939 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
942 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
943 .into_iter()
944 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
945 .collect();
946
947 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
948 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
949 .into_iter()
950 .filter(|oid| packed_oid_set.contains(oid))
951 .collect();
952 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
953
954 let pack_checksum = written.checksum;
955 let index_entries = written.entries.clone();
956 Ok(Some(RepackResult {
957 pack: written.pack,
958 idx: written.index,
959 object_count,
960 obsolete_packs,
961 packed_loose,
962 retained_pack_stems,
963 pack_checksum,
964 index_entries,
965 }))
966}
967
968fn repack_retained_pack_stems(
969 pack_dir: &Path,
970 explicit: &HashSet<String>,
971 keep_dot_keep: bool,
972) -> Result<Vec<String>> {
973 let mut stems = explicit.clone();
974 if keep_dot_keep {
975 for pack_path in existing_pack_files(pack_dir)? {
976 if pack_path.with_extension("keep").exists()
977 && let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
978 {
979 stems.insert(stem.to_string());
980 }
981 }
982 }
983 let mut stems = stems.into_iter().collect::<Vec<_>>();
984 stems.sort();
985 Ok(stems)
986}
987
988fn pack_oids_for_stems(
989 pack_dir: &Path,
990 format: ObjectFormat,
991 stems: &[String],
992) -> Result<HashSet<ObjectId>> {
993 let wanted: HashSet<&str> = stems.iter().map(String::as_str).collect();
994 if wanted.is_empty() {
995 return Ok(HashSet::new());
996 }
997 let mut oids = HashSet::new();
998 for pack_path in existing_pack_files(pack_dir)? {
999 let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1000 continue;
1001 };
1002 if !wanted.contains(stem) {
1003 continue;
1004 }
1005 let index_path = pack_path.with_extension("idx");
1006 if !index_path.exists() {
1007 continue;
1008 }
1009 let index = PackIndex::parse(&fs::read(index_path)?, format)?;
1010 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
1011 }
1012 Ok(oids)
1013}
1014
1015fn alternate_packed_object_ids(
1016 objects_dir: &Path,
1017 format: ObjectFormat,
1018) -> Result<Vec<(PathBuf, ObjectId)>> {
1019 let mut oids = Vec::new();
1020 for alternate in alternate_object_dirs(objects_dir) {
1021 let mut alternate_oids = HashSet::new();
1022 collect_packed_object_ids(&alternate.join("pack"), format, &mut alternate_oids)?;
1023 oids.extend(
1024 alternate_oids
1025 .into_iter()
1026 .map(|oid| (alternate.clone(), oid)),
1027 );
1028 }
1029 oids.sort_by(|left, right| {
1030 left.0
1031 .cmp(&right.0)
1032 .then(left.1.as_bytes().cmp(right.1.as_bytes()))
1033 });
1034 Ok(oids)
1035}
1036
1037pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1038 let objects_dir = repository_objects_dir(git_dir);
1039 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1040
1041 let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
1045 if all_oids.is_empty() {
1046 return Ok(None);
1047 }
1048
1049 let mut objects = Vec::with_capacity(all_oids.len());
1053 for oid in &all_oids {
1054 objects.push(ReachablePackObject {
1055 oid: *oid,
1056 object: database.read_object(oid)?,
1057 });
1058 }
1059
1060 let inputs = pack_inputs(&objects);
1061 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1062 let object_count = written.entries.len();
1063
1064 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
1070 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
1071 .into_iter()
1072 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
1073 .collect();
1074
1075 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1078 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1079 .into_iter()
1080 .filter(|oid| packed_oid_set.contains(oid))
1081 .collect();
1082 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1083
1084 Ok(Some(RepackResult {
1085 pack: written.pack,
1086 idx: written.index,
1087 object_count,
1088 obsolete_packs,
1089 packed_loose,
1090 retained_pack_stems: Vec::new(),
1091 pack_checksum: written.checksum,
1092 index_entries: written.entries,
1093 }))
1094}
1095
1096pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
1102 let objects_dir = repository_objects_dir(git_dir);
1103 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1104 let loose_oids = loose_object_ids(&objects_dir, format)?;
1105 if loose_oids.is_empty() {
1106 return Ok(None);
1107 }
1108
1109 let mut objects = Vec::with_capacity(loose_oids.len());
1110 for oid in &loose_oids {
1111 objects.push(ReachablePackObject {
1112 oid: *oid,
1113 object: database.read_object(oid)?,
1114 });
1115 }
1116
1117 let inputs = pack_inputs(&objects);
1118 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1119 let object_count = written.entries.len();
1120 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1121 let mut packed_loose: Vec<ObjectId> = loose_oids
1122 .into_iter()
1123 .filter(|oid| packed_oid_set.contains(oid))
1124 .collect();
1125 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1126
1127 let pack_checksum = written.checksum;
1128 let index_entries = written.entries.clone();
1129 Ok(Some(RepackResult {
1130 pack: written.pack,
1131 idx: written.index,
1132 object_count,
1133 obsolete_packs: Vec::new(),
1134 packed_loose,
1135 retained_pack_stems: Vec::new(),
1136 pack_checksum,
1137 index_entries,
1138 }))
1139}
1140
1141#[derive(Debug, Clone)]
1144struct GeometryPack {
1145 pack_path: PathBuf,
1147 oids: Vec<ObjectId>,
1149 weight: u64,
1151 is_promisor: bool,
1153}
1154
1155#[derive(Debug, Clone)]
1158pub struct GeometricRepackResult {
1159 pub result: Option<RepackResult>,
1161 pub rolled_up_packs: Vec<PathBuf>,
1163}
1164
1165fn collect_geometry_packs(
1168 objects_dir: &Path,
1169 format: ObjectFormat,
1170 kept_pack_stems: &HashSet<String>,
1171) -> Result<Vec<GeometryPack>> {
1172 let pack_dir = objects_dir.join("pack");
1173 let mut packs = Vec::new();
1174 for pack_path in existing_pack_files(&pack_dir)? {
1175 if pack_path.with_extension("mtimes").exists() {
1178 continue;
1179 }
1180 if pack_path.with_extension("keep").exists() {
1181 continue;
1182 }
1183 let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
1184 continue;
1185 };
1186 if kept_pack_stems.contains(stem) {
1187 continue;
1188 }
1189 let index_path = pack_path.with_extension("idx");
1190 if !index_path.exists() {
1191 continue;
1192 }
1193 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1194 let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
1195 let weight = oids.len() as u64;
1196 packs.push(GeometryPack {
1197 is_promisor: pack_path.with_extension("promisor").exists(),
1198 pack_path,
1199 oids,
1200 weight,
1201 });
1202 }
1203 packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
1205 Ok(packs)
1206}
1207
1208fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
1212 let pack_nr = packs.len();
1213 if pack_nr == 0 {
1214 return 0;
1215 }
1216 let mut i = pack_nr - 1;
1218 while i > 0 {
1219 let ours = packs[i].weight;
1220 let prev = packs[i - 1].weight;
1221 if ours < split_factor.saturating_mul(prev) {
1222 break;
1223 }
1224 i -= 1;
1225 }
1226 let mut split = i;
1227 if split != 0 {
1228 split += 1;
1230 }
1231
1232 let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
1236 for pack in &packs[split..] {
1237 if pack.weight < split_factor.saturating_mul(total_size) {
1238 split += 1;
1239 total_size = total_size.saturating_add(pack.weight);
1240 } else {
1241 break;
1242 }
1243 }
1244 split
1245}
1246
1247pub fn repack_geometric(
1257 git_dir: &Path,
1258 format: ObjectFormat,
1259 split_factor: u64,
1260 kept_pack_stems: &HashSet<String>,
1261) -> Result<GeometricRepackResult> {
1262 let objects_dir = repository_objects_dir(git_dir);
1263 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1264
1265 let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
1269 let packs: Vec<GeometryPack> = all_packs
1270 .into_iter()
1271 .filter(|pack| !pack.is_promisor)
1272 .collect();
1273
1274 let split = compute_geometry_split(&packs, split_factor);
1275
1276 let loose_oids = loose_object_ids(&objects_dir, format)?;
1277
1278 let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
1283 for pack in &packs[split..] {
1284 excluded_oids.extend(pack.oids.iter().copied());
1285 }
1286
1287 let mut included: Vec<ObjectId> = Vec::new();
1288 let mut seen: HashSet<ObjectId> = HashSet::new();
1289 for pack in &packs[..split] {
1290 for oid in &pack.oids {
1291 if excluded_oids.contains(oid) {
1292 continue;
1293 }
1294 if seen.insert(*oid) {
1295 included.push(*oid);
1296 }
1297 }
1298 }
1299 for oid in &loose_oids {
1300 if excluded_oids.contains(oid) {
1301 continue;
1302 }
1303 if seen.insert(*oid) {
1304 included.push(*oid);
1305 }
1306 }
1307
1308 if included.is_empty() {
1310 return Ok(GeometricRepackResult {
1311 result: None,
1312 rolled_up_packs: Vec::new(),
1313 });
1314 }
1315
1316 included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1317 let mut objects = Vec::with_capacity(included.len());
1318 for oid in &included {
1319 objects.push(ReachablePackObject {
1320 oid: *oid,
1321 object: database.read_object(oid)?,
1322 });
1323 }
1324
1325 let inputs = pack_inputs(&objects);
1326 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1327 let object_count = written.entries.len();
1328
1329 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1330 let mut packed_loose: Vec<ObjectId> = loose_oids
1331 .into_iter()
1332 .filter(|oid| packed_oid_set.contains(oid))
1333 .collect();
1334 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1335
1336 let rolled_up_packs: Vec<PathBuf> = packs[..split]
1337 .iter()
1338 .map(|pack| pack.pack_path.clone())
1339 .collect();
1340
1341 let pack_checksum = written.checksum;
1342 let index_entries = written.entries.clone();
1343 Ok(GeometricRepackResult {
1344 result: Some(RepackResult {
1345 pack: written.pack,
1346 idx: written.index,
1347 object_count,
1348 obsolete_packs: rolled_up_packs.clone(),
1349 packed_loose,
1350 retained_pack_stems: Vec::new(),
1351 pack_checksum,
1352 index_entries,
1353 }),
1354 rolled_up_packs,
1355 })
1356}
1357
1358pub fn install_repack_result(
1373 git_dir: &Path,
1374 format: ObjectFormat,
1375 result: &RepackResult,
1376 prune: bool,
1377) -> Result<()> {
1378 install_repack_result_with_bitmap(git_dir, format, result, prune, None)
1379}
1380
1381pub fn install_repack_result_with_bitmap(
1387 git_dir: &Path,
1388 format: ObjectFormat,
1389 result: &RepackResult,
1390 prune: bool,
1391 bitmap_tips: Option<&HashSet<ObjectId>>,
1392) -> Result<()> {
1393 let objects_dir = repository_objects_dir(git_dir);
1394 let pack_dir = objects_dir.join("pack");
1395 fs::create_dir_all(&pack_dir)?;
1396
1397 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1402 let parsed_index = PackIndex::parse(&result.idx, format)?;
1403 if parsed_index.pack_checksum != result.pack_checksum {
1404 return Err(GitError::InvalidFormat(
1405 "repack index checksum does not match the new pack".into(),
1406 ));
1407 }
1408 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1409 return Err(GitError::InvalidFormat(
1410 "repack index does not match the new pack contents".into(),
1411 ));
1412 }
1413 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1414 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1415 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1416 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1417 let reverse_index = sley_pack::PackReverseIndex::write(
1421 format,
1422 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1423 &result.pack_checksum,
1424 )?;
1425 write_pack_component(&new_pack_path, &result.pack)?;
1426 write_pack_component(&new_rev_path, &reverse_index)?;
1427 write_pack_component(&new_index_path, &result.idx)?;
1428
1429 if let Some(tips) = bitmap_tips {
1430 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1433 if let Some(bitmap) = build_pack_bitmap(
1434 &database,
1435 format,
1436 &result.index_entries,
1437 &result.pack_checksum,
1438 tips,
1439 )? {
1440 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1445 remove_file_if_exists(&bitmap_path)?;
1446 write_pack_component(&bitmap_path, &bitmap)?;
1447 }
1448 }
1449
1450 if !prune {
1451 return Ok(());
1452 }
1453
1454 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1458
1459 prune_obsolete_pack_paths(
1460 &objects_dir,
1461 format,
1462 &result.obsolete_packs,
1463 &new_pack_path,
1464 &result.retained_pack_stems,
1465 )?;
1466 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1467 Ok(())
1468}
1469
1470pub fn install_geometric_repack_result(
1476 git_dir: &Path,
1477 format: ObjectFormat,
1478 geometric: &GeometricRepackResult,
1479 prune: bool,
1480 bitmap_tips: Option<&HashSet<ObjectId>>,
1481) -> Result<()> {
1482 let Some(result) = geometric.result.as_ref() else {
1483 return Ok(());
1484 };
1485 let objects_dir = repository_objects_dir(git_dir);
1486 let pack_dir = objects_dir.join("pack");
1487 fs::create_dir_all(&pack_dir)?;
1488
1489 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1490 let parsed_index = PackIndex::parse(&result.idx, format)?;
1491 if parsed_index.pack_checksum != result.pack_checksum {
1492 return Err(GitError::InvalidFormat(
1493 "repack index checksum does not match the new pack".into(),
1494 ));
1495 }
1496 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1497 return Err(GitError::InvalidFormat(
1498 "repack index does not match the new pack contents".into(),
1499 ));
1500 }
1501 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1502 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1503 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1504 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1505 let reverse_index = sley_pack::PackReverseIndex::write(
1506 format,
1507 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1508 &result.pack_checksum,
1509 )?;
1510 write_pack_component(&new_pack_path, &result.pack)?;
1511 write_pack_component(&new_rev_path, &reverse_index)?;
1512 write_pack_component(&new_index_path, &result.idx)?;
1513
1514 if let Some(tips) = bitmap_tips {
1515 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1516 if let Some(bitmap) = build_pack_bitmap(
1517 &database,
1518 format,
1519 &result.index_entries,
1520 &result.pack_checksum,
1521 tips,
1522 )? {
1523 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1524 remove_file_if_exists(&bitmap_path)?;
1525 write_pack_component(&bitmap_path, &bitmap)?;
1526 }
1527 }
1528
1529 if !prune {
1530 return Ok(());
1531 }
1532
1533 for pack_path in &geometric.rolled_up_packs {
1536 if *pack_path == new_pack_path {
1537 continue;
1538 }
1539 if pack_path.with_extension("keep").exists() {
1540 continue;
1541 }
1542 remove_file_if_exists(pack_path)?;
1543 remove_file_if_exists(&pack_path.with_extension("idx"))?;
1544 for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1545 remove_file_if_exists(&pack_path.with_extension(ext))?;
1546 }
1547 }
1548
1549 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1551 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1552
1553 let removed_stems: HashSet<String> = geometric
1555 .rolled_up_packs
1556 .iter()
1557 .filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
1558 .collect();
1559 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1560 Ok(())
1561}
1562
1563fn validate_pack_checksum(
1564 pack: &[u8],
1565 format: ObjectFormat,
1566 expected: &ObjectId,
1567 context: &str,
1568) -> Result<()> {
1569 if expected.format() != format {
1570 return Err(GitError::InvalidObjectId(format!(
1571 "{context} checksum format does not match object format"
1572 )));
1573 }
1574 let hash_len = format.raw_len();
1575 if pack.len() < 12 + hash_len {
1576 return Err(GitError::InvalidFormat(format!(
1577 "{context} pack file too short"
1578 )));
1579 }
1580 if &pack[..4] != b"PACK" {
1581 return Err(GitError::InvalidFormat(format!(
1582 "{context} pack file missing PACK signature"
1583 )));
1584 }
1585 let trailer_offset = pack.len() - hash_len;
1586 let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
1587 let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
1588 if &actual != expected || trailer != *expected {
1589 return Err(GitError::InvalidFormat(format!(
1590 "{context} pack checksum does not match generated pack"
1591 )));
1592 }
1593 Ok(())
1594}
1595
1596fn path_mtime_secs(path: &Path) -> u32 {
1598 fs::metadata(path)
1599 .and_then(|metadata| metadata.modified())
1600 .ok()
1601 .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
1602 .map(|dur| dur.as_secs() as u32)
1603 .unwrap_or(0)
1604}
1605
1606#[derive(Debug, Clone)]
1609pub struct CruftPack {
1610 pub pack: Vec<u8>,
1611 pub idx: Vec<u8>,
1612 pub rev: Vec<u8>,
1613 pub mtimes: Vec<u8>,
1614 pub checksum: ObjectId,
1615 pub oids: Vec<ObjectId>,
1617}
1618
1619#[derive(Debug, Clone)]
1622pub struct CruftRepackResult {
1623 pub reachable: Option<RepackResult>,
1625 pub cruft: Option<CruftPack>,
1627 pub obsolete_packs: Vec<PathBuf>,
1630 pub obsolete_cruft_packs: Vec<PathBuf>,
1633 retained_pack_stems: Vec<String>,
1634}
1635
1636pub fn object_mtimes_on_disk_pub(
1640 objects_dir: &Path,
1641 format: ObjectFormat,
1642) -> Result<HashMap<ObjectId, u32>> {
1643 object_mtimes_on_disk(objects_dir, format)
1644}
1645
1646fn object_mtimes_on_disk(
1647 objects_dir: &Path,
1648 format: ObjectFormat,
1649) -> Result<HashMap<ObjectId, u32>> {
1650 let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
1651 let mut record = |oid: ObjectId, mtime: u32| {
1652 mtimes
1653 .entry(oid)
1654 .and_modify(|existing| {
1655 if mtime > *existing {
1656 *existing = mtime;
1657 }
1658 })
1659 .or_insert(mtime);
1660 };
1661
1662 let pack_dir = objects_dir.join("pack");
1663 if let Ok(entries) = fs::read_dir(&pack_dir) {
1664 let mut idx_paths: Vec<PathBuf> = Vec::new();
1665 for entry in entries {
1666 let path = entry?.path();
1667 if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
1668 idx_paths.push(path);
1669 }
1670 }
1671 idx_paths.sort();
1672 for idx_path in idx_paths {
1673 let pack_path = idx_path.with_extension("pack");
1674 if !pack_path.exists() {
1675 continue;
1676 }
1677 let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
1678 let mtimes_path = idx_path.with_extension("mtimes");
1679 let pack_object_mtimes: Option<Vec<u32>> =
1680 fs::read(&mtimes_path).ok().and_then(|bytes| {
1681 sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
1682 .ok()
1683 .map(|parsed| parsed.mtimes)
1684 });
1685 let pack_mtime = path_mtime_secs(&pack_path);
1686 for (pos, entry) in index.entries.iter().enumerate() {
1687 let mtime = pack_object_mtimes
1688 .as_ref()
1689 .and_then(|table| table.get(pos).copied())
1690 .unwrap_or(pack_mtime);
1691 record(entry.oid, mtime);
1692 }
1693 }
1694 }
1695
1696 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1697 for oid in loose_object_ids(objects_dir, format)? {
1698 let path = store.object_path(&oid)?;
1699 record(oid, path_mtime_secs(&path));
1700 }
1701 Ok(mtimes)
1702}
1703
1704pub fn build_cruft_pack_pub(
1706 database: &FileObjectDatabase,
1707 format: ObjectFormat,
1708 survivors: &HashMap<ObjectId, u32>,
1709) -> Result<Option<CruftPack>> {
1710 build_cruft_pack(database, format, survivors)
1711}
1712
1713fn build_cruft_pack(
1716 database: &FileObjectDatabase,
1717 format: ObjectFormat,
1718 survivors: &HashMap<ObjectId, u32>,
1719) -> Result<Option<CruftPack>> {
1720 if survivors.is_empty() {
1721 return Ok(None);
1722 }
1723 let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
1724 ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
1725
1726 let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
1727 let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
1728 let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
1729 for (oid, mtime) in ordered {
1730 match database.read_object(&oid) {
1731 Ok(object) => {
1732 oids.push(oid);
1733 objects.push(object);
1734 mtime_by_oid.insert(oid, mtime);
1735 }
1736 Err(GitError::NotFound(_)) => {}
1737 Err(err) => return Err(err),
1738 }
1739 }
1740 if oids.is_empty() {
1741 return Ok(None);
1742 }
1743
1744 let inputs: Vec<PackInput<'_>> = oids
1745 .iter()
1746 .zip(&objects)
1747 .map(|(oid, object)| PackInput {
1748 oid,
1749 object: object.as_ref(),
1750 })
1751 .collect();
1752 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1753
1754 let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
1756 sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
1757 let mtimes_table: Vec<u32> = sorted_entries
1758 .iter()
1759 .map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
1760 .collect();
1761 let positions = sley_pack::pack_order_index_positions(&written.entries);
1762 let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
1763 let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
1764
1765 let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
1766 cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1767 Ok(Some(CruftPack {
1768 pack: written.pack,
1769 idx: written.index,
1770 rev,
1771 mtimes,
1772 checksum: written.checksum,
1773 oids: cruft_oids,
1774 }))
1775}
1776
1777pub fn repack_cruft(
1787 git_dir: &Path,
1788 format: ObjectFormat,
1789 roots: &[ObjectId],
1790 cruft_expiration: Option<u32>,
1791) -> Result<CruftRepackResult> {
1792 repack_cruft_with_options(
1793 git_dir,
1794 format,
1795 roots,
1796 cruft_expiration,
1797 &RepackOptions::default(),
1798 )
1799}
1800
1801pub fn repack_cruft_with_options(
1802 git_dir: &Path,
1803 format: ObjectFormat,
1804 roots: &[ObjectId],
1805 cruft_expiration: Option<u32>,
1806 options: &RepackOptions,
1807) -> Result<CruftRepackResult> {
1808 let objects_dir = repository_objects_dir(git_dir);
1809 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1810 let pack_dir = objects_dir.join("pack");
1811 let retained_pack_stems = repack_retained_pack_stems(
1812 &pack_dir,
1813 &options.keep_pack_stems,
1814 !options.pack_kept_objects,
1815 )?;
1816 let excluded_oids = if options.pack_kept_objects {
1817 HashSet::new()
1818 } else {
1819 pack_oids_for_stems(&pack_dir, format, &retained_pack_stems)?
1820 };
1821
1822 let mut reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
1824 reachable_ids.retain(|oid| !excluded_oids.contains(oid));
1825 let reachable_result = if reachable_ids.is_empty() {
1826 None
1827 } else {
1828 let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
1829 ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1830 let mut objects = Vec::with_capacity(ids.len());
1831 for oid in &ids {
1832 match database.read_object(oid) {
1833 Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
1834 Err(GitError::NotFound(_)) => {}
1835 Err(err) => return Err(err),
1836 }
1837 }
1838 if objects.is_empty() {
1839 None
1840 } else {
1841 let inputs = pack_inputs(&objects);
1842 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1843 let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1844 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1845 .into_iter()
1846 .filter(|oid| packed_set.contains(oid))
1847 .collect();
1848 packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1849 Some(RepackResult {
1850 pack: written.pack,
1851 idx: written.index,
1852 object_count: written.entries.len(),
1853 obsolete_packs: Vec::new(),
1854 packed_loose,
1855 retained_pack_stems: Vec::new(),
1856 pack_checksum: written.checksum,
1857 index_entries: written.entries,
1858 })
1859 }
1860 };
1861
1862 let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
1865 .into_iter()
1866 .filter(|(oid, _)| !reachable_ids.contains(oid) && !excluded_oids.contains(oid))
1867 .collect();
1868
1869 if let Some(expiration) = cruft_expiration {
1871 rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
1872 }
1873
1874 let cruft = build_cruft_pack(&database, format, &survivors)?;
1875
1876 let mut obsolete_packs = Vec::new();
1879 let mut obsolete_cruft_packs = Vec::new();
1880 for pack_path in existing_pack_files(&pack_dir)? {
1881 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
1882 && retained_pack_stems.iter().any(|retained| retained == stem)
1883 {
1884 continue;
1885 }
1886 if pack_path.with_extension("keep").exists() {
1887 continue;
1888 }
1889 if pack_path.with_extension("mtimes").exists() {
1890 obsolete_cruft_packs.push(pack_path);
1891 } else {
1892 obsolete_packs.push(pack_path);
1893 }
1894 }
1895
1896 Ok(CruftRepackResult {
1897 reachable: reachable_result,
1898 cruft,
1899 obsolete_packs,
1900 obsolete_cruft_packs,
1901 retained_pack_stems,
1902 })
1903}
1904
1905fn rescue_and_expire_cruft_objects(
1910 database: &FileObjectDatabase,
1911 format: ObjectFormat,
1912 survivors: &mut HashMap<ObjectId, u32>,
1913 expiration: u32,
1914) -> Result<()> {
1915 let recent: Vec<ObjectId> = survivors
1916 .iter()
1917 .filter(|(_, mtime)| **mtime > expiration)
1918 .map(|(oid, _)| *oid)
1919 .collect();
1920
1921 let mut keep: HashSet<ObjectId> = HashSet::new();
1922 let mut pending: Vec<ObjectId> = recent.clone();
1923 while let Some(oid) = pending.pop() {
1924 if !keep.insert(oid) {
1925 continue;
1926 }
1927 let Ok(object) = database.read_object(&oid) else {
1928 continue;
1929 };
1930 match object.object_type {
1931 ObjectType::Commit => {
1932 if let Ok(commit) = Commit::parse_ref(format, &object.body) {
1933 pending.extend(commit.parents.iter().copied());
1934 pending.push(commit.tree);
1935 }
1936 }
1937 ObjectType::Tree => {
1938 for entry in TreeEntries::new(format, &object.body).flatten() {
1939 if !entry.is_gitlink() {
1940 pending.push(entry.oid);
1941 }
1942 }
1943 }
1944 ObjectType::Tag => {
1945 if let Ok(tag) = Tag::parse_ref(format, &object.body) {
1946 pending.push(tag.object);
1947 }
1948 }
1949 ObjectType::Blob => {}
1950 }
1951 }
1952
1953 survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
1956 Ok(())
1957}
1958
1959pub fn install_cruft_repack_result(
1963 git_dir: &Path,
1964 format: ObjectFormat,
1965 result: &CruftRepackResult,
1966 prune: bool,
1967) -> Result<()> {
1968 let objects_dir = repository_objects_dir(git_dir);
1969 let pack_dir = objects_dir.join("pack");
1970 fs::create_dir_all(&pack_dir)?;
1971
1972 let new_reachable_name = result
1974 .reachable
1975 .as_ref()
1976 .map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
1977 let new_cruft_name = result
1978 .cruft
1979 .as_ref()
1980 .map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
1981
1982 if let Some(reachable) = result.reachable.as_ref() {
1984 let parsed_index = PackIndex::parse(&reachable.idx, format)?;
1985 let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
1986 let reverse_index = sley_pack::PackReverseIndex::write(
1987 format,
1988 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1989 &reachable.pack_checksum,
1990 )?;
1991 write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
1992 write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
1993 write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
1994 }
1995
1996 if let Some(cruft) = result.cruft.as_ref() {
1998 let pack_name = format!("pack-{}", cruft.checksum.to_hex());
1999 write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
2000 write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
2001 write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
2002 write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
2003 }
2004
2005 if !prune {
2006 return Ok(());
2007 }
2008
2009 let mut present: HashSet<ObjectId> = HashSet::new();
2011 if let Some(reachable) = result.reachable.as_ref() {
2012 present.extend(reachable.index_entries.iter().map(|e| e.oid));
2013 }
2014 if let Some(cruft) = result.cruft.as_ref() {
2015 present.extend(cruft.oids.iter().copied());
2016 }
2017
2018 let mut removed_stems: HashSet<String> = HashSet::new();
2020 for pack_path in result
2021 .obsolete_packs
2022 .iter()
2023 .chain(result.obsolete_cruft_packs.iter())
2024 {
2025 let file_name = pack_path.file_name().and_then(|n| n.to_str());
2026 if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
2027 continue;
2028 }
2029 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str())
2030 && result
2031 .retained_pack_stems
2032 .iter()
2033 .any(|retained| retained == stem)
2034 {
2035 continue;
2036 }
2037 if pack_path.with_extension("keep").exists() {
2038 continue;
2039 }
2040 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
2041 removed_stems.insert(stem.to_string());
2042 }
2043 remove_file_if_exists(pack_path)?;
2044 remove_file_if_exists(&pack_path.with_extension("idx"))?;
2045 for ext in ["rev", "mtimes", "bitmap", "promisor"] {
2046 remove_file_if_exists(&pack_path.with_extension(ext))?;
2047 }
2048 }
2049
2050 let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2052 .into_iter()
2053 .filter(|oid| present.contains(oid))
2054 .collect();
2055 prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
2056
2057 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2058 Ok(())
2059}
2060
2061fn pack_index_entries_match_writer(
2062 parsed: &[PackIndexEntry],
2063 writer_entries: &[PackIndexEntry],
2064) -> bool {
2065 if parsed.len() != writer_entries.len() {
2066 return false;
2067 }
2068 let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
2069 writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2070 parsed.iter().zip(writer_entries).all(|(left, right)| {
2071 left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
2072 })
2073}
2074
2075pub fn prune_unreachable_loose<I>(
2084 git_dir: &Path,
2085 format: ObjectFormat,
2086 roots: I,
2087 delete: bool,
2088) -> Result<Vec<ObjectId>>
2089where
2090 I: IntoIterator<Item = ObjectId>,
2091{
2092 let objects_dir = repository_objects_dir(git_dir);
2093 let database = FileObjectDatabase::new(objects_dir.clone(), format);
2094 let reachable = collect_reachable_object_ids(&database, format, roots)?;
2095
2096 let store = LooseObjectStore::new(objects_dir.clone(), format);
2097 let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
2098 .into_iter()
2099 .filter(|oid| !reachable.contains(oid))
2100 .collect();
2101 pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2102
2103 if delete {
2104 for oid in &pruned {
2105 let path = store.object_path(oid)?;
2106 match fs::remove_file(&path) {
2107 Ok(()) => {}
2108 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
2109 Err(err) => return Err(GitError::Io(err.to_string())),
2110 }
2111 }
2112 }
2113 Ok(pruned)
2114}
2115
2116fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
2119 let oids = loose_object_id_set(objects_dir, format)?;
2120 let mut oids = oids.into_iter().collect::<Vec<_>>();
2121 oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
2122 Ok(oids)
2123}
2124
2125fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
2126 let mut oids = HashSet::new();
2127 collect_loose_object_ids(objects_dir, format, &mut oids)?;
2128 Ok(oids)
2129}
2130
2131fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
2134 if !pack_dir.exists() {
2135 return Ok(Vec::new());
2136 }
2137 let mut packs = Vec::new();
2138 for entry in fs::read_dir(pack_dir)? {
2139 let path = entry?.path();
2140 if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
2141 packs.push(path);
2142 }
2143 }
2144 packs.sort();
2145 Ok(packs)
2146}
2147
2148fn prune_obsolete_pack_paths(
2152 objects_dir: &Path,
2153 format: ObjectFormat,
2154 packs: &[PathBuf],
2155 keep: &Path,
2156 retained_pack_stems: &[String],
2157) -> Result<()> {
2158 prune_pack_paths_matching(objects_dir, format, packs.iter(), keep, retained_pack_stems, |_| Ok(true))
2159}
2160
2161fn prune_pack_paths_matching<'a>(
2162 objects_dir: &Path,
2163 format: ObjectFormat,
2164 packs: impl IntoIterator<Item = &'a PathBuf>,
2165 keep: &Path,
2166 retained_pack_stems: &[String],
2167 mut should_prune: impl FnMut(&Path) -> Result<bool>,
2168) -> Result<()> {
2169 let pack_dir = objects_dir.join("pack");
2170 let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
2171 let retained_pack_stems: HashSet<&str> =
2172 retained_pack_stems.iter().map(String::as_str).collect();
2173 let mut removed_stems: HashSet<String> = HashSet::new();
2174
2175 for pack_path in packs {
2176 if pack_path == keep {
2177 continue;
2178 }
2179 let Some(stem) = pack_path.file_stem() else {
2180 continue;
2181 };
2182 if Some(stem) == keep_stem.as_deref() {
2183 continue;
2184 }
2185 if let Some(stem) = stem.to_str()
2186 && retained_pack_stems.contains(stem)
2187 {
2188 continue;
2189 }
2190 if pack_path.with_extension("keep").exists()
2191 || pack_path.with_extension("promisor").exists()
2192 {
2193 continue;
2194 }
2195 if !should_prune(pack_path)? {
2196 continue;
2197 }
2198 remove_file_if_exists(pack_path)?;
2199 remove_file_if_exists(&pack_path.with_extension("idx"))?;
2200 for ext in ["rev", "mtimes", "bitmap"] {
2201 remove_file_if_exists(&pack_path.with_extension(ext))?;
2202 }
2203 removed_stems.insert(stem.to_string_lossy().into_owned());
2204 }
2205
2206 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
2207 Ok(())
2208}
2209
2210fn prune_stale_multi_pack_index(
2217 pack_dir: &Path,
2218 format: ObjectFormat,
2219 removed_stems: &HashSet<String>,
2220) -> Result<()> {
2221 if removed_stems.is_empty() {
2222 return Ok(());
2223 }
2224 let midx_path = pack_dir.join("multi-pack-index");
2225 if !midx_path.exists() {
2226 return Ok(());
2227 }
2228 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
2229 let references_removed_pack = midx.pack_names.iter().any(|name| {
2230 let stem = name.strip_suffix(".idx").unwrap_or(name);
2231 removed_stems.contains(stem)
2232 });
2233 if references_removed_pack {
2234 remove_file_if_exists(&midx_path)?;
2235 }
2236 Ok(())
2237}
2238
2239fn prune_loose_objects<'a, I>(
2242 objects_dir: &Path,
2243 format: ObjectFormat,
2244 candidates: I,
2245 present: &HashSet<ObjectId>,
2246) -> Result<()>
2247where
2248 I: IntoIterator<Item = &'a ObjectId>,
2249{
2250 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
2251 for oid in candidates {
2252 if !present.contains(oid) {
2253 continue;
2254 }
2255 remove_file_if_exists(&store.object_path(oid)?)?;
2256 }
2257 Ok(())
2258}
2259
2260enum PackDeltaBase {
2261 Offset(u64),
2262 Ref(ObjectId),
2263}
2264
2265struct PackIndexOffsetInfo {
2266 end_offset: u64,
2267 delta_base_oid: Option<ObjectId>,
2268}
2269
2270fn scan_pack_index_offsets(
2271 index: &PackIndex,
2272 target_offset: u64,
2273 trailer_offset: u64,
2274 delta_base_offset: Option<u64>,
2275) -> Result<PackIndexOffsetInfo> {
2276 let mut target_count = 0usize;
2277 let mut next_offset = None;
2278 let mut delta_base_oid = None;
2279
2280 for entry in &index.entries {
2281 if entry.offset == target_offset {
2282 target_count += 1;
2283 } else if entry.offset > target_offset {
2284 match next_offset {
2285 Some(current) if current <= entry.offset => {}
2286 _ => next_offset = Some(entry.offset),
2287 }
2288 }
2289 if Some(entry.offset) == delta_base_offset {
2290 delta_base_oid = Some(entry.oid);
2291 }
2292 }
2293
2294 if target_count == 0 {
2295 return Err(GitError::InvalidFormat(format!(
2296 "pack index offset {target_offset} not found"
2297 )));
2298 }
2299 if let Some(offset) = delta_base_offset
2300 && delta_base_oid.is_none()
2301 {
2302 return Err(GitError::InvalidFormat(format!(
2303 "ofs-delta base offset {offset} not found"
2304 )));
2305 }
2306
2307 Ok(PackIndexOffsetInfo {
2308 end_offset: if target_count > 1 {
2311 target_offset
2312 } else {
2313 next_offset.unwrap_or(trailer_offset)
2314 },
2315 delta_base_oid,
2316 })
2317}
2318
2319fn pack_entry_delta_base(
2320 format: ObjectFormat,
2321 pack: &[u8],
2322 entry_offset: u64,
2323) -> Result<Option<PackDeltaBase>> {
2324 let mut cursor = usize::try_from(entry_offset)
2325 .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
2326 let first = pack_next_byte(pack, &mut cursor)?;
2327 let kind = (first >> 4) & 0x07;
2328 let mut byte = first;
2329 while byte & 0x80 != 0 {
2330 byte = pack_next_byte(pack, &mut cursor)?;
2331 }
2332 match kind {
2333 6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
2334 pack,
2335 &mut cursor,
2336 entry_offset,
2337 )?))),
2338 7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
2339 format,
2340 pack,
2341 &mut cursor,
2342 )?))),
2343 _ => Ok(None),
2344 }
2345}
2346
2347fn parse_ref_delta_base_oid(
2348 format: ObjectFormat,
2349 pack: &[u8],
2350 cursor: &mut usize,
2351) -> Result<ObjectId> {
2352 let raw_len = format.raw_len();
2353 if *cursor + raw_len > pack.len() {
2354 return Err(GitError::InvalidFormat(
2355 "truncated ref-delta base object id".into(),
2356 ));
2357 }
2358 let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
2359 *cursor += raw_len;
2360 Ok(oid)
2361}
2362
2363fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
2364 let mut byte = pack_next_byte(pack, cursor)?;
2365 let mut relative = u64::from(byte & 0x7f);
2366 while byte & 0x80 != 0 {
2367 byte = pack_next_byte(pack, cursor)?;
2368 relative = relative
2369 .checked_add(1)
2370 .and_then(|value| value.checked_shl(7))
2371 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2372 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2373 }
2374 entry_offset
2375 .checked_sub(relative)
2376 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2377}
2378
2379fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
2380 let Some(byte) = pack.get(*cursor).copied() else {
2381 return Err(GitError::InvalidFormat("truncated pack entry".into()));
2382 };
2383 *cursor += 1;
2384 Ok(byte)
2385}
2386
2387fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
2388 Ok(ObjectId::null(format))
2389}
2390
2391fn remove_file_if_exists(path: &Path) -> Result<()> {
2393 match fs::remove_file(path) {
2394 Ok(()) => Ok(()),
2395 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
2396 Err(err) => Err(GitError::Io(err.to_string())),
2397 }
2398}
2399
2400fn walk_reachable_objects<R, I, F>(
2401 reader: &R,
2402 format: ObjectFormat,
2403 starts: I,
2404 excluded: &HashSet<ObjectId>,
2405 visit: F,
2406) -> Result<HashSet<ObjectId>>
2407where
2408 R: ObjectReader,
2409 I: IntoIterator<Item = ObjectId>,
2410 F: FnMut(&ObjectId, &Arc<EncodedObject>),
2411{
2412 walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
2413}
2414
2415fn walk_reachable_objects_with_cut<R, I, F>(
2419 reader: &R,
2420 format: ObjectFormat,
2421 starts: I,
2422 excluded: &HashSet<ObjectId>,
2423 cut: &HashSet<ObjectId>,
2424 mut visit: F,
2425) -> Result<HashSet<ObjectId>>
2426where
2427 R: ObjectReader,
2428 I: IntoIterator<Item = ObjectId>,
2429 F: FnMut(&ObjectId, &Arc<EncodedObject>),
2430{
2431 let mut seen = HashSet::new();
2432 let mut pending = Vec::new();
2433 for start in starts {
2434 pending.push(start);
2435 while let Some(oid) = pending.pop() {
2436 if excluded.contains(&oid) {
2437 continue;
2438 }
2439 if !seen.insert(oid) {
2440 continue;
2441 }
2442 let object = reader.read_object(&oid).map_err(|err| {
2443 with_missing_object_context(err, oid, MissingObjectContext::Traversal)
2444 })?;
2445 match object.object_type {
2446 ObjectType::Commit => {
2447 let (tree, parents) = {
2448 let commit = Commit::parse_ref(format, &object.body)?;
2449 (commit.tree, commit.parents)
2450 };
2451 visit(&oid, &object);
2452 if !cut.contains(&oid) {
2453 for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
2454 pending.push(parent);
2455 }
2456 }
2457 pending.push(tree);
2458 }
2459 ObjectType::Tree => {
2460 let mut child_oids = Vec::new();
2461 for entry in TreeEntries::new(format, &object.body) {
2462 let entry = entry?;
2463 if entry.is_gitlink() {
2464 continue;
2465 }
2466 child_oids.push(entry.oid);
2467 }
2468 visit(&oid, &object);
2469 pending.extend(child_oids.into_iter().rev());
2470 }
2471 ObjectType::Tag => {
2472 let target = {
2473 let tag = Tag::parse_ref(format, &object.body)?;
2474 tag.object
2475 };
2476 visit(&oid, &object);
2477 pending.push(target);
2478 }
2479 ObjectType::Blob => visit(&oid, &object),
2480 }
2481 }
2482 }
2483 Ok(seen)
2484}
2485
2486fn bitset_get(words: &[u64], position: u32) -> bool {
2491 let word = (position / 64) as usize;
2492 word < words.len() && words[word] & (1u64 << (position % 64)) != 0
2493}
2494
2495fn bitset_set(words: &mut [u64], position: u32) {
2496 let word = (position / 64) as usize;
2497 if word < words.len() {
2498 words[word] |= 1u64 << (position % 64);
2499 }
2500}
2501
2502fn bitset_or(acc: &mut [u64], other: &[u64]) {
2503 for (dst, src) in acc.iter_mut().zip(other) {
2504 *dst |= *src;
2505 }
2506}
2507
2508fn bitset_positions(words: &[u64]) -> Vec<u32> {
2510 let mut positions = Vec::new();
2511 for (word_index, word) in words.iter().enumerate() {
2512 let mut remaining = *word;
2513 while remaining != 0 {
2514 let bit = remaining.trailing_zeros();
2515 positions.push(word_index as u32 * 64 + bit);
2516 remaining &= remaining - 1;
2517 }
2518 }
2519 positions
2520}
2521
2522fn commit_identity_timestamp(identity: &[u8]) -> i64 {
2526 let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
2527 let _tz = fields.next();
2528 fields
2529 .next()
2530 .and_then(|raw| std::str::from_utf8(raw).ok())
2531 .and_then(|raw| raw.parse::<i64>().ok())
2532 .unwrap_or(0)
2533}
2534
2535fn bitmap_next_commit_index(idx: u32) -> u32 {
2538 const MIN_COMMITS: u32 = 100;
2539 const MAX_COMMITS: u32 = 5000;
2540 const MUST_REGION: u32 = 100;
2541 const MIN_REGION: u32 = 20000;
2542
2543 if idx <= MUST_REGION {
2544 return 0;
2545 }
2546 if idx <= MIN_REGION {
2547 let offset = idx - MUST_REGION;
2548 return offset.min(MIN_COMMITS);
2549 }
2550 let offset = idx - MIN_REGION;
2551 offset.clamp(MIN_COMMITS, MAX_COMMITS)
2552}
2553
2554pub fn build_pack_bitmap(
2568 db: &FileObjectDatabase,
2569 format: ObjectFormat,
2570 index_entries: &[PackIndexEntry],
2571 pack_checksum: &ObjectId,
2572 preferred_tips: &HashSet<ObjectId>,
2573) -> Result<Option<Vec<u8>>> {
2574 let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
2577 by_offset.sort_by_key(|&slot| index_entries[slot].offset);
2578 let bit_order: Vec<ObjectId> = by_offset
2579 .into_iter()
2580 .map(|slot| index_entries[slot].oid)
2581 .collect();
2582 build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
2583}
2584
2585pub fn build_midx_bitmap(
2591 db: &FileObjectDatabase,
2592 format: ObjectFormat,
2593 midx_entries: &[sley_pack::MultiPackIndexEntry],
2594 midx_checksum: &ObjectId,
2595 preferred_pack: u32,
2596 preferred_tips: &HashSet<ObjectId>,
2597) -> Result<Option<Vec<u8>>> {
2598 let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
2599 pseudo.sort_by_key(|&slot| {
2600 let entry = &midx_entries[slot];
2601 (
2602 entry.pack_int_id != preferred_pack,
2603 entry.pack_int_id,
2604 entry.offset,
2605 )
2606 });
2607 let bit_order: Vec<ObjectId> = pseudo
2608 .into_iter()
2609 .map(|slot| midx_entries[slot].oid)
2610 .collect();
2611 build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
2612}
2613
2614fn bitmap_num_maximal_commits(
2622 db: &FileObjectDatabase,
2623 format: ObjectFormat,
2624 selected: &[ObjectId],
2625) -> Result<usize> {
2626 let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
2628 let mut stack: Vec<ObjectId> = selected.to_vec();
2629 while let Some(oid) = stack.pop() {
2630 if first_parent.contains_key(&oid) {
2631 continue;
2632 }
2633 let object = db.read_object(&oid)?;
2634 let commit = Commit::parse_ref(format, &object.body)?;
2635 let parent = grafted_parents(db, &oid, commit.parents).first().copied();
2636 first_parent.insert(oid, parent);
2637 if let Some(parent) = parent {
2638 stack.push(parent);
2639 }
2640 }
2641 let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
2643 for parent in first_parent.values().flatten() {
2644 *pending_children.entry(*parent).or_default() += 1;
2645 }
2646 let word_count = selected.len().div_ceil(64);
2647 struct MaximalEnt {
2648 mask: Vec<u64>,
2649 maximal: bool,
2650 }
2651 let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
2652 for (bit, oid) in selected.iter().enumerate() {
2653 let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
2654 mask: vec![0u64; word_count],
2655 maximal: true,
2656 });
2657 ent.mask[bit / 64] |= 1u64 << (bit % 64);
2658 ent.maximal = true;
2659 }
2660 let mut queue: Vec<ObjectId> = first_parent
2661 .keys()
2662 .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
2663 .copied()
2664 .collect();
2665 let mut num_maximal = 0usize;
2666 while let Some(oid) = queue.pop() {
2667 if let Some(ent) = ents.remove(&oid) {
2668 if ent.maximal {
2669 num_maximal += 1;
2670 }
2671 if let Some(Some(parent)) = first_parent.get(&oid) {
2672 match ents.entry(*parent) {
2673 std::collections::hash_map::Entry::Vacant(vacant) => {
2674 vacant.insert(MaximalEnt {
2676 mask: ent.mask.clone(),
2677 maximal: false,
2678 });
2679 }
2680 std::collections::hash_map::Entry::Occupied(mut occupied) => {
2681 let parent_ent = occupied.get_mut();
2682 let c_not_p = ent
2683 .mask
2684 .iter()
2685 .zip(&parent_ent.mask)
2686 .any(|(child, parent)| child & !parent != 0);
2687 if c_not_p {
2688 let p_not_c = parent_ent
2689 .mask
2690 .iter()
2691 .zip(&ent.mask)
2692 .any(|(parent, child)| parent & !child != 0);
2693 for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
2694 *parent |= child;
2695 }
2696 parent_ent.maximal = p_not_c;
2697 }
2698 }
2699 }
2700 }
2701 }
2702 if let Some(Some(parent)) = first_parent.get(&oid)
2703 && let Some(remaining) = pending_children.get_mut(parent)
2704 {
2705 *remaining -= 1;
2706 if *remaining == 0 {
2707 queue.push(*parent);
2708 }
2709 }
2710 }
2711 Ok(num_maximal)
2712}
2713
2714fn build_reachability_bitmap(
2718 db: &FileObjectDatabase,
2719 format: ObjectFormat,
2720 checksum: &ObjectId,
2721 bit_order: &[ObjectId],
2722 preferred_tips: &HashSet<ObjectId>,
2723) -> Result<Option<Vec<u8>>> {
2724 if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
2725 return Ok(None);
2726 }
2727 let object_count = bit_order.len();
2728
2729 let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
2732 oid_sorted.sort_by(|&left, &right| {
2733 bit_order[left as usize]
2734 .as_bytes()
2735 .cmp(bit_order[right as usize].as_bytes())
2736 });
2737 let mut index_position = vec![0u32; object_count];
2738 for (position, &slot) in oid_sorted.iter().enumerate() {
2739 index_position[slot as usize] = position as u32;
2740 }
2741 let mut oid_to_pack = HashMap::with_capacity(object_count);
2742 for (pack_pos, oid) in bit_order.iter().enumerate() {
2743 oid_to_pack.insert(*oid, pack_pos as u32);
2744 }
2745
2746 let mut object_types = Vec::with_capacity(object_count);
2748 struct IndexedCommit {
2749 oid: ObjectId,
2750 pack_pos: u32,
2751 index_pos: u32,
2752 date: i64,
2753 parent_count: usize,
2754 }
2755 let mut indexed_commits = Vec::new();
2756 for (pack_pos, oid) in bit_order.iter().enumerate() {
2757 let object_type = match db.read_object_header(oid)? {
2760 Some((object_type, _)) => object_type,
2761 None => db.read_object(oid)?.object_type,
2762 };
2763 object_types.push(object_type);
2764 if object_type == ObjectType::Commit {
2765 let object = db.read_object(oid)?;
2766 let commit = Commit::parse_ref(format, &object.body)?;
2767 indexed_commits.push(IndexedCommit {
2768 oid: *oid,
2769 pack_pos: pack_pos as u32,
2770 index_pos: index_position[pack_pos],
2771 date: commit_identity_timestamp(commit.committer),
2772 parent_count: grafted_parents(db, oid, commit.parents).len(),
2773 });
2774 }
2775 }
2776
2777 indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
2779 let mut selected: Vec<&IndexedCommit> = Vec::new();
2780 let commit_count = indexed_commits.len() as u32;
2781 if commit_count < 100 {
2782 selected.extend(indexed_commits.iter());
2783 } else {
2784 let mut i = 0u32;
2785 loop {
2786 let next = bitmap_next_commit_index(i);
2787 if i + next >= commit_count {
2788 break;
2789 }
2790 let mut chosen = &indexed_commits[(i + next) as usize];
2791 if next > 0 {
2792 for j in 0..=next {
2793 let candidate = &indexed_commits[(i + j) as usize];
2794 if preferred_tips.contains(&candidate.oid) {
2795 chosen = candidate;
2796 break;
2797 }
2798 if candidate.parent_count >= 2 {
2799 chosen = candidate;
2800 }
2801 }
2802 }
2803 selected.push(chosen);
2804 i += next + 1;
2805 }
2806 }
2807
2808 if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
2813 let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
2814 let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
2815 sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
2816 sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
2817 }
2818
2819 let word_count = object_count.div_ceil(64);
2822 let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
2823 for commit in selected.iter().rev() {
2824 let mut acc = vec![0u64; word_count];
2825 let mut pending = vec![commit.oid];
2826 while let Some(oid) = pending.pop() {
2827 let Some(&pack_pos) = oid_to_pack.get(&oid) else {
2828 eprintln!(
2830 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
2831 );
2832 return Ok(None);
2833 };
2834 if bitset_get(&acc, pack_pos) {
2835 continue;
2836 }
2837 if let Some(stored) = memo.get(&oid) {
2838 bitset_or(&mut acc, stored);
2839 continue;
2840 }
2841 bitset_set(&mut acc, pack_pos);
2842 let object = db.read_object(&oid)?;
2843 let tree = {
2844 let parsed = Commit::parse_ref(format, &object.body)?;
2845 pending.extend(grafted_parents(db, &oid, parsed.parents));
2846 parsed.tree
2847 };
2848 if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
2849 return Ok(None);
2850 }
2851 }
2852 memo.insert(commit.oid, Arc::new(acc));
2853 }
2854
2855 let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
2856 for commit in &selected {
2857 let words = match memo.get(&commit.oid) {
2858 Some(words) => words,
2859 None => continue,
2860 };
2861 writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
2862 }
2863 writer.write().map(Some)
2864}
2865
2866fn bitmap_mark_tree(
2870 db: &impl ObjectReader,
2871 format: ObjectFormat,
2872 tree: &ObjectId,
2873 oid_to_pack: &HashMap<ObjectId, u32>,
2874 acc: &mut [u64],
2875) -> Result<bool> {
2876 let Some(&pack_pos) = oid_to_pack.get(tree) else {
2877 eprintln!(
2878 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
2879 );
2880 return Ok(false);
2881 };
2882 if bitset_get(acc, pack_pos) {
2883 return Ok(true);
2884 }
2885 bitset_set(acc, pack_pos);
2886 let object = db.read_object(tree)?;
2887 for entry in TreeEntries::new(format, &object.body) {
2888 let entry = entry?;
2889 if entry.is_gitlink() {
2890 continue;
2891 }
2892 if entry.is_tree() {
2893 if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
2894 return Ok(false);
2895 }
2896 } else {
2897 let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
2898 eprintln!(
2899 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
2900 entry.oid
2901 );
2902 return Ok(false);
2903 };
2904 bitset_set(acc, blob_pos);
2905 }
2906 }
2907 Ok(true)
2908}
2909
2910pub struct LoadedPackBitmap {
2914 object_count: u32,
2915 oid_to_pack: HashMap<ObjectId, u32>,
2916 pack_to_oid: Vec<ObjectId>,
2917 commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
2918 commits: Vec<u64>,
2919 trees: Vec<u64>,
2920 blobs: Vec<u64>,
2921 tags: Vec<u64>,
2922}
2923
2924impl LoadedPackBitmap {
2925 pub fn object_count(&self) -> u32 {
2926 self.object_count
2927 }
2928
2929 pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
2931 self.oid_to_pack.get(oid).copied()
2932 }
2933
2934 pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
2935 self.pack_to_oid.get(position as usize)
2936 }
2937
2938 pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
2941 self.commit_words.get(oid)
2942 }
2943
2944 pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
2946 self.commit_words.keys()
2947 }
2948
2949 pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
2951 match object_type {
2952 ObjectType::Commit => &self.commits,
2953 ObjectType::Tree => &self.trees,
2954 ObjectType::Blob => &self.blobs,
2955 ObjectType::Tag => &self.tags,
2956 }
2957 }
2958
2959 fn word_count(&self) -> usize {
2960 (self.object_count as usize).div_ceil(64)
2961 }
2962}
2963
2964pub fn load_pack_bitmap(
2971 objects_dir: &Path,
2972 format: ObjectFormat,
2973) -> Result<Option<LoadedPackBitmap>> {
2974 let pack_dir = objects_dir.join("pack");
2975 if !pack_dir.exists() {
2976 return Ok(None);
2977 }
2978 if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
2981 return Ok(Some(bitmap));
2982 }
2983 let mut bitmap_paths = Vec::new();
2984 for entry in fs::read_dir(&pack_dir)? {
2985 let path = entry?.path();
2986 if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
2987 && path
2988 .file_name()
2989 .and_then(|name| name.to_str())
2990 .is_some_and(|name| name.starts_with("pack-"))
2991 {
2992 bitmap_paths.push(path);
2993 }
2994 }
2995 bitmap_paths.sort();
2996 for bitmap_path in bitmap_paths {
2997 match load_pack_bitmap_file(&bitmap_path, format) {
2998 Ok(Some(bitmap)) => return Ok(Some(bitmap)),
2999 Ok(None) | Err(_) => continue,
3000 }
3001 }
3002 Ok(None)
3003}
3004
3005fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
3010 let midx_path = pack_dir.join("multi-pack-index");
3011 if !midx_path.exists() {
3012 return Ok(None);
3013 }
3014 let Ok(midx_bytes) = fs::read(&midx_path) else {
3015 return Ok(None);
3016 };
3017 if midx_has_bad_ridx_chunk(&midx_bytes, format) {
3018 eprintln!("error: multi-pack-index reverse-index chunk is the wrong size");
3019 eprintln!("warning: multi-pack bitmap is missing required reverse index");
3020 return Ok(None);
3021 }
3022 let midx = match MultiPackIndex::parse(&midx_bytes, format) {
3023 Ok(midx) => midx,
3024 Err(GitError::InvalidFormat(message))
3025 if message == "multi-pack-index reverse-index chunk is the wrong size" =>
3026 {
3027 eprintln!("error: {message}");
3028 eprintln!("warning: multi-pack bitmap is missing required reverse index");
3029 return Ok(None);
3030 }
3031 Err(_) => return Ok(None),
3032 };
3033 let bitmap_path = pack_dir.join(format!(
3034 "multi-pack-index-{}.bitmap",
3035 midx.checksum.to_hex()
3036 ));
3037 if !bitmap_path.exists() {
3038 return Ok(None);
3039 }
3040 let object_count = midx.objects.len();
3041 let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
3046 .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
3047 .unwrap_or(true);
3048 let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
3049 (Some(chunk), true) => {
3050 sley_core::trace2::data("load_midx_revindex", "source", "midx");
3051 chunk.clone()
3052 }
3053 _ => {
3054 let rev_path =
3055 pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
3056 let Ok(rev_bytes) = fs::read(&rev_path) else {
3057 return Ok(None);
3059 };
3060 let Ok(parsed_rev) =
3061 sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
3062 else {
3063 return Ok(None);
3064 };
3065 sley_core::trace2::data("load_midx_revindex", "source", "rev");
3066 parsed_rev.positions
3067 }
3068 };
3069 let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
3070 return Ok(None);
3071 };
3072 let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
3073 Ok(parsed) => parsed,
3074 Err(_) => return Ok(None),
3075 };
3076 if parsed.pack_checksum != midx.checksum {
3077 return Ok(None);
3078 }
3079
3080 let mut pack_to_oid = Vec::with_capacity(object_count);
3083 for &midx_pos in &reverse_index {
3084 let Some(entry) = midx.objects.get(midx_pos as usize) else {
3085 return Ok(None);
3086 };
3087 pack_to_oid.push(entry.oid);
3088 }
3089 let mut oid_to_pack = HashMap::with_capacity(object_count);
3090 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3091 oid_to_pack.insert(*oid, pack_pos as u32);
3092 }
3093 match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3094 midx.objects.get(position).map(|entry| entry.oid)
3095 }) {
3096 Ok(loaded) => Ok(Some(loaded)),
3097 Err(_) => Ok(None),
3098 }
3099}
3100
3101fn midx_has_bad_ridx_chunk(bytes: &[u8], format: ObjectFormat) -> bool {
3102 let hash_len = format.raw_len();
3103 if bytes.len() < 12 + 12 + hash_len || &bytes[..4] != b"MIDX" {
3104 return false;
3105 }
3106 let chunk_count = bytes[6] as usize;
3107 let table_len = match (chunk_count + 1).checked_mul(12) {
3108 Some(table_len) => table_len,
3109 None => return false,
3110 };
3111 let table_end = match 12usize.checked_add(table_len) {
3112 Some(table_end) if table_end <= bytes.len().saturating_sub(hash_len) => table_end,
3113 _ => return false,
3114 };
3115 let mut entries = Vec::with_capacity(chunk_count + 1);
3116 let mut cursor = 12usize;
3117 while cursor < table_end {
3118 let id = [
3119 bytes[cursor],
3120 bytes[cursor + 1],
3121 bytes[cursor + 2],
3122 bytes[cursor + 3],
3123 ];
3124 let mut raw_offset = [0u8; 8];
3125 raw_offset.copy_from_slice(&bytes[cursor + 4..cursor + 12]);
3126 entries.push((id, u64::from_be_bytes(raw_offset) as usize));
3127 cursor += 12;
3128 }
3129 let mut oidf = None;
3130 let mut ridx = None;
3131 for pair in entries.windows(2) {
3132 let start = pair[0].1;
3133 let end = pair[1].1;
3134 if end < start || end > bytes.len().saturating_sub(hash_len) {
3135 return false;
3136 }
3137 match &pair[0].0 {
3138 b"OIDF" => oidf = Some((start, end)),
3139 b"RIDX" => ridx = Some((start, end)),
3140 _ => {}
3141 }
3142 }
3143 let Some((oidf_start, oidf_end)) = oidf else {
3144 return false;
3145 };
3146 let Some((ridx_start, ridx_end)) = ridx else {
3147 return false;
3148 };
3149 if oidf_end.saturating_sub(oidf_start) != 256 * 4 {
3150 return false;
3151 }
3152 let object_count_start = oidf_end - 4;
3153 let object_count = u32::from_be_bytes([
3154 bytes[object_count_start],
3155 bytes[object_count_start + 1],
3156 bytes[object_count_start + 2],
3157 bytes[object_count_start + 3],
3158 ]) as usize;
3159 ridx_end.saturating_sub(ridx_start) != object_count.saturating_mul(4)
3160}
3161
3162fn load_pack_bitmap_file(
3163 bitmap_path: &Path,
3164 format: ObjectFormat,
3165) -> Result<Option<LoadedPackBitmap>> {
3166 let index_path = bitmap_path.with_extension("idx");
3167 if !index_path.exists() {
3168 return Ok(None);
3169 }
3170 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
3171 let object_count = index.entries.len();
3172 let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
3173 if parsed.pack_checksum != index.pack_checksum {
3174 return Ok(None);
3175 }
3176
3177 let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
3178 pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
3179 let mut pack_to_oid = Vec::with_capacity(object_count);
3180 for index_pos in &pack_order {
3181 pack_to_oid.push(index.entries[*index_pos as usize].oid);
3182 }
3183 let mut oid_to_pack = HashMap::with_capacity(object_count);
3184 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
3185 oid_to_pack.insert(*oid, pack_pos as u32);
3186 }
3187
3188 assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
3189 index.entries.get(position).map(|entry| entry.oid)
3190 })
3191 .map(Some)
3192}
3193
3194fn assemble_loaded_bitmap(
3199 parsed: PackBitmapIndex,
3200 object_count: usize,
3201 pack_to_oid: Vec<ObjectId>,
3202 oid_to_pack: HashMap<ObjectId, u32>,
3203 lookup_oid: impl Fn(usize) -> Option<ObjectId>,
3204) -> Result<LoadedPackBitmap> {
3205 let word_count = object_count.div_ceil(64);
3206 let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
3207 let mut words = bitmap.to_words()?;
3208 words.resize(word_count, 0);
3209 Ok(words)
3210 };
3211
3212 let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
3213 let mut commit_words = HashMap::with_capacity(parsed.entries.len());
3214 for (entry_index, entry) in parsed.entries.iter().enumerate() {
3215 let mut words = expand(&entry.bitmap)?;
3216 if entry.xor_offset > 0 {
3217 let base_index = entry_index - entry.xor_offset as usize;
3218 let base = &resolved[base_index];
3219 for (dst, src) in words.iter_mut().zip(base.iter()) {
3220 *dst ^= *src;
3221 }
3222 }
3223 let words = Arc::new(words);
3224 resolved.push(Arc::clone(&words));
3225 let commit_oid = lookup_oid(entry.object_position as usize)
3226 .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
3227 commit_words.insert(commit_oid, words);
3228 }
3229
3230 Ok(LoadedPackBitmap {
3231 object_count: object_count as u32,
3232 oid_to_pack,
3233 pack_to_oid,
3234 commit_words,
3235 commits: expand(&parsed.type_bitmaps.commits)?,
3236 trees: expand(&parsed.type_bitmaps.trees)?,
3237 blobs: expand(&parsed.type_bitmaps.blobs)?,
3238 tags: expand(&parsed.type_bitmaps.tags)?,
3239 })
3240}
3241
3242pub struct BitmapWalkResult {
3246 pub words: Vec<u64>,
3247 pub extended: Vec<(ObjectId, ObjectType)>,
3248}
3249
3250impl BitmapWalkResult {
3251 pub fn subtract(&mut self, haves: &BitmapWalkResult) {
3253 for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
3254 *dst &= !*src;
3255 }
3256 let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
3257 self.extended.retain(|(oid, _)| !have_ext.contains(oid));
3258 }
3259}
3260
3261pub fn bitmap_reachable(
3272 bitmap: &LoadedPackBitmap,
3273 db: &impl ObjectReader,
3274 format: ObjectFormat,
3275 roots: &[ObjectId],
3276 include_objects: bool,
3277) -> Result<BitmapWalkResult> {
3278 let mut walk = BitmapFillWalk {
3279 bitmap,
3280 words: vec![0u64; bitmap.word_count()],
3281 extended: Vec::new(),
3282 extended_seen: HashSet::new(),
3283 };
3284 let mut commit_stack: Vec<ObjectId> = Vec::new();
3285
3286 for root in roots {
3287 let mut oid = *root;
3288 loop {
3290 let object = db.read_object(&oid)?;
3291 match object.object_type {
3292 ObjectType::Tag => {
3293 walk.mark(&oid, ObjectType::Tag);
3294 let tag = Tag::parse_ref(format, &object.body)?;
3295 oid = tag.object;
3296 }
3297 ObjectType::Commit => {
3298 commit_stack.push(oid);
3299 break;
3300 }
3301 ObjectType::Tree => {
3302 walk.mark_tree_closure(db, format, &oid)?;
3303 break;
3304 }
3305 ObjectType::Blob => {
3306 walk.mark(&oid, ObjectType::Blob);
3307 break;
3308 }
3309 }
3310 }
3311 }
3312
3313 while let Some(oid) = commit_stack.pop() {
3314 if let Some(position) = bitmap.pack_position(&oid) {
3315 if bitset_get(&walk.words, position) {
3316 continue;
3317 }
3318 if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
3319 bitset_or(&mut walk.words, stored);
3320 continue;
3321 }
3322 bitset_set(&mut walk.words, position);
3323 } else {
3324 if walk.extended_seen.contains(&oid) {
3325 continue;
3326 }
3327 walk.extended_seen.insert(oid);
3328 walk.extended.push((oid, ObjectType::Commit));
3329 }
3330 let object = db.read_object(&oid)?;
3331 let commit = Commit::parse_ref(format, &object.body)?;
3332 commit_stack.extend(grafted_parents(db, &oid, commit.parents));
3333 if include_objects {
3334 walk.mark_tree_closure(db, format, &commit.tree)?;
3335 }
3336 }
3337
3338 Ok(BitmapWalkResult {
3339 words: walk.words,
3340 extended: walk.extended,
3341 })
3342}
3343
3344struct BitmapFillWalk<'a> {
3345 bitmap: &'a LoadedPackBitmap,
3346 words: Vec<u64>,
3347 extended: Vec<(ObjectId, ObjectType)>,
3348 extended_seen: HashSet<ObjectId>,
3349}
3350
3351impl BitmapFillWalk<'_> {
3352 fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
3354 if let Some(position) = self.bitmap.pack_position(oid) {
3355 if bitset_get(&self.words, position) {
3356 return false;
3357 }
3358 bitset_set(&mut self.words, position);
3359 true
3360 } else {
3361 if !self.extended_seen.insert(*oid) {
3362 return false;
3363 }
3364 self.extended.push((*oid, object_type));
3365 true
3366 }
3367 }
3368
3369 fn mark_tree_closure(
3373 &mut self,
3374 db: &impl ObjectReader,
3375 format: ObjectFormat,
3376 tree: &ObjectId,
3377 ) -> Result<()> {
3378 if !self.mark(tree, ObjectType::Tree) {
3379 return Ok(());
3380 }
3381 let object = db.read_object(tree)?;
3382 for entry in TreeEntries::new(format, &object.body) {
3383 let entry = entry?;
3384 if entry.is_gitlink() {
3385 continue;
3386 }
3387 if entry.is_tree() {
3388 self.mark_tree_closure(db, format, &entry.oid)?;
3389 } else {
3390 self.mark(&entry.oid, ObjectType::Blob);
3391 }
3392 }
3393 Ok(())
3394 }
3395}
3396
3397#[derive(Debug)]
3398pub struct ObjectDatabase {
3399 format: ObjectFormat,
3400 objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
3406 promisor: bool,
3407}
3408
3409impl ObjectDatabase {
3410 pub fn new(format: ObjectFormat) -> Self {
3411 Self {
3412 format,
3413 objects: Mutex::new(HashMap::new()),
3414 promisor: false,
3415 }
3416 }
3417
3418 pub fn with_promisor(mut self, promisor: bool) -> Self {
3419 self.promisor = promisor;
3420 self
3421 }
3422
3423 pub fn contains(&self, oid: &ObjectId) -> bool {
3424 self.objects
3425 .lock()
3426 .map(|objects| objects.contains_key(oid))
3427 .unwrap_or(false)
3428 }
3429
3430 pub fn validate(&self, oid: &ObjectId) -> Result<()> {
3431 let object = self.read_object(oid)?;
3432 let actual = object.object_id(self.format)?;
3433 if &actual == oid {
3434 Ok(())
3435 } else {
3436 Err(GitError::InvalidObject(format!(
3437 "object id mismatch: expected {oid}, got {actual}"
3438 )))
3439 }
3440 }
3441}
3442
3443impl ObjectReader for ObjectDatabase {
3444 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3445 self.objects
3446 .lock()
3447 .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
3448 .get(oid)
3449 .map(Arc::clone)
3450 .or_else(|| implied_empty_tree_object(self.format, oid))
3451 .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
3452 }
3453}
3454
3455impl ObjectWriter for ObjectDatabase {
3456 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3457 let oid = object.object_id(self.format)?;
3458 self.objects
3459 .lock()
3460 .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
3461 .entry(oid)
3462 .or_insert_with(|| Arc::new(object));
3463 Ok(oid)
3464 }
3465}
3466
3467#[derive(Debug, Clone, PartialEq, Eq)]
3468pub struct Alternate {
3469 pub path: std::path::PathBuf,
3470}
3471
3472#[derive(Debug, Clone, PartialEq, Eq)]
3473pub struct PartialClonePolicy {
3474 pub promisor_remote: Option<String>,
3475 pub allow_missing_promised_objects: bool,
3476}
3477
3478type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
3482
3483#[derive(Debug)]
3486enum PackData {
3487 #[cfg(feature = "mmap")]
3488 Mapped(sley_mmap::MappedFile),
3489 Heap(Vec<u8>),
3490}
3491
3492impl std::ops::Deref for PackData {
3493 type Target = [u8];
3494
3495 fn deref(&self) -> &[u8] {
3496 match self {
3497 #[cfg(feature = "mmap")]
3498 Self::Mapped(mapped) => mapped,
3499 Self::Heap(bytes) => bytes,
3500 }
3501 }
3502}
3503
3504#[cfg(feature = "mmap")]
3507fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3508 match sley_mmap::MappedFile::open_pack(pack_path) {
3509 Ok(mapped) => Ok(PackData::Mapped(mapped)),
3510 Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
3511 }
3512}
3513
3514#[cfg(not(feature = "mmap"))]
3515fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3516 Ok(PackData::Heap(fs::read(pack_path)?))
3517}
3518
3519#[cfg(feature = "mmap")]
3520fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3521 match sley_mmap::MappedFile::open_pack(index_path) {
3522 Ok(mapped) => Ok(Arc::new(mapped)),
3523 Err(_) => Ok(Arc::new(fs::read(index_path)?)),
3524 }
3525}
3526
3527#[cfg(not(feature = "mmap"))]
3528fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3529 Ok(Arc::new(fs::read(index_path)?))
3530}
3531
3532#[cfg(feature = "mmap")]
3533fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3534 match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
3535 Ok(mapped) => Ok(Arc::new(mapped)),
3536 Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
3537 }
3538}
3539
3540#[cfg(not(feature = "mmap"))]
3541fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3542 Ok(Arc::new(fs::read(midx_path)?))
3543}
3544
3545type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
3551
3552type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
3557
3558type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
3568
3569type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
3570
3571const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
3577
3578const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
3582
3583fn cached_object_cost(object: &EncodedObject) -> usize {
3587 object.body.len().saturating_add(64)
3588}
3589
3590fn cache_budget_from_env(var: &str, default: usize) -> usize {
3593 match env::var(var) {
3594 Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
3595 Err(_) => default,
3596 }
3597}
3598
3599fn object_cache_budget() -> usize {
3606 static BUDGET: OnceLock<usize> = OnceLock::new();
3607 *BUDGET.get_or_init(|| {
3608 cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
3609 })
3610}
3611
3612fn delta_base_cache_budget() -> usize {
3616 static BUDGET: OnceLock<usize> = OnceLock::new();
3617 *BUDGET.get_or_init(|| {
3618 cache_budget_from_env(
3619 "SLEY_DELTA_BASE_CACHE_BYTES",
3620 DEFAULT_DELTA_BASE_CACHE_BYTES,
3621 )
3622 })
3623}
3624
3625fn verify_reads_enabled() -> bool {
3636 static VERIFY: OnceLock<bool> = OnceLock::new();
3637 *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
3638 Ok(value) => !matches!(value.trim(), "" | "0"),
3639 Err(_) => false,
3640 })
3641}
3642
3643#[derive(Debug)]
3651struct LruCache<K: std::hash::Hash + Eq + Clone> {
3652 budget: usize,
3653 used: usize,
3654 map: HashMap<K, LruEntry<K>>,
3655 head: Option<K>,
3656 tail: Option<K>,
3657}
3658
3659#[derive(Debug)]
3660struct LruEntry<K> {
3661 object: Arc<EncodedObject>,
3662 prev: Option<K>,
3663 next: Option<K>,
3664}
3665
3666impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
3667 fn new(budget: usize) -> Self {
3668 Self {
3669 budget,
3670 used: 0,
3671 map: HashMap::new(),
3672 head: None,
3673 tail: None,
3674 }
3675 }
3676
3677 fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
3678 let object = Arc::clone(&self.map.get(key)?.object);
3679 self.touch(key);
3680 Some(object)
3681 }
3682
3683 fn touch(&mut self, key: &K) {
3685 if self.tail.as_ref() == Some(key) {
3686 return;
3687 }
3688 if self.map.contains_key(key) {
3689 self.detach(key);
3690 self.attach_back(key.clone());
3691 }
3692 }
3693
3694 fn remove(&mut self, key: &K) {
3696 if let Some(entry) = self.map.get(key) {
3697 self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
3698 }
3699 self.detach(key);
3700 self.map.remove(key);
3701 }
3702
3703 fn detach(&mut self, key: &K) {
3704 let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
3705 let prev = entry.prev.take();
3706 let next = entry.next.take();
3707 (prev, next)
3708 }) else {
3709 return;
3710 };
3711
3712 match &prev {
3713 Some(prev_key) => {
3714 if let Some(prev_entry) = self.map.get_mut(prev_key) {
3715 prev_entry.next = next.clone();
3716 }
3717 }
3718 None => self.head = next.clone(),
3719 }
3720 match &next {
3721 Some(next_key) => {
3722 if let Some(next_entry) = self.map.get_mut(next_key) {
3723 next_entry.prev = prev.clone();
3724 }
3725 }
3726 None => self.tail = prev.clone(),
3727 }
3728 }
3729
3730 fn attach_back(&mut self, key: K) {
3731 let previous_tail = self.tail.replace(key.clone());
3732 match previous_tail {
3733 Some(tail_key) => {
3734 if let Some(tail_entry) = self.map.get_mut(&tail_key) {
3735 tail_entry.next = Some(key.clone());
3736 }
3737 if let Some(entry) = self.map.get_mut(&key) {
3738 entry.prev = Some(tail_key);
3739 entry.next = None;
3740 }
3741 }
3742 None => {
3743 self.head = Some(key.clone());
3744 if let Some(entry) = self.map.get_mut(&key) {
3745 entry.prev = None;
3746 entry.next = None;
3747 }
3748 }
3749 }
3750 }
3751
3752 fn clear(&mut self) {
3753 self.map.clear();
3754 self.head = None;
3755 self.tail = None;
3756 self.used = 0;
3757 }
3758
3759 fn put(&mut self, key: K, object: Arc<EncodedObject>) {
3760 if self.budget == 0 {
3761 return;
3762 }
3763 let cost = cached_object_cost(&object);
3764 if cost > self.budget {
3768 self.remove(&key);
3769 return;
3770 }
3771 if let Some(entry) = self.map.get_mut(&key) {
3772 let previous = std::mem::replace(&mut entry.object, object);
3773 self.used = self
3775 .used
3776 .saturating_sub(cached_object_cost(&previous))
3777 .saturating_add(cost);
3778 self.touch(&key);
3779 } else {
3780 self.used = self.used.saturating_add(cost);
3781 self.map.insert(
3782 key.clone(),
3783 LruEntry {
3784 object,
3785 prev: None,
3786 next: None,
3787 },
3788 );
3789 self.attach_back(key);
3790 }
3791 while self.used > self.budget {
3792 let Some(evicted) = self.head.clone() else {
3793 break;
3794 };
3795 self.remove(&evicted);
3796 }
3797 }
3798}
3799
3800type LruObjectCache = LruCache<ObjectId>;
3802type LruOffsetCache = LruCache<u64>;
3804
3805struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
3810
3811impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
3812 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
3813 self.0.lock().ok()?.get(&offset)
3814 }
3815
3816 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
3817 if let Ok(mut cache) = self.0.lock() {
3818 cache.put(offset, object);
3819 }
3820 }
3821}
3822
3823struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
3827
3828impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
3829 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
3830 self.0.lock().ok()?.get(&pack_offset).copied()
3831 }
3832
3833 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
3834 if let Ok(mut cache) = self.0.lock() {
3835 cache.insert(pack_offset, header);
3836 }
3837 }
3838}
3839
3840type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
3845
3846type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
3850
3851type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
3855
3856#[derive(Debug)]
3861struct RegisteredPack {
3862 idx: PathBuf,
3863 pack: PathBuf,
3864 index: Mutex<Option<Arc<PackIndexViewData>>>,
3865 data: Mutex<Option<Arc<PackData>>>,
3866 delta_cache: Arc<Mutex<LruOffsetCache>>,
3867 header_type_cache: PackHeaderTypeCache,
3868}
3869
3870impl RegisteredPack {
3871 fn new(idx: PathBuf, pack: PathBuf) -> Self {
3872 Self {
3873 idx,
3874 pack,
3875 index: Mutex::new(None),
3876 data: Mutex::new(None),
3877 delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
3878 header_type_cache: Arc::new(Mutex::new(HashMap::new())),
3879 }
3880 }
3881
3882 fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
3883 if let Ok(cache) = self.index.lock()
3884 && let Some(index) = cache.as_ref()
3885 {
3886 return Ok(Arc::clone(index));
3887 }
3888 let index_bytes = load_pack_index_data(&self.idx)?;
3889 let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
3890 index_bytes,
3891 format,
3892 )?);
3893 if let Ok(mut cache) = self.index.lock() {
3894 *cache = Some(Arc::clone(&index));
3895 }
3896 Ok(index)
3897 }
3898
3899 fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
3900 if let Ok(cache) = self.data.lock()
3901 && let Some(bytes) = cache.as_ref()
3902 {
3903 return Ok(Arc::clone(bytes));
3904 }
3905 if let Ok(cache) = pack_bytes.lock()
3906 && let Some(bytes) = cache.get(&self.pack)
3907 {
3908 let bytes = Arc::clone(bytes);
3909 if let Ok(mut local_cache) = self.data.lock() {
3910 *local_cache = Some(Arc::clone(&bytes));
3911 }
3912 return Ok(bytes);
3913 }
3914 let bytes = Arc::new(load_pack_data(&self.pack)?);
3915 if let Ok(mut local_cache) = self.data.lock() {
3916 *local_cache = Some(Arc::clone(&bytes));
3917 }
3918 if let Ok(mut cache) = pack_bytes.lock() {
3919 cache.insert(self.pack.clone(), Arc::clone(&bytes));
3920 }
3921 Ok(bytes)
3922 }
3923}
3924
3925#[derive(Debug, Clone, PartialEq, Eq)]
3926struct PackDirFingerprint {
3927 modified: Option<std::time::SystemTime>,
3928 idx_count: usize,
3929 pack_count: usize,
3930}
3931
3932#[derive(Debug)]
3937struct PackRegistrySnapshot {
3938 fingerprint: PackDirFingerprint,
3939 packs: Vec<Arc<RegisteredPack>>,
3940 recent_pack: Mutex<Option<usize>>,
3941}
3942
3943impl PackRegistrySnapshot {
3944 fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
3945 Self {
3946 fingerprint,
3947 packs,
3948 recent_pack: Mutex::new(None),
3949 }
3950 }
3951
3952 fn cached_hint(&self) -> Option<usize> {
3953 self.recent_pack
3954 .lock()
3955 .ok()
3956 .and_then(|hint| *hint)
3957 .filter(|pack_index| *pack_index < self.packs.len())
3958 }
3959
3960 fn remember_hint(&self, pack_index: usize) {
3961 if let Ok(mut hint) = self.recent_pack.lock() {
3962 *hint = Some(pack_index);
3963 }
3964 }
3965}
3966
3967type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
3971
3972#[derive(Debug, Clone)]
3973struct PackLookup {
3974 pack: PathBuf,
3975 registered: Option<Arc<RegisteredPack>>,
3976 offset: u64,
3977}
3978
3979impl PackLookup {
3980 fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
3981 Self {
3982 pack: pack.pack.clone(),
3983 registered: Some(pack),
3984 offset,
3985 }
3986 }
3987
3988 fn from_path(pack: PathBuf, offset: u64) -> Self {
3989 Self {
3990 pack,
3991 registered: None,
3992 offset,
3993 }
3994 }
3995
3996 fn pack_path(&self) -> &Path {
3997 &self.pack
3998 }
3999
4000 fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
4001 match &self.registered {
4002 Some(pack) => pack.bytes(&database.pack_bytes),
4003 None => database.cached_pack_bytes(&self.pack),
4004 }
4005 }
4006
4007 fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
4008 match &self.registered {
4009 Some(pack) => database.cached_pack_index(&pack.idx),
4010 None => database.cached_pack_index(&self.pack.with_extension("idx")),
4011 }
4012 }
4013
4014 fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
4015 match &self.registered {
4016 Some(pack) => Some(Arc::clone(&pack.delta_cache)),
4017 None => database.pack_delta_cache(&self.pack),
4018 }
4019 }
4020
4021 fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
4022 match &self.registered {
4023 Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
4024 None => database.pack_header_type_cache(&self.pack),
4025 }
4026 }
4027}
4028
4029#[derive(Debug, Clone)]
4030pub struct FileObjectDatabase {
4031 loose: LooseObjectStore,
4032 objects_dir: PathBuf,
4033 alternates: Vec<PathBuf>,
4034 format: ObjectFormat,
4035 pack_bytes: PackBytesCache,
4036 pack_indexes: PackIndexCache,
4037 multi_pack_indexes: MultiPackIndexCache,
4038 multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
4039 pack_registry: PackRegistryCache,
4040 decoded: DecodedObjectCache,
4041 pack_deltas: PackDeltaCaches,
4042 pack_header_types: PackHeaderTypeCaches,
4043 promisor_objects: Arc<OnceLock<HashSet<ObjectId>>>,
4044 shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
4048}
4049
4050#[derive(Debug)]
4051pub struct ObjectPresenceChecker {
4052 db: FileObjectDatabase,
4053 pack_dir: PathBuf,
4054 midx: Option<Arc<MultiPackIndexOidLookup>>,
4055 registry: Option<Arc<PackRegistrySnapshot>>,
4056 registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
4057 recent_pack: Option<usize>,
4058 prepared_packs: bool,
4059 prepared_registry: bool,
4060}
4061
4062impl ObjectPresenceChecker {
4063 fn new(db: FileObjectDatabase) -> Self {
4064 let pack_dir = db.objects_dir.join("pack");
4065 Self {
4066 db,
4067 pack_dir,
4068 midx: None,
4069 registry: None,
4070 registry_indexes: Vec::new(),
4071 recent_pack: None,
4072 prepared_packs: false,
4073 prepared_registry: false,
4074 }
4075 }
4076
4077 pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
4078 if oid.format() != self.db.format {
4079 return Err(GitError::InvalidObjectId(format!(
4080 "object {oid} uses {}, store uses {}",
4081 oid.format().name(),
4082 self.db.format.name()
4083 )));
4084 }
4085 if self.db.loose.exists(oid)? {
4086 return Ok(true);
4087 }
4088 if self.find_packed(oid, false)? {
4089 return Ok(true);
4090 }
4091 if self.find_packed(oid, true)? {
4092 return Ok(true);
4093 }
4094 for alternate in &self.db.alternates {
4095 if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
4096 return Ok(true);
4097 }
4098 }
4099 self.db.loose.invalidate_cache();
4102 self.db.loose.exists(oid)
4103 }
4104
4105 fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
4106 self.prepare_packs(force_rescan)?;
4107 if let Some(midx) = &self.midx
4108 && midx.contains(oid)
4109 {
4110 return Ok(true);
4111 }
4112 self.prepare_registry(force_rescan)?;
4113 self.find_in_registry(oid)
4114 }
4115
4116 fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
4117 if self.prepared_packs && !force_rescan {
4118 return Ok(());
4119 }
4120 let midx_path = self.pack_dir.join("multi-pack-index");
4121 self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
4122 self.prepared_packs = true;
4123 Ok(())
4124 }
4125
4126 fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
4127 if self.prepared_registry && !force_rescan {
4128 return Ok(());
4129 }
4130 let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
4131 let registry_changed = match self.registry.as_ref() {
4132 Some(cached) => !Arc::ptr_eq(cached, ®istry),
4133 None => true,
4134 };
4135 if registry_changed {
4136 self.registry_indexes = vec![None; registry.packs.len()];
4137 self.recent_pack = None;
4138 self.registry = Some(registry);
4139 }
4140 self.prepared_registry = true;
4141 Ok(())
4142 }
4143
4144 fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
4145 let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
4146 return Ok(false);
4147 };
4148 if let Some(pack_index) = self
4149 .recent_pack
4150 .filter(|pack_index| *pack_index < registry.packs.len())
4151 {
4152 let index = self.registry_index(®istry, pack_index)?;
4153 if index.find(oid).is_some() {
4154 return Ok(true);
4155 }
4156 }
4157 for pack_index in 0..registry.packs.len() {
4158 if Some(pack_index) == self.recent_pack {
4159 continue;
4160 }
4161 let index = self.registry_index(®istry, pack_index)?;
4162 if index.find(oid).is_some() {
4163 self.recent_pack = Some(pack_index);
4164 return Ok(true);
4165 }
4166 }
4167 Ok(false)
4168 }
4169
4170 fn registry_index(
4171 &mut self,
4172 registry: &PackRegistrySnapshot,
4173 pack_index: usize,
4174 ) -> Result<Arc<PackIndexViewData>> {
4175 if self.registry_indexes.len() != registry.packs.len() {
4176 self.registry_indexes = vec![None; registry.packs.len()];
4177 self.recent_pack = None;
4178 }
4179 if let Some(index) = self
4180 .registry_indexes
4181 .get(pack_index)
4182 .and_then(|index| index.as_ref())
4183 {
4184 return Ok(Arc::clone(index));
4185 }
4186 let index = registry.packs[pack_index].index(self.db.format)?;
4187 if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
4188 *slot = Some(Arc::clone(&index));
4189 }
4190 Ok(index)
4191 }
4192}
4193
4194fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
4198 let Ok(contents) = std::fs::read_to_string(shallow_file) else {
4199 return HashSet::new();
4200 };
4201 contents
4202 .lines()
4203 .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
4204 .collect()
4205}
4206
4207pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4208 env::var_os("GIT_OBJECT_DIRECTORY")
4209 .map(PathBuf::from)
4210 .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
4211}
4212
4213pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
4214 if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
4215 return PathBuf::from(common_dir);
4216 }
4217 let git_dir = git_dir.as_ref();
4218 let commondir = git_dir.join("commondir");
4219 if let Ok(value) = fs::read_to_string(&commondir) {
4220 let path = PathBuf::from(value.trim());
4221 let common = if path.is_absolute() {
4222 path
4223 } else {
4224 git_dir.join(path)
4225 };
4226 return fs::canonicalize(&common).unwrap_or(common);
4227 }
4228 git_dir.to_path_buf()
4229}
4230
4231pub fn repository_object_ids(
4232 git_dir: impl AsRef<Path>,
4233 format: ObjectFormat,
4234) -> Result<Vec<ObjectId>> {
4235 object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
4236}
4237
4238pub fn object_ids_in_objects_dir(
4239 objects_dir: impl AsRef<Path>,
4240 format: ObjectFormat,
4241) -> Result<Vec<ObjectId>> {
4242 let objects_dir = objects_dir.as_ref();
4243 let mut oids = HashSet::new();
4244 collect_loose_object_ids(objects_dir, format, &mut oids)?;
4245 collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
4246 let mut oids = oids.into_iter().collect::<Vec<_>>();
4247 oids.sort_by_key(ObjectId::to_hex);
4248 Ok(oids)
4249}
4250
4251fn collect_loose_object_ids(
4252 objects_dir: &Path,
4253 format: ObjectFormat,
4254 oids: &mut HashSet<ObjectId>,
4255) -> Result<()> {
4256 if !objects_dir.exists() {
4257 return Ok(());
4258 }
4259 let hex_len = format.hex_len();
4260 for entry in fs::read_dir(objects_dir)? {
4261 let entry = entry?;
4262 if !entry.file_type()?.is_dir() {
4263 continue;
4264 }
4265 let name = entry.file_name();
4266 let Some(fanout) = name.to_str() else {
4267 continue;
4268 };
4269 if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4270 continue;
4271 }
4272 for object_entry in fs::read_dir(entry.path())? {
4273 let object_entry = object_entry?;
4274 if !object_entry.file_type()?.is_file() {
4275 continue;
4276 }
4277 let name = object_entry.file_name();
4278 let Some(suffix) = name.to_str() else {
4279 continue;
4280 };
4281 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4282 continue;
4283 }
4284 oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
4285 }
4286 }
4287 Ok(())
4288}
4289
4290fn collect_loose_fanout_object_ids(
4291 objects_dir: &Path,
4292 format: ObjectFormat,
4293 fanout: u8,
4294 oids: &mut HashSet<ObjectId>,
4295) -> Result<()> {
4296 let fanout_hex = format!("{fanout:02x}");
4297 let fanout_dir = objects_dir.join(&fanout_hex);
4298 let entries = match fs::read_dir(&fanout_dir) {
4299 Ok(entries) => entries,
4300 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
4301 Err(err) => return Err(GitError::Io(err.to_string())),
4302 };
4303 let hex_len = format.hex_len();
4304 for object_entry in entries {
4305 let object_entry = object_entry?;
4306 let name = object_entry.file_name();
4307 let Some(suffix) = name.to_str() else {
4308 continue;
4309 };
4310 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4311 continue;
4312 }
4313 oids.insert(ObjectId::from_hex(
4314 format,
4315 &format!("{fanout_hex}{suffix}"),
4316 )?);
4317 }
4318 Ok(())
4319}
4320
4321#[derive(Debug, Default)]
4322struct LoosePresenceCache {
4323 loaded_fanouts: HashSet<u8>,
4324 objects: HashSet<ObjectId>,
4325}
4326
4327pub fn packed_object_ids(
4332 objects_dir: impl AsRef<Path>,
4333 format: ObjectFormat,
4334) -> Result<HashSet<ObjectId>> {
4335 let mut oids = HashSet::new();
4336 collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
4337 Ok(oids)
4338}
4339
4340fn collect_packed_object_ids(
4341 pack_dir: &Path,
4342 format: ObjectFormat,
4343 oids: &mut HashSet<ObjectId>,
4344) -> Result<()> {
4345 if !pack_dir.exists() {
4346 return Ok(());
4347 }
4348 let mut midx_pack_names = HashSet::new();
4349 let midx_path = pack_dir.join("multi-pack-index");
4350 if midx_path.exists() {
4351 let midx = MultiPackIndex::parse_without_checksum(&fs::read(&midx_path)?, format)?;
4352 midx_pack_names.extend(midx.pack_names.iter().cloned());
4353 oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
4354 }
4355 for entry in fs::read_dir(pack_dir)? {
4356 let path = entry?.path();
4357 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
4358 continue;
4359 }
4360 if !path.with_extension("pack").exists() {
4361 continue;
4362 }
4363 let index = match PackIndex::parse(&fs::read(&path)?, format) {
4364 Ok(index) => index,
4365 Err(_err)
4366 if path
4367 .file_name()
4368 .and_then(|name| name.to_str())
4369 .is_some_and(|name| midx_pack_names.contains(name)) =>
4370 {
4371 eprintln!(
4372 "error: packfile {} index unavailable",
4373 path.with_extension("pack").display()
4374 );
4375 continue;
4376 }
4377 Err(err) => return Err(err),
4378 };
4379 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
4380 }
4381 Ok(())
4382}
4383
4384impl FileObjectDatabase {
4385 pub fn object_format(&self) -> ObjectFormat {
4387 self.format
4388 }
4389
4390 pub fn objects_dir(&self) -> &Path {
4392 &self.objects_dir
4393 }
4394
4395 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4396 let objects_dir = objects_dir.into();
4397 Self {
4398 loose: LooseObjectStore::new(objects_dir.clone(), format),
4399 alternates: alternate_object_dirs(&objects_dir),
4400 objects_dir,
4401 format,
4402 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4403 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4404 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4405 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4406 pack_registry: Arc::new(Mutex::new(None)),
4407 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4408 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4409 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4410 promisor_objects: Arc::new(OnceLock::new()),
4411 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4412 }
4413 }
4414
4415 fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4416 let objects_dir = objects_dir.into();
4417 Self {
4418 loose: LooseObjectStore::new(objects_dir.clone(), format),
4419 alternates: Vec::new(),
4420 objects_dir,
4421 format,
4422 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
4423 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4424 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
4425 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
4426 pack_registry: Arc::new(Mutex::new(None)),
4427 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
4428 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
4429 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
4430 promisor_objects: Arc::new(OnceLock::new()),
4431 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
4432 }
4433 }
4434
4435 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4436 Self::new(repository_objects_dir(git_dir), format)
4437 }
4438
4439 pub fn refresh_read_cache(&self) {
4444 if let Ok(mut cache) = self.pack_registry.lock() {
4445 *cache = None;
4446 }
4447 if let Ok(mut cache) = self.pack_indexes.lock() {
4448 cache.clear();
4449 }
4450 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
4451 cache.clear();
4452 }
4453 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
4454 cache.clear();
4455 }
4456 if let Ok(mut cache) = self.pack_bytes.lock() {
4457 cache.clear();
4458 }
4459 if let Ok(mut cache) = self.pack_deltas.lock() {
4460 cache.clear();
4461 }
4462 if let Ok(mut cache) = self.pack_header_types.lock() {
4463 cache.clear();
4464 }
4465 if let Ok(mut cache) = self.decoded.lock() {
4466 cache.clear();
4467 }
4468 self.loose.invalidate_cache();
4469 }
4470
4471 pub fn loose(&self) -> &LooseObjectStore {
4472 &self.loose
4473 }
4474
4475 pub fn presence_checker(&self) -> ObjectPresenceChecker {
4476 ObjectPresenceChecker::new(self.clone())
4477 }
4478
4479 pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4480 self.install_pack_with_options(pack, RawPackInstallOptions::default())
4481 }
4482
4483 pub fn write_blob_as_pack(
4484 &self,
4485 oid: ObjectId,
4486 object: &EncodedObject,
4487 compression_level: u32,
4488 ) -> Result<ObjectId> {
4489 if object.object_type != ObjectType::Blob {
4490 return Err(GitError::InvalidObject(
4491 "write_blob_as_pack requires a blob object".into(),
4492 ));
4493 }
4494 if oid.format() != self.format {
4495 return Err(GitError::InvalidObjectId(format!(
4496 "object {oid} uses {}, store uses {}",
4497 oid.format().name(),
4498 self.format.name()
4499 )));
4500 }
4501 if self.contains(&oid)? {
4502 return Ok(oid);
4503 }
4504 let input = [PackInput {
4505 oid: &oid,
4506 object,
4507 }];
4508 let options = PackWriteOptions::new()
4509 .with_window(0)
4510 .with_depth(0)
4511 .with_reorder(false)
4512 .with_compression_level(compression_level);
4513 let pack = PackFile::write_packed_with_known_ids_and_options(&input, self.format, &options)?;
4514 self.install_pack(&pack)?;
4515 Ok(oid)
4516 }
4517
4518 pub fn write_blobs_as_pack(
4519 &self,
4520 objects: &[(ObjectId, EncodedObject)],
4521 compression_level: u32,
4522 ) -> Result<()> {
4523 let mut seen = HashSet::with_capacity(objects.len());
4524 let mut inputs = Vec::new();
4525 for (oid, object) in objects {
4526 if object.object_type != ObjectType::Blob {
4527 return Err(GitError::InvalidObject(
4528 "write_blobs_as_pack requires blob objects".into(),
4529 ));
4530 }
4531 if oid.format() != self.format {
4532 return Err(GitError::InvalidObjectId(format!(
4533 "object {oid} uses {}, store uses {}",
4534 oid.format().name(),
4535 self.format.name()
4536 )));
4537 }
4538 if seen.insert(*oid) && !self.contains(oid)? {
4539 inputs.push(PackInput { oid, object });
4540 }
4541 }
4542 if inputs.is_empty() {
4543 return Ok(());
4544 }
4545 let options = PackWriteOptions::new()
4546 .with_window(0)
4547 .with_depth(0)
4548 .with_reorder(false)
4549 .with_compression_level(compression_level);
4550 let pack = PackFile::write_packed_with_known_ids_and_options(&inputs, self.format, &options)?;
4551 self.install_pack(&pack)?;
4552 Ok(())
4553 }
4554
4555 pub fn install_pack_with_options(
4556 &self,
4557 pack: &PackWrite,
4558 options: RawPackInstallOptions,
4559 ) -> Result<PackInstallResult> {
4560 if pack.checksum.format() != self.format {
4561 return Err(GitError::InvalidObjectId(format!(
4562 "pack checksum uses {}, store uses {}",
4563 pack.checksum.format().name(),
4564 self.format.name()
4565 )));
4566 }
4567 for entry in &pack.entries {
4568 if entry.oid.format() != self.format {
4569 return Err(GitError::InvalidObjectId(format!(
4570 "pack entry {} uses {}, store uses {}",
4571 entry.oid,
4572 entry.oid.format().name(),
4573 self.format.name()
4574 )));
4575 }
4576 }
4577 let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
4578 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4579 if canonical_index.pack_checksum != pack.checksum
4580 || parsed_index.pack_checksum != pack.checksum
4581 {
4582 return Err(GitError::InvalidFormat(
4583 "pack and index checksums do not match pack write".into(),
4584 ));
4585 }
4586 if pack.index != canonical_index.index {
4587 return Err(GitError::InvalidFormat(
4588 "pack index does not match pack contents".into(),
4589 ));
4590 }
4591
4592 let pack_dir = self.objects_dir.join("pack");
4593 fs::create_dir_all(&pack_dir)?;
4594 let pack_name = format!("pack-{}", pack.checksum.to_hex());
4595 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4596 let index_path = pack_dir.join(format!("{pack_name}.idx"));
4597 if !pack_path.exists() || !index_path.exists() {
4598 write_pack_component(&pack_path, &pack.pack)?;
4599 write_pack_component(&index_path, &pack.index)?;
4600 }
4601 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4602 Ok(PackInstallResult {
4603 pack_name,
4604 pack_path,
4605 index_path,
4606 promisor_path,
4607 object_ids: canonical_index
4608 .entries
4609 .iter()
4610 .map(|entry| entry.oid)
4611 .collect(),
4612 })
4613 }
4614
4615 pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4623 self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
4624 }
4625
4626 pub fn install_written_pack_with_options(
4627 &self,
4628 pack: &PackWrite,
4629 options: RawPackInstallOptions,
4630 ) -> Result<PackInstallResult> {
4631 validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
4632 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4633 if parsed_index.pack_checksum != pack.checksum {
4634 return Err(GitError::InvalidFormat(
4635 "pack write index checksum does not match pack".into(),
4636 ));
4637 }
4638 if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
4639 return Err(GitError::InvalidFormat(
4640 "pack write index does not match generated entries".into(),
4641 ));
4642 }
4643 self.install_generated_pack_unchecked(pack, options)
4644 }
4645
4646 fn install_generated_pack_unchecked(
4647 &self,
4648 pack: &PackWrite,
4649 options: RawPackInstallOptions,
4650 ) -> Result<PackInstallResult> {
4651 let pack_dir = self.objects_dir.join("pack");
4652 fs::create_dir_all(&pack_dir)?;
4653 let pack_name = format!("pack-{}", pack.checksum.to_hex());
4654 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4655 let index_path = pack_dir.join(format!("{pack_name}.idx"));
4656 if !pack_path.exists() || !index_path.exists() {
4657 write_pack_component(&pack_path, &pack.pack)?;
4658 write_pack_component(&index_path, &pack.index)?;
4659 }
4660 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4661 Ok(PackInstallResult {
4662 pack_name,
4663 pack_path,
4664 index_path,
4665 promisor_path,
4666 object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
4667 })
4668 }
4669
4670 pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
4671 self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
4672 }
4673
4674 pub fn install_raw_pack_with_options(
4675 &self,
4676 pack_bytes: &[u8],
4677 options: RawPackInstallOptions,
4678 ) -> Result<PackInstallResult> {
4679 let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
4680 let pack_dir = self.objects_dir.join("pack");
4681 fs::create_dir_all(&pack_dir)?;
4682 let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
4683 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4684 let index_path = pack_dir.join(format!("{pack_name}.idx"));
4685 if !pack_path.exists() || !index_path.exists() {
4686 write_pack_component(&pack_path, pack_bytes)?;
4687 write_pack_component(&index_path, &built.index)?;
4688 }
4689 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4690 Ok(PackInstallResult {
4691 pack_name,
4692 pack_path,
4693 index_path,
4694 promisor_path,
4695 object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
4696 })
4697 }
4698
4699 pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
4700 if self.loose.exists(oid)? {
4701 return Ok(true);
4702 }
4703 if self.find_pack_containing(oid)?.is_some() {
4704 return Ok(true);
4705 }
4706 for alternate in &self.alternates {
4707 if Self::without_alternates(alternate, self.format).contains(oid)? {
4708 return Ok(true);
4709 }
4710 }
4711 self.loose.invalidate_cache();
4714 self.loose.exists(oid)
4715 }
4716
4717 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4718 let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
4719 .into_iter()
4720 .collect::<HashSet<_>>();
4721 for alternate in &self.alternates {
4722 oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
4723 }
4724 let mut oids = oids.into_iter().collect::<Vec<_>>();
4725 oids.sort_by_key(ObjectId::to_hex);
4726 Ok(oids)
4727 }
4728
4729 pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
4730 if let Some(disk_size) = self.loose.disk_size(oid)? {
4731 return Ok(Some(ObjectStorageInfo {
4732 disk_size,
4733 deltabase: zero_oid(self.format)?,
4734 }));
4735 }
4736 if let Some(info) = self.packed_object_storage_info(oid)? {
4737 return Ok(Some(info));
4738 }
4739 for alternate in &self.alternates {
4740 if let Some(info) =
4741 Self::without_alternates(alternate, self.format).object_storage_info(oid)?
4742 {
4743 return Ok(Some(info));
4744 }
4745 }
4746 self.loose.invalidate_cache();
4749 if let Some(disk_size) = self.loose.disk_size(oid)? {
4750 return Ok(Some(ObjectStorageInfo {
4751 disk_size,
4752 deltabase: zero_oid(self.format)?,
4753 }));
4754 }
4755 Ok(None)
4756 }
4757
4758 pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
4759 let mut matches = self.object_ids_with_prefix(prefix)?;
4760 Ok(match matches.len() {
4761 0 => ObjectPrefixResolution::Missing,
4762 1 => ObjectPrefixResolution::Unique(matches.remove(0)),
4763 _ => ObjectPrefixResolution::Ambiguous(matches),
4764 })
4765 }
4766
4767 pub fn object_ids_with_prefix(&self, prefix: &str) -> Result<Vec<ObjectId>> {
4768 validate_object_id_prefix(self.format, prefix)?;
4769 let mut matches = Vec::new();
4770 for oid in self.object_ids()? {
4771 if object_id_matches_prefix(&oid, prefix) {
4772 matches.push(oid);
4773 }
4774 }
4775 Ok(matches)
4776 }
4777
4778 pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4788 if implied_empty_tree_object(self.format, oid).is_some() {
4789 return Ok(Some((ObjectType::Tree, 0)));
4790 }
4791 if let Ok(mut cache) = self.decoded.lock()
4792 && let Some(object) = cache.get(oid)
4793 {
4794 return Ok(Some((object.object_type, object.body.len() as u64)));
4795 }
4796 if let Some(header) = self.loose.read_header(oid)? {
4797 return Ok(Some(header));
4798 }
4799 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4800 let bytes = pack_lookup.pack_bytes(self)?;
4801 let type_cache = pack_lookup.header_type_cache(self);
4806 let resolve_ref_base = |base: &ObjectId| {
4807 self.read_object_header(base)
4808 .map(|header| header.map(|(t, _)| t))
4809 };
4810 let header = match &type_cache {
4811 Some(cache) => {
4812 let mut adapter = PackHeaderTypeCacheAdapter(cache);
4813 sley_pack::read_object_header_at_with_cache(
4814 &bytes,
4815 pack_lookup.offset,
4816 self.format,
4817 resolve_ref_base,
4818 &mut adapter,
4819 )?
4820 }
4821 None => sley_pack::read_object_header_at(
4822 &bytes,
4823 pack_lookup.offset,
4824 self.format,
4825 resolve_ref_base,
4826 )?,
4827 };
4828 return Ok(Some(header));
4829 }
4830 for alternate in &self.alternates {
4831 if let Some(header) =
4832 Self::without_alternates(alternate, self.format).read_object_header(oid)?
4833 {
4834 return Ok(Some(header));
4835 }
4836 }
4837 self.loose.invalidate_cache();
4840 if let Some(header) = self.loose.read_header(oid)? {
4841 return Ok(Some(header));
4842 }
4843 Ok(None)
4844 }
4845
4846 fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
4847 if let Ok(mut cache) = self.decoded.lock()
4850 && let Some(object) = cache.get(oid)
4851 {
4852 return Ok(Some(object));
4853 }
4854 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
4855 return Ok(None);
4856 };
4857 self.read_packed_object_at_lookup(oid, &pack_lookup)
4858 .map(Some)
4859 }
4860
4861 fn read_packed_object_at_lookup(
4862 &self,
4863 oid: &ObjectId,
4864 pack_lookup: &PackLookup,
4865 ) -> Result<Arc<EncodedObject>> {
4866 if let Ok(mut cache) = self.decoded.lock()
4867 && let Some(object) = cache.get(oid)
4868 {
4869 return Ok(object);
4870 }
4871 let bytes = pack_lookup.pack_bytes(self)?;
4872 let delta_cache = pack_lookup.delta_cache(self);
4877 let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
4878 let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
4884 let object = match &delta_adapter {
4885 Some(adapter) => sley_pack::read_object_at_with_cache_arc(
4886 &bytes,
4887 pack_lookup.offset,
4888 self.format,
4889 resolve_ref_base,
4890 adapter,
4891 )?,
4892 None => sley_pack::read_object_at_arc(
4893 &bytes,
4894 pack_lookup.offset,
4895 self.format,
4896 resolve_ref_base,
4897 )?,
4898 };
4899 if verify_reads_enabled() {
4903 let actual = object.object_id(self.format)?;
4904 if actual != *oid {
4905 return Err(GitError::InvalidObject(format!(
4906 "pack object id mismatch: index says {oid}, decoded {actual}"
4907 )));
4908 }
4909 }
4910 if let Ok(mut cache) = self.decoded.lock() {
4911 cache.put(*oid, Arc::clone(&object));
4912 }
4913 Ok(object)
4914 }
4915
4916 fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
4920 let mut caches = self.pack_deltas.lock().ok()?;
4921 let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
4922 Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
4923 });
4924 Some(Arc::clone(cache))
4925 }
4926
4927 fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
4931 let mut caches = self.pack_header_types.lock().ok()?;
4932 let cache = caches
4933 .entry(pack_path.to_path_buf())
4934 .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
4935 Some(Arc::clone(cache))
4936 }
4937
4938 fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
4943 if let Ok(cache) = self.pack_bytes.lock()
4944 && let Some(bytes) = cache.get(pack_path)
4945 {
4946 return Ok(Arc::clone(bytes));
4947 }
4948 let bytes = Arc::new(load_pack_data(pack_path)?);
4949 if let Ok(mut cache) = self.pack_bytes.lock() {
4950 cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
4951 }
4952 Ok(bytes)
4953 }
4954
4955 fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
4959 if let Ok(cache) = self.pack_indexes.lock()
4960 && let Some(index) = cache.get(index_path)
4961 {
4962 return Ok(Arc::clone(index));
4963 }
4964 let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
4965 if let Ok(mut cache) = self.pack_indexes.lock() {
4966 cache.insert(index_path.to_path_buf(), Arc::clone(&index));
4967 }
4968 Ok(index)
4969 }
4970
4971 fn cached_multi_pack_index_oid_lookup(
4972 &self,
4973 midx_path: &Path,
4974 ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
4975 if !midx_path.exists() {
4976 return Ok(None);
4977 }
4978 if let Ok(cache) = self.multi_pack_oid_lookups.lock()
4979 && let Some(midx) = cache.get(midx_path)
4980 {
4981 return Ok(Some(Arc::clone(midx)));
4982 }
4983 let bytes = load_multi_pack_index_lookup_data(midx_path)?;
4984 let midx = match MultiPackIndexOidLookup::parse(bytes, self.format) {
4985 Ok(midx) => Arc::new(midx),
4986 Err(GitError::InvalidFormat(message))
4987 if message.starts_with("multi-pack-index hash id ") =>
4988 {
4989 let actual = message
4990 .strip_prefix("multi-pack-index hash id ")
4991 .and_then(|rest| rest.split_whitespace().next())
4992 .unwrap_or("0");
4993 let expected = match self.format {
4994 ObjectFormat::Sha1 => 1,
4995 ObjectFormat::Sha256 => 2,
4996 };
4997 eprintln!(
4998 "error: multi-pack-index hash version {actual} does not match version {expected}"
4999 );
5000 return Ok(None);
5001 }
5002 Err(err) => return Err(err),
5003 };
5004 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
5005 cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
5006 }
5007 Ok(Some(midx))
5008 }
5009
5010 fn cached_pack_registry(
5015 &self,
5016 pack_dir: &Path,
5017 force_rescan: bool,
5018 ) -> Result<Arc<PackRegistrySnapshot>> {
5019 if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
5020 return Ok(registry);
5021 }
5022 let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
5023 if let Ok(mut cache) = self.pack_registry.lock() {
5024 match cache.as_ref() {
5025 Some(existing)
5026 if existing.fingerprint == scanned.fingerprint
5027 && same_registered_pack_set(&existing.packs, &scanned.packs) =>
5028 {
5029 return Ok(Arc::clone(existing));
5030 }
5031 _ => {
5032 *cache = Some(Arc::clone(&scanned));
5033 }
5034 }
5035 }
5036 Ok(scanned)
5037 }
5038
5039 fn find_in_pack_registry(
5040 &self,
5041 registry: Arc<PackRegistrySnapshot>,
5042 oid: &ObjectId,
5043 ) -> Result<Option<PackLookup>> {
5044 let hinted_pack_index = registry.cached_hint();
5045 if let Some(pack_index) = hinted_pack_index {
5046 let pack = ®istry.packs[pack_index];
5047 match pack.index(self.format) {
5048 Ok(index) => {
5049 if let Some(entry) = index.find(oid) {
5050 return Ok(Some(PackLookup::from_registered(
5051 Arc::clone(pack),
5052 entry.offset,
5053 )));
5054 }
5055 }
5056 Err(_) => {
5057 eprintln!("error: packfile {} index unavailable", pack.pack.display());
5058 }
5059 }
5060 }
5061 for (pack_index, pack) in registry.packs.iter().enumerate() {
5062 if Some(pack_index) == hinted_pack_index {
5063 continue;
5064 }
5065 let index = match pack.index(self.format) {
5066 Ok(index) => index,
5067 Err(_) => {
5068 eprintln!("error: packfile {} index unavailable", pack.pack.display());
5069 continue;
5070 }
5071 };
5072 if let Some(entry) = index.find(oid) {
5073 registry.remember_hint(pack_index);
5074 return Ok(Some(PackLookup::from_registered(
5075 Arc::clone(pack),
5076 entry.offset,
5077 )));
5078 }
5079 }
5080 Ok(None)
5081 }
5082
5083 fn read_packed_object_from_other_packs(
5089 &self,
5090 oid: &ObjectId,
5091 exclude: &PackLookup,
5092 ) -> Result<Option<Arc<EncodedObject>>> {
5093 let pack_dir = self.objects_dir.join("pack");
5094 let Ok(entries) = fs::read_dir(&pack_dir) else {
5095 return Ok(None);
5096 };
5097 let excluded_pack = exclude.pack_path().to_path_buf();
5098 for entry in entries {
5099 let idx_path = entry?.path();
5100 if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
5101 continue;
5102 }
5103 let pack_path = idx_path.with_extension("pack");
5104 if pack_path == excluded_pack {
5105 continue;
5106 }
5107 let Ok(idx_bytes) = fs::read(&idx_path) else {
5108 continue;
5109 };
5110 let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
5111 continue;
5112 };
5113 let Some(entry) = index.find(oid) else {
5114 continue;
5115 };
5116 let candidate = PackLookup::from_path(pack_path, entry.offset);
5117 if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
5118 return Ok(Some(object));
5119 }
5120 }
5121 Ok(None)
5122 }
5123
5124 fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
5125 if oid.format() != self.format {
5126 return Err(GitError::InvalidObjectId(format!(
5127 "object {oid} uses {}, store uses {}",
5128 oid.format().name(),
5129 self.format.name()
5130 )));
5131 }
5132 let pack_dir = self.objects_dir.join("pack");
5133 if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
5138 && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
5139 {
5140 return Ok(Some(pack_paths));
5141 }
5142 if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
5143 && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
5144 {
5145 return Ok(Some(pack_paths));
5146 }
5147
5148 if !pack_dir.exists() {
5149 return Ok(None);
5150 }
5151 if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
5152 return Ok(Some(pack_paths));
5153 }
5154 let registry = self.cached_pack_registry(&pack_dir, false)?;
5158 if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(®istry), oid)? {
5159 return Ok(Some(pack_paths));
5160 }
5161 let refreshed = self.cached_pack_registry(&pack_dir, true)?;
5162 if Arc::ptr_eq(®istry, &refreshed) {
5163 return Ok(None);
5165 }
5166 self.find_in_pack_registry(refreshed, oid)
5167 }
5168
5169 fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
5170 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
5171 return Ok(None);
5172 };
5173 let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
5174 let trailer_offset = pack_len
5175 .checked_sub(self.format.raw_len() as u64)
5176 .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
5177 let index = pack_lookup.pack_index(self)?;
5178 let pack = pack_lookup.pack_bytes(self)?;
5179 let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
5180 let delta_base_offset = match &delta_base {
5181 Some(PackDeltaBase::Offset(offset)) => Some(*offset),
5182 Some(PackDeltaBase::Ref(_)) | None => None,
5183 };
5184 let offset_info = scan_pack_index_offsets(
5185 &index,
5186 pack_lookup.offset,
5187 trailer_offset,
5188 delta_base_offset,
5189 )?;
5190 let disk_size = offset_info
5191 .end_offset
5192 .checked_sub(pack_lookup.offset)
5193 .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
5194 let deltabase = match delta_base {
5195 Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
5196 GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
5202 })?,
5203 Some(PackDeltaBase::Ref(oid)) => oid,
5204 None => zero_oid(self.format)?,
5205 };
5206 Ok(Some(ObjectStorageInfo {
5207 disk_size,
5208 deltabase,
5209 }))
5210 }
5211
5212 fn find_midx_pack_containing(
5213 &self,
5214 pack_dir: &Path,
5215 oid: &ObjectId,
5216 ) -> Result<Option<PackLookup>> {
5217 let midx_path = pack_dir.join("multi-pack-index");
5218 let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
5219 return Ok(None);
5220 };
5221 self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
5222 }
5223
5224 fn midx_oid_lookup_pack_paths(
5225 &self,
5226 pack_dir: &Path,
5227 midx: &MultiPackIndexOidLookup,
5228 oid: &ObjectId,
5229 ) -> Result<Option<PackLookup>> {
5230 let Some(entry) = midx.find(oid)? else {
5231 return Ok(None);
5232 };
5233 let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
5234 return Err(GitError::InvalidFormat(
5235 "multi-pack-index object points past pack table".into(),
5236 ));
5237 };
5238 let pack_file_name = pack_name
5239 .strip_suffix(".idx")
5240 .map(|stem| format!("{stem}.pack"))
5241 .unwrap_or_else(|| pack_name.to_string());
5242 let pack = pack_dir.join(pack_file_name);
5243 Ok(Some(PackLookup::from_path(pack, entry.offset)))
5244 }
5245
5246 fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
5247 let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
5248 let cache = self.multi_pack_oid_lookups.lock().ok()?;
5249 cache.get(&midx_path).map(Arc::clone)
5250 }
5251
5252 fn cached_loaded_pack_registry(
5258 &self,
5259 _pack_dir: &Path,
5260 ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
5261 let cache = match self.pack_registry.lock() {
5262 Ok(cache) => cache,
5263 Err(_) => return Ok(None),
5264 };
5265 Ok(cache.as_ref().map(Arc::clone))
5266 }
5267}
5268
5269fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
5270 if prefix.len() < 4 || prefix.len() > format.hex_len() {
5271 return Err(GitError::InvalidObjectId(format!(
5272 "expected 4 to {} hex digits for {}, got {}",
5273 format.hex_len(),
5274 format.name(),
5275 prefix.len()
5276 )));
5277 }
5278 if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
5279 return Err(GitError::InvalidObjectId(format!(
5280 "non-hex object id prefix {prefix}"
5281 )));
5282 }
5283 Ok(())
5284}
5285
5286fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
5287 oid.to_hex()
5288 .as_bytes()
5289 .iter()
5290 .zip(prefix.as_bytes())
5291 .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
5292}
5293
5294fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
5295 match fs::metadata(pack_dir) {
5296 Ok(metadata) => Ok(metadata.modified().ok()),
5297 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5298 Err(err) => Err(GitError::Io(err.to_string())),
5299 }
5300}
5301
5302fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
5307 let modified = pack_dir_modified(pack_dir)?;
5308 let entries = match fs::read_dir(pack_dir) {
5309 Ok(entries) => entries,
5310 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
5311 return Ok(PackRegistrySnapshot::new(
5312 PackDirFingerprint {
5313 modified,
5314 idx_count: 0,
5315 pack_count: 0,
5316 },
5317 Vec::new(),
5318 ));
5319 }
5320 Err(err) => return Err(GitError::Io(err.to_string())),
5321 };
5322
5323 let mut idx_paths = Vec::new();
5324 let mut idx_count = 0;
5325 let mut pack_count = 0;
5326 for entry in entries {
5327 let entry = entry?;
5328 let path = entry.path();
5329 match path.extension().and_then(|ext| ext.to_str()) {
5330 Some("idx") => {
5331 idx_count += 1;
5332 idx_paths.push(path);
5333 }
5334 Some("pack") => {
5335 pack_count += 1;
5336 }
5337 _ => {}
5338 }
5339 }
5340
5341 let mut packs = Vec::new();
5342 for idx in idx_paths {
5343 let pack = idx.with_extension("pack");
5344 let Ok(metadata) = fs::metadata(&pack) else {
5345 continue;
5346 };
5347 let modified = pack_sort_modified(&metadata);
5348 packs.push((
5349 modified,
5350 metadata.len(),
5351 Arc::new(RegisteredPack::new(idx, pack)),
5352 ));
5353 }
5354 packs.sort_by(|left, right| {
5359 right
5360 .0
5361 .cmp(&left.0)
5362 .then_with(|| right.1.cmp(&left.1))
5363 .then_with(|| left.2.idx.cmp(&right.2.idx))
5364 });
5365 let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
5366 Ok(PackRegistrySnapshot::new(
5367 PackDirFingerprint {
5368 modified,
5369 idx_count,
5370 pack_count,
5371 },
5372 packs,
5373 ))
5374}
5375
5376fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
5377 metadata
5378 .modified()
5379 .ok()
5380 .and_then(|modified| {
5381 modified
5382 .duration_since(std::time::UNIX_EPOCH)
5383 .ok()
5384 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
5385 })
5386 .unwrap_or((0, 0))
5387}
5388
5389fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
5392 left.len() == right.len()
5393 && left
5394 .iter()
5395 .zip(right.iter())
5396 .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
5397}
5398
5399fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
5400 let mut alternates = Vec::new();
5401 if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
5402 for raw in value.to_string_lossy().split(':') {
5403 if !raw.is_empty() {
5404 alternates.push(PathBuf::from(raw));
5405 }
5406 }
5407 }
5408 let alternates_path = objects_dir.join("info").join("alternates");
5409 if let Ok(contents) = fs::read(&alternates_path) {
5410 for raw in contents.split(|byte| *byte == b'\n') {
5411 let line = raw.strip_suffix(b"\r").unwrap_or(raw);
5412 if line.is_empty() || line.starts_with(b"#") {
5413 continue;
5414 }
5415 let Ok(value) = std::str::from_utf8(line) else {
5416 continue;
5417 };
5418 let path = Path::new(value);
5419 let absolute = if path.is_absolute() {
5420 path.to_path_buf()
5421 } else {
5422 objects_dir.join(path)
5423 };
5424 alternates.push(absolute);
5425 }
5426 }
5427 alternates
5428}
5429
5430impl ObjectReader for FileObjectDatabase {
5431 fn is_promised_object(&self, oid: &ObjectId) -> bool {
5432 self.promisor_objects().contains(oid)
5433 }
5434
5435 fn has_shallow_grafts(&self) -> bool {
5436 !self
5437 .shallow_grafts
5438 .get_or_init(|| {
5439 let shallow_file = self
5440 .objects_dir
5441 .parent()
5442 .map(|git_dir| git_dir.join("shallow"));
5443 match shallow_file {
5444 Some(path) => read_shallow_grafts(&path, self.format),
5445 None => HashSet::new(),
5446 }
5447 })
5448 .is_empty()
5449 }
5450
5451 fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
5452 self.shallow_grafts
5453 .get_or_init(|| {
5454 let shallow_file = self
5455 .objects_dir
5456 .parent()
5457 .map(|git_dir| git_dir.join("shallow"));
5458 match shallow_file {
5459 Some(path) => read_shallow_grafts(&path, self.format),
5460 None => HashSet::new(),
5461 }
5462 })
5463 .contains(oid)
5464 }
5465
5466 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
5467 if let Some(object) = implied_empty_tree_object(self.format, oid) {
5468 return Ok(object);
5469 }
5470 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
5478 match self.read_packed_object_at_lookup(oid, &pack_lookup) {
5479 Ok(object) => return Ok(object),
5480 Err(GitError::NotFound(_)) => {}
5481 Err(packed_err) => {
5487 if let Ok(object) = self.loose.read_object(oid) {
5488 return Ok(object);
5489 }
5490 if let Some(object) =
5493 self.read_packed_object_from_other_packs(oid, &pack_lookup)?
5494 {
5495 return Ok(object);
5496 }
5497 for alternate in &self.alternates {
5498 if let Ok(object) =
5499 Self::without_alternates(alternate, self.format).read_object(oid)
5500 {
5501 return Ok(object);
5502 }
5503 }
5504 return Err(packed_err);
5505 }
5506 }
5507 }
5508 let loose_err = match self.loose.read_object(oid) {
5509 Ok(object) => return Ok(object),
5510 Err(GitError::NotFound(_)) => None,
5511 Err(err) => Some(err),
5512 };
5513 if let Some(object) = self.read_packed_object(oid)? {
5514 return Ok(object);
5515 }
5516 for alternate in &self.alternates {
5517 match Self::without_alternates(alternate, self.format).read_object(oid) {
5518 Ok(object) => return Ok(object),
5519 Err(GitError::NotFound(_)) => {}
5520 Err(err) => return Err(err),
5521 }
5522 }
5523 self.loose.invalidate_cache();
5529 match self.loose.read_object(oid) {
5530 Ok(object) => return Ok(object),
5531 Err(GitError::NotFound(_)) => {}
5532 Err(err) => return Err(err),
5533 }
5534 if let Some(err) = loose_err {
5538 return Err(err);
5539 }
5540 Err(GitError::object_not_found_in(
5541 *oid,
5542 MissingObjectContext::Read,
5543 ))
5544 }
5545}
5546
5547impl FileObjectDatabase {
5548 fn promisor_objects(&self) -> &HashSet<ObjectId> {
5549 self.promisor_objects.get_or_init(|| {
5550 let mut promised =
5551 promisor_pack_object_ids(&self.objects_dir, self.format).unwrap_or_default();
5552 let mut pending = promised.iter().copied().collect::<Vec<_>>();
5553 while let Some(oid) = pending.pop() {
5554 let Ok(object) = self.read_object(&oid) else {
5555 continue;
5556 };
5557 for link in promisor_object_links(self.format, &object) {
5558 if promised.insert(link) {
5559 pending.push(link);
5560 }
5561 }
5562 }
5563 promised
5564 })
5565 }
5566}
5567
5568fn promisor_pack_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
5569 let pack_dir = objects_dir.join("pack");
5570 let mut oids = HashSet::new();
5571 if !pack_dir.exists() {
5572 return Ok(oids);
5573 }
5574 for entry in fs::read_dir(pack_dir)? {
5575 let path = entry?.path();
5576 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
5577 continue;
5578 }
5579 if !path.with_extension("pack").exists() || !path.with_extension("promisor").exists() {
5580 continue;
5581 }
5582 let index = PackIndex::parse(&fs::read(path)?, format)?;
5583 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
5584 }
5585 Ok(oids)
5586}
5587
5588fn promisor_object_links(format: ObjectFormat, object: &EncodedObject) -> Vec<ObjectId> {
5589 match object.object_type {
5590 ObjectType::Commit => Commit::parse_ref(format, &object.body)
5591 .map(|commit| {
5592 let mut links = Vec::with_capacity(commit.parents.len() + 1);
5593 links.push(commit.tree);
5594 links.extend(commit.parents);
5595 links
5596 })
5597 .unwrap_or_default(),
5598 ObjectType::Tree => TreeEntries::new(format, &object.body)
5599 .filter_map(|entry| entry.ok().map(|entry| entry.oid))
5600 .collect(),
5601 ObjectType::Tag => Tag::parse_ref(format, &object.body)
5602 .map(|tag| vec![tag.object])
5603 .unwrap_or_default(),
5604 ObjectType::Blob => Vec::new(),
5605 }
5606}
5607
5608impl ObjectWriter for FileObjectDatabase {
5609 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
5610 let oid = object.object_id(self.format)?;
5616 if self.contains(&oid)? {
5617 return Ok(oid);
5618 }
5619 self.loose.write_object(object)
5620 }
5621}
5622
5623fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
5624 if path.exists() {
5625 return Ok(());
5626 }
5627 let parent = path
5628 .parent()
5629 .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
5630 fs::create_dir_all(parent)?;
5631 let temp_path = unique_temp_path(parent);
5632 let write_result = (|| -> Result<()> {
5633 {
5634 let mut file = fs::OpenOptions::new()
5635 .write(true)
5636 .create_new(true)
5637 .open(&temp_path)?;
5638 file.write_all(bytes)?;
5639 file.sync_all()?;
5640 }
5641 match fs::rename(&temp_path, path) {
5642 Ok(()) => Ok(()),
5643 Err(_) if path.exists() => {
5644 let _ = fs::remove_file(&temp_path);
5645 Ok(())
5646 }
5647 Err(err) => Err(GitError::Io(err.to_string())),
5648 }
5649 })();
5650 if write_result.is_err() {
5651 let _ = fs::remove_file(&temp_path);
5652 }
5653 write_result
5654}
5655
5656fn write_promisor_pack_sidecar(
5657 pack_dir: &Path,
5658 pack_name: &str,
5659 promisor: bool,
5660) -> Result<Option<PathBuf>> {
5661 if !promisor {
5662 return Ok(None);
5663 }
5664 let path = pack_dir.join(format!("{pack_name}.promisor"));
5665 write_pack_component(&path, b"")?;
5666 Ok(Some(path))
5667}
5668
5669const MAX_LOOSE_HEADER_LEN: usize = 32;
5674
5675fn loose_header_too_long(oid: &ObjectId) -> GitError {
5680 GitError::InvalidObject(format!(
5681 "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
5682 ))
5683}
5684
5685fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
5689 GitError::InvalidObject(format!("unable to unpack {oid} header"))
5690}
5691
5692fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
5700 let [cmf, flg, ..] = *input else { return None };
5701 if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
5702 return Some("inflate: data stream error (incorrect header check)");
5703 }
5704 if cmf & 0x0f != 8 {
5705 return Some("inflate: data stream error (unknown compression method)");
5706 }
5707 if cmf >> 4 > 7 {
5708 return Some("inflate: data stream error (invalid window size)");
5709 }
5710 if flg & 0x20 != 0 {
5711 return Some("inflate: needs dictionary (no message)");
5712 }
5713 None
5714}
5715
5716fn emit_inflate_diagnostic(input: &[u8]) {
5719 if let Some(diagnostic) = inflate_header_diagnostic(input) {
5720 eprintln!("error: {diagnostic}");
5721 }
5722}
5723
5724#[derive(Debug, Clone, PartialEq, Eq)]
5727pub enum LooseObjectIntegrity {
5728 Ok,
5730 HashMismatch { actual: ObjectId },
5733 Corrupt,
5736}
5737
5738#[derive(Debug, Clone)]
5739pub struct LooseObjectStore {
5740 objects_dir: PathBuf,
5741 format: ObjectFormat,
5742 loose_cache: Arc<Mutex<LoosePresenceCache>>,
5751}
5752
5753impl LooseObjectStore {
5754 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
5755 Self {
5756 objects_dir: objects_dir.into(),
5757 format,
5758 loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
5759 }
5760 }
5761
5762 fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
5767 let mut guard = self.loose_cache.lock().ok()?;
5768 let fanout = oid.as_bytes()[0];
5769 if !guard.loaded_fanouts.contains(&fanout) {
5770 collect_loose_fanout_object_ids(
5771 &self.objects_dir,
5772 self.format,
5773 fanout,
5774 &mut guard.objects,
5775 )
5776 .ok()?;
5777 guard.loaded_fanouts.insert(fanout);
5778 }
5779 Some(guard.objects.contains(oid))
5780 }
5781
5782 fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
5786 if let Ok(mut guard) = self.loose_cache.lock() {
5787 guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
5788 guard.loaded_fanouts = (0..=u8::MAX).collect();
5789 let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
5790 ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
5791 return Ok(ids);
5792 }
5793 loose_object_ids(&self.objects_dir, self.format)
5794 }
5795
5796 fn note_loose_write(&self, oid: ObjectId) {
5800 if let Ok(mut guard) = self.loose_cache.lock() {
5801 guard.objects.insert(oid);
5802 }
5803 }
5804
5805 pub(crate) fn invalidate_cache(&self) {
5808 if let Ok(mut guard) = self.loose_cache.lock() {
5809 *guard = LoosePresenceCache::default();
5810 }
5811 }
5812
5813 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
5814 Self::new(repository_objects_dir(git_dir), format)
5815 }
5816
5817 fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
5818 if oid.format() != self.format {
5819 return Err(GitError::InvalidObjectId(format!(
5820 "object {oid} uses {}, store uses {}",
5821 oid.format().name(),
5822 self.format.name()
5823 )));
5824 }
5825 Ok(())
5826 }
5827
5828 pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
5829 self.validate_oid_format(oid)?;
5830 let hex = oid.to_hex();
5831 Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
5832 }
5833
5834 pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
5835 self.validate_oid_format(oid)?;
5836 if self.cached_loose_presence(oid) == Some(false) {
5837 return Ok(false);
5838 }
5839 let path = self.object_path(oid)?;
5840 Ok(path.exists())
5841 }
5842
5843 pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
5844 self.validate_oid_format(oid)?;
5845 if self.cached_loose_presence(oid) == Some(false) {
5846 return Ok(None);
5847 }
5848 let path = self.object_path(oid)?;
5849 match fs::metadata(path) {
5850 Ok(metadata) => Ok(Some(metadata.len())),
5851 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5852 Err(err) => Err(GitError::Io(err.to_string())),
5853 }
5854 }
5855
5856 pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
5861 self.validate_oid_format(oid)?;
5862 if self.cached_loose_presence(oid) == Some(false) {
5863 return Ok(None);
5864 }
5865 let path = self.object_path(oid)?;
5866 let compressed = match fs::read(&path) {
5867 Ok(compressed) => compressed,
5868 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5869 Err(err) => return Err(GitError::Io(err.to_string())),
5870 };
5871 match inflate_loose_header(&compressed)? {
5872 LooseHeader::Ok(header) => {
5873 let header = std::str::from_utf8(&header)
5874 .map_err(|err| GitError::InvalidObject(err.to_string()))?;
5875 let (kind, size) = header
5876 .split_once(' ')
5877 .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
5878 let object_type = kind.parse::<ObjectType>()?;
5879 let size = size
5880 .parse::<u64>()
5881 .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
5882 Ok(Some((object_type, size)))
5883 }
5884 LooseHeader::Bad => {
5885 emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
5888 Err(loose_unpack_header_failed(oid))
5889 }
5890 LooseHeader::TooLong => {
5891 Err(loose_header_too_long(oid))
5896 }
5897 }
5898 }
5899
5900 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
5902 self.loose_object_ids_cached()
5903 }
5904
5905 pub fn verify_object(
5913 &self,
5914 oid: &ObjectId,
5915 display_path: &str,
5916 ) -> Result<Option<LooseObjectIntegrity>> {
5917 let path = self.object_path(oid)?;
5918 let compressed = match fs::read(&path) {
5919 Ok(compressed) => compressed,
5920 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5921 Err(err) => return Err(GitError::Io(err.to_string())),
5922 };
5923 let mut decoder = ZlibDecoder::new(compressed.as_slice());
5924 let mut framed = Vec::new();
5925 if decoder.read_to_end(&mut framed).is_err() {
5926 emit_inflate_diagnostic(&compressed);
5927 if framed_loose_header_terminated(&framed) {
5935 eprintln!("error: corrupt loose object '{oid}'");
5936 eprintln!("error: unable to unpack contents of {display_path}");
5937 } else {
5938 eprintln!("error: unable to unpack header of {display_path}");
5939 }
5940 return Ok(Some(LooseObjectIntegrity::Corrupt));
5941 }
5942 if !framed_loose_header_terminated(&framed) {
5943 eprintln!("error: unable to unpack header of {display_path}");
5946 return Ok(Some(LooseObjectIntegrity::Corrupt));
5947 }
5948 if (decoder.total_in() as usize) < compressed.len() {
5955 eprintln!("error: garbage at end of loose object '{oid}'");
5959 eprintln!("error: unable to unpack contents of {display_path}");
5960 return Ok(Some(LooseObjectIntegrity::Corrupt));
5961 }
5962 if let Some(declared) = loose_header_declared_size(&framed) {
5969 let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
5970 let body_len = framed.len() - (nul + 1).min(framed.len());
5971 if body_len < declared {
5972 eprintln!("error: corrupt loose object '{oid}'");
5973 eprintln!("error: unable to unpack contents of {display_path}");
5974 return Ok(Some(LooseObjectIntegrity::Corrupt));
5975 }
5976 }
5977 let Ok(object) = parse_framed_object(&framed) else {
5978 if let Some(header) = loose_header_with_unknown_type(&framed) {
5983 eprintln!("error: unable to parse type from header '{header}' of {display_path}");
5984 } else {
5985 eprintln!("error: unable to parse header of {display_path}");
5986 }
5987 return Ok(Some(LooseObjectIntegrity::Corrupt));
5988 };
5989 let actual = object.object_id(self.format)?;
5990 if &actual != oid {
5991 return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
5992 }
5993 Ok(Some(LooseObjectIntegrity::Ok))
5994 }
5995}
5996
5997fn framed_loose_header_terminated(framed: &[u8]) -> bool {
6001 framed
6002 .iter()
6003 .take(MAX_LOOSE_HEADER_LEN)
6004 .any(|byte| *byte == 0)
6005}
6006
6007fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
6012 let nul = framed.iter().position(|&b| b == 0)?;
6013 let header = std::str::from_utf8(&framed[..nul]).ok()?;
6014 let (kind, size) = header.split_once(' ')?;
6015 let size: usize = size.parse().ok()?;
6016 if framed.len() - (nul + 1) != size {
6019 return None;
6020 }
6021 if kind.parse::<ObjectType>().is_ok() {
6024 return None;
6025 }
6026 Some(header.to_string())
6027}
6028
6029fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
6033 let nul = framed.iter().position(|&b| b == 0)?;
6034 let header = std::str::from_utf8(&framed[..nul]).ok()?;
6035 let (_kind, size) = header.split_once(' ')?;
6036 size.parse::<usize>().ok()
6037}
6038
6039enum LooseHeader {
6045 Ok(Vec<u8>),
6048 Bad,
6050 TooLong,
6052}
6053
6054fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
6068 let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
6069 let mut decompress = Decompress::new(true);
6070 let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
6074 let produced = decompress.total_out() as usize;
6075 match status {
6076 Ok(_) => {
6077 let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
6078 match window.iter().position(|&byte| byte == 0) {
6079 Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
6080 None => Ok(LooseHeader::TooLong),
6084 }
6085 }
6086 Err(_) => Ok(LooseHeader::Bad),
6088 }
6089}
6090
6091impl ObjectReader for LooseObjectStore {
6092 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
6093 self.validate_oid_format(oid)?;
6094 if self.cached_loose_presence(oid) == Some(false) {
6098 return Err(GitError::object_not_found_in(
6099 *oid,
6100 MissingObjectContext::Read,
6101 ));
6102 }
6103 let path = self.object_path(oid)?;
6104 let compressed = match fs::read(&path) {
6105 Ok(compressed) => compressed,
6106 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
6107 return Err(GitError::object_not_found_in(
6108 *oid,
6109 MissingObjectContext::Read,
6110 ));
6111 }
6112 Err(err) => return Err(GitError::Io(err.to_string())),
6113 };
6114 let mut decoder = ZlibDecoder::new(compressed.as_slice());
6115 let mut framed = Vec::new();
6116 if decoder.read_to_end(&mut framed).is_err() {
6117 emit_inflate_diagnostic(&compressed);
6118 if !framed_loose_header_terminated(&framed) {
6123 return Err(loose_unpack_header_failed(oid));
6124 }
6125 return Err(GitError::InvalidObject(format!(
6126 "corrupt loose object '{oid}'"
6127 )));
6128 }
6129 if framed
6134 .iter()
6135 .take(MAX_LOOSE_HEADER_LEN)
6136 .all(|byte| *byte != 0)
6137 {
6138 return Err(loose_header_too_long(oid));
6139 }
6140 let object = parse_framed_object(&framed)?;
6141 if verify_reads_enabled() {
6145 let actual = object.object_id(self.format)?;
6146 if &actual != oid {
6147 return Err(GitError::InvalidObject(format!(
6148 "loose object {} hashes to {actual}",
6149 path.display()
6150 )));
6151 }
6152 }
6153 Ok(Arc::new(object))
6154 }
6155}
6156
6157impl ObjectWriter for LooseObjectStore {
6158 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
6159 let oid = object.object_id(self.format)?;
6160 let path = self.object_path(&oid)?;
6161 if path.exists() {
6162 self.note_loose_write(oid);
6163 return Ok(oid);
6164 }
6165 let parent = path
6166 .parent()
6167 .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
6168 fs::create_dir_all(parent)?;
6169 let temp_path = unique_temp_path(parent);
6170 let write_result = (|| -> Result<()> {
6171 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
6172 encoder.write_all(&object.framed_bytes())?;
6173 let compressed = encoder.finish()?;
6174 {
6175 let mut file = fs::OpenOptions::new()
6176 .write(true)
6177 .create_new(true)
6178 .open(&temp_path)?;
6179 file.write_all(&compressed)?;
6180 }
6190 match fs::rename(&temp_path, &path) {
6191 Ok(()) => Ok(()),
6192 Err(_) if path.exists() => {
6193 let _ = fs::remove_file(&temp_path);
6194 Ok(())
6195 }
6196 Err(err) => Err(GitError::Io(err.to_string())),
6197 }
6198 })();
6199 if write_result.is_err() {
6200 let _ = fs::remove_file(&temp_path);
6201 }
6202 write_result?;
6203 self.note_loose_write(oid);
6204 Ok(oid)
6205 }
6206}
6207
6208fn unique_temp_path(parent: &Path) -> PathBuf {
6209 let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
6210 parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
6211}
6212
6213#[cfg(test)]
6214mod tests {
6215 use super::*;
6216 use sley_core::BString;
6217 use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
6218 use sley_pack::{PackFile, PackWriteOptions};
6219
6220 fn blob_of(byte: u8, len: usize) -> EncodedObject {
6221 EncodedObject::new(ObjectType::Blob, vec![byte; len])
6222 }
6223
6224 fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
6225 Arc::new(blob_of(byte, len))
6226 }
6227
6228 fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
6229 reader
6230 .read_object(oid)
6231 .expect("test operation should succeed")
6232 .as_ref()
6233 .clone()
6234 }
6235
6236 #[test]
6237 fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
6238 let one = cached_object_cost(&blob_of(0, 1000));
6240 let mut cache = LruCache::<u32>::new(one * 2 + 8);
6241 cache.put(1, cached_blob_of(b'a', 1000));
6242 cache.put(2, cached_blob_of(b'b', 1000));
6243 assert!(cache.get(&1).is_some());
6245 cache.put(3, cached_blob_of(b'c', 1000));
6246 assert!(cache.get(&1).is_some());
6248 assert!(cache.get(&2).is_none());
6249 assert!(cache.get(&3).is_some());
6250 }
6251
6252 #[test]
6253 fn lru_cache_zero_budget_is_inert() {
6254 let mut cache = LruCache::<u32>::new(0);
6255 cache.put(1, cached_blob_of(b'a', 16));
6256 assert!(cache.get(&1).is_none());
6257 }
6258
6259 #[test]
6260 fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
6261 let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
6262 cache.put(1, cached_blob_of(b'a', 50));
6263 assert!(cache.get(&1).is_some());
6264 cache.put(1, cached_blob_of(b'b', 10_000));
6267 assert!(cache.get(&1).is_none());
6268 cache.put(2, cached_blob_of(b'c', 50));
6271 assert!(cache.get(&2).is_some());
6272 }
6273
6274 #[test]
6275 fn lru_cache_replacing_entry_updates_byte_accounting() {
6276 let small = cached_object_cost(&blob_of(0, 500));
6279 let mut cache = LruCache::<u32>::new(small * 2 + 200);
6280 cache.put(1, cached_blob_of(b'a', 500));
6281 cache.put(2, cached_blob_of(b'b', 500));
6282 assert!(cache.get(&1).is_some());
6283 assert!(cache.get(&2).is_some());
6284 cache.put(2, cached_blob_of(b'b', 1000));
6289 assert!(cache.get(&2).is_some());
6290 assert!(cache.get(&1).is_none());
6291 }
6292
6293 #[test]
6294 fn write_and_validate_blob() {
6295 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6296 let oid = db
6297 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
6298 .expect("test operation should succeed");
6299 assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
6300 db.validate(&oid).expect("test operation should succeed");
6301 }
6302
6303 #[test]
6304 fn loose_store_writes_and_reads_object() {
6305 let root = std::env::temp_dir().join(format!(
6306 "sley-loose-store-{}-{}",
6307 std::process::id(),
6308 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6309 ));
6310 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6311 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
6312 let oid = store
6313 .write_object(object.clone())
6314 .expect("test operation should succeed");
6315 assert_eq!(read_object_for_assert(&store, &oid), object);
6316 assert!(
6317 store
6318 .object_path(&oid)
6319 .expect("test operation should succeed")
6320 .exists()
6321 );
6322 fs::remove_dir_all(root).expect("test operation should succeed");
6323 }
6324
6325 #[test]
6326 fn read_header_detects_corruption_within_gits_header_window() {
6327 let root = temp_root("sley-loose-header-corrupt");
6335 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6336 let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
6337 let oid = store
6338 .write_object(object)
6339 .expect("test operation should succeed");
6340 let path = store
6341 .object_path(&oid)
6342 .expect("test operation should succeed");
6343 let mut bytes = fs::read(&path).expect("test operation should succeed");
6344 bytes[10] = 0;
6348 fs::write(&path, &bytes).expect("test operation should succeed");
6349 store.invalidate_cache();
6350 let err = store
6351 .read_header(&oid)
6352 .expect_err("corrupt loose header must fail like git's ULHR_BAD");
6353 let msg = err.to_string();
6354 assert!(
6355 msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
6356 "expected git's ULHR_BAD message, got: {msg}"
6357 );
6358 fs::remove_dir_all(root).expect("test operation should succeed");
6359 }
6360
6361 #[test]
6362 fn read_header_ignores_corruption_past_gits_header_window() {
6363 let root = temp_root("sley-loose-header-deep-corrupt");
6368 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
6369 let body: Vec<u8> = (0..4096u32)
6372 .map(|i| (i.wrapping_mul(2654435761)) as u8)
6373 .collect();
6374 let object = EncodedObject::new(ObjectType::Blob, body.clone());
6375 let oid = store
6376 .write_object(object)
6377 .expect("test operation should succeed");
6378 let path = store
6379 .object_path(&oid)
6380 .expect("test operation should succeed");
6381 let mut bytes = fs::read(&path).expect("test operation should succeed");
6382 let deep = bytes.len() / 2;
6383 bytes[deep] ^= 0xff;
6384 fs::write(&path, &bytes).expect("test operation should succeed");
6385 store.invalidate_cache();
6386 let header = store
6387 .read_header(&oid)
6388 .expect("header-only read must still succeed for deep body corruption");
6389 assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
6390 fs::remove_dir_all(root).expect("test operation should succeed");
6391 }
6392
6393 #[test]
6394 fn file_database_reads_object_from_pack_index() {
6395 let root = temp_root("sley-file-odb-pack");
6396 let git_dir = root.join(".git");
6397 let pack_dir = git_dir.join("objects").join("pack");
6398 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6399 let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
6400 let oid = object
6401 .object_id(ObjectFormat::Sha1)
6402 .expect("test operation should succeed");
6403 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6404 .expect("test operation should succeed");
6405 let pack_name = written.checksum.to_hex();
6406 fs::write(
6407 pack_dir.join(format!("pack-{pack_name}.pack")),
6408 written.pack,
6409 )
6410 .expect("test operation should succeed");
6411 fs::write(
6412 pack_dir.join(format!("pack-{pack_name}.idx")),
6413 written.index,
6414 )
6415 .expect("test operation should succeed");
6416
6417 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6418 assert!(db.contains(&oid).expect("test operation should succeed"));
6419 assert_eq!(read_object_for_assert(&db, &oid), object);
6420 fs::remove_dir_all(root).expect("test operation should succeed");
6421 }
6422
6423 #[test]
6424 fn file_database_loose_cache_observes_same_process_write_after_miss() {
6425 let root = temp_root("sley-file-odb-loose-cache-write");
6426 let git_dir = root.join(".git");
6427 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6428 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6429
6430 let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
6431 let oid = object
6432 .object_id(ObjectFormat::Sha1)
6433 .expect("test operation should succeed");
6434
6435 assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
6436 db.loose()
6437 .write_object(object.clone())
6438 .expect("test operation should succeed");
6439
6440 assert_eq!(read_object_for_assert(&db, &oid), object);
6441 fs::remove_dir_all(root).expect("test operation should succeed");
6442 }
6443
6444 #[test]
6445 fn object_presence_checker_observes_same_process_loose_write_after_miss() {
6446 let root = temp_root("sley-presence-checker-loose-cache-write");
6447 let git_dir = root.join(".git");
6448 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6449 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6450 let mut checker = db.presence_checker();
6451
6452 let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
6453 let oid = object
6454 .object_id(ObjectFormat::Sha1)
6455 .expect("test operation should succeed");
6456
6457 assert!(
6458 !checker
6459 .contains(&oid)
6460 .expect("test operation should succeed")
6461 );
6462 db.loose()
6463 .write_object(object)
6464 .expect("test operation should succeed");
6465
6466 assert!(
6467 checker
6468 .contains(&oid)
6469 .expect("test operation should succeed")
6470 );
6471 fs::remove_dir_all(root).expect("test operation should succeed");
6472 }
6473
6474 #[test]
6475 fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
6476 let root = temp_root("sley-read-object-header");
6477 let git_dir = root.join(".git");
6478 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6479 let format = ObjectFormat::Sha1;
6480 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6481
6482 let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
6484 let loose_oid = db
6485 .write_object(loose.clone())
6486 .expect("test operation should succeed");
6487
6488 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
6492 let mut child_body = vec![b'a'; 4096];
6493 child_body.extend_from_slice(b" plus a deltified tail\n");
6494 let child = EncodedObject::new(ObjectType::Blob, child_body);
6495 let commitish =
6496 EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
6497 let base_oid = base
6498 .object_id(format)
6499 .expect("test operation should succeed");
6500 let child_oid = child
6501 .object_id(format)
6502 .expect("test operation should succeed");
6503 let commit_oid = commitish
6504 .object_id(format)
6505 .expect("test operation should succeed");
6506 let options = PackWriteOptions::new()
6507 .with_prefer_ofs_delta(true)
6508 .with_reorder(false);
6509 let pack = PackFile::write_packed_with_options(
6510 &[base.clone(), child.clone(), commitish.clone()],
6511 format,
6512 &options,
6513 )
6514 .expect("test operation should succeed");
6515 db.install_pack(&pack)
6516 .expect("test operation should succeed");
6517
6518 for (oid, want_type, want_len) in [
6521 (&loose_oid, ObjectType::Blob, loose.body.len()),
6522 (&base_oid, ObjectType::Blob, base.body.len()),
6523 (&child_oid, ObjectType::Blob, child.body.len()),
6524 (&commit_oid, ObjectType::Commit, commitish.body.len()),
6525 ] {
6526 assert_eq!(
6527 db.read_object_header(oid)
6528 .expect("test operation should succeed"),
6529 Some((want_type, want_len as u64)),
6530 "header for {oid}"
6531 );
6532 let full = db.read_object(oid).expect("test operation should succeed");
6533 assert_eq!(
6534 db.read_object_header(oid)
6535 .expect("test operation should succeed"),
6536 Some((full.object_type, full.body.len() as u64))
6537 );
6538 }
6539
6540 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
6541 .expect("test operation should succeed");
6542 assert_eq!(
6543 db.read_object_header(&missing)
6544 .expect("test operation should succeed"),
6545 None
6546 );
6547 fs::remove_dir_all(root).expect("test operation should succeed");
6548 }
6549
6550 #[test]
6551 fn object_storage_info_reports_loose_packed_and_delta_metadata() {
6552 let root = temp_root("sley-object-storage-info");
6553 let git_dir = root.join(".git");
6554 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6555 let format = ObjectFormat::Sha1;
6556 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6557
6558 let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
6559 let loose_oid = db
6560 .write_object(loose)
6561 .expect("test operation should succeed");
6562 let loose_size = fs::metadata(
6563 db.loose()
6564 .object_path(&loose_oid)
6565 .expect("test operation should succeed"),
6566 )
6567 .expect("test operation should succeed")
6568 .len();
6569 let loose_info = db
6570 .object_storage_info(&loose_oid)
6571 .expect("test operation should succeed")
6572 .expect("test operation should succeed");
6573 assert_eq!(loose_info.disk_size, loose_size);
6574 assert_eq!(
6575 loose_info.deltabase,
6576 zero_oid(format).expect("test operation should succeed")
6577 );
6578
6579 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
6580 let mut child_body = vec![b'a'; 4096];
6581 child_body.extend_from_slice(b" changed tail\n");
6582 let child = EncodedObject::new(ObjectType::Blob, child_body);
6583 let base_oid = base
6584 .object_id(format)
6585 .expect("test operation should succeed");
6586 let child_oid = child
6587 .object_id(format)
6588 .expect("test operation should succeed");
6589 let options = PackWriteOptions::new()
6590 .with_prefer_ofs_delta(true)
6591 .with_reorder(false);
6592 let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
6593 .expect("test operation should succeed");
6594 db.install_pack(&pack)
6595 .expect("test operation should succeed");
6596
6597 let base_info = db
6598 .object_storage_info(&base_oid)
6599 .expect("test operation should succeed")
6600 .expect("test operation should succeed");
6601 assert!(base_info.disk_size > 0);
6602 assert_eq!(
6603 base_info.deltabase,
6604 zero_oid(format).expect("test operation should succeed")
6605 );
6606
6607 let child_info = db
6608 .object_storage_info(&child_oid)
6609 .expect("test operation should succeed")
6610 .expect("test operation should succeed");
6611 assert!(child_info.disk_size > 0);
6612 assert_eq!(child_info.deltabase, base_oid);
6613
6614 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
6615 .expect("test operation should succeed");
6616 assert_eq!(
6617 db.object_storage_info(&missing)
6618 .expect("test operation should succeed"),
6619 None
6620 );
6621 fs::remove_dir_all(root).expect("test operation should succeed");
6622 }
6623
6624 #[test]
6625 fn file_database_resolves_unique_loose_object_prefix() {
6626 let root = temp_root("sley-file-odb-prefix-loose");
6627 let git_dir = root.join(".git");
6628 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6629 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6630 let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
6631 let oid = db
6632 .write_object(object)
6633 .expect("test operation should succeed");
6634 let prefix = &oid.to_hex()[..8];
6635
6636 assert_eq!(
6637 db.resolve_prefix(prefix)
6638 .expect("test operation should succeed"),
6639 ObjectPrefixResolution::Unique(oid)
6640 );
6641 assert!(
6642 db.object_ids()
6643 .expect("test operation should succeed")
6644 .contains(&oid)
6645 );
6646 fs::remove_dir_all(root).expect("test operation should succeed");
6647 }
6648
6649 #[test]
6650 fn file_database_resolves_unique_packed_object_prefix() {
6651 let root = temp_root("sley-file-odb-prefix-packed");
6652 let git_dir = root.join(".git");
6653 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6654 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6655 let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
6656 let oid = object
6657 .object_id(ObjectFormat::Sha1)
6658 .expect("test operation should succeed");
6659 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6660 .expect("test operation should succeed");
6661 db.install_pack(&pack)
6662 .expect("test operation should succeed");
6663 let prefix = &oid.to_hex()[..8];
6664
6665 assert_eq!(
6666 db.resolve_prefix(prefix)
6667 .expect("test operation should succeed"),
6668 ObjectPrefixResolution::Unique(oid)
6669 );
6670 fs::remove_dir_all(root).expect("test operation should succeed");
6671 }
6672
6673 #[test]
6674 fn file_database_reports_ambiguous_object_prefix() {
6675 let root = temp_root("sley-file-odb-prefix-ambiguous");
6676 let git_dir = root.join(".git");
6677 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6678 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6679 let mut seen = HashMap::new();
6680 let (prefix, first, second) = (0..10_000)
6681 .find_map(|idx| {
6682 let object =
6683 EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
6684 let oid = db
6685 .write_object(object)
6686 .expect("test operation should succeed");
6687 let prefix = oid.to_hex()[..4].to_string();
6688 seen.insert(prefix.clone(), oid)
6689 .map(|first| (prefix, first, oid))
6690 })
6691 .expect("test should find a 4-hex collision");
6692
6693 let ObjectPrefixResolution::Ambiguous(mut matches) = db
6694 .resolve_prefix(&prefix)
6695 .expect("test operation should succeed")
6696 else {
6697 panic!("expected ambiguous prefix {prefix}");
6698 };
6699 matches.sort_by_key(ObjectId::to_hex);
6700 let mut expected = vec![first, second];
6701 expected.sort_by_key(ObjectId::to_hex);
6702 assert_eq!(matches, expected);
6703 fs::remove_dir_all(root).expect("test operation should succeed");
6704 }
6705
6706 #[test]
6707 fn file_database_rejects_too_short_object_prefix() {
6708 let root = temp_root("sley-file-odb-prefix-short");
6709 let git_dir = root.join(".git");
6710 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6711 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6712
6713 assert!(matches!(
6714 db.resolve_prefix("abc"),
6715 Err(GitError::InvalidObjectId(_))
6716 ));
6717 fs::remove_dir_all(root).expect("test operation should succeed");
6718 }
6719
6720 #[test]
6721 fn file_database_reads_sha256_object_from_pack_index() {
6722 let root = temp_root("sley-file-odb-pack-sha256");
6723 let git_dir = root.join(".git");
6724 let pack_dir = git_dir.join("objects").join("pack");
6725 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6726 let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
6727 let oid = object
6728 .object_id(ObjectFormat::Sha256)
6729 .expect("test operation should succeed");
6730 let written =
6731 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6732 .expect("test operation should succeed");
6733 let pack_name = written.checksum.to_hex();
6734 fs::write(
6735 pack_dir.join(format!("pack-{pack_name}.pack")),
6736 written.pack,
6737 )
6738 .expect("test operation should succeed");
6739 fs::write(
6740 pack_dir.join(format!("pack-{pack_name}.idx")),
6741 written.index,
6742 )
6743 .expect("test operation should succeed");
6744
6745 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6746 assert!(db.contains(&oid).expect("test operation should succeed"));
6747 assert_eq!(read_object_for_assert(&db, &oid), object);
6748 fs::remove_dir_all(root).expect("test operation should succeed");
6749 }
6750
6751 #[test]
6752 fn file_database_installs_sha256_pack_without_loose_objects() {
6753 let root = temp_root("sley-file-odb-install-pack");
6754 let git_dir = root.join(".git");
6755 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6756 let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
6757 let oid = object
6758 .object_id(ObjectFormat::Sha256)
6759 .expect("test operation should succeed");
6760 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6761 .expect("test operation should succeed");
6762 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6763
6764 let result = db
6765 .install_pack(&pack)
6766 .expect("test operation should succeed");
6767
6768 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6769 assert_eq!(result.object_ids, vec![oid]);
6770 assert!(result.pack_path.exists());
6771 assert!(result.index_path.exists());
6772 assert_eq!(result.promisor_path, None);
6773 assert!(
6774 !db.loose()
6775 .object_path(&oid)
6776 .expect("test operation should succeed")
6777 .exists()
6778 );
6779 assert!(db.contains(&oid).expect("test operation should succeed"));
6780 assert_eq!(read_object_for_assert(&db, &oid), object);
6781 fs::remove_dir_all(root).expect("test operation should succeed");
6782 }
6783
6784 #[test]
6785 fn file_database_installs_raw_sha256_pack_without_loose_objects() {
6786 let root = temp_root("sley-file-odb-install-raw-pack");
6787 let git_dir = root.join(".git");
6788 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6789 let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
6790 let oid = object
6791 .object_id(ObjectFormat::Sha256)
6792 .expect("test operation should succeed");
6793 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6794 .expect("test operation should succeed");
6795 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6796
6797 let result = db
6798 .install_raw_pack(&pack.pack)
6799 .expect("test operation should succeed");
6800
6801 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6802 assert_eq!(result.object_ids, vec![oid]);
6803 assert!(result.pack_path.exists());
6804 assert!(result.index_path.exists());
6805 assert_eq!(result.promisor_path, None);
6806 assert!(
6807 !db.loose()
6808 .object_path(&oid)
6809 .expect("test operation should succeed")
6810 .exists()
6811 );
6812 assert!(db.contains(&oid).expect("test operation should succeed"));
6813 assert_eq!(read_object_for_assert(&db, &oid), object);
6814 fs::remove_dir_all(root).expect("test operation should succeed");
6815 }
6816
6817 #[test]
6818 fn file_database_rejects_noncanonical_pack_index() {
6819 let root = temp_root("sley-file-odb-install-bad-index");
6820 let git_dir = root.join(".git");
6821 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6822 let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
6823 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6824 .expect("test operation should succeed");
6825 let mut entries = pack.entries.clone();
6826 entries[0].crc32 ^= 1;
6827 let mut bad_pack = pack.clone();
6828 bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
6829 .expect("test operation should succeed");
6830 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6831
6832 assert!(db.install_pack(&bad_pack).is_err());
6833
6834 fs::remove_dir_all(root).expect("test operation should succeed");
6835 }
6836
6837 #[test]
6838 fn file_database_installs_raw_promisor_pack_with_sidecar() {
6839 let root = temp_root("sley-file-odb-install-raw-promisor-pack");
6840 let git_dir = root.join(".git");
6841 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6842 let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
6843 let oid = object
6844 .object_id(ObjectFormat::Sha1)
6845 .expect("test operation should succeed");
6846 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6847 .expect("test operation should succeed");
6848 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6849
6850 let result = db
6851 .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
6852 .expect("test operation should succeed");
6853
6854 let promisor_path = result.promisor_path.expect("promisor sidecar");
6855 assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
6856 assert_eq!(
6857 promisor_path.extension().and_then(|ext| ext.to_str()),
6858 Some("promisor")
6859 );
6860 assert!(promisor_path.exists());
6861 assert_eq!(
6862 fs::read(&promisor_path).expect("test operation should succeed"),
6863 b""
6864 );
6865 assert!(result.pack_path.exists());
6866 assert!(result.index_path.exists());
6867 assert!(
6868 !db.loose()
6869 .object_path(&oid)
6870 .expect("test operation should succeed")
6871 .exists()
6872 );
6873 assert_eq!(read_object_for_assert(&db, &oid), object);
6874 fs::remove_dir_all(root).expect("test operation should succeed");
6875 }
6876
6877 #[test]
6878 fn repository_objects_dir_uses_linked_worktree_common_dir() {
6879 let root = temp_root("sley-odb-common-dir");
6880 let common = root.join(".git");
6881 let admin = common.join("worktrees").join("linked");
6882 fs::create_dir_all(&admin).expect("test operation should succeed");
6883 fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
6884
6885 let common = fs::canonicalize(common).expect("test operation should succeed");
6886 assert_eq!(repository_common_dir(&admin), common);
6887 assert_eq!(repository_objects_dir(&admin), common.join("objects"));
6888
6889 fs::remove_dir_all(root).expect("test operation should succeed");
6890 }
6891
6892 #[test]
6893 fn reachable_object_helpers_walk_graph_and_install_pack() {
6894 let root = temp_root("sley-reachable-pack");
6895 let source_git_dir = root.join("source.git");
6896 let destination_git_dir = root.join("destination.git");
6897 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6898 fs::create_dir_all(destination_git_dir.join("objects"))
6899 .expect("test operation should succeed");
6900 let format = ObjectFormat::Sha1;
6901 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6902 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6903
6904 let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
6905 let blob_oid = source
6906 .write_object(blob.clone())
6907 .expect("test operation should succeed");
6908 let tree = EncodedObject::new(
6909 ObjectType::Tree,
6910 Tree {
6911 entries: vec![TreeEntry {
6912 mode: 0o100644,
6913 name: BString::from(b"payload.txt"),
6914 oid: blob_oid,
6915 }],
6916 }
6917 .write(),
6918 );
6919 let tree_oid = source
6920 .write_object(tree.clone())
6921 .expect("test operation should succeed");
6922 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6923 let commit = EncodedObject::new(
6924 ObjectType::Commit,
6925 Commit {
6926 tree: tree_oid,
6927 parents: Vec::new(),
6928 author: identity.clone(),
6929 committer: identity,
6930 encoding: None,
6931 message: b"initial\n".to_vec(),
6932 }
6933 .write(),
6934 );
6935 let commit_oid = source
6936 .write_object(commit.clone())
6937 .expect("test operation should succeed");
6938
6939 let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
6940 .expect("test operation should succeed");
6941 assert!(reachable.contains(&commit_oid));
6942 assert!(reachable.contains(&tree_oid));
6943 assert!(reachable.contains(&blob_oid));
6944
6945 let install =
6946 install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
6947 .expect("test operation should succeed")
6948 .expect("reachable pack should be written");
6949 assert_eq!(install.object_ids.len(), 3);
6950 for (oid, object) in [
6951 (&commit_oid, &commit),
6952 (&tree_oid, &tree),
6953 (&blob_oid, &blob),
6954 ] {
6955 assert!(
6956 !destination
6957 .loose()
6958 .object_path(oid)
6959 .expect("test operation should succeed")
6960 .exists()
6961 );
6962 assert!(
6963 destination
6964 .contains(oid)
6965 .expect("test operation should succeed")
6966 );
6967 assert_eq!(read_object_for_assert(&destination, oid), *object);
6968 }
6969 fs::remove_dir_all(root).expect("test operation should succeed");
6970 }
6971
6972 #[test]
6973 fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
6974 let root = temp_root("sley-reachable-exclusions");
6975 let git_dir = root.join("repo.git");
6976 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6977 let format = ObjectFormat::Sha1;
6978 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6979
6980 let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
6981 let blob_oid = db
6982 .write_object(blob)
6983 .expect("test operation should succeed");
6984 let tree = EncodedObject::new(
6985 ObjectType::Tree,
6986 Tree {
6987 entries: vec![TreeEntry {
6988 mode: 0o100644,
6989 name: BString::from(b"payload.txt"),
6990 oid: blob_oid,
6991 }],
6992 }
6993 .write(),
6994 );
6995 let tree_oid = db
6996 .write_object(tree)
6997 .expect("test operation should succeed");
6998 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6999 let commit = EncodedObject::new(
7000 ObjectType::Commit,
7001 Commit {
7002 tree: tree_oid,
7003 parents: Vec::new(),
7004 author: identity.clone(),
7005 committer: identity,
7006 encoding: None,
7007 message: b"initial\n".to_vec(),
7008 }
7009 .write(),
7010 );
7011 let commit_oid = db
7012 .write_object(commit)
7013 .expect("test operation should succeed");
7014 let excluded = HashSet::from([tree_oid]);
7015
7016 let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
7017 .expect("test operation should succeed");
7018
7019 assert_eq!(objects.len(), 1);
7020 assert_eq!(
7021 objects[0]
7022 .object_id(format)
7023 .expect("test operation should succeed"),
7024 commit_oid
7025 );
7026 fs::remove_dir_all(root).expect("test operation should succeed");
7027 }
7028
7029 #[test]
7030 fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
7031 let root = temp_root("sley-build-reachable-pack");
7032 let git_dir = root.join("repo.git");
7033 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7034 let format = ObjectFormat::Sha1;
7035 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7036
7037 let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
7038 let oid = db
7039 .write_object(object.clone())
7040 .expect("test operation should succeed");
7041 let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
7042 .expect("test operation should succeed")
7043 .expect("reachable pack should be built");
7044 assert!(pack.pack.starts_with(b"PACK"));
7045 assert_eq!(pack.entries.len(), 1);
7046 assert_eq!(pack.entries[0].oid, oid);
7047
7048 let excluded = HashSet::from([oid]);
7049 assert!(
7050 build_reachable_pack(
7051 &db,
7052 format,
7053 pack.entries.into_iter().map(|entry| entry.oid),
7054 &excluded
7055 )
7056 .expect("test operation should succeed")
7057 .is_none()
7058 );
7059 fs::remove_dir_all(root).expect("test operation should succeed");
7060 }
7061
7062 #[test]
7063 fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
7064 let root = temp_root("sley-reachable-tags");
7065 let git_dir = root.join("repo.git");
7066 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7067 let format = ObjectFormat::Sha1;
7068 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7069
7070 let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
7071 let blob_oid = db
7072 .write_object(blob)
7073 .expect("test operation should succeed");
7074 let tag = EncodedObject::new(
7075 ObjectType::Tag,
7076 Tag {
7077 object: blob_oid,
7078 object_type: ObjectType::Blob,
7079 name: b"v1".to_vec(),
7080 tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
7081 message: b"tag message\n".to_vec(),
7082 raw_body: None,
7083 }
7084 .write(),
7085 );
7086 let tag_oid = db.write_object(tag).expect("test operation should succeed");
7087
7088 let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
7089 .expect("test operation should succeed");
7090 assert!(reachable.contains(&tag_oid));
7091 assert!(reachable.contains(&blob_oid));
7092
7093 let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
7094 .expect("test operation should succeed");
7095 let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
7096 .expect_err("missing traversal root should error");
7097 let kind = err.not_found_kind().expect("typed not found");
7098 assert_eq!(kind.object_id(), Some(missing));
7099 assert_eq!(
7100 kind.missing_object_context(),
7101 Some(MissingObjectContext::Traversal)
7102 );
7103 fs::remove_dir_all(root).expect("test operation should succeed");
7104 }
7105
7106 #[test]
7107 fn install_reachable_pack_empty_starts_create_no_pack() {
7108 let root = temp_root("sley-reachable-empty");
7109 let source_git_dir = root.join("source.git");
7110 let destination_git_dir = root.join("destination.git");
7111 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7112 fs::create_dir_all(destination_git_dir.join("objects"))
7113 .expect("test operation should succeed");
7114 let format = ObjectFormat::Sha1;
7115 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7116 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7117
7118 let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
7119 .expect("test operation should succeed");
7120
7121 assert!(result.is_none());
7122 assert!(!destination_git_dir.join("objects").join("pack").exists());
7123 fs::remove_dir_all(root).expect("test operation should succeed");
7124 }
7125
7126 #[test]
7127 fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
7128 let root = temp_root("sley-reachable-install-excluding");
7129 let source_git_dir = root.join("source.git");
7130 let destination_git_dir = root.join("destination.git");
7131 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7132 fs::create_dir_all(destination_git_dir.join("objects"))
7133 .expect("test operation should succeed");
7134 let format = ObjectFormat::Sha1;
7135 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7136 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7137 let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
7138 let oid = source
7139 .write_object(object)
7140 .expect("test operation should succeed");
7141 let excluded = HashSet::from([oid]);
7142
7143 let result = install_reachable_pack_excluding(
7144 &source,
7145 &destination,
7146 format,
7147 std::iter::once(oid),
7148 &excluded,
7149 )
7150 .expect("test operation should succeed");
7151
7152 assert!(result.is_none());
7153 assert!(!destination_git_dir.join("objects").join("pack").exists());
7154 fs::remove_dir_all(root).expect("test operation should succeed");
7155 }
7156
7157 #[test]
7158 fn install_reachable_pack_supports_sha256() {
7159 let root = temp_root("sley-reachable-pack-sha256");
7160 let source_git_dir = root.join("source.git");
7161 let destination_git_dir = root.join("destination.git");
7162 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
7163 fs::create_dir_all(destination_git_dir.join("objects"))
7164 .expect("test operation should succeed");
7165 let format = ObjectFormat::Sha256;
7166 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
7167 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
7168 let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
7169 let oid = source
7170 .write_object(object.clone())
7171 .expect("test operation should succeed");
7172
7173 let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
7174 .expect("test operation should succeed")
7175 .expect("sha256 reachable pack should be built");
7176 assert!(pack.pack.starts_with(b"PACK"));
7177 assert_eq!(pack.entries[0].oid, oid);
7178
7179 let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
7180 .expect("test operation should succeed")
7181 .expect("sha256 reachable pack should be written");
7182
7183 assert_eq!(result.object_ids, vec![oid]);
7184 assert!(
7185 !destination
7186 .loose()
7187 .object_path(&oid)
7188 .expect("test operation should succeed")
7189 .exists()
7190 );
7191 assert_eq!(read_object_for_assert(&destination, &oid), object);
7192 fs::remove_dir_all(root).expect("test operation should succeed");
7193 }
7194
7195 #[test]
7196 fn install_helpers_accept_custom_raw_pack_installer() {
7197 #[derive(Default)]
7198 struct RecordingInstaller {
7199 packs: std::cell::RefCell<Vec<Vec<u8>>>,
7200 installed: std::cell::RefCell<Vec<ObjectId>>,
7201 }
7202
7203 impl RawPackInstaller for RecordingInstaller {
7204 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
7205 self.packs.borrow_mut().push(pack_bytes.to_vec());
7206 let object_ids = self.installed.borrow().clone();
7207 Ok(RawPackInstallResult { object_ids })
7208 }
7209 }
7210
7211 let format = ObjectFormat::Sha1;
7212 let source = ObjectDatabase::new(format);
7213 let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
7214 let oid = source
7215 .write_object(object)
7216 .expect("test operation should succeed");
7217 let installer = RecordingInstaller::default();
7218 installer.installed.borrow_mut().push(oid);
7219
7220 let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
7221 .expect("test operation should succeed")
7222 .expect("custom installer should receive pack");
7223
7224 assert_eq!(result.object_ids, installer.installed.into_inner());
7225 let packs = installer.packs.into_inner();
7226 assert_eq!(packs.len(), 1);
7227 assert!(packs[0].starts_with(b"PACK"));
7228 }
7229
7230 #[test]
7231 fn file_database_reads_object_from_multi_pack_index() {
7232 let root = temp_root("sley-file-odb-midx");
7233 let git_dir = root.join(".git");
7234 let pack_dir = git_dir.join("objects").join("pack");
7235 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7236 let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
7237 let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
7238 let first_oid = first
7239 .object_id(ObjectFormat::Sha1)
7240 .expect("test operation should succeed");
7241 let second_oid = second
7242 .object_id(ObjectFormat::Sha1)
7243 .expect("test operation should succeed");
7244 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7245 .expect("test operation should succeed");
7246 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7247 .expect("test operation should succeed");
7248 let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
7249 let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
7250 fs::write(
7251 pack_dir.join(first_pack_name.replace(".idx", ".pack")),
7252 first_pack.pack,
7253 )
7254 .expect("test operation should succeed");
7255 fs::write(
7256 pack_dir.join(second_pack_name.replace(".idx", ".pack")),
7257 second_pack.pack,
7258 )
7259 .expect("test operation should succeed");
7260 let midx = MultiPackIndex::write(
7261 ObjectFormat::Sha1,
7262 2,
7263 &[first_pack_name, second_pack_name],
7264 &[
7265 sley_pack::MultiPackIndexEntry {
7266 oid: first_oid,
7267 pack_int_id: 0,
7268 offset: first_pack.entries[0].offset,
7269 force_large_offset: false,
7270 },
7271 sley_pack::MultiPackIndexEntry {
7272 oid: second_oid,
7273 pack_int_id: 1,
7274 offset: second_pack.entries[0].offset,
7275 force_large_offset: false,
7276 },
7277 ],
7278 )
7279 .expect("test operation should succeed");
7280 fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
7281
7282 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7283 assert!(
7284 db.contains(&second_oid)
7285 .expect("test operation should succeed")
7286 );
7287 assert_eq!(
7288 db.resolve_prefix(&second_oid.to_hex()[..8])
7289 .expect("test operation should succeed"),
7290 ObjectPrefixResolution::Unique(second_oid)
7291 );
7292 assert_eq!(read_object_for_assert(&db, &second_oid), second);
7293 assert_eq!(read_object_for_assert(&db, &first_oid), first);
7294 fs::remove_dir_all(root).expect("test operation should succeed");
7295 }
7296
7297 #[test]
7298 fn file_database_finds_pack_added_after_registry_was_cached() {
7299 let root = temp_root("sley-file-odb-pack-added-late");
7303 let git_dir = root.join(".git");
7304 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7305 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7306
7307 let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
7309 let first_oid = first
7310 .object_id(ObjectFormat::Sha1)
7311 .expect("test operation should succeed");
7312 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7313 .expect("test operation should succeed");
7314 db.install_pack(&first_pack)
7315 .expect("test operation should succeed");
7316 assert_eq!(read_object_for_assert(&db, &first_oid), first);
7317
7318 let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
7320 let second_oid = second
7321 .object_id(ObjectFormat::Sha1)
7322 .expect("test operation should succeed");
7323 assert!(matches!(
7325 db.read_object(&second_oid),
7326 Err(GitError::NotFound(_))
7327 ));
7328
7329 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7332 .expect("test operation should succeed");
7333 db.install_pack(&second_pack)
7334 .expect("test operation should succeed");
7335 assert!(
7336 db.contains(&second_oid)
7337 .expect("test operation should succeed")
7338 );
7339 assert_eq!(read_object_for_assert(&db, &second_oid), second);
7340 assert_eq!(read_object_for_assert(&db, &first_oid), first);
7342
7343 fs::remove_dir_all(root).expect("test operation should succeed");
7344 }
7345
7346 #[test]
7347 fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
7348 let root = temp_root("sley-presence-checker-pack-added-late");
7349 let git_dir = root.join(".git");
7350 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7351 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7352
7353 let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
7354 let first_oid = first
7355 .object_id(ObjectFormat::Sha1)
7356 .expect("test operation should succeed");
7357 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7358 .expect("test operation should succeed");
7359 db.install_pack(&first_pack)
7360 .expect("test operation should succeed");
7361
7362 let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
7363 let second_oid = second
7364 .object_id(ObjectFormat::Sha1)
7365 .expect("test operation should succeed");
7366 let mut checker = db.presence_checker();
7367 assert!(
7368 checker
7369 .contains(&first_oid)
7370 .expect("test operation should succeed")
7371 );
7372 assert!(
7373 !checker
7374 .contains(&second_oid)
7375 .expect("test operation should succeed")
7376 );
7377
7378 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7379 .expect("test operation should succeed");
7380 db.install_pack(&second_pack)
7381 .expect("test operation should succeed");
7382
7383 assert!(
7384 checker
7385 .contains(&second_oid)
7386 .expect("test operation should succeed")
7387 );
7388 fs::remove_dir_all(root).expect("test operation should succeed");
7389 }
7390
7391 #[test]
7392 fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
7393 let root = temp_root("sley-file-odb-pack-registry-refresh");
7394 let git_dir = root.join(".git");
7395 let pack_dir = git_dir.join("objects").join("pack");
7396 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7397 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7398
7399 let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
7400 let first_oid = first
7401 .object_id(ObjectFormat::Sha1)
7402 .expect("test operation should succeed");
7403 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7404 .expect("test operation should succeed");
7405 db.install_pack(&first_pack)
7406 .expect("test operation should succeed");
7407
7408 let first_registry = db
7409 .cached_pack_registry(&pack_dir, false)
7410 .expect("test operation should succeed");
7411 assert_eq!(first_registry.fingerprint.idx_count, 1);
7412 assert_eq!(first_registry.fingerprint.pack_count, 1);
7413 assert_eq!(first_registry.packs.len(), 1);
7414 assert!(
7415 first_registry.packs[0]
7416 .index
7417 .lock()
7418 .expect("test operation should succeed")
7419 .is_none()
7420 );
7421 assert!(
7422 first_registry.packs[0]
7423 .data
7424 .lock()
7425 .expect("test operation should succeed")
7426 .is_none()
7427 );
7428
7429 assert!(
7432 db.contains(&first_oid)
7433 .expect("test operation should succeed")
7434 );
7435 assert!(
7436 first_registry.packs[0]
7437 .index
7438 .lock()
7439 .expect("test operation should succeed")
7440 .is_some()
7441 );
7442 assert!(
7443 first_registry.packs[0]
7444 .data
7445 .lock()
7446 .expect("test operation should succeed")
7447 .is_none()
7448 );
7449 assert_eq!(read_object_for_assert(&db, &first_oid), first);
7450 assert!(
7451 first_registry.packs[0]
7452 .data
7453 .lock()
7454 .expect("test operation should succeed")
7455 .is_some()
7456 );
7457
7458 let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
7459 let second_oid = second
7460 .object_id(ObjectFormat::Sha1)
7461 .expect("test operation should succeed");
7462 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7463 .expect("test operation should succeed");
7464 db.install_pack(&second_pack)
7465 .expect("test operation should succeed");
7466
7467 let refreshed = db
7468 .cached_pack_registry(&pack_dir, true)
7469 .expect("test operation should succeed");
7470 assert!(!Arc::ptr_eq(&first_registry, &refreshed));
7471 assert_eq!(refreshed.fingerprint.idx_count, 2);
7472 assert_eq!(refreshed.fingerprint.pack_count, 2);
7473 assert_eq!(refreshed.packs.len(), 2);
7474 assert_eq!(read_object_for_assert(&db, &second_oid), second);
7475
7476 fs::remove_dir_all(root).expect("test operation should succeed");
7477 }
7478
7479 #[test]
7480 fn file_database_pack_search_hint_rebuilds_after_pack_added() {
7481 let root = temp_root("sley-file-odb-pack-lookup-added-late");
7485 let git_dir = root.join(".git");
7486 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7487 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7488
7489 let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
7490 let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
7491 let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
7492 let first_oid = first
7493 .object_id(ObjectFormat::Sha1)
7494 .expect("test operation should succeed");
7495 let second_oid = second
7496 .object_id(ObjectFormat::Sha1)
7497 .expect("test operation should succeed");
7498 let third_oid = third
7499 .object_id(ObjectFormat::Sha1)
7500 .expect("test operation should succeed");
7501
7502 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
7503 .expect("test operation should succeed");
7504 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
7505 .expect("test operation should succeed");
7506 db.install_pack(&first_pack)
7507 .expect("test operation should succeed");
7508 db.install_pack(&second_pack)
7509 .expect("test operation should succeed");
7510
7511 assert_eq!(read_object_for_assert(&db, &first_oid), first);
7513 assert_eq!(read_object_for_assert(&db, &second_oid), second);
7514 assert!(matches!(
7515 db.read_object(&third_oid),
7516 Err(GitError::NotFound(_))
7517 ));
7518
7519 let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
7520 .expect("test operation should succeed");
7521 db.install_pack(&third_pack)
7522 .expect("test operation should succeed");
7523
7524 assert_eq!(read_object_for_assert(&db, &third_oid), third);
7525 assert_eq!(read_object_for_assert(&db, &first_oid), first);
7526
7527 fs::remove_dir_all(root).expect("test operation should succeed");
7528 }
7529
7530 #[test]
7531 fn file_database_prefers_loose_object_over_packed_object() {
7532 let root = temp_root("sley-file-odb-prefer-loose");
7533 let git_dir = root.join(".git");
7534 let pack_dir = git_dir.join("objects").join("pack");
7535 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
7536 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
7537 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7538 .expect("test operation should succeed");
7539 let pack_name = written.checksum.to_hex();
7540 fs::write(
7541 pack_dir.join(format!("pack-{pack_name}.pack")),
7542 written.pack,
7543 )
7544 .expect("test operation should succeed");
7545 fs::write(
7546 pack_dir.join(format!("pack-{pack_name}.idx")),
7547 written.index,
7548 )
7549 .expect("test operation should succeed");
7550
7551 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7552 let oid = db
7553 .write_object(object.clone())
7554 .expect("test operation should succeed");
7555 assert_eq!(read_object_for_assert(&db, &oid), object);
7556 fs::remove_dir_all(root).expect("test operation should succeed");
7557 }
7558
7559 #[test]
7560 fn bundle_prerequisite_verification_reads_existing_objects() {
7561 let db = ObjectDatabase::new(ObjectFormat::Sha1);
7562 let oid = db
7563 .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
7564 .expect("test operation should succeed");
7565 let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
7566 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7567 .expect("test operation should succeed");
7568
7569 verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
7570 }
7571
7572 #[test]
7573 fn bundle_prerequisite_verification_reports_missing_objects() {
7574 let db = ObjectDatabase::new(ObjectFormat::Sha1);
7575 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7576 .expect("test operation should succeed");
7577 let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
7578 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7579 .expect("test operation should succeed");
7580
7581 assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
7582 }
7583
7584 #[test]
7585 fn unbundle_objects_writes_pack_entries_and_returns_refs() {
7586 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7587 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
7588 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
7589 let oid = object
7590 .object_id(ObjectFormat::Sha1)
7591 .expect("test operation should succeed");
7592 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7593 .expect("test operation should succeed");
7594 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
7595 .into_bytes()
7596 .into_iter()
7597 .chain(pack.pack)
7598 .collect::<Vec<_>>();
7599 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7600 .expect("test operation should succeed");
7601
7602 let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
7603 .expect("test operation should succeed");
7604 assert_eq!(result.written_objects, vec![oid]);
7605 assert_eq!(result.references, bundle.references);
7606 assert_eq!(read_object_for_assert(&writer, &oid), object);
7607 }
7608
7609 #[test]
7610 fn install_bundle_pack_writes_pack_and_returns_refs() {
7611 let root = temp_root("sley-install-bundle-pack");
7612 let git_dir = root.join(".git");
7613 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7614 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7615 let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
7616 let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
7617 let oid = object
7618 .object_id(ObjectFormat::Sha1)
7619 .expect("test operation should succeed");
7620 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7621 .expect("test operation should succeed");
7622 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
7623 .into_bytes()
7624 .into_iter()
7625 .chain(pack.pack)
7626 .collect::<Vec<_>>();
7627 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7628 .expect("test operation should succeed");
7629
7630 let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
7631 .expect("test operation should succeed");
7632
7633 assert_eq!(result.written_objects, vec![oid]);
7634 assert_eq!(result.references, bundle.references);
7635 assert!(
7636 database
7637 .contains(&oid)
7638 .expect("test operation should succeed")
7639 );
7640 assert_eq!(read_object_for_assert(&database, &oid), object);
7641 assert!(
7642 !database
7643 .loose()
7644 .object_path(&oid)
7645 .expect("test operation should succeed")
7646 .exists()
7647 );
7648 fs::remove_dir_all(root).expect("test operation should succeed");
7649 }
7650
7651 #[test]
7652 fn unpack_packfile_objects_writes_sha256_pack_entries() {
7653 let writer = ObjectDatabase::new(ObjectFormat::Sha256);
7654 let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
7655 let oid = object
7656 .object_id(ObjectFormat::Sha256)
7657 .expect("test operation should succeed");
7658 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7659 .expect("test operation should succeed");
7660
7661 let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
7662 .expect("test operation should succeed");
7663
7664 assert_eq!(result.written_objects, vec![oid]);
7665 assert_eq!(read_object_for_assert(&writer, &oid), object);
7666 }
7667
7668 #[test]
7669 fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
7670 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7671 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
7672 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7673 .expect("test operation should succeed");
7674 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
7675 let oid = object
7676 .object_id(ObjectFormat::Sha1)
7677 .expect("test operation should succeed");
7678 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7679 .expect("test operation should succeed");
7680 let bundle_bytes =
7681 format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
7682 .into_bytes()
7683 .into_iter()
7684 .chain(pack.pack)
7685 .collect::<Vec<_>>();
7686 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7687 .expect("test operation should succeed");
7688
7689 assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
7690 assert!(!writer.contains(&oid));
7691 }
7692
7693 fn write_commit_graph(
7696 db: &mut FileObjectDatabase,
7697 payload: &[u8],
7698 ) -> Vec<(ObjectId, EncodedObject)> {
7699 let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
7700 let blob_oid = db
7701 .write_object(blob.clone())
7702 .expect("test operation should succeed");
7703 let tree = EncodedObject::new(
7704 ObjectType::Tree,
7705 Tree {
7706 entries: vec![TreeEntry {
7707 mode: 0o100644,
7708 name: BString::from(b"payload.txt"),
7709 oid: blob_oid,
7710 }],
7711 }
7712 .write(),
7713 );
7714 let tree_oid = db
7715 .write_object(tree.clone())
7716 .expect("test operation should succeed");
7717 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7718 let commit = EncodedObject::new(
7719 ObjectType::Commit,
7720 Commit {
7721 tree: tree_oid,
7722 parents: Vec::new(),
7723 author: identity.clone(),
7724 committer: identity,
7725 encoding: None,
7726 message: b"initial\n".to_vec(),
7727 }
7728 .write(),
7729 );
7730 let commit_oid = db
7731 .write_object(commit.clone())
7732 .expect("test operation should succeed");
7733 vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
7734 }
7735
7736 fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
7737 let root = temp_root("sley-repack-all");
7738 let git_dir = root.join(".git");
7739 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7740 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7741
7742 let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
7744 let packed_oid = packed_blob
7745 .object_id(format)
7746 .expect("test operation should succeed");
7747 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7748 .expect("test operation should succeed");
7749 let existing = db
7750 .install_pack(&existing_pack)
7751 .expect("test operation should succeed");
7752
7753 let graph = write_commit_graph(&mut db, b"repack payload\n");
7754
7755 let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
7756 expected.insert(packed_oid, packed_blob.clone());
7757
7758 let result = repack_all_objects(&git_dir, format)
7759 .expect("test operation should succeed")
7760 .expect("repository has objects");
7761
7762 assert_eq!(result.object_count, expected.len());
7764 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7765 assert_eq!(parsed.entries.len(), expected.len());
7766 for entry in &parsed.entries {
7767 let want = expected
7768 .get(&entry.entry.oid)
7769 .expect("packed object was in the repository");
7770 assert_eq!(&entry.object, want);
7771 assert_eq!(
7772 entry
7773 .object
7774 .object_id(format)
7775 .expect("test operation should succeed"),
7776 entry.entry.oid
7777 );
7778 }
7779 let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
7781 assert_eq!(idx.pack_checksum, parsed.checksum);
7782 assert_eq!(idx.entries.len(), expected.len());
7783
7784 assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
7786 let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
7788 want_loose.sort_by_key(ObjectId::to_hex);
7789 assert_eq!(result.packed_loose, want_loose);
7790 assert!(!result.packed_loose.contains(&packed_oid));
7791
7792 fs::remove_dir_all(root).expect("test operation should succeed");
7793 }
7794
7795 #[test]
7796 fn repack_all_objects_consolidates_loose_and_pack_sha1() {
7797 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
7798 }
7799
7800 #[test]
7801 fn repack_all_objects_consolidates_loose_and_pack_sha256() {
7802 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
7803 }
7804
7805 #[test]
7806 fn repack_all_objects_returns_none_for_empty_repository() {
7807 let root = temp_root("sley-repack-empty");
7808 let git_dir = root.join(".git");
7809 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7810
7811 assert!(
7812 repack_all_objects(&git_dir, ObjectFormat::Sha1)
7813 .expect("test operation should succeed")
7814 .is_none()
7815 );
7816
7817 fs::remove_dir_all(root).expect("test operation should succeed");
7818 }
7819
7820 #[test]
7821 fn install_repack_result_writes_pack_without_pruning_by_default() {
7822 let root = temp_root("sley-repack-install-nodelete");
7823 let git_dir = root.join(".git");
7824 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7825 let format = ObjectFormat::Sha1;
7826 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7827 let graph = write_commit_graph(&mut db, b"install no prune\n");
7828
7829 let result = repack_all_objects(&git_dir, format)
7830 .expect("test operation should succeed")
7831 .expect("test operation should succeed");
7832 install_repack_result(&git_dir, format, &result, false)
7833 .expect("test operation should succeed");
7834
7835 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7837 let pack_dir = git_dir.join("objects").join("pack");
7838 let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
7839 let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
7840 assert!(pack_path.exists());
7841 assert!(idx_path.exists());
7842 for (oid, object) in &graph {
7844 assert!(
7845 db.loose()
7846 .object_path(oid)
7847 .expect("test operation should succeed")
7848 .exists()
7849 );
7850 assert_eq!(read_object_for_assert(&db, oid), *object);
7851 }
7852
7853 fs::remove_dir_all(root).expect("test operation should succeed");
7854 }
7855
7856 #[test]
7857 fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
7858 let root = temp_root("sley-repack-install-prune");
7859 let git_dir = root.join(".git");
7860 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7861 let format = ObjectFormat::Sha1;
7862 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7863
7864 let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
7865 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7866 .expect("test operation should succeed");
7867 let existing = db
7868 .install_pack(&existing_pack)
7869 .expect("test operation should succeed");
7870 let graph = write_commit_graph(&mut db, b"prune payload\n");
7871
7872 let result = repack_all_objects(&git_dir, format)
7873 .expect("test operation should succeed")
7874 .expect("test operation should succeed");
7875 let new_pack_checksum = PackFile::parse(&result.pack, format)
7876 .expect("test operation should succeed")
7877 .checksum;
7878 install_repack_result(&git_dir, format, &result, true)
7879 .expect("test operation should succeed");
7880
7881 assert!(!existing.pack_path.exists());
7883 assert!(!existing.index_path.exists());
7884 for (oid, _) in &graph {
7886 assert!(
7887 !db.loose()
7888 .object_path(oid)
7889 .expect("test operation should succeed")
7890 .exists()
7891 );
7892 }
7893 let pack_dir = git_dir.join("objects").join("pack");
7895 assert!(
7896 pack_dir
7897 .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
7898 .exists()
7899 );
7900 let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
7901 for (oid, object) in &graph {
7902 assert!(
7903 reopened
7904 .contains(oid)
7905 .expect("test operation should succeed")
7906 );
7907 assert_eq!(read_object_for_assert(&reopened, oid), *object);
7908 }
7909 let packed_oid = packed_blob
7910 .object_id(format)
7911 .expect("test operation should succeed");
7912 assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
7913
7914 fs::remove_dir_all(root).expect("test operation should succeed");
7915 }
7916
7917 #[test]
7918 fn install_repack_result_preserves_keep_and_promisor_packs() {
7919 let root = temp_root("sley-repack-install-keep-promisor");
7920 let git_dir = root.join(".git");
7921 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7922 let format = ObjectFormat::Sha1;
7923 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7924
7925 let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
7926 let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
7927 .expect("test operation should succeed");
7928 let keep_install = db
7929 .install_pack(&keep_pack)
7930 .expect("test operation should succeed");
7931 let keep_sidecar = keep_install.pack_path.with_extension("keep");
7932 fs::write(&keep_sidecar, b"").expect("test operation should succeed");
7933
7934 let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
7935 let promisor_pack =
7936 PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
7937 .expect("test operation should succeed");
7938 let promisor_install = db
7939 .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
7940 .expect("test operation should succeed");
7941 let promisor_sidecar = promisor_install
7942 .promisor_path
7943 .clone()
7944 .expect("promisor sidecar");
7945
7946 let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
7947 let result = repack_all_objects(&git_dir, format)
7948 .expect("test operation should succeed")
7949 .expect("test operation should succeed");
7950 assert!(result.obsolete_packs.contains(&keep_install.pack_path));
7951 assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
7952
7953 install_repack_result(&git_dir, format, &result, true)
7954 .expect("test operation should succeed");
7955
7956 for path in [
7957 &keep_install.pack_path,
7958 &keep_install.index_path,
7959 &keep_sidecar,
7960 &promisor_install.pack_path,
7961 &promisor_install.index_path,
7962 &promisor_sidecar,
7963 ] {
7964 assert!(path.exists(), "{} should be preserved", path.display());
7965 }
7966 for (oid, _) in &graph {
7967 assert!(
7968 !db.loose()
7969 .object_path(oid)
7970 .expect("test operation should succeed")
7971 .exists()
7972 );
7973 }
7974
7975 fs::remove_dir_all(root).expect("test operation should succeed");
7976 }
7977
7978 #[test]
7979 fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
7980 let root = temp_root("sley-repack-install-safety");
7983 let git_dir = root.join(".git");
7984 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7985 let format = ObjectFormat::Sha1;
7986 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7987 let graph = write_commit_graph(&mut db, b"safety packed\n");
7988
7989 let mut result = repack_all_objects(&git_dir, format)
7990 .expect("test operation should succeed")
7991 .expect("test operation should succeed");
7992
7993 let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
7995 let stray_oid = db
7996 .write_object(stray.clone())
7997 .expect("test operation should succeed");
7998 assert!(!result.packed_loose.contains(&stray_oid));
7999 result.packed_loose.push(stray_oid);
8000
8001 install_repack_result(&git_dir, format, &result, true)
8002 .expect("test operation should succeed");
8003
8004 assert!(
8006 db.loose()
8007 .object_path(&stray_oid)
8008 .expect("test operation should succeed")
8009 .exists()
8010 );
8011 assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
8012 for (oid, _) in &graph {
8014 assert!(
8015 !db.loose()
8016 .object_path(oid)
8017 .expect("test operation should succeed")
8018 .exists()
8019 );
8020 }
8021
8022 fs::remove_dir_all(root).expect("test operation should succeed");
8023 }
8024
8025 #[test]
8026 fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
8027 let root = temp_root("sley-prune-unreachable");
8028 let git_dir = root.join(".git");
8029 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8030 let format = ObjectFormat::Sha1;
8031 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
8032 let graph = write_commit_graph(&mut db, b"reachable payload\n");
8033 let commit_oid = graph[0].0.clone();
8034
8035 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
8037 let dangling_oid = db
8038 .write_object(dangling)
8039 .expect("test operation should succeed");
8040
8041 let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
8043 .expect("test operation should succeed");
8044 assert_eq!(reported, vec![dangling_oid]);
8045 assert!(
8046 db.loose()
8047 .object_path(&dangling_oid)
8048 .expect("test operation should succeed")
8049 .exists()
8050 );
8051
8052 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8054 .expect("test operation should succeed");
8055 assert_eq!(deleted, vec![dangling_oid]);
8056 assert!(
8057 !db.loose()
8058 .object_path(&dangling_oid)
8059 .expect("test operation should succeed")
8060 .exists()
8061 );
8062 for (oid, object) in &graph {
8063 assert!(
8064 db.loose()
8065 .object_path(oid)
8066 .expect("test operation should succeed")
8067 .exists()
8068 );
8069 assert_eq!(read_object_for_assert(&db, oid), *object);
8070 }
8071
8072 fs::remove_dir_all(root).expect("test operation should succeed");
8073 }
8074
8075 #[test]
8076 fn prune_unreachable_loose_ignores_gitlink_targets() {
8077 let root = temp_root("sley-prune-gitlink");
8078 let git_dir = root.join(".git");
8079 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8080 let format = ObjectFormat::Sha1;
8081 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
8082
8083 let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
8084 .expect("test operation should succeed");
8085 let tree = EncodedObject::new(
8086 ObjectType::Tree,
8087 Tree {
8088 entries: vec![TreeEntry {
8089 mode: 0o160000,
8090 name: BString::from(b"submodule"),
8091 oid: submodule_oid,
8092 }],
8093 }
8094 .write(),
8095 );
8096 let tree_oid = db
8097 .write_object(tree)
8098 .expect("test operation should succeed");
8099 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
8100 let commit = EncodedObject::new(
8101 ObjectType::Commit,
8102 Commit {
8103 tree: tree_oid,
8104 parents: Vec::new(),
8105 author: identity.clone(),
8106 committer: identity,
8107 encoding: None,
8108 message: b"gitlink\n".to_vec(),
8109 }
8110 .write(),
8111 );
8112 let commit_oid = db
8113 .write_object(commit)
8114 .expect("test operation should succeed");
8115 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
8116 let dangling_oid = db
8117 .write_object(dangling)
8118 .expect("test operation should succeed");
8119
8120 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
8121 .expect("test operation should succeed");
8122
8123 assert_eq!(deleted, vec![dangling_oid]);
8124 assert!(
8125 !db.loose()
8126 .object_path(&dangling_oid)
8127 .expect("test operation should succeed")
8128 .exists()
8129 );
8130
8131 fs::remove_dir_all(root).expect("test operation should succeed");
8132 }
8133
8134 fn temp_root(prefix: &str) -> PathBuf {
8135 std::env::temp_dir().join(format!(
8136 "{prefix}-{}-{}",
8137 std::process::id(),
8138 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
8139 ))
8140 }
8141}