1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
12use sley_pack::{
13 MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
14 PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
15};
16use std::collections::{HashMap, HashSet};
17use std::io::{Read, Write};
18use std::path::{Path, PathBuf};
19use std::sync::atomic::{AtomicU64, Ordering};
20use std::sync::{Arc, Mutex, OnceLock};
21use std::{env, fs};
22
23static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
24
25pub trait ObjectReader {
26 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
27
28 fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
36 false
37 }
38
39 fn has_shallow_grafts(&self) -> bool {
43 false
44 }
45}
46
47fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
48 (*oid == ObjectId::empty_tree(format))
49 .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
50}
51
52fn with_missing_object_context(
53 err: GitError,
54 oid: ObjectId,
55 context: MissingObjectContext,
56) -> GitError {
57 let kind = err
58 .not_found_kind()
59 .and_then(sley_core::NotFoundKind::missing_object_kind);
60 match kind {
61 Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
62 None => err,
63 }
64}
65
66pub fn grafted_parents<R: ObjectReader + ?Sized>(
70 reader: &R,
71 oid: &ObjectId,
72 parents: Vec<ObjectId>,
73) -> Vec<ObjectId> {
74 if reader.is_shallow_graft(oid) {
75 Vec::new()
76 } else {
77 parents
78 }
79}
80
81pub trait ObjectWriter {
82 fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct BundleUnbundleResult {
92 pub written_objects: Vec<ObjectId>,
93 pub references: Vec<BundleReference>,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct PackUnpackResult {
98 pub written_objects: Vec<ObjectId>,
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct PackInstallResult {
103 pub pack_name: String,
104 pub pack_path: PathBuf,
105 pub index_path: PathBuf,
106 pub promisor_path: Option<PathBuf>,
107 pub object_ids: Vec<ObjectId>,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct RawPackInstallResult {
112 pub object_ids: Vec<ObjectId>,
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
116pub struct RawPackInstallOptions {
117 pub promisor: bool,
118}
119
120pub trait RawPackInstaller {
121 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub enum ObjectPrefixResolution {
126 Missing,
127 Unique(ObjectId),
128 Ambiguous(Vec<ObjectId>),
129}
130
131#[derive(Debug, Clone, PartialEq, Eq)]
132pub struct ObjectStorageInfo {
133 pub disk_size: u64,
134 pub deltabase: ObjectId,
135}
136
137impl RawPackInstaller for FileObjectDatabase {
138 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
139 let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
140 Ok(RawPackInstallResult {
141 object_ids: result.object_ids,
142 })
143 }
144}
145
146impl RawPackInstaller for ObjectDatabase {
147 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
148 let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
149 Ok(RawPackInstallResult {
150 object_ids: result.written_objects,
151 })
152 }
153}
154
155pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
156 let mut missing = Vec::new();
157 for prerequisite in &bundle.prerequisites {
158 match reader.read_object(&prerequisite.oid) {
159 Ok(object) => {
160 let actual = object.object_id(bundle.format)?;
161 if actual != prerequisite.oid {
162 return Err(GitError::InvalidObject(format!(
163 "bundle prerequisite {} hashes to {actual}",
164 prerequisite.oid
165 )));
166 }
167 }
168 Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
169 Err(err) => return Err(err),
170 }
171 }
172 if missing.is_empty() {
173 return Ok(());
174 }
175 Err(GitError::object_not_found_in(
176 missing[0],
177 MissingObjectContext::PackInstall,
178 ))
179}
180
181pub fn unbundle_objects<R, W>(
182 bundle: &Bundle,
183 prerequisite_reader: &R,
184 writer: &mut W,
185) -> Result<BundleUnbundleResult>
186where
187 R: ObjectReader,
188 W: ObjectWriter,
189{
190 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
191 let pack = PackFile::parse_bundle(bundle)?;
192 let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
193 Ok(BundleUnbundleResult {
194 written_objects,
195 references: bundle.references.clone(),
196 })
197}
198
199pub fn install_bundle_pack<R>(
200 bundle: &Bundle,
201 prerequisite_reader: &R,
202 destination: &impl RawPackInstaller,
203) -> Result<BundleUnbundleResult>
204where
205 R: ObjectReader,
206{
207 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
208 let install = destination.install_raw_pack(&bundle.pack)?;
209 Ok(BundleUnbundleResult {
210 written_objects: install.object_ids,
211 references: bundle.references.clone(),
212 })
213}
214
215pub fn unpack_packfile_objects<W>(
216 pack_bytes: &[u8],
217 format: ObjectFormat,
218 writer: &W,
219) -> Result<PackUnpackResult>
220where
221 W: ObjectWriter,
222{
223 let pack = PackFile::parse(pack_bytes, format)?;
224 write_pack_objects(pack, writer, "pack")
225}
226
227fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
228where
229 W: ObjectWriter,
230{
231 let mut written_objects = Vec::with_capacity(pack.entries.len());
232 for entry in pack.entries {
233 let expected = entry.entry.oid;
234 let actual = writer.write_object(entry.object)?;
235 if actual != expected {
236 return Err(GitError::InvalidObject(format!(
237 "{source} object id mismatch: expected {expected}, wrote {actual}"
238 )));
239 }
240 written_objects.push(actual);
241 }
242 Ok(PackUnpackResult { written_objects })
243}
244
245pub fn collect_reachable_object_ids<R, I>(
246 reader: &R,
247 format: ObjectFormat,
248 starts: I,
249) -> Result<HashSet<ObjectId>>
250where
251 R: ObjectReader,
252 I: IntoIterator<Item = ObjectId>,
253{
254 walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
255}
256
257pub fn collect_reachable_object_ids_with_cut<R, I>(
262 reader: &R,
263 format: ObjectFormat,
264 starts: I,
265 cut: &HashSet<ObjectId>,
266) -> Result<HashSet<ObjectId>>
267where
268 R: ObjectReader,
269 I: IntoIterator<Item = ObjectId>,
270{
271 walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
272}
273
274pub fn collect_reachable_object_ids_excluding<R, I>(
278 reader: &R,
279 format: ObjectFormat,
280 starts: I,
281 excluded: &HashSet<ObjectId>,
282) -> Result<HashSet<ObjectId>>
283where
284 R: ObjectReader,
285 I: IntoIterator<Item = ObjectId>,
286{
287 walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
288}
289
290pub fn collect_reachable_objects<R, I>(
291 reader: &R,
292 format: ObjectFormat,
293 starts: I,
294 excluded: &HashSet<ObjectId>,
295) -> Result<Vec<Arc<EncodedObject>>>
296where
297 R: ObjectReader,
298 I: IntoIterator<Item = ObjectId>,
299{
300 let mut objects = Vec::new();
301 walk_reachable_objects(reader, format, starts, excluded, |_, object| {
302 objects.push(Arc::clone(object));
303 })?;
304 Ok(objects)
305}
306
307#[derive(Debug, Clone)]
308struct ReachablePackObject {
309 oid: ObjectId,
310 object: Arc<EncodedObject>,
311}
312
313fn collect_reachable_pack_objects<R, I>(
314 reader: &R,
315 format: ObjectFormat,
316 starts: I,
317 excluded: &HashSet<ObjectId>,
318) -> Result<Vec<ReachablePackObject>>
319where
320 R: ObjectReader,
321 I: IntoIterator<Item = ObjectId>,
322{
323 let mut objects = Vec::new();
324 walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
325 objects.push(ReachablePackObject {
326 oid: *oid,
327 object: Arc::clone(object),
328 });
329 })?;
330 Ok(objects)
331}
332
333fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
334 objects
335 .iter()
336 .map(|entry| PackInput {
337 oid: &entry.oid,
338 object: &entry.object,
339 })
340 .collect()
341}
342
343pub fn install_reachable_pack<I>(
344 source: &impl ObjectReader,
345 destination: &impl RawPackInstaller,
346 format: ObjectFormat,
347 starts: I,
348) -> Result<Option<RawPackInstallResult>>
349where
350 I: IntoIterator<Item = ObjectId>,
351{
352 install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
353}
354
355pub fn install_reachable_pack_excluding<I>(
356 source: &impl ObjectReader,
357 destination: &impl RawPackInstaller,
358 format: ObjectFormat,
359 starts: I,
360 excluded: &HashSet<ObjectId>,
361) -> Result<Option<RawPackInstallResult>>
362where
363 I: IntoIterator<Item = ObjectId>,
364{
365 let pack = match build_reachable_pack(source, format, starts, excluded)? {
366 Some(pack) => pack,
367 None => return Ok(None),
368 };
369 destination.install_raw_pack(&pack.pack).map(Some)
370}
371
372pub fn build_reachable_pack<R, I>(
373 reader: &R,
374 format: ObjectFormat,
375 starts: I,
376 excluded: &HashSet<ObjectId>,
377) -> Result<Option<PackWrite>>
378where
379 R: ObjectReader,
380 I: IntoIterator<Item = ObjectId>,
381{
382 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
383 if objects.is_empty() {
384 return Ok(None);
385 }
386 let inputs = pack_inputs(&objects);
391 PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
392}
393
394pub fn build_and_install_reachable_pack<R, I>(
395 source: &R,
396 destination: &FileObjectDatabase,
397 format: ObjectFormat,
398 starts: I,
399 excluded: &HashSet<ObjectId>,
400 options: RawPackInstallOptions,
401) -> Result<Option<PackInstallResult>>
402where
403 R: ObjectReader,
404 I: IntoIterator<Item = ObjectId>,
405{
406 build_and_install_reachable_pack_filtered(
407 source,
408 destination,
409 format,
410 starts,
411 excluded,
412 options,
413 None,
414 None,
415 )
416}
417
418#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425pub enum PackObjectFilter {
426 BlobNone,
428}
429
430#[allow(clippy::too_many_arguments)]
434pub fn build_and_install_reachable_pack_filtered<R, I>(
435 source: &R,
436 destination: &FileObjectDatabase,
437 format: ObjectFormat,
438 starts: I,
439 excluded: &HashSet<ObjectId>,
440 options: RawPackInstallOptions,
441 filter: Option<PackObjectFilter>,
442 unpack_limit: Option<usize>,
443) -> Result<Option<PackInstallResult>>
444where
445 R: ObjectReader,
446 I: IntoIterator<Item = ObjectId>,
447{
448 let starts: Vec<ObjectId> = starts.into_iter().collect();
449 let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
450 let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
451 match filter {
452 Some(PackObjectFilter::BlobNone) => {
453 objects.retain(|entry| {
454 entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
455 });
456 }
457 None => {}
458 }
459 if objects.is_empty() {
460 return Ok(None);
461 }
462 if let Some(limit) = unpack_limit
466 && objects.len() < limit
467 {
468 for entry in &objects {
469 destination.loose().write_object((*entry.object).clone())?;
470 }
471 return Ok(None);
472 }
473 let inputs = pack_inputs(&objects);
474 let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
475 destination
476 .install_generated_pack_unchecked(&pack, options)
477 .map(Some)
478}
479
480pub fn assemble_pack_with_verbatim_reuse(
490 format: ObjectFormat,
491 reused_pack_bytes: &[u8],
492 appended: &[PackInput<'_>],
493) -> Result<(Vec<u8>, u32)> {
494 assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
495}
496
497pub fn assemble_pack_with_verbatim_reuses(
500 format: ObjectFormat,
501 reused_packs: &[&[u8]],
502 appended: &[PackInput<'_>],
503) -> Result<(Vec<u8>, u32)> {
504 let hash_len = format.raw_len();
505 let mut reused_count = 0u32;
506 let mut capacity = 12 + hash_len + 64 * appended.len();
507 for reused_pack_bytes in reused_packs {
508 if reused_pack_bytes.len() < 12 + hash_len {
509 return Err(GitError::InvalidFormat("reused pack too short".into()));
510 }
511 if &reused_pack_bytes[..4] != b"PACK" {
512 return Err(GitError::InvalidFormat(
513 "reused pack has no signature".into(),
514 ));
515 }
516 let version = u32::from_be_bytes([
517 reused_pack_bytes[4],
518 reused_pack_bytes[5],
519 reused_pack_bytes[6],
520 reused_pack_bytes[7],
521 ]);
522 if version != 2 {
523 return Err(GitError::Unsupported(format!(
524 "reused pack version {version}"
525 )));
526 }
527 let count = u32::from_be_bytes([
528 reused_pack_bytes[8],
529 reused_pack_bytes[9],
530 reused_pack_bytes[10],
531 reused_pack_bytes[11],
532 ]);
533 reused_count = reused_count
534 .checked_add(count)
535 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
536 capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
537 }
538 let total = reused_count
539 .checked_add(appended.len() as u32)
540 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
541
542 let mut out = Vec::with_capacity(capacity);
543 out.extend_from_slice(b"PACK");
544 out.extend_from_slice(&2u32.to_be_bytes());
545 out.extend_from_slice(&total.to_be_bytes());
546 for reused_pack_bytes in reused_packs {
547 out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
548 }
549 for input in appended {
550 write_undeltified_pack_entry(&mut out, input.object)?;
551 }
552 let checksum = sley_core::digest_bytes(format, &out)?;
553 out.extend_from_slice(checksum.as_bytes());
554 Ok((out, reused_count))
555}
556
557pub fn assemble_pack_with_verbatim_entries(
560 format: ObjectFormat,
561 reused_entries: &[&[u8]],
562 appended: &[PackInput<'_>],
563) -> Result<(Vec<u8>, u32)> {
564 let reused_count = u32::try_from(reused_entries.len())
565 .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
566 let total = reused_count
567 .checked_add(appended.len() as u32)
568 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
569
570 let mut capacity = 12 + format.raw_len() + 64 * appended.len();
571 for entry in reused_entries {
572 capacity = capacity.saturating_add(entry.len());
573 }
574 let mut out = Vec::with_capacity(capacity);
575 out.extend_from_slice(b"PACK");
576 out.extend_from_slice(&2u32.to_be_bytes());
577 out.extend_from_slice(&total.to_be_bytes());
578 for entry in reused_entries {
579 out.extend_from_slice(entry);
580 }
581 for input in appended {
582 write_undeltified_pack_entry(&mut out, input.object)?;
583 }
584 let checksum = sley_core::digest_bytes(format, &out)?;
585 out.extend_from_slice(checksum.as_bytes());
586 Ok((out, reused_count))
587}
588
589fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
591 let type_bits: u8 = match object.object_type {
592 ObjectType::Commit => 1,
593 ObjectType::Tree => 2,
594 ObjectType::Blob => 3,
595 ObjectType::Tag => 4,
596 };
597 let mut size = object.body.len() as u64;
598 let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
599 size >>= 4;
600 while size > 0 {
601 out.push(byte | 0x80);
602 byte = (size & 0x7f) as u8;
603 size >>= 7;
604 }
605 out.push(byte);
606 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
607 encoder.write_all(&object.body)?;
608 out.extend_from_slice(&encoder.finish()?);
609 Ok(())
610}
611
612#[derive(Debug, Clone, PartialEq, Eq)]
620pub struct RepackResult {
621 pub pack: Vec<u8>,
623 pub idx: Vec<u8>,
625 pub object_count: usize,
627 pub obsolete_packs: Vec<PathBuf>,
630 pub packed_loose: Vec<ObjectId>,
633 pack_checksum: ObjectId,
634 index_entries: Vec<PackIndexEntry>,
635}
636
637pub fn repack_reachable_objects(
657 git_dir: &Path,
658 format: ObjectFormat,
659 roots: &[ObjectId],
660) -> Result<Option<RepackResult>> {
661 let objects_dir = repository_objects_dir(git_dir);
662 let database = FileObjectDatabase::new(objects_dir.clone(), format);
663
664 let mut seen: HashSet<ObjectId> = HashSet::new();
665 let mut objects: Vec<ReachablePackObject> = Vec::new();
666 let mut pending: Vec<ObjectId> = roots.to_vec();
667 while let Some(oid) = pending.pop() {
668 if !seen.insert(oid) {
669 continue;
670 }
671 let object = match database.read_object(&oid) {
672 Ok(object) => object,
673 Err(GitError::NotFound(_)) => continue,
674 Err(err) => return Err(err),
675 };
676 match object.object_type {
677 ObjectType::Commit => {
678 let commit = Commit::parse_ref(format, &object.body)?;
679 pending.extend(grafted_parents(&database, &oid, commit.parents));
680 pending.push(commit.tree);
681 }
682 ObjectType::Tree => {
683 for entry in TreeEntries::new(format, &object.body) {
684 let entry = entry?;
685 if !entry.is_gitlink() {
686 pending.push(entry.oid);
687 }
688 }
689 }
690 ObjectType::Tag => {
691 let tag = Tag::parse_ref(format, &object.body)?;
692 pending.push(tag.object);
693 }
694 ObjectType::Blob => {}
695 }
696 objects.push(ReachablePackObject { oid, object });
697 }
698 if objects.is_empty() {
699 return Ok(None);
700 }
701
702 let inputs = pack_inputs(&objects);
703 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
704 let object_count = written.entries.len();
705
706 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
709 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
710 .into_iter()
711 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
712 .collect();
713
714 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
715 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
716 .into_iter()
717 .filter(|oid| packed_oid_set.contains(oid))
718 .collect();
719 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
720
721 let pack_checksum = written.checksum;
722 let index_entries = written.entries.clone();
723 Ok(Some(RepackResult {
724 pack: written.pack,
725 idx: written.index,
726 object_count,
727 obsolete_packs,
728 packed_loose,
729 pack_checksum,
730 index_entries,
731 }))
732}
733
734pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
735 let objects_dir = repository_objects_dir(git_dir);
736 let database = FileObjectDatabase::new(objects_dir.clone(), format);
737
738 let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
742 if all_oids.is_empty() {
743 return Ok(None);
744 }
745
746 let mut objects = Vec::with_capacity(all_oids.len());
750 for oid in &all_oids {
751 objects.push(ReachablePackObject {
752 oid: *oid,
753 object: database.read_object(oid)?,
754 });
755 }
756
757 let inputs = pack_inputs(&objects);
758 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
759 let object_count = written.entries.len();
760
761 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
767 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
768 .into_iter()
769 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
770 .collect();
771
772 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
775 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
776 .into_iter()
777 .filter(|oid| packed_oid_set.contains(oid))
778 .collect();
779 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
780
781 Ok(Some(RepackResult {
782 pack: written.pack,
783 idx: written.index,
784 object_count,
785 obsolete_packs,
786 packed_loose,
787 pack_checksum: written.checksum,
788 index_entries: written.entries,
789 }))
790}
791
792pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
798 let objects_dir = repository_objects_dir(git_dir);
799 let database = FileObjectDatabase::new(objects_dir.clone(), format);
800 let loose_oids = loose_object_ids(&objects_dir, format)?;
801 if loose_oids.is_empty() {
802 return Ok(None);
803 }
804
805 let mut objects = Vec::with_capacity(loose_oids.len());
806 for oid in &loose_oids {
807 objects.push(ReachablePackObject {
808 oid: *oid,
809 object: database.read_object(oid)?,
810 });
811 }
812
813 let inputs = pack_inputs(&objects);
814 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
815 let object_count = written.entries.len();
816 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
817 let mut packed_loose: Vec<ObjectId> = loose_oids
818 .into_iter()
819 .filter(|oid| packed_oid_set.contains(oid))
820 .collect();
821 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
822
823 let pack_checksum = written.checksum;
824 let index_entries = written.entries.clone();
825 Ok(Some(RepackResult {
826 pack: written.pack,
827 idx: written.index,
828 object_count,
829 obsolete_packs: Vec::new(),
830 packed_loose,
831 pack_checksum,
832 index_entries,
833 }))
834}
835
836#[derive(Debug, Clone)]
839struct GeometryPack {
840 pack_path: PathBuf,
842 oids: Vec<ObjectId>,
844 weight: u64,
846 is_promisor: bool,
848}
849
850#[derive(Debug, Clone)]
853pub struct GeometricRepackResult {
854 pub result: Option<RepackResult>,
856 pub rolled_up_packs: Vec<PathBuf>,
858}
859
860fn collect_geometry_packs(
863 objects_dir: &Path,
864 format: ObjectFormat,
865 kept_pack_stems: &HashSet<String>,
866) -> Result<Vec<GeometryPack>> {
867 let pack_dir = objects_dir.join("pack");
868 let mut packs = Vec::new();
869 for pack_path in existing_pack_files(&pack_dir)? {
870 if pack_path.with_extension("mtimes").exists() {
873 continue;
874 }
875 if pack_path.with_extension("keep").exists() {
876 continue;
877 }
878 let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) else {
879 continue;
880 };
881 if kept_pack_stems.contains(stem) {
882 continue;
883 }
884 let index_path = pack_path.with_extension("idx");
885 if !index_path.exists() {
886 continue;
887 }
888 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
889 let oids: Vec<ObjectId> = index.entries.iter().map(|entry| entry.oid).collect();
890 let weight = oids.len() as u64;
891 packs.push(GeometryPack {
892 is_promisor: pack_path.with_extension("promisor").exists(),
893 pack_path,
894 oids,
895 weight,
896 });
897 }
898 packs.sort_by(|a, b| a.weight.cmp(&b.weight).then(a.pack_path.cmp(&b.pack_path)));
900 Ok(packs)
901}
902
903fn compute_geometry_split(packs: &[GeometryPack], split_factor: u64) -> usize {
907 let pack_nr = packs.len();
908 if pack_nr == 0 {
909 return 0;
910 }
911 let mut i = pack_nr - 1;
913 while i > 0 {
914 let ours = packs[i].weight;
915 let prev = packs[i - 1].weight;
916 if ours < split_factor.saturating_mul(prev) {
917 break;
918 }
919 i -= 1;
920 }
921 let mut split = i;
922 if split != 0 {
923 split += 1;
925 }
926
927 let mut total_size: u64 = packs[..split].iter().map(|p| p.weight).sum();
931 let mut split = split;
932 for pack in &packs[split..] {
933 if pack.weight < split_factor.saturating_mul(total_size) {
934 split += 1;
935 total_size = total_size.saturating_add(pack.weight);
936 } else {
937 break;
938 }
939 }
940 split
941}
942
943pub fn repack_geometric(
953 git_dir: &Path,
954 format: ObjectFormat,
955 split_factor: u64,
956 kept_pack_stems: &HashSet<String>,
957) -> Result<GeometricRepackResult> {
958 let objects_dir = repository_objects_dir(git_dir);
959 let database = FileObjectDatabase::new(objects_dir.clone(), format);
960
961 let all_packs = collect_geometry_packs(&objects_dir, format, kept_pack_stems)?;
965 let packs: Vec<GeometryPack> = all_packs
966 .into_iter()
967 .filter(|pack| !pack.is_promisor)
968 .collect();
969
970 let split = compute_geometry_split(&packs, split_factor);
971
972 let loose_oids = loose_object_ids(&objects_dir, format)?;
973
974 let mut excluded_oids: HashSet<ObjectId> = HashSet::new();
979 for pack in &packs[split..] {
980 excluded_oids.extend(pack.oids.iter().copied());
981 }
982
983 let mut included: Vec<ObjectId> = Vec::new();
984 let mut seen: HashSet<ObjectId> = HashSet::new();
985 for pack in &packs[..split] {
986 for oid in &pack.oids {
987 if excluded_oids.contains(oid) {
988 continue;
989 }
990 if seen.insert(*oid) {
991 included.push(*oid);
992 }
993 }
994 }
995 for oid in &loose_oids {
996 if excluded_oids.contains(oid) {
997 continue;
998 }
999 if seen.insert(*oid) {
1000 included.push(*oid);
1001 }
1002 }
1003
1004 if included.is_empty() {
1006 return Ok(GeometricRepackResult {
1007 result: None,
1008 rolled_up_packs: Vec::new(),
1009 });
1010 }
1011
1012 included.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1013 let mut objects = Vec::with_capacity(included.len());
1014 for oid in &included {
1015 objects.push(ReachablePackObject {
1016 oid: *oid,
1017 object: database.read_object(oid)?,
1018 });
1019 }
1020
1021 let inputs = pack_inputs(&objects);
1022 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1023 let object_count = written.entries.len();
1024
1025 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1026 let mut packed_loose: Vec<ObjectId> = loose_oids
1027 .into_iter()
1028 .filter(|oid| packed_oid_set.contains(oid))
1029 .collect();
1030 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1031
1032 let rolled_up_packs: Vec<PathBuf> = packs[..split]
1033 .iter()
1034 .map(|pack| pack.pack_path.clone())
1035 .collect();
1036
1037 let pack_checksum = written.checksum;
1038 let index_entries = written.entries.clone();
1039 Ok(GeometricRepackResult {
1040 result: Some(RepackResult {
1041 pack: written.pack,
1042 idx: written.index,
1043 object_count,
1044 obsolete_packs: rolled_up_packs.clone(),
1045 packed_loose,
1046 pack_checksum,
1047 index_entries,
1048 }),
1049 rolled_up_packs,
1050 })
1051}
1052
1053pub fn install_repack_result(
1068 git_dir: &Path,
1069 format: ObjectFormat,
1070 result: &RepackResult,
1071 prune: bool,
1072) -> Result<()> {
1073 install_repack_result_with_bitmap(git_dir, format, result, prune, None)
1074}
1075
1076pub fn install_repack_result_with_bitmap(
1082 git_dir: &Path,
1083 format: ObjectFormat,
1084 result: &RepackResult,
1085 prune: bool,
1086 bitmap_tips: Option<&HashSet<ObjectId>>,
1087) -> Result<()> {
1088 let objects_dir = repository_objects_dir(git_dir);
1089 let pack_dir = objects_dir.join("pack");
1090 fs::create_dir_all(&pack_dir)?;
1091
1092 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1097 let parsed_index = PackIndex::parse(&result.idx, format)?;
1098 if parsed_index.pack_checksum != result.pack_checksum {
1099 return Err(GitError::InvalidFormat(
1100 "repack index checksum does not match the new pack".into(),
1101 ));
1102 }
1103 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1104 return Err(GitError::InvalidFormat(
1105 "repack index does not match the new pack contents".into(),
1106 ));
1107 }
1108 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1109 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1110 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1111 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1112 let reverse_index = sley_pack::PackReverseIndex::write(
1116 format,
1117 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1118 &result.pack_checksum,
1119 )?;
1120 write_pack_component(&new_pack_path, &result.pack)?;
1121 write_pack_component(&new_rev_path, &reverse_index)?;
1122 write_pack_component(&new_index_path, &result.idx)?;
1123
1124 if let Some(tips) = bitmap_tips {
1125 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1128 if let Some(bitmap) = build_pack_bitmap(
1129 &database,
1130 format,
1131 &result.index_entries,
1132 &result.pack_checksum,
1133 tips,
1134 )? {
1135 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1140 remove_file_if_exists(&bitmap_path)?;
1141 write_pack_component(&bitmap_path, &bitmap)?;
1142 }
1143 }
1144
1145 if !prune {
1146 return Ok(());
1147 }
1148
1149 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1153
1154 prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
1155 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1156 Ok(())
1157}
1158
1159pub fn install_geometric_repack_result(
1165 git_dir: &Path,
1166 format: ObjectFormat,
1167 geometric: &GeometricRepackResult,
1168 prune: bool,
1169 bitmap_tips: Option<&HashSet<ObjectId>>,
1170) -> Result<()> {
1171 let Some(result) = geometric.result.as_ref() else {
1172 return Ok(());
1173 };
1174 let objects_dir = repository_objects_dir(git_dir);
1175 let pack_dir = objects_dir.join("pack");
1176 fs::create_dir_all(&pack_dir)?;
1177
1178 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
1179 let parsed_index = PackIndex::parse(&result.idx, format)?;
1180 if parsed_index.pack_checksum != result.pack_checksum {
1181 return Err(GitError::InvalidFormat(
1182 "repack index checksum does not match the new pack".into(),
1183 ));
1184 }
1185 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
1186 return Err(GitError::InvalidFormat(
1187 "repack index does not match the new pack contents".into(),
1188 ));
1189 }
1190 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
1191 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
1192 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
1193 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
1194 let reverse_index = sley_pack::PackReverseIndex::write(
1195 format,
1196 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1197 &result.pack_checksum,
1198 )?;
1199 write_pack_component(&new_pack_path, &result.pack)?;
1200 write_pack_component(&new_rev_path, &reverse_index)?;
1201 write_pack_component(&new_index_path, &result.idx)?;
1202
1203 if let Some(tips) = bitmap_tips {
1204 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1205 if let Some(bitmap) = build_pack_bitmap(
1206 &database,
1207 format,
1208 &result.index_entries,
1209 &result.pack_checksum,
1210 tips,
1211 )? {
1212 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
1213 remove_file_if_exists(&bitmap_path)?;
1214 write_pack_component(&bitmap_path, &bitmap)?;
1215 }
1216 }
1217
1218 if !prune {
1219 return Ok(());
1220 }
1221
1222 for pack_path in &geometric.rolled_up_packs {
1225 if *pack_path == new_pack_path {
1226 continue;
1227 }
1228 if pack_path.with_extension("keep").exists() {
1229 continue;
1230 }
1231 remove_file_if_exists(pack_path)?;
1232 remove_file_if_exists(&pack_path.with_extension("idx"))?;
1233 for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1234 remove_file_if_exists(&pack_path.with_extension(ext))?;
1235 }
1236 }
1237
1238 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
1240 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
1241
1242 let removed_stems: HashSet<String> = geometric
1244 .rolled_up_packs
1245 .iter()
1246 .filter_map(|p| p.file_stem().map(|s| s.to_string_lossy().into_owned()))
1247 .collect();
1248 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1249 Ok(())
1250}
1251
1252fn validate_pack_checksum(
1253 pack: &[u8],
1254 format: ObjectFormat,
1255 expected: &ObjectId,
1256 context: &str,
1257) -> Result<()> {
1258 if expected.format() != format {
1259 return Err(GitError::InvalidObjectId(format!(
1260 "{context} checksum format does not match object format"
1261 )));
1262 }
1263 let hash_len = format.raw_len();
1264 if pack.len() < 12 + hash_len {
1265 return Err(GitError::InvalidFormat(format!(
1266 "{context} pack file too short"
1267 )));
1268 }
1269 if &pack[..4] != b"PACK" {
1270 return Err(GitError::InvalidFormat(format!(
1271 "{context} pack file missing PACK signature"
1272 )));
1273 }
1274 let trailer_offset = pack.len() - hash_len;
1275 let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
1276 let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
1277 if &actual != expected || trailer != *expected {
1278 return Err(GitError::InvalidFormat(format!(
1279 "{context} pack checksum does not match generated pack"
1280 )));
1281 }
1282 Ok(())
1283}
1284
1285fn path_mtime_secs(path: &Path) -> u32 {
1287 fs::metadata(path)
1288 .and_then(|metadata| metadata.modified())
1289 .ok()
1290 .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
1291 .map(|dur| dur.as_secs() as u32)
1292 .unwrap_or(0)
1293}
1294
1295#[derive(Debug, Clone)]
1298pub struct CruftPack {
1299 pub pack: Vec<u8>,
1300 pub idx: Vec<u8>,
1301 pub rev: Vec<u8>,
1302 pub mtimes: Vec<u8>,
1303 pub checksum: ObjectId,
1304 pub oids: Vec<ObjectId>,
1306}
1307
1308#[derive(Debug, Clone)]
1311pub struct CruftRepackResult {
1312 pub reachable: Option<RepackResult>,
1314 pub cruft: Option<CruftPack>,
1316 pub obsolete_packs: Vec<PathBuf>,
1319 pub obsolete_cruft_packs: Vec<PathBuf>,
1322}
1323
1324pub fn object_mtimes_on_disk_pub(
1328 objects_dir: &Path,
1329 format: ObjectFormat,
1330) -> Result<HashMap<ObjectId, u32>> {
1331 object_mtimes_on_disk(objects_dir, format)
1332}
1333
1334fn object_mtimes_on_disk(
1335 objects_dir: &Path,
1336 format: ObjectFormat,
1337) -> Result<HashMap<ObjectId, u32>> {
1338 let mut mtimes: HashMap<ObjectId, u32> = HashMap::new();
1339 let mut record = |oid: ObjectId, mtime: u32| {
1340 mtimes
1341 .entry(oid)
1342 .and_modify(|existing| {
1343 if mtime > *existing {
1344 *existing = mtime;
1345 }
1346 })
1347 .or_insert(mtime);
1348 };
1349
1350 let pack_dir = objects_dir.join("pack");
1351 if let Ok(entries) = fs::read_dir(&pack_dir) {
1352 let mut idx_paths: Vec<PathBuf> = Vec::new();
1353 for entry in entries {
1354 let path = entry?.path();
1355 if path.extension().and_then(|ext| ext.to_str()) == Some("idx") {
1356 idx_paths.push(path);
1357 }
1358 }
1359 idx_paths.sort();
1360 for idx_path in idx_paths {
1361 let index = PackIndex::parse(&fs::read(&idx_path)?, format)?;
1362 let pack_path = idx_path.with_extension("pack");
1363 let mtimes_path = idx_path.with_extension("mtimes");
1364 let pack_object_mtimes: Option<Vec<u32>> =
1365 fs::read(&mtimes_path).ok().and_then(|bytes| {
1366 sley_pack::PackMtimes::parse(&bytes, format, index.entries.len())
1367 .ok()
1368 .map(|parsed| parsed.mtimes)
1369 });
1370 let pack_mtime = path_mtime_secs(&pack_path);
1371 for (pos, entry) in index.entries.iter().enumerate() {
1372 let mtime = pack_object_mtimes
1373 .as_ref()
1374 .and_then(|table| table.get(pos).copied())
1375 .unwrap_or(pack_mtime);
1376 record(entry.oid, mtime);
1377 }
1378 }
1379 }
1380
1381 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1382 for oid in loose_object_ids(objects_dir, format)? {
1383 let path = store.object_path(&oid)?;
1384 record(oid, path_mtime_secs(&path));
1385 }
1386 Ok(mtimes)
1387}
1388
1389pub fn build_cruft_pack_pub(
1391 database: &FileObjectDatabase,
1392 format: ObjectFormat,
1393 survivors: &HashMap<ObjectId, u32>,
1394) -> Result<Option<CruftPack>> {
1395 build_cruft_pack(database, format, survivors)
1396}
1397
1398fn build_cruft_pack(
1401 database: &FileObjectDatabase,
1402 format: ObjectFormat,
1403 survivors: &HashMap<ObjectId, u32>,
1404) -> Result<Option<CruftPack>> {
1405 if survivors.is_empty() {
1406 return Ok(None);
1407 }
1408 let mut ordered: Vec<(ObjectId, u32)> = survivors.iter().map(|(o, m)| (*o, *m)).collect();
1409 ordered.sort_by(|a, b| a.0.as_bytes().cmp(b.0.as_bytes()));
1410
1411 let mut oids: Vec<ObjectId> = Vec::with_capacity(ordered.len());
1412 let mut objects: Vec<Arc<EncodedObject>> = Vec::with_capacity(ordered.len());
1413 let mut mtime_by_oid: HashMap<ObjectId, u32> = HashMap::with_capacity(ordered.len());
1414 for (oid, mtime) in ordered {
1415 match database.read_object(&oid) {
1416 Ok(object) => {
1417 oids.push(oid);
1418 objects.push(object);
1419 mtime_by_oid.insert(oid, mtime);
1420 }
1421 Err(GitError::NotFound(_)) => {}
1422 Err(err) => return Err(err),
1423 }
1424 }
1425 if oids.is_empty() {
1426 return Ok(None);
1427 }
1428
1429 let inputs: Vec<PackInput<'_>> = oids
1430 .iter()
1431 .zip(&objects)
1432 .map(|(oid, object)| PackInput {
1433 oid,
1434 object: object.as_ref(),
1435 })
1436 .collect();
1437 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1438
1439 let mut sorted_entries: Vec<&sley_pack::PackIndexEntry> = written.entries.iter().collect();
1441 sorted_entries.sort_by(|a, b| a.oid.as_bytes().cmp(b.oid.as_bytes()));
1442 let mtimes_table: Vec<u32> = sorted_entries
1443 .iter()
1444 .map(|entry| mtime_by_oid.get(&entry.oid).copied().unwrap_or(0))
1445 .collect();
1446 let positions = sley_pack::pack_order_index_positions(&written.entries);
1447 let rev = sley_pack::PackReverseIndex::write(format, &positions, &written.checksum)?;
1448 let mtimes = sley_pack::PackMtimes::write(format, &mtimes_table, &written.checksum)?;
1449
1450 let mut cruft_oids: Vec<ObjectId> = sorted_entries.iter().map(|e| e.oid).collect();
1451 cruft_oids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1452 Ok(Some(CruftPack {
1453 pack: written.pack,
1454 idx: written.index,
1455 rev,
1456 mtimes,
1457 checksum: written.checksum,
1458 oids: cruft_oids,
1459 }))
1460}
1461
1462pub fn repack_cruft(
1472 git_dir: &Path,
1473 format: ObjectFormat,
1474 roots: &[ObjectId],
1475 cruft_expiration: Option<u32>,
1476) -> Result<CruftRepackResult> {
1477 let objects_dir = repository_objects_dir(git_dir);
1478 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1479
1480 let reachable_ids = collect_reachable_object_ids(&database, format, roots.iter().copied())?;
1482 let reachable_result = if reachable_ids.is_empty() {
1483 None
1484 } else {
1485 let mut ids: Vec<ObjectId> = reachable_ids.iter().copied().collect();
1486 ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1487 let mut objects = Vec::with_capacity(ids.len());
1488 for oid in &ids {
1489 match database.read_object(oid) {
1490 Ok(object) => objects.push(ReachablePackObject { oid: *oid, object }),
1491 Err(GitError::NotFound(_)) => {}
1492 Err(err) => return Err(err),
1493 }
1494 }
1495 if objects.is_empty() {
1496 None
1497 } else {
1498 let inputs = pack_inputs(&objects);
1499 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
1500 let packed_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
1501 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1502 .into_iter()
1503 .filter(|oid| packed_set.contains(oid))
1504 .collect();
1505 packed_loose.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
1506 Some(RepackResult {
1507 pack: written.pack,
1508 idx: written.index,
1509 object_count: written.entries.len(),
1510 obsolete_packs: Vec::new(),
1511 packed_loose,
1512 pack_checksum: written.checksum,
1513 index_entries: written.entries,
1514 })
1515 }
1516 };
1517
1518 let mut survivors: HashMap<ObjectId, u32> = object_mtimes_on_disk(&objects_dir, format)?
1521 .into_iter()
1522 .filter(|(oid, _)| !reachable_ids.contains(oid))
1523 .collect();
1524
1525 if let Some(expiration) = cruft_expiration {
1527 rescue_and_expire_cruft_objects(&database, format, &mut survivors, expiration)?;
1528 }
1529
1530 let cruft = build_cruft_pack(&database, format, &survivors)?;
1531
1532 let pack_dir = objects_dir.join("pack");
1535 let mut obsolete_packs = Vec::new();
1536 let mut obsolete_cruft_packs = Vec::new();
1537 for pack_path in existing_pack_files(&pack_dir)? {
1538 if pack_path.with_extension("keep").exists() {
1539 continue;
1540 }
1541 if pack_path.with_extension("mtimes").exists() {
1542 obsolete_cruft_packs.push(pack_path);
1543 } else {
1544 obsolete_packs.push(pack_path);
1545 }
1546 }
1547
1548 Ok(CruftRepackResult {
1549 reachable: reachable_result,
1550 cruft,
1551 obsolete_packs,
1552 obsolete_cruft_packs,
1553 })
1554}
1555
1556fn rescue_and_expire_cruft_objects(
1561 database: &FileObjectDatabase,
1562 format: ObjectFormat,
1563 survivors: &mut HashMap<ObjectId, u32>,
1564 expiration: u32,
1565) -> Result<()> {
1566 let recent: Vec<ObjectId> = survivors
1567 .iter()
1568 .filter(|(_, mtime)| **mtime > expiration)
1569 .map(|(oid, _)| *oid)
1570 .collect();
1571
1572 let mut keep: HashSet<ObjectId> = HashSet::new();
1573 let mut pending: Vec<ObjectId> = recent.clone();
1574 while let Some(oid) = pending.pop() {
1575 if !keep.insert(oid) {
1576 continue;
1577 }
1578 let Ok(object) = database.read_object(&oid) else {
1579 continue;
1580 };
1581 match object.object_type {
1582 ObjectType::Commit => {
1583 if let Ok(commit) = Commit::parse_ref(format, &object.body) {
1584 pending.extend(commit.parents.iter().copied());
1585 pending.push(commit.tree);
1586 }
1587 }
1588 ObjectType::Tree => {
1589 for entry in TreeEntries::new(format, &object.body).flatten() {
1590 if !entry.is_gitlink() {
1591 pending.push(entry.oid);
1592 }
1593 }
1594 }
1595 ObjectType::Tag => {
1596 if let Ok(tag) = Tag::parse_ref(format, &object.body) {
1597 pending.push(tag.object);
1598 }
1599 }
1600 ObjectType::Blob => {}
1601 }
1602 }
1603
1604 survivors.retain(|oid, mtime| *mtime > expiration || keep.contains(oid));
1607 Ok(())
1608}
1609
1610pub fn install_cruft_repack_result(
1614 git_dir: &Path,
1615 format: ObjectFormat,
1616 result: &CruftRepackResult,
1617 prune: bool,
1618) -> Result<()> {
1619 let objects_dir = repository_objects_dir(git_dir);
1620 let pack_dir = objects_dir.join("pack");
1621 fs::create_dir_all(&pack_dir)?;
1622
1623 let new_reachable_name = result
1625 .reachable
1626 .as_ref()
1627 .map(|r| format!("pack-{}.pack", r.pack_checksum.to_hex()));
1628 let new_cruft_name = result
1629 .cruft
1630 .as_ref()
1631 .map(|c| format!("pack-{}.pack", c.checksum.to_hex()));
1632
1633 if let Some(reachable) = result.reachable.as_ref() {
1635 let parsed_index = PackIndex::parse(&reachable.idx, format)?;
1636 let pack_name = format!("pack-{}", reachable.pack_checksum.to_hex());
1637 let reverse_index = sley_pack::PackReverseIndex::write(
1638 format,
1639 &sley_pack::pack_order_index_positions(&parsed_index.entries),
1640 &reachable.pack_checksum,
1641 )?;
1642 write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &reachable.pack)?;
1643 write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &reverse_index)?;
1644 write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &reachable.idx)?;
1645 }
1646
1647 if let Some(cruft) = result.cruft.as_ref() {
1649 let pack_name = format!("pack-{}", cruft.checksum.to_hex());
1650 write_pack_component(&pack_dir.join(format!("{pack_name}.pack")), &cruft.pack)?;
1651 write_pack_component(&pack_dir.join(format!("{pack_name}.rev")), &cruft.rev)?;
1652 write_pack_component(&pack_dir.join(format!("{pack_name}.mtimes")), &cruft.mtimes)?;
1653 write_pack_component(&pack_dir.join(format!("{pack_name}.idx")), &cruft.idx)?;
1654 }
1655
1656 if !prune {
1657 return Ok(());
1658 }
1659
1660 let mut present: HashSet<ObjectId> = HashSet::new();
1662 if let Some(reachable) = result.reachable.as_ref() {
1663 present.extend(reachable.index_entries.iter().map(|e| e.oid));
1664 }
1665 if let Some(cruft) = result.cruft.as_ref() {
1666 present.extend(cruft.oids.iter().copied());
1667 }
1668
1669 let mut removed_stems: HashSet<String> = HashSet::new();
1671 for pack_path in result
1672 .obsolete_packs
1673 .iter()
1674 .chain(result.obsolete_cruft_packs.iter())
1675 {
1676 let file_name = pack_path.file_name().and_then(|n| n.to_str());
1677 if file_name == new_reachable_name.as_deref() || file_name == new_cruft_name.as_deref() {
1678 continue;
1679 }
1680 if pack_path.with_extension("keep").exists() {
1681 continue;
1682 }
1683 if let Some(stem) = pack_path.file_stem().and_then(|s| s.to_str()) {
1684 removed_stems.insert(stem.to_string());
1685 }
1686 remove_file_if_exists(pack_path)?;
1687 remove_file_if_exists(&pack_path.with_extension("idx"))?;
1688 for ext in ["rev", "mtimes", "bitmap", "promisor"] {
1689 remove_file_if_exists(&pack_path.with_extension(ext))?;
1690 }
1691 }
1692
1693 let loose_now_packed: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1695 .into_iter()
1696 .filter(|oid| present.contains(oid))
1697 .collect();
1698 prune_loose_objects(&objects_dir, format, loose_now_packed.iter(), &present)?;
1699
1700 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1701 Ok(())
1702}
1703
1704fn pack_index_entries_match_writer(
1705 parsed: &[PackIndexEntry],
1706 writer_entries: &[PackIndexEntry],
1707) -> bool {
1708 if parsed.len() != writer_entries.len() {
1709 return false;
1710 }
1711 let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
1712 writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1713 parsed.iter().zip(writer_entries).all(|(left, right)| {
1714 left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
1715 })
1716}
1717
1718pub fn prune_unreachable_loose<I>(
1727 git_dir: &Path,
1728 format: ObjectFormat,
1729 roots: I,
1730 delete: bool,
1731) -> Result<Vec<ObjectId>>
1732where
1733 I: IntoIterator<Item = ObjectId>,
1734{
1735 let objects_dir = repository_objects_dir(git_dir);
1736 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1737 let reachable = collect_reachable_object_ids(&database, format, roots)?;
1738
1739 let store = LooseObjectStore::new(objects_dir.clone(), format);
1740 let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1741 .into_iter()
1742 .filter(|oid| !reachable.contains(oid))
1743 .collect();
1744 pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1745
1746 if delete {
1747 for oid in &pruned {
1748 let path = store.object_path(oid)?;
1749 match fs::remove_file(&path) {
1750 Ok(()) => {}
1751 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1752 Err(err) => return Err(GitError::Io(err.to_string())),
1753 }
1754 }
1755 }
1756 Ok(pruned)
1757}
1758
1759fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1762 let oids = loose_object_id_set(objects_dir, format)?;
1763 let mut oids = oids.into_iter().collect::<Vec<_>>();
1764 oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1765 Ok(oids)
1766}
1767
1768fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1769 let mut oids = HashSet::new();
1770 collect_loose_object_ids(objects_dir, format, &mut oids)?;
1771 Ok(oids)
1772}
1773
1774fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1777 if !pack_dir.exists() {
1778 return Ok(Vec::new());
1779 }
1780 let mut packs = Vec::new();
1781 for entry in fs::read_dir(pack_dir)? {
1782 let path = entry?.path();
1783 if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1784 packs.push(path);
1785 }
1786 }
1787 packs.sort();
1788 Ok(packs)
1789}
1790
1791fn prune_packs_contained_in(
1795 objects_dir: &Path,
1796 format: ObjectFormat,
1797 present: &HashSet<ObjectId>,
1798 keep: &Path,
1799) -> Result<()> {
1800 let pack_dir = objects_dir.join("pack");
1801 let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1802 let mut removed_stems: HashSet<String> = HashSet::new();
1803
1804 for pack_path in existing_pack_files(&pack_dir)? {
1805 if pack_path == keep {
1806 continue;
1807 }
1808 let Some(stem) = pack_path.file_stem() else {
1809 continue;
1810 };
1811 if Some(stem) == keep_stem.as_deref() {
1812 continue;
1813 }
1814 if pack_path.with_extension("keep").exists()
1815 || pack_path.with_extension("promisor").exists()
1816 {
1817 continue;
1818 }
1819 let index_path = pack_path.with_extension("idx");
1820 if !index_path.exists() {
1821 continue;
1823 }
1824 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1825 if !index
1826 .entries
1827 .iter()
1828 .all(|entry| present.contains(&entry.oid))
1829 {
1830 continue;
1831 }
1832 remove_file_if_exists(&pack_path)?;
1836 remove_file_if_exists(&index_path)?;
1837 for ext in ["rev", "mtimes", "bitmap"] {
1838 remove_file_if_exists(&pack_path.with_extension(ext))?;
1839 }
1840 removed_stems.insert(stem.to_string_lossy().into_owned());
1841 }
1842
1843 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1844 Ok(())
1845}
1846
1847fn prune_stale_multi_pack_index(
1854 pack_dir: &Path,
1855 format: ObjectFormat,
1856 removed_stems: &HashSet<String>,
1857) -> Result<()> {
1858 if removed_stems.is_empty() {
1859 return Ok(());
1860 }
1861 let midx_path = pack_dir.join("multi-pack-index");
1862 if !midx_path.exists() {
1863 return Ok(());
1864 }
1865 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1866 let references_removed_pack = midx.pack_names.iter().any(|name| {
1867 let stem = name.strip_suffix(".idx").unwrap_or(name);
1868 removed_stems.contains(stem)
1869 });
1870 if references_removed_pack {
1871 remove_file_if_exists(&midx_path)?;
1872 }
1873 Ok(())
1874}
1875
1876fn prune_loose_objects<'a, I>(
1879 objects_dir: &Path,
1880 format: ObjectFormat,
1881 candidates: I,
1882 present: &HashSet<ObjectId>,
1883) -> Result<()>
1884where
1885 I: IntoIterator<Item = &'a ObjectId>,
1886{
1887 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1888 for oid in candidates {
1889 if !present.contains(oid) {
1890 continue;
1891 }
1892 remove_file_if_exists(&store.object_path(oid)?)?;
1893 }
1894 Ok(())
1895}
1896
1897enum PackDeltaBase {
1898 Offset(u64),
1899 Ref(ObjectId),
1900}
1901
1902struct PackIndexOffsetInfo {
1903 end_offset: u64,
1904 delta_base_oid: Option<ObjectId>,
1905}
1906
1907fn scan_pack_index_offsets(
1908 index: &PackIndex,
1909 target_offset: u64,
1910 trailer_offset: u64,
1911 delta_base_offset: Option<u64>,
1912) -> Result<PackIndexOffsetInfo> {
1913 let mut target_count = 0usize;
1914 let mut next_offset = None;
1915 let mut delta_base_oid = None;
1916
1917 for entry in &index.entries {
1918 if entry.offset == target_offset {
1919 target_count += 1;
1920 } else if entry.offset > target_offset {
1921 match next_offset {
1922 Some(current) if current <= entry.offset => {}
1923 _ => next_offset = Some(entry.offset),
1924 }
1925 }
1926 if Some(entry.offset) == delta_base_offset {
1927 delta_base_oid = Some(entry.oid);
1928 }
1929 }
1930
1931 if target_count == 0 {
1932 return Err(GitError::InvalidFormat(format!(
1933 "pack index offset {target_offset} not found"
1934 )));
1935 }
1936 if let Some(offset) = delta_base_offset
1937 && delta_base_oid.is_none()
1938 {
1939 return Err(GitError::InvalidFormat(format!(
1940 "ofs-delta base offset {offset} not found"
1941 )));
1942 }
1943
1944 Ok(PackIndexOffsetInfo {
1945 end_offset: if target_count > 1 {
1948 target_offset
1949 } else {
1950 next_offset.unwrap_or(trailer_offset)
1951 },
1952 delta_base_oid,
1953 })
1954}
1955
1956fn pack_entry_delta_base(
1957 format: ObjectFormat,
1958 pack: &[u8],
1959 entry_offset: u64,
1960) -> Result<Option<PackDeltaBase>> {
1961 let mut cursor = usize::try_from(entry_offset)
1962 .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1963 let first = pack_next_byte(pack, &mut cursor)?;
1964 let kind = (first >> 4) & 0x07;
1965 let mut byte = first;
1966 while byte & 0x80 != 0 {
1967 byte = pack_next_byte(pack, &mut cursor)?;
1968 }
1969 match kind {
1970 6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1971 pack,
1972 &mut cursor,
1973 entry_offset,
1974 )?))),
1975 7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1976 format,
1977 pack,
1978 &mut cursor,
1979 )?))),
1980 _ => Ok(None),
1981 }
1982}
1983
1984fn parse_ref_delta_base_oid(
1985 format: ObjectFormat,
1986 pack: &[u8],
1987 cursor: &mut usize,
1988) -> Result<ObjectId> {
1989 let raw_len = format.raw_len();
1990 if *cursor + raw_len > pack.len() {
1991 return Err(GitError::InvalidFormat(
1992 "truncated ref-delta base object id".into(),
1993 ));
1994 }
1995 let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1996 *cursor += raw_len;
1997 Ok(oid)
1998}
1999
2000fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
2001 let mut byte = pack_next_byte(pack, cursor)?;
2002 let mut relative = u64::from(byte & 0x7f);
2003 while byte & 0x80 != 0 {
2004 byte = pack_next_byte(pack, cursor)?;
2005 relative = relative
2006 .checked_add(1)
2007 .and_then(|value| value.checked_shl(7))
2008 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2009 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2010 }
2011 entry_offset
2012 .checked_sub(relative)
2013 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2014}
2015
2016fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
2017 let Some(byte) = pack.get(*cursor).copied() else {
2018 return Err(GitError::InvalidFormat("truncated pack entry".into()));
2019 };
2020 *cursor += 1;
2021 Ok(byte)
2022}
2023
2024fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
2025 Ok(ObjectId::null(format))
2026}
2027
2028fn remove_file_if_exists(path: &Path) -> Result<()> {
2030 match fs::remove_file(path) {
2031 Ok(()) => Ok(()),
2032 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
2033 Err(err) => Err(GitError::Io(err.to_string())),
2034 }
2035}
2036
2037fn walk_reachable_objects<R, I, F>(
2038 reader: &R,
2039 format: ObjectFormat,
2040 starts: I,
2041 excluded: &HashSet<ObjectId>,
2042 visit: F,
2043) -> Result<HashSet<ObjectId>>
2044where
2045 R: ObjectReader,
2046 I: IntoIterator<Item = ObjectId>,
2047 F: FnMut(&ObjectId, &Arc<EncodedObject>),
2048{
2049 walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
2050}
2051
2052fn walk_reachable_objects_with_cut<R, I, F>(
2056 reader: &R,
2057 format: ObjectFormat,
2058 starts: I,
2059 excluded: &HashSet<ObjectId>,
2060 cut: &HashSet<ObjectId>,
2061 mut visit: F,
2062) -> Result<HashSet<ObjectId>>
2063where
2064 R: ObjectReader,
2065 I: IntoIterator<Item = ObjectId>,
2066 F: FnMut(&ObjectId, &Arc<EncodedObject>),
2067{
2068 let mut seen = HashSet::new();
2069 let mut pending = Vec::new();
2070 for start in starts {
2071 pending.push(start);
2072 while let Some(oid) = pending.pop() {
2073 if excluded.contains(&oid) {
2074 continue;
2075 }
2076 if !seen.insert(oid) {
2077 continue;
2078 }
2079 let object = reader.read_object(&oid).map_err(|err| {
2080 with_missing_object_context(err, oid, MissingObjectContext::Traversal)
2081 })?;
2082 match object.object_type {
2083 ObjectType::Commit => {
2084 let (tree, parents) = {
2085 let commit = Commit::parse_ref(format, &object.body)?;
2086 (commit.tree, commit.parents)
2087 };
2088 visit(&oid, &object);
2089 if !cut.contains(&oid) {
2090 for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
2091 pending.push(parent);
2092 }
2093 }
2094 pending.push(tree);
2095 }
2096 ObjectType::Tree => {
2097 let mut child_oids = Vec::new();
2098 for entry in TreeEntries::new(format, &object.body) {
2099 let entry = entry?;
2100 if entry.is_gitlink() {
2101 continue;
2102 }
2103 child_oids.push(entry.oid);
2104 }
2105 visit(&oid, &object);
2106 pending.extend(child_oids.into_iter().rev());
2107 }
2108 ObjectType::Tag => {
2109 let target = {
2110 let tag = Tag::parse_ref(format, &object.body)?;
2111 tag.object
2112 };
2113 visit(&oid, &object);
2114 pending.push(target);
2115 }
2116 ObjectType::Blob => visit(&oid, &object),
2117 }
2118 }
2119 }
2120 Ok(seen)
2121}
2122
2123fn bitset_get(words: &[u64], position: u32) -> bool {
2128 let word = (position / 64) as usize;
2129 word < words.len() && words[word] & (1u64 << (position % 64)) != 0
2130}
2131
2132fn bitset_set(words: &mut [u64], position: u32) {
2133 let word = (position / 64) as usize;
2134 if word < words.len() {
2135 words[word] |= 1u64 << (position % 64);
2136 }
2137}
2138
2139fn bitset_or(acc: &mut [u64], other: &[u64]) {
2140 for (dst, src) in acc.iter_mut().zip(other) {
2141 *dst |= *src;
2142 }
2143}
2144
2145fn bitset_positions(words: &[u64]) -> Vec<u32> {
2147 let mut positions = Vec::new();
2148 for (word_index, word) in words.iter().enumerate() {
2149 let mut remaining = *word;
2150 while remaining != 0 {
2151 let bit = remaining.trailing_zeros();
2152 positions.push(word_index as u32 * 64 + bit);
2153 remaining &= remaining - 1;
2154 }
2155 }
2156 positions
2157}
2158
2159fn commit_identity_timestamp(identity: &[u8]) -> i64 {
2163 let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
2164 let _tz = fields.next();
2165 fields
2166 .next()
2167 .and_then(|raw| std::str::from_utf8(raw).ok())
2168 .and_then(|raw| raw.parse::<i64>().ok())
2169 .unwrap_or(0)
2170}
2171
2172fn bitmap_next_commit_index(idx: u32) -> u32 {
2175 const MIN_COMMITS: u32 = 100;
2176 const MAX_COMMITS: u32 = 5000;
2177 const MUST_REGION: u32 = 100;
2178 const MIN_REGION: u32 = 20000;
2179
2180 if idx <= MUST_REGION {
2181 return 0;
2182 }
2183 if idx <= MIN_REGION {
2184 let offset = idx - MUST_REGION;
2185 return offset.min(MIN_COMMITS);
2186 }
2187 let offset = idx - MIN_REGION;
2188 offset.clamp(MIN_COMMITS, MAX_COMMITS)
2189}
2190
2191pub fn build_pack_bitmap(
2205 db: &FileObjectDatabase,
2206 format: ObjectFormat,
2207 index_entries: &[PackIndexEntry],
2208 pack_checksum: &ObjectId,
2209 preferred_tips: &HashSet<ObjectId>,
2210) -> Result<Option<Vec<u8>>> {
2211 let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
2214 by_offset.sort_by_key(|&slot| index_entries[slot].offset);
2215 let bit_order: Vec<ObjectId> = by_offset
2216 .into_iter()
2217 .map(|slot| index_entries[slot].oid)
2218 .collect();
2219 build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
2220}
2221
2222pub fn build_midx_bitmap(
2228 db: &FileObjectDatabase,
2229 format: ObjectFormat,
2230 midx_entries: &[sley_pack::MultiPackIndexEntry],
2231 midx_checksum: &ObjectId,
2232 preferred_pack: u32,
2233 preferred_tips: &HashSet<ObjectId>,
2234) -> Result<Option<Vec<u8>>> {
2235 let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
2236 pseudo.sort_by_key(|&slot| {
2237 let entry = &midx_entries[slot];
2238 (
2239 entry.pack_int_id != preferred_pack,
2240 entry.pack_int_id,
2241 entry.offset,
2242 )
2243 });
2244 let bit_order: Vec<ObjectId> = pseudo
2245 .into_iter()
2246 .map(|slot| midx_entries[slot].oid)
2247 .collect();
2248 build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
2249}
2250
2251fn bitmap_num_maximal_commits(
2259 db: &FileObjectDatabase,
2260 format: ObjectFormat,
2261 selected: &[ObjectId],
2262) -> Result<usize> {
2263 let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
2265 let mut stack: Vec<ObjectId> = selected.to_vec();
2266 while let Some(oid) = stack.pop() {
2267 if first_parent.contains_key(&oid) {
2268 continue;
2269 }
2270 let object = db.read_object(&oid)?;
2271 let commit = Commit::parse_ref(format, &object.body)?;
2272 let parent = grafted_parents(db, &oid, commit.parents).first().copied();
2273 first_parent.insert(oid, parent);
2274 if let Some(parent) = parent {
2275 stack.push(parent);
2276 }
2277 }
2278 let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
2280 for parent in first_parent.values().flatten() {
2281 *pending_children.entry(*parent).or_default() += 1;
2282 }
2283 let word_count = selected.len().div_ceil(64);
2284 struct MaximalEnt {
2285 mask: Vec<u64>,
2286 maximal: bool,
2287 }
2288 let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
2289 for (bit, oid) in selected.iter().enumerate() {
2290 let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
2291 mask: vec![0u64; word_count],
2292 maximal: true,
2293 });
2294 ent.mask[bit / 64] |= 1u64 << (bit % 64);
2295 ent.maximal = true;
2296 }
2297 let mut queue: Vec<ObjectId> = first_parent
2298 .keys()
2299 .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
2300 .copied()
2301 .collect();
2302 let mut num_maximal = 0usize;
2303 while let Some(oid) = queue.pop() {
2304 if let Some(ent) = ents.remove(&oid) {
2305 if ent.maximal {
2306 num_maximal += 1;
2307 }
2308 if let Some(Some(parent)) = first_parent.get(&oid) {
2309 match ents.entry(*parent) {
2310 std::collections::hash_map::Entry::Vacant(vacant) => {
2311 vacant.insert(MaximalEnt {
2313 mask: ent.mask.clone(),
2314 maximal: false,
2315 });
2316 }
2317 std::collections::hash_map::Entry::Occupied(mut occupied) => {
2318 let parent_ent = occupied.get_mut();
2319 let c_not_p = ent
2320 .mask
2321 .iter()
2322 .zip(&parent_ent.mask)
2323 .any(|(child, parent)| child & !parent != 0);
2324 if c_not_p {
2325 let p_not_c = parent_ent
2326 .mask
2327 .iter()
2328 .zip(&ent.mask)
2329 .any(|(parent, child)| parent & !child != 0);
2330 for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
2331 *parent |= child;
2332 }
2333 parent_ent.maximal = p_not_c;
2334 }
2335 }
2336 }
2337 }
2338 }
2339 if let Some(Some(parent)) = first_parent.get(&oid)
2340 && let Some(remaining) = pending_children.get_mut(parent)
2341 {
2342 *remaining -= 1;
2343 if *remaining == 0 {
2344 queue.push(*parent);
2345 }
2346 }
2347 }
2348 Ok(num_maximal)
2349}
2350
2351fn build_reachability_bitmap(
2355 db: &FileObjectDatabase,
2356 format: ObjectFormat,
2357 checksum: &ObjectId,
2358 bit_order: &[ObjectId],
2359 preferred_tips: &HashSet<ObjectId>,
2360) -> Result<Option<Vec<u8>>> {
2361 if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
2362 return Ok(None);
2363 }
2364 let object_count = bit_order.len();
2365
2366 let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
2369 oid_sorted.sort_by(|&left, &right| {
2370 bit_order[left as usize]
2371 .as_bytes()
2372 .cmp(bit_order[right as usize].as_bytes())
2373 });
2374 let mut index_position = vec![0u32; object_count];
2375 for (position, &slot) in oid_sorted.iter().enumerate() {
2376 index_position[slot as usize] = position as u32;
2377 }
2378 let mut oid_to_pack = HashMap::with_capacity(object_count);
2379 for (pack_pos, oid) in bit_order.iter().enumerate() {
2380 oid_to_pack.insert(*oid, pack_pos as u32);
2381 }
2382
2383 let mut object_types = Vec::with_capacity(object_count);
2385 struct IndexedCommit {
2386 oid: ObjectId,
2387 pack_pos: u32,
2388 index_pos: u32,
2389 date: i64,
2390 parent_count: usize,
2391 }
2392 let mut indexed_commits = Vec::new();
2393 for (pack_pos, oid) in bit_order.iter().enumerate() {
2394 let object_type = match db.read_object_header(oid)? {
2397 Some((object_type, _)) => object_type,
2398 None => db.read_object(oid)?.object_type,
2399 };
2400 object_types.push(object_type);
2401 if object_type == ObjectType::Commit {
2402 let object = db.read_object(oid)?;
2403 let commit = Commit::parse_ref(format, &object.body)?;
2404 indexed_commits.push(IndexedCommit {
2405 oid: *oid,
2406 pack_pos: pack_pos as u32,
2407 index_pos: index_position[pack_pos],
2408 date: commit_identity_timestamp(commit.committer),
2409 parent_count: grafted_parents(db, oid, commit.parents).len(),
2410 });
2411 }
2412 }
2413
2414 indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
2416 let mut selected: Vec<&IndexedCommit> = Vec::new();
2417 let commit_count = indexed_commits.len() as u32;
2418 if commit_count < 100 {
2419 selected.extend(indexed_commits.iter());
2420 } else {
2421 let mut i = 0u32;
2422 loop {
2423 let next = bitmap_next_commit_index(i);
2424 if i + next >= commit_count {
2425 break;
2426 }
2427 let mut chosen = &indexed_commits[(i + next) as usize];
2428 if next > 0 {
2429 for j in 0..=next {
2430 let candidate = &indexed_commits[(i + j) as usize];
2431 if preferred_tips.contains(&candidate.oid) {
2432 chosen = candidate;
2433 break;
2434 }
2435 if candidate.parent_count >= 2 {
2436 chosen = candidate;
2437 }
2438 }
2439 }
2440 selected.push(chosen);
2441 i += next + 1;
2442 }
2443 }
2444
2445 if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
2450 let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
2451 let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
2452 sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
2453 sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
2454 }
2455
2456 let word_count = object_count.div_ceil(64);
2459 let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
2460 for commit in selected.iter().rev() {
2461 let mut acc = vec![0u64; word_count];
2462 let mut pending = vec![commit.oid];
2463 while let Some(oid) = pending.pop() {
2464 let Some(&pack_pos) = oid_to_pack.get(&oid) else {
2465 eprintln!(
2467 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
2468 );
2469 return Ok(None);
2470 };
2471 if bitset_get(&acc, pack_pos) {
2472 continue;
2473 }
2474 if let Some(stored) = memo.get(&oid) {
2475 bitset_or(&mut acc, stored);
2476 continue;
2477 }
2478 bitset_set(&mut acc, pack_pos);
2479 let object = db.read_object(&oid)?;
2480 let tree = {
2481 let parsed = Commit::parse_ref(format, &object.body)?;
2482 pending.extend(grafted_parents(db, &oid, parsed.parents));
2483 parsed.tree
2484 };
2485 if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
2486 return Ok(None);
2487 }
2488 }
2489 memo.insert(commit.oid, Arc::new(acc));
2490 }
2491
2492 let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
2493 for commit in &selected {
2494 let words = match memo.get(&commit.oid) {
2495 Some(words) => words,
2496 None => continue,
2497 };
2498 writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
2499 }
2500 writer.write().map(Some)
2501}
2502
2503fn bitmap_mark_tree(
2507 db: &impl ObjectReader,
2508 format: ObjectFormat,
2509 tree: &ObjectId,
2510 oid_to_pack: &HashMap<ObjectId, u32>,
2511 acc: &mut [u64],
2512) -> Result<bool> {
2513 let Some(&pack_pos) = oid_to_pack.get(tree) else {
2514 eprintln!(
2515 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
2516 );
2517 return Ok(false);
2518 };
2519 if bitset_get(acc, pack_pos) {
2520 return Ok(true);
2521 }
2522 bitset_set(acc, pack_pos);
2523 let object = db.read_object(tree)?;
2524 for entry in TreeEntries::new(format, &object.body) {
2525 let entry = entry?;
2526 if entry.is_gitlink() {
2527 continue;
2528 }
2529 if entry.is_tree() {
2530 if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
2531 return Ok(false);
2532 }
2533 } else {
2534 let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
2535 eprintln!(
2536 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
2537 entry.oid
2538 );
2539 return Ok(false);
2540 };
2541 bitset_set(acc, blob_pos);
2542 }
2543 }
2544 Ok(true)
2545}
2546
2547pub struct LoadedPackBitmap {
2551 object_count: u32,
2552 oid_to_pack: HashMap<ObjectId, u32>,
2553 pack_to_oid: Vec<ObjectId>,
2554 commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
2555 commits: Vec<u64>,
2556 trees: Vec<u64>,
2557 blobs: Vec<u64>,
2558 tags: Vec<u64>,
2559}
2560
2561impl LoadedPackBitmap {
2562 pub fn object_count(&self) -> u32 {
2563 self.object_count
2564 }
2565
2566 pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
2568 self.oid_to_pack.get(oid).copied()
2569 }
2570
2571 pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
2572 self.pack_to_oid.get(position as usize)
2573 }
2574
2575 pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
2578 self.commit_words.get(oid)
2579 }
2580
2581 pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
2583 self.commit_words.keys()
2584 }
2585
2586 pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
2588 match object_type {
2589 ObjectType::Commit => &self.commits,
2590 ObjectType::Tree => &self.trees,
2591 ObjectType::Blob => &self.blobs,
2592 ObjectType::Tag => &self.tags,
2593 }
2594 }
2595
2596 fn word_count(&self) -> usize {
2597 (self.object_count as usize).div_ceil(64)
2598 }
2599}
2600
2601pub fn load_pack_bitmap(
2608 objects_dir: &Path,
2609 format: ObjectFormat,
2610) -> Result<Option<LoadedPackBitmap>> {
2611 let pack_dir = objects_dir.join("pack");
2612 if !pack_dir.exists() {
2613 return Ok(None);
2614 }
2615 if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
2618 return Ok(Some(bitmap));
2619 }
2620 let mut bitmap_paths = Vec::new();
2621 for entry in fs::read_dir(&pack_dir)? {
2622 let path = entry?.path();
2623 if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
2624 && path
2625 .file_name()
2626 .and_then(|name| name.to_str())
2627 .is_some_and(|name| name.starts_with("pack-"))
2628 {
2629 bitmap_paths.push(path);
2630 }
2631 }
2632 bitmap_paths.sort();
2633 for bitmap_path in bitmap_paths {
2634 match load_pack_bitmap_file(&bitmap_path, format) {
2635 Ok(Some(bitmap)) => return Ok(Some(bitmap)),
2636 Ok(None) | Err(_) => continue,
2637 }
2638 }
2639 Ok(None)
2640}
2641
2642fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
2647 let midx_path = pack_dir.join("multi-pack-index");
2648 if !midx_path.exists() {
2649 return Ok(None);
2650 }
2651 let Ok(midx_bytes) = fs::read(&midx_path) else {
2652 return Ok(None);
2653 };
2654 let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
2655 return Ok(None);
2656 };
2657 let bitmap_path = pack_dir.join(format!(
2658 "multi-pack-index-{}.bitmap",
2659 midx.checksum.to_hex()
2660 ));
2661 if !bitmap_path.exists() {
2662 return Ok(None);
2663 }
2664 let object_count = midx.objects.len();
2665 let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
2670 .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
2671 .unwrap_or(true);
2672 let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
2673 (Some(chunk), true) => {
2674 sley_core::trace2::data("load_midx_revindex", "source", "midx");
2675 chunk.clone()
2676 }
2677 _ => {
2678 let rev_path =
2679 pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
2680 let Ok(rev_bytes) = fs::read(&rev_path) else {
2681 return Ok(None);
2683 };
2684 let Ok(parsed_rev) =
2685 sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
2686 else {
2687 return Ok(None);
2688 };
2689 sley_core::trace2::data("load_midx_revindex", "source", "rev");
2690 parsed_rev.positions
2691 }
2692 };
2693 let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
2694 return Ok(None);
2695 };
2696 let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
2697 Ok(parsed) => parsed,
2698 Err(_) => return Ok(None),
2699 };
2700 if parsed.pack_checksum != midx.checksum {
2701 return Ok(None);
2702 }
2703
2704 let mut pack_to_oid = Vec::with_capacity(object_count);
2707 for &midx_pos in &reverse_index {
2708 let Some(entry) = midx.objects.get(midx_pos as usize) else {
2709 return Ok(None);
2710 };
2711 pack_to_oid.push(entry.oid);
2712 }
2713 let mut oid_to_pack = HashMap::with_capacity(object_count);
2714 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2715 oid_to_pack.insert(*oid, pack_pos as u32);
2716 }
2717 match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2718 midx.objects.get(position).map(|entry| entry.oid)
2719 }) {
2720 Ok(loaded) => Ok(Some(loaded)),
2721 Err(_) => Ok(None),
2722 }
2723}
2724
2725fn load_pack_bitmap_file(
2726 bitmap_path: &Path,
2727 format: ObjectFormat,
2728) -> Result<Option<LoadedPackBitmap>> {
2729 let index_path = bitmap_path.with_extension("idx");
2730 if !index_path.exists() {
2731 return Ok(None);
2732 }
2733 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
2734 let object_count = index.entries.len();
2735 let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
2736 if parsed.pack_checksum != index.pack_checksum {
2737 return Ok(None);
2738 }
2739
2740 let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2741 pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2742 let mut pack_to_oid = Vec::with_capacity(object_count);
2743 for index_pos in &pack_order {
2744 pack_to_oid.push(index.entries[*index_pos as usize].oid);
2745 }
2746 let mut oid_to_pack = HashMap::with_capacity(object_count);
2747 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2748 oid_to_pack.insert(*oid, pack_pos as u32);
2749 }
2750
2751 assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2752 index.entries.get(position).map(|entry| entry.oid)
2753 })
2754 .map(Some)
2755}
2756
2757fn assemble_loaded_bitmap(
2762 parsed: PackBitmapIndex,
2763 object_count: usize,
2764 pack_to_oid: Vec<ObjectId>,
2765 oid_to_pack: HashMap<ObjectId, u32>,
2766 lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2767) -> Result<LoadedPackBitmap> {
2768 let word_count = object_count.div_ceil(64);
2769 let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2770 let mut words = bitmap.to_words()?;
2771 words.resize(word_count, 0);
2772 Ok(words)
2773 };
2774
2775 let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2776 let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2777 for (entry_index, entry) in parsed.entries.iter().enumerate() {
2778 let mut words = expand(&entry.bitmap)?;
2779 if entry.xor_offset > 0 {
2780 let base_index = entry_index - entry.xor_offset as usize;
2781 let base = &resolved[base_index];
2782 for (dst, src) in words.iter_mut().zip(base.iter()) {
2783 *dst ^= *src;
2784 }
2785 }
2786 let words = Arc::new(words);
2787 resolved.push(Arc::clone(&words));
2788 let commit_oid = lookup_oid(entry.object_position as usize)
2789 .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2790 commit_words.insert(commit_oid, words);
2791 }
2792
2793 Ok(LoadedPackBitmap {
2794 object_count: object_count as u32,
2795 oid_to_pack,
2796 pack_to_oid,
2797 commit_words,
2798 commits: expand(&parsed.type_bitmaps.commits)?,
2799 trees: expand(&parsed.type_bitmaps.trees)?,
2800 blobs: expand(&parsed.type_bitmaps.blobs)?,
2801 tags: expand(&parsed.type_bitmaps.tags)?,
2802 })
2803}
2804
2805pub struct BitmapWalkResult {
2809 pub words: Vec<u64>,
2810 pub extended: Vec<(ObjectId, ObjectType)>,
2811}
2812
2813impl BitmapWalkResult {
2814 pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2816 for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2817 *dst &= !*src;
2818 }
2819 let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2820 self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2821 }
2822}
2823
2824pub fn bitmap_reachable(
2835 bitmap: &LoadedPackBitmap,
2836 db: &impl ObjectReader,
2837 format: ObjectFormat,
2838 roots: &[ObjectId],
2839 include_objects: bool,
2840) -> Result<BitmapWalkResult> {
2841 let mut walk = BitmapFillWalk {
2842 bitmap,
2843 words: vec![0u64; bitmap.word_count()],
2844 extended: Vec::new(),
2845 extended_seen: HashSet::new(),
2846 };
2847 let mut commit_stack: Vec<ObjectId> = Vec::new();
2848
2849 for root in roots {
2850 let mut oid = *root;
2851 loop {
2853 let object = db.read_object(&oid)?;
2854 match object.object_type {
2855 ObjectType::Tag => {
2856 walk.mark(&oid, ObjectType::Tag);
2857 let tag = Tag::parse_ref(format, &object.body)?;
2858 oid = tag.object;
2859 }
2860 ObjectType::Commit => {
2861 commit_stack.push(oid);
2862 break;
2863 }
2864 ObjectType::Tree => {
2865 walk.mark_tree_closure(db, format, &oid)?;
2866 break;
2867 }
2868 ObjectType::Blob => {
2869 walk.mark(&oid, ObjectType::Blob);
2870 break;
2871 }
2872 }
2873 }
2874 }
2875
2876 while let Some(oid) = commit_stack.pop() {
2877 if let Some(position) = bitmap.pack_position(&oid) {
2878 if bitset_get(&walk.words, position) {
2879 continue;
2880 }
2881 if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2882 bitset_or(&mut walk.words, stored);
2883 continue;
2884 }
2885 bitset_set(&mut walk.words, position);
2886 } else {
2887 if walk.extended_seen.contains(&oid) {
2888 continue;
2889 }
2890 walk.extended_seen.insert(oid);
2891 walk.extended.push((oid, ObjectType::Commit));
2892 }
2893 let object = db.read_object(&oid)?;
2894 let commit = Commit::parse_ref(format, &object.body)?;
2895 commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2896 if include_objects {
2897 walk.mark_tree_closure(db, format, &commit.tree)?;
2898 }
2899 }
2900
2901 Ok(BitmapWalkResult {
2902 words: walk.words,
2903 extended: walk.extended,
2904 })
2905}
2906
2907struct BitmapFillWalk<'a> {
2908 bitmap: &'a LoadedPackBitmap,
2909 words: Vec<u64>,
2910 extended: Vec<(ObjectId, ObjectType)>,
2911 extended_seen: HashSet<ObjectId>,
2912}
2913
2914impl BitmapFillWalk<'_> {
2915 fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2917 if let Some(position) = self.bitmap.pack_position(oid) {
2918 if bitset_get(&self.words, position) {
2919 return false;
2920 }
2921 bitset_set(&mut self.words, position);
2922 true
2923 } else {
2924 if !self.extended_seen.insert(*oid) {
2925 return false;
2926 }
2927 self.extended.push((*oid, object_type));
2928 true
2929 }
2930 }
2931
2932 fn mark_tree_closure(
2936 &mut self,
2937 db: &impl ObjectReader,
2938 format: ObjectFormat,
2939 tree: &ObjectId,
2940 ) -> Result<()> {
2941 if !self.mark(tree, ObjectType::Tree) {
2942 return Ok(());
2943 }
2944 let object = db.read_object(tree)?;
2945 for entry in TreeEntries::new(format, &object.body) {
2946 let entry = entry?;
2947 if entry.is_gitlink() {
2948 continue;
2949 }
2950 if entry.is_tree() {
2951 self.mark_tree_closure(db, format, &entry.oid)?;
2952 } else {
2953 self.mark(&entry.oid, ObjectType::Blob);
2954 }
2955 }
2956 Ok(())
2957 }
2958}
2959
2960#[derive(Debug)]
2961pub struct ObjectDatabase {
2962 format: ObjectFormat,
2963 objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2969 promisor: bool,
2970}
2971
2972impl ObjectDatabase {
2973 pub fn new(format: ObjectFormat) -> Self {
2974 Self {
2975 format,
2976 objects: Mutex::new(HashMap::new()),
2977 promisor: false,
2978 }
2979 }
2980
2981 pub fn with_promisor(mut self, promisor: bool) -> Self {
2982 self.promisor = promisor;
2983 self
2984 }
2985
2986 pub fn contains(&self, oid: &ObjectId) -> bool {
2987 self.objects
2988 .lock()
2989 .map(|objects| objects.contains_key(oid))
2990 .unwrap_or(false)
2991 }
2992
2993 pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2994 let object = self.read_object(oid)?;
2995 let actual = object.object_id(self.format)?;
2996 if &actual == oid {
2997 Ok(())
2998 } else {
2999 Err(GitError::InvalidObject(format!(
3000 "object id mismatch: expected {oid}, got {actual}"
3001 )))
3002 }
3003 }
3004}
3005
3006impl ObjectReader for ObjectDatabase {
3007 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3008 self.objects
3009 .lock()
3010 .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
3011 .get(oid)
3012 .map(Arc::clone)
3013 .or_else(|| implied_empty_tree_object(self.format, oid))
3014 .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
3015 }
3016}
3017
3018impl ObjectWriter for ObjectDatabase {
3019 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3020 let oid = object.object_id(self.format)?;
3021 self.objects
3022 .lock()
3023 .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
3024 .entry(oid)
3025 .or_insert_with(|| Arc::new(object));
3026 Ok(oid)
3027 }
3028}
3029
3030#[derive(Debug, Clone, PartialEq, Eq)]
3031pub struct Alternate {
3032 pub path: std::path::PathBuf,
3033}
3034
3035#[derive(Debug, Clone, PartialEq, Eq)]
3036pub struct PartialClonePolicy {
3037 pub promisor_remote: Option<String>,
3038 pub allow_missing_promised_objects: bool,
3039}
3040
3041type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
3045
3046#[derive(Debug)]
3049enum PackData {
3050 #[cfg(feature = "mmap")]
3051 Mapped(sley_mmap::MappedFile),
3052 Heap(Vec<u8>),
3053}
3054
3055impl std::ops::Deref for PackData {
3056 type Target = [u8];
3057
3058 fn deref(&self) -> &[u8] {
3059 match self {
3060 #[cfg(feature = "mmap")]
3061 Self::Mapped(mapped) => mapped,
3062 Self::Heap(bytes) => bytes,
3063 }
3064 }
3065}
3066
3067#[cfg(feature = "mmap")]
3070fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3071 match sley_mmap::MappedFile::open_pack(pack_path) {
3072 Ok(mapped) => Ok(PackData::Mapped(mapped)),
3073 Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
3074 }
3075}
3076
3077#[cfg(not(feature = "mmap"))]
3078fn load_pack_data(pack_path: &Path) -> Result<PackData> {
3079 Ok(PackData::Heap(fs::read(pack_path)?))
3080}
3081
3082#[cfg(feature = "mmap")]
3083fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3084 match sley_mmap::MappedFile::open_pack(index_path) {
3085 Ok(mapped) => Ok(Arc::new(mapped)),
3086 Err(_) => Ok(Arc::new(fs::read(index_path)?)),
3087 }
3088}
3089
3090#[cfg(not(feature = "mmap"))]
3091fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3092 Ok(Arc::new(fs::read(index_path)?))
3093}
3094
3095#[cfg(feature = "mmap")]
3096fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3097 match sley_mmap::MappedFile::open_multi_pack_index(midx_path) {
3098 Ok(mapped) => Ok(Arc::new(mapped)),
3099 Err(_) => Ok(Arc::new(fs::read(midx_path)?)),
3100 }
3101}
3102
3103#[cfg(not(feature = "mmap"))]
3104fn load_multi_pack_index_lookup_data(midx_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
3105 Ok(Arc::new(fs::read(midx_path)?))
3106}
3107
3108type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
3114
3115type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
3120
3121type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
3131
3132type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
3133
3134const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
3140
3141const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
3145
3146fn cached_object_cost(object: &EncodedObject) -> usize {
3150 object.body.len().saturating_add(64)
3151}
3152
3153fn cache_budget_from_env(var: &str, default: usize) -> usize {
3156 match env::var(var) {
3157 Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
3158 Err(_) => default,
3159 }
3160}
3161
3162fn object_cache_budget() -> usize {
3169 static BUDGET: OnceLock<usize> = OnceLock::new();
3170 *BUDGET.get_or_init(|| {
3171 cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
3172 })
3173}
3174
3175fn delta_base_cache_budget() -> usize {
3179 static BUDGET: OnceLock<usize> = OnceLock::new();
3180 *BUDGET.get_or_init(|| {
3181 cache_budget_from_env(
3182 "SLEY_DELTA_BASE_CACHE_BYTES",
3183 DEFAULT_DELTA_BASE_CACHE_BYTES,
3184 )
3185 })
3186}
3187
3188fn verify_reads_enabled() -> bool {
3199 static VERIFY: OnceLock<bool> = OnceLock::new();
3200 *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
3201 Ok(value) => !matches!(value.trim(), "" | "0"),
3202 Err(_) => false,
3203 })
3204}
3205
3206#[derive(Debug)]
3214struct LruCache<K: std::hash::Hash + Eq + Clone> {
3215 budget: usize,
3216 used: usize,
3217 map: HashMap<K, LruEntry<K>>,
3218 head: Option<K>,
3219 tail: Option<K>,
3220}
3221
3222#[derive(Debug)]
3223struct LruEntry<K> {
3224 object: Arc<EncodedObject>,
3225 prev: Option<K>,
3226 next: Option<K>,
3227}
3228
3229impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
3230 fn new(budget: usize) -> Self {
3231 Self {
3232 budget,
3233 used: 0,
3234 map: HashMap::new(),
3235 head: None,
3236 tail: None,
3237 }
3238 }
3239
3240 fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
3241 let object = Arc::clone(&self.map.get(key)?.object);
3242 self.touch(key);
3243 Some(object)
3244 }
3245
3246 fn touch(&mut self, key: &K) {
3248 if self.tail.as_ref() == Some(key) {
3249 return;
3250 }
3251 if self.map.contains_key(key) {
3252 self.detach(key);
3253 self.attach_back(key.clone());
3254 }
3255 }
3256
3257 fn remove(&mut self, key: &K) {
3259 if let Some(entry) = self.map.get(key) {
3260 self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
3261 }
3262 self.detach(key);
3263 self.map.remove(key);
3264 }
3265
3266 fn detach(&mut self, key: &K) {
3267 let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
3268 let prev = entry.prev.take();
3269 let next = entry.next.take();
3270 (prev, next)
3271 }) else {
3272 return;
3273 };
3274
3275 match &prev {
3276 Some(prev_key) => {
3277 if let Some(prev_entry) = self.map.get_mut(prev_key) {
3278 prev_entry.next = next.clone();
3279 }
3280 }
3281 None => self.head = next.clone(),
3282 }
3283 match &next {
3284 Some(next_key) => {
3285 if let Some(next_entry) = self.map.get_mut(next_key) {
3286 next_entry.prev = prev.clone();
3287 }
3288 }
3289 None => self.tail = prev.clone(),
3290 }
3291 }
3292
3293 fn attach_back(&mut self, key: K) {
3294 let previous_tail = self.tail.replace(key.clone());
3295 match previous_tail {
3296 Some(tail_key) => {
3297 if let Some(tail_entry) = self.map.get_mut(&tail_key) {
3298 tail_entry.next = Some(key.clone());
3299 }
3300 if let Some(entry) = self.map.get_mut(&key) {
3301 entry.prev = Some(tail_key);
3302 entry.next = None;
3303 }
3304 }
3305 None => {
3306 self.head = Some(key.clone());
3307 if let Some(entry) = self.map.get_mut(&key) {
3308 entry.prev = None;
3309 entry.next = None;
3310 }
3311 }
3312 }
3313 }
3314
3315 fn clear(&mut self) {
3316 self.map.clear();
3317 self.head = None;
3318 self.tail = None;
3319 self.used = 0;
3320 }
3321
3322 fn put(&mut self, key: K, object: Arc<EncodedObject>) {
3323 if self.budget == 0 {
3324 return;
3325 }
3326 let cost = cached_object_cost(&object);
3327 if cost > self.budget {
3331 self.remove(&key);
3332 return;
3333 }
3334 if let Some(entry) = self.map.get_mut(&key) {
3335 let previous = std::mem::replace(&mut entry.object, object);
3336 self.used = self
3338 .used
3339 .saturating_sub(cached_object_cost(&previous))
3340 .saturating_add(cost);
3341 self.touch(&key);
3342 } else {
3343 self.used = self.used.saturating_add(cost);
3344 self.map.insert(
3345 key.clone(),
3346 LruEntry {
3347 object,
3348 prev: None,
3349 next: None,
3350 },
3351 );
3352 self.attach_back(key);
3353 }
3354 while self.used > self.budget {
3355 let Some(evicted) = self.head.clone() else {
3356 break;
3357 };
3358 self.remove(&evicted);
3359 }
3360 }
3361}
3362
3363type LruObjectCache = LruCache<ObjectId>;
3365type LruOffsetCache = LruCache<u64>;
3367
3368struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
3373
3374impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
3375 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
3376 self.0.lock().ok()?.get(&offset)
3377 }
3378
3379 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
3380 if let Ok(mut cache) = self.0.lock() {
3381 cache.put(offset, object);
3382 }
3383 }
3384}
3385
3386struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
3390
3391impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
3392 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
3393 self.0.lock().ok()?.get(&pack_offset).copied()
3394 }
3395
3396 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
3397 if let Ok(mut cache) = self.0.lock() {
3398 cache.insert(pack_offset, header);
3399 }
3400 }
3401}
3402
3403type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
3408
3409type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
3413
3414type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
3418
3419#[derive(Debug)]
3424struct RegisteredPack {
3425 idx: PathBuf,
3426 pack: PathBuf,
3427 index: Mutex<Option<Arc<PackIndexViewData>>>,
3428 data: Mutex<Option<Arc<PackData>>>,
3429 delta_cache: Arc<Mutex<LruOffsetCache>>,
3430 header_type_cache: PackHeaderTypeCache,
3431}
3432
3433impl RegisteredPack {
3434 fn new(idx: PathBuf, pack: PathBuf) -> Self {
3435 Self {
3436 idx,
3437 pack,
3438 index: Mutex::new(None),
3439 data: Mutex::new(None),
3440 delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
3441 header_type_cache: Arc::new(Mutex::new(HashMap::new())),
3442 }
3443 }
3444
3445 fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
3446 if let Ok(cache) = self.index.lock()
3447 && let Some(index) = cache.as_ref()
3448 {
3449 return Ok(Arc::clone(index));
3450 }
3451 let index_bytes = load_pack_index_data(&self.idx)?;
3452 let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
3453 index_bytes,
3454 format,
3455 )?);
3456 if let Ok(mut cache) = self.index.lock() {
3457 *cache = Some(Arc::clone(&index));
3458 }
3459 Ok(index)
3460 }
3461
3462 fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
3463 if let Ok(cache) = self.data.lock()
3464 && let Some(bytes) = cache.as_ref()
3465 {
3466 return Ok(Arc::clone(bytes));
3467 }
3468 if let Ok(cache) = pack_bytes.lock()
3469 && let Some(bytes) = cache.get(&self.pack)
3470 {
3471 let bytes = Arc::clone(bytes);
3472 if let Ok(mut local_cache) = self.data.lock() {
3473 *local_cache = Some(Arc::clone(&bytes));
3474 }
3475 return Ok(bytes);
3476 }
3477 let bytes = Arc::new(load_pack_data(&self.pack)?);
3478 if let Ok(mut local_cache) = self.data.lock() {
3479 *local_cache = Some(Arc::clone(&bytes));
3480 }
3481 if let Ok(mut cache) = pack_bytes.lock() {
3482 cache.insert(self.pack.clone(), Arc::clone(&bytes));
3483 }
3484 Ok(bytes)
3485 }
3486}
3487
3488#[derive(Debug, Clone, PartialEq, Eq)]
3489struct PackDirFingerprint {
3490 modified: Option<std::time::SystemTime>,
3491 idx_count: usize,
3492 pack_count: usize,
3493}
3494
3495#[derive(Debug)]
3500struct PackRegistrySnapshot {
3501 fingerprint: PackDirFingerprint,
3502 packs: Vec<Arc<RegisteredPack>>,
3503 recent_pack: Mutex<Option<usize>>,
3504}
3505
3506impl PackRegistrySnapshot {
3507 fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
3508 Self {
3509 fingerprint,
3510 packs,
3511 recent_pack: Mutex::new(None),
3512 }
3513 }
3514
3515 fn cached_hint(&self) -> Option<usize> {
3516 self.recent_pack
3517 .lock()
3518 .ok()
3519 .and_then(|hint| *hint)
3520 .filter(|pack_index| *pack_index < self.packs.len())
3521 }
3522
3523 fn remember_hint(&self, pack_index: usize) {
3524 if let Ok(mut hint) = self.recent_pack.lock() {
3525 *hint = Some(pack_index);
3526 }
3527 }
3528}
3529
3530type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
3534
3535#[derive(Debug, Clone)]
3536struct PackLookup {
3537 pack: PathBuf,
3538 registered: Option<Arc<RegisteredPack>>,
3539 offset: u64,
3540}
3541
3542impl PackLookup {
3543 fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
3544 Self {
3545 pack: pack.pack.clone(),
3546 registered: Some(pack),
3547 offset,
3548 }
3549 }
3550
3551 fn from_path(pack: PathBuf, offset: u64) -> Self {
3552 Self {
3553 pack,
3554 registered: None,
3555 offset,
3556 }
3557 }
3558
3559 fn pack_path(&self) -> &Path {
3560 &self.pack
3561 }
3562
3563 fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
3564 match &self.registered {
3565 Some(pack) => pack.bytes(&database.pack_bytes),
3566 None => database.cached_pack_bytes(&self.pack),
3567 }
3568 }
3569
3570 fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
3571 match &self.registered {
3572 Some(pack) => database.cached_pack_index(&pack.idx),
3573 None => database.cached_pack_index(&self.pack.with_extension("idx")),
3574 }
3575 }
3576
3577 fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
3578 match &self.registered {
3579 Some(pack) => Some(Arc::clone(&pack.delta_cache)),
3580 None => database.pack_delta_cache(&self.pack),
3581 }
3582 }
3583
3584 fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
3585 match &self.registered {
3586 Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
3587 None => database.pack_header_type_cache(&self.pack),
3588 }
3589 }
3590}
3591
3592#[derive(Debug, Clone)]
3593pub struct FileObjectDatabase {
3594 loose: LooseObjectStore,
3595 objects_dir: PathBuf,
3596 alternates: Vec<PathBuf>,
3597 format: ObjectFormat,
3598 pack_bytes: PackBytesCache,
3599 pack_indexes: PackIndexCache,
3600 multi_pack_indexes: MultiPackIndexCache,
3601 multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
3602 pack_registry: PackRegistryCache,
3603 decoded: DecodedObjectCache,
3604 pack_deltas: PackDeltaCaches,
3605 pack_header_types: PackHeaderTypeCaches,
3606 shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
3610}
3611
3612#[derive(Debug)]
3613pub struct ObjectPresenceChecker {
3614 db: FileObjectDatabase,
3615 pack_dir: PathBuf,
3616 midx: Option<Arc<MultiPackIndexOidLookup>>,
3617 registry: Option<Arc<PackRegistrySnapshot>>,
3618 registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
3619 recent_pack: Option<usize>,
3620 prepared_packs: bool,
3621 prepared_registry: bool,
3622}
3623
3624impl ObjectPresenceChecker {
3625 fn new(db: FileObjectDatabase) -> Self {
3626 let pack_dir = db.objects_dir.join("pack");
3627 Self {
3628 db,
3629 pack_dir,
3630 midx: None,
3631 registry: None,
3632 registry_indexes: Vec::new(),
3633 recent_pack: None,
3634 prepared_packs: false,
3635 prepared_registry: false,
3636 }
3637 }
3638
3639 pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
3640 if oid.format() != self.db.format {
3641 return Err(GitError::InvalidObjectId(format!(
3642 "object {oid} uses {}, store uses {}",
3643 oid.format().name(),
3644 self.db.format.name()
3645 )));
3646 }
3647 if self.db.loose.exists(oid)? {
3648 return Ok(true);
3649 }
3650 if self.find_packed(oid, false)? {
3651 return Ok(true);
3652 }
3653 if self.find_packed(oid, true)? {
3654 return Ok(true);
3655 }
3656 for alternate in &self.db.alternates {
3657 if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
3658 return Ok(true);
3659 }
3660 }
3661 self.db.loose.invalidate_cache();
3664 self.db.loose.exists(oid)
3665 }
3666
3667 fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
3668 self.prepare_packs(force_rescan)?;
3669 if let Some(midx) = &self.midx
3670 && midx.contains(oid)
3671 {
3672 return Ok(true);
3673 }
3674 self.prepare_registry(force_rescan)?;
3675 self.find_in_registry(oid)
3676 }
3677
3678 fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
3679 if self.prepared_packs && !force_rescan {
3680 return Ok(());
3681 }
3682 let midx_path = self.pack_dir.join("multi-pack-index");
3683 self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
3684 self.prepared_packs = true;
3685 Ok(())
3686 }
3687
3688 fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
3689 if self.prepared_registry && !force_rescan {
3690 return Ok(());
3691 }
3692 let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
3693 let registry_changed = match self.registry.as_ref() {
3694 Some(cached) => !Arc::ptr_eq(cached, ®istry),
3695 None => true,
3696 };
3697 if registry_changed {
3698 self.registry_indexes = vec![None; registry.packs.len()];
3699 self.recent_pack = None;
3700 self.registry = Some(registry);
3701 }
3702 self.prepared_registry = true;
3703 Ok(())
3704 }
3705
3706 fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
3707 let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
3708 return Ok(false);
3709 };
3710 if let Some(pack_index) = self
3711 .recent_pack
3712 .filter(|pack_index| *pack_index < registry.packs.len())
3713 {
3714 let index = self.registry_index(®istry, pack_index)?;
3715 if index.find(oid).is_some() {
3716 return Ok(true);
3717 }
3718 }
3719 for pack_index in 0..registry.packs.len() {
3720 if Some(pack_index) == self.recent_pack {
3721 continue;
3722 }
3723 let index = self.registry_index(®istry, pack_index)?;
3724 if index.find(oid).is_some() {
3725 self.recent_pack = Some(pack_index);
3726 return Ok(true);
3727 }
3728 }
3729 Ok(false)
3730 }
3731
3732 fn registry_index(
3733 &mut self,
3734 registry: &PackRegistrySnapshot,
3735 pack_index: usize,
3736 ) -> Result<Arc<PackIndexViewData>> {
3737 if self.registry_indexes.len() != registry.packs.len() {
3738 self.registry_indexes = vec![None; registry.packs.len()];
3739 self.recent_pack = None;
3740 }
3741 if let Some(index) = self
3742 .registry_indexes
3743 .get(pack_index)
3744 .and_then(|index| index.as_ref())
3745 {
3746 return Ok(Arc::clone(index));
3747 }
3748 let index = registry.packs[pack_index].index(self.db.format)?;
3749 if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
3750 *slot = Some(Arc::clone(&index));
3751 }
3752 Ok(index)
3753 }
3754}
3755
3756fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
3760 let Ok(contents) = std::fs::read_to_string(shallow_file) else {
3761 return HashSet::new();
3762 };
3763 contents
3764 .lines()
3765 .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
3766 .collect()
3767}
3768
3769pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3770 env::var_os("GIT_OBJECT_DIRECTORY")
3771 .map(PathBuf::from)
3772 .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
3773}
3774
3775pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3776 if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
3777 return PathBuf::from(common_dir);
3778 }
3779 let git_dir = git_dir.as_ref();
3780 let commondir = git_dir.join("commondir");
3781 if let Ok(value) = fs::read_to_string(&commondir) {
3782 let path = PathBuf::from(value.trim());
3783 let common = if path.is_absolute() {
3784 path
3785 } else {
3786 git_dir.join(path)
3787 };
3788 return fs::canonicalize(&common).unwrap_or(common);
3789 }
3790 git_dir.to_path_buf()
3791}
3792
3793pub fn repository_object_ids(
3794 git_dir: impl AsRef<Path>,
3795 format: ObjectFormat,
3796) -> Result<Vec<ObjectId>> {
3797 object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
3798}
3799
3800pub fn object_ids_in_objects_dir(
3801 objects_dir: impl AsRef<Path>,
3802 format: ObjectFormat,
3803) -> Result<Vec<ObjectId>> {
3804 let objects_dir = objects_dir.as_ref();
3805 let mut oids = HashSet::new();
3806 collect_loose_object_ids(objects_dir, format, &mut oids)?;
3807 collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
3808 let mut oids = oids.into_iter().collect::<Vec<_>>();
3809 oids.sort_by_key(ObjectId::to_hex);
3810 Ok(oids)
3811}
3812
3813fn collect_loose_object_ids(
3814 objects_dir: &Path,
3815 format: ObjectFormat,
3816 oids: &mut HashSet<ObjectId>,
3817) -> Result<()> {
3818 if !objects_dir.exists() {
3819 return Ok(());
3820 }
3821 let hex_len = format.hex_len();
3822 for entry in fs::read_dir(objects_dir)? {
3823 let entry = entry?;
3824 if !entry.file_type()?.is_dir() {
3825 continue;
3826 }
3827 let name = entry.file_name();
3828 let Some(fanout) = name.to_str() else {
3829 continue;
3830 };
3831 if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3832 continue;
3833 }
3834 for object_entry in fs::read_dir(entry.path())? {
3835 let object_entry = object_entry?;
3836 if !object_entry.file_type()?.is_file() {
3837 continue;
3838 }
3839 let name = object_entry.file_name();
3840 let Some(suffix) = name.to_str() else {
3841 continue;
3842 };
3843 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3844 continue;
3845 }
3846 oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
3847 }
3848 }
3849 Ok(())
3850}
3851
3852fn collect_loose_fanout_object_ids(
3853 objects_dir: &Path,
3854 format: ObjectFormat,
3855 fanout: u8,
3856 oids: &mut HashSet<ObjectId>,
3857) -> Result<()> {
3858 let fanout_hex = format!("{fanout:02x}");
3859 let fanout_dir = objects_dir.join(&fanout_hex);
3860 let entries = match fs::read_dir(&fanout_dir) {
3861 Ok(entries) => entries,
3862 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
3863 Err(err) => return Err(GitError::Io(err.to_string())),
3864 };
3865 let hex_len = format.hex_len();
3866 for object_entry in entries {
3867 let object_entry = object_entry?;
3868 let name = object_entry.file_name();
3869 let Some(suffix) = name.to_str() else {
3870 continue;
3871 };
3872 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3873 continue;
3874 }
3875 oids.insert(ObjectId::from_hex(
3876 format,
3877 &format!("{fanout_hex}{suffix}"),
3878 )?);
3879 }
3880 Ok(())
3881}
3882
3883#[derive(Debug, Default)]
3884struct LoosePresenceCache {
3885 loaded_fanouts: HashSet<u8>,
3886 objects: HashSet<ObjectId>,
3887}
3888
3889pub fn packed_object_ids(
3894 objects_dir: impl AsRef<Path>,
3895 format: ObjectFormat,
3896) -> Result<HashSet<ObjectId>> {
3897 let mut oids = HashSet::new();
3898 collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
3899 Ok(oids)
3900}
3901
3902fn collect_packed_object_ids(
3903 pack_dir: &Path,
3904 format: ObjectFormat,
3905 oids: &mut HashSet<ObjectId>,
3906) -> Result<()> {
3907 if !pack_dir.exists() {
3908 return Ok(());
3909 }
3910 let midx_path = pack_dir.join("multi-pack-index");
3911 if midx_path.exists() {
3912 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
3913 oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
3914 }
3915 for entry in fs::read_dir(pack_dir)? {
3916 let path = entry?.path();
3917 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3918 continue;
3919 }
3920 let index = PackIndex::parse(&fs::read(path)?, format)?;
3921 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
3922 }
3923 Ok(())
3924}
3925
3926impl FileObjectDatabase {
3927 pub fn object_format(&self) -> ObjectFormat {
3929 self.format
3930 }
3931
3932 pub fn objects_dir(&self) -> &Path {
3934 &self.objects_dir
3935 }
3936
3937 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3938 let objects_dir = objects_dir.into();
3939 Self {
3940 loose: LooseObjectStore::new(objects_dir.clone(), format),
3941 alternates: alternate_object_dirs(&objects_dir),
3942 objects_dir,
3943 format,
3944 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3945 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3946 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3947 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3948 pack_registry: Arc::new(Mutex::new(None)),
3949 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3950 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3951 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3952 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3953 }
3954 }
3955
3956 fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3957 let objects_dir = objects_dir.into();
3958 Self {
3959 loose: LooseObjectStore::new(objects_dir.clone(), format),
3960 alternates: Vec::new(),
3961 objects_dir,
3962 format,
3963 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3964 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3965 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3966 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3967 pack_registry: Arc::new(Mutex::new(None)),
3968 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3969 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3970 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3971 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3972 }
3973 }
3974
3975 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3976 Self::new(repository_objects_dir(git_dir), format)
3977 }
3978
3979 pub fn refresh_read_cache(&self) {
3984 if let Ok(mut cache) = self.pack_registry.lock() {
3985 *cache = None;
3986 }
3987 if let Ok(mut cache) = self.pack_indexes.lock() {
3988 cache.clear();
3989 }
3990 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3991 cache.clear();
3992 }
3993 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3994 cache.clear();
3995 }
3996 if let Ok(mut cache) = self.pack_bytes.lock() {
3997 cache.clear();
3998 }
3999 if let Ok(mut cache) = self.pack_deltas.lock() {
4000 cache.clear();
4001 }
4002 if let Ok(mut cache) = self.pack_header_types.lock() {
4003 cache.clear();
4004 }
4005 if let Ok(mut cache) = self.decoded.lock() {
4006 cache.clear();
4007 }
4008 self.loose.invalidate_cache();
4009 }
4010
4011 pub fn loose(&self) -> &LooseObjectStore {
4012 &self.loose
4013 }
4014
4015 pub fn presence_checker(&self) -> ObjectPresenceChecker {
4016 ObjectPresenceChecker::new(self.clone())
4017 }
4018
4019 pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4020 self.install_pack_with_options(pack, RawPackInstallOptions::default())
4021 }
4022
4023 pub fn install_pack_with_options(
4024 &self,
4025 pack: &PackWrite,
4026 options: RawPackInstallOptions,
4027 ) -> Result<PackInstallResult> {
4028 if pack.checksum.format() != self.format {
4029 return Err(GitError::InvalidObjectId(format!(
4030 "pack checksum uses {}, store uses {}",
4031 pack.checksum.format().name(),
4032 self.format.name()
4033 )));
4034 }
4035 for entry in &pack.entries {
4036 if entry.oid.format() != self.format {
4037 return Err(GitError::InvalidObjectId(format!(
4038 "pack entry {} uses {}, store uses {}",
4039 entry.oid,
4040 entry.oid.format().name(),
4041 self.format.name()
4042 )));
4043 }
4044 }
4045 let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
4046 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4047 if canonical_index.pack_checksum != pack.checksum
4048 || parsed_index.pack_checksum != pack.checksum
4049 {
4050 return Err(GitError::InvalidFormat(
4051 "pack and index checksums do not match pack write".into(),
4052 ));
4053 }
4054 if pack.index != canonical_index.index {
4055 return Err(GitError::InvalidFormat(
4056 "pack index does not match pack contents".into(),
4057 ));
4058 }
4059
4060 let pack_dir = self.objects_dir.join("pack");
4061 fs::create_dir_all(&pack_dir)?;
4062 let pack_name = format!("pack-{}", pack.checksum.to_hex());
4063 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4064 let index_path = pack_dir.join(format!("{pack_name}.idx"));
4065 if !pack_path.exists() || !index_path.exists() {
4066 write_pack_component(&pack_path, &pack.pack)?;
4067 write_pack_component(&index_path, &pack.index)?;
4068 }
4069 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4070 Ok(PackInstallResult {
4071 pack_name,
4072 pack_path,
4073 index_path,
4074 promisor_path,
4075 object_ids: canonical_index
4076 .entries
4077 .iter()
4078 .map(|entry| entry.oid)
4079 .collect(),
4080 })
4081 }
4082
4083 pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
4091 self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
4092 }
4093
4094 pub fn install_written_pack_with_options(
4095 &self,
4096 pack: &PackWrite,
4097 options: RawPackInstallOptions,
4098 ) -> Result<PackInstallResult> {
4099 validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
4100 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
4101 if parsed_index.pack_checksum != pack.checksum {
4102 return Err(GitError::InvalidFormat(
4103 "pack write index checksum does not match pack".into(),
4104 ));
4105 }
4106 if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
4107 return Err(GitError::InvalidFormat(
4108 "pack write index does not match generated entries".into(),
4109 ));
4110 }
4111 self.install_generated_pack_unchecked(pack, options)
4112 }
4113
4114 fn install_generated_pack_unchecked(
4115 &self,
4116 pack: &PackWrite,
4117 options: RawPackInstallOptions,
4118 ) -> Result<PackInstallResult> {
4119 let pack_dir = self.objects_dir.join("pack");
4120 fs::create_dir_all(&pack_dir)?;
4121 let pack_name = format!("pack-{}", pack.checksum.to_hex());
4122 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4123 let index_path = pack_dir.join(format!("{pack_name}.idx"));
4124 if !pack_path.exists() || !index_path.exists() {
4125 write_pack_component(&pack_path, &pack.pack)?;
4126 write_pack_component(&index_path, &pack.index)?;
4127 }
4128 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4129 Ok(PackInstallResult {
4130 pack_name,
4131 pack_path,
4132 index_path,
4133 promisor_path,
4134 object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
4135 })
4136 }
4137
4138 pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
4139 self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
4140 }
4141
4142 pub fn install_raw_pack_with_options(
4143 &self,
4144 pack_bytes: &[u8],
4145 options: RawPackInstallOptions,
4146 ) -> Result<PackInstallResult> {
4147 let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
4148 let pack_dir = self.objects_dir.join("pack");
4149 fs::create_dir_all(&pack_dir)?;
4150 let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
4151 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
4152 let index_path = pack_dir.join(format!("{pack_name}.idx"));
4153 if !pack_path.exists() || !index_path.exists() {
4154 write_pack_component(&pack_path, pack_bytes)?;
4155 write_pack_component(&index_path, &built.index)?;
4156 }
4157 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
4158 Ok(PackInstallResult {
4159 pack_name,
4160 pack_path,
4161 index_path,
4162 promisor_path,
4163 object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
4164 })
4165 }
4166
4167 pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
4168 if self.loose.exists(oid)? {
4169 return Ok(true);
4170 }
4171 if self.find_pack_containing(oid)?.is_some() {
4172 return Ok(true);
4173 }
4174 for alternate in &self.alternates {
4175 if Self::without_alternates(alternate, self.format).contains(oid)? {
4176 return Ok(true);
4177 }
4178 }
4179 self.loose.invalidate_cache();
4182 self.loose.exists(oid)
4183 }
4184
4185 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4186 let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
4187 .into_iter()
4188 .collect::<HashSet<_>>();
4189 for alternate in &self.alternates {
4190 oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
4191 }
4192 let mut oids = oids.into_iter().collect::<Vec<_>>();
4193 oids.sort_by_key(ObjectId::to_hex);
4194 Ok(oids)
4195 }
4196
4197 pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
4198 if let Some(disk_size) = self.loose.disk_size(oid)? {
4199 return Ok(Some(ObjectStorageInfo {
4200 disk_size,
4201 deltabase: zero_oid(self.format)?,
4202 }));
4203 }
4204 if let Some(info) = self.packed_object_storage_info(oid)? {
4205 return Ok(Some(info));
4206 }
4207 for alternate in &self.alternates {
4208 if let Some(info) =
4209 Self::without_alternates(alternate, self.format).object_storage_info(oid)?
4210 {
4211 return Ok(Some(info));
4212 }
4213 }
4214 self.loose.invalidate_cache();
4217 if let Some(disk_size) = self.loose.disk_size(oid)? {
4218 return Ok(Some(ObjectStorageInfo {
4219 disk_size,
4220 deltabase: zero_oid(self.format)?,
4221 }));
4222 }
4223 Ok(None)
4224 }
4225
4226 pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
4227 validate_object_id_prefix(self.format, prefix)?;
4228 let mut matches = Vec::new();
4229 for oid in self.object_ids()? {
4230 if object_id_matches_prefix(&oid, prefix) {
4231 matches.push(oid);
4232 }
4233 }
4234 Ok(match matches.len() {
4235 0 => ObjectPrefixResolution::Missing,
4236 1 => ObjectPrefixResolution::Unique(matches.remove(0)),
4237 _ => ObjectPrefixResolution::Ambiguous(matches),
4238 })
4239 }
4240
4241 pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4251 if implied_empty_tree_object(self.format, oid).is_some() {
4252 return Ok(Some((ObjectType::Tree, 0)));
4253 }
4254 if let Ok(mut cache) = self.decoded.lock()
4255 && let Some(object) = cache.get(oid)
4256 {
4257 return Ok(Some((object.object_type, object.body.len() as u64)));
4258 }
4259 if let Some(header) = self.loose.read_header(oid)? {
4260 return Ok(Some(header));
4261 }
4262 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4263 let bytes = pack_lookup.pack_bytes(self)?;
4264 let type_cache = pack_lookup.header_type_cache(self);
4269 let resolve_ref_base = |base: &ObjectId| {
4270 self.read_object_header(base)
4271 .map(|header| header.map(|(t, _)| t))
4272 };
4273 let header = match &type_cache {
4274 Some(cache) => {
4275 let mut adapter = PackHeaderTypeCacheAdapter(cache);
4276 sley_pack::read_object_header_at_with_cache(
4277 &bytes,
4278 pack_lookup.offset,
4279 self.format,
4280 resolve_ref_base,
4281 &mut adapter,
4282 )?
4283 }
4284 None => sley_pack::read_object_header_at(
4285 &bytes,
4286 pack_lookup.offset,
4287 self.format,
4288 resolve_ref_base,
4289 )?,
4290 };
4291 return Ok(Some(header));
4292 }
4293 for alternate in &self.alternates {
4294 if let Some(header) =
4295 Self::without_alternates(alternate, self.format).read_object_header(oid)?
4296 {
4297 return Ok(Some(header));
4298 }
4299 }
4300 self.loose.invalidate_cache();
4303 if let Some(header) = self.loose.read_header(oid)? {
4304 return Ok(Some(header));
4305 }
4306 Ok(None)
4307 }
4308
4309 fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
4310 if let Ok(mut cache) = self.decoded.lock()
4313 && let Some(object) = cache.get(oid)
4314 {
4315 return Ok(Some(object));
4316 }
4317 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
4318 return Ok(None);
4319 };
4320 self.read_packed_object_at_lookup(oid, &pack_lookup)
4321 .map(Some)
4322 }
4323
4324 fn read_packed_object_at_lookup(
4325 &self,
4326 oid: &ObjectId,
4327 pack_lookup: &PackLookup,
4328 ) -> Result<Arc<EncodedObject>> {
4329 if let Ok(mut cache) = self.decoded.lock()
4330 && let Some(object) = cache.get(oid)
4331 {
4332 return Ok(object);
4333 }
4334 let bytes = pack_lookup.pack_bytes(self)?;
4335 let delta_cache = pack_lookup.delta_cache(self);
4340 let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
4341 let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
4347 let object = match &delta_adapter {
4348 Some(adapter) => sley_pack::read_object_at_with_cache_arc(
4349 &bytes,
4350 pack_lookup.offset,
4351 self.format,
4352 resolve_ref_base,
4353 adapter,
4354 )?,
4355 None => sley_pack::read_object_at_arc(
4356 &bytes,
4357 pack_lookup.offset,
4358 self.format,
4359 resolve_ref_base,
4360 )?,
4361 };
4362 if verify_reads_enabled() {
4366 let actual = object.object_id(self.format)?;
4367 if actual != *oid {
4368 return Err(GitError::InvalidObject(format!(
4369 "pack object id mismatch: index says {oid}, decoded {actual}"
4370 )));
4371 }
4372 }
4373 if let Ok(mut cache) = self.decoded.lock() {
4374 cache.put(*oid, Arc::clone(&object));
4375 }
4376 Ok(object)
4377 }
4378
4379 fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
4383 let mut caches = self.pack_deltas.lock().ok()?;
4384 let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
4385 Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
4386 });
4387 Some(Arc::clone(cache))
4388 }
4389
4390 fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
4394 let mut caches = self.pack_header_types.lock().ok()?;
4395 let cache = caches
4396 .entry(pack_path.to_path_buf())
4397 .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
4398 Some(Arc::clone(cache))
4399 }
4400
4401 fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
4406 if let Ok(cache) = self.pack_bytes.lock()
4407 && let Some(bytes) = cache.get(pack_path)
4408 {
4409 return Ok(Arc::clone(bytes));
4410 }
4411 let bytes = Arc::new(load_pack_data(pack_path)?);
4412 if let Ok(mut cache) = self.pack_bytes.lock() {
4413 cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
4414 }
4415 Ok(bytes)
4416 }
4417
4418 fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
4422 if let Ok(cache) = self.pack_indexes.lock()
4423 && let Some(index) = cache.get(index_path)
4424 {
4425 return Ok(Arc::clone(index));
4426 }
4427 let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
4428 if let Ok(mut cache) = self.pack_indexes.lock() {
4429 cache.insert(index_path.to_path_buf(), Arc::clone(&index));
4430 }
4431 Ok(index)
4432 }
4433
4434 fn cached_multi_pack_index_oid_lookup(
4435 &self,
4436 midx_path: &Path,
4437 ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
4438 if !midx_path.exists() {
4439 return Ok(None);
4440 }
4441 if let Ok(cache) = self.multi_pack_oid_lookups.lock()
4442 && let Some(midx) = cache.get(midx_path)
4443 {
4444 return Ok(Some(Arc::clone(midx)));
4445 }
4446 let bytes = load_multi_pack_index_lookup_data(midx_path)?;
4447 let midx = Arc::new(MultiPackIndexOidLookup::parse(bytes, self.format)?);
4448 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
4449 cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
4450 }
4451 Ok(Some(midx))
4452 }
4453
4454 fn cached_pack_registry(
4459 &self,
4460 pack_dir: &Path,
4461 force_rescan: bool,
4462 ) -> Result<Arc<PackRegistrySnapshot>> {
4463 if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
4464 return Ok(registry);
4465 }
4466 let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
4467 if let Ok(mut cache) = self.pack_registry.lock() {
4468 match cache.as_ref() {
4469 Some(existing)
4470 if existing.fingerprint == scanned.fingerprint
4471 && same_registered_pack_set(&existing.packs, &scanned.packs) =>
4472 {
4473 return Ok(Arc::clone(existing));
4474 }
4475 _ => {
4476 *cache = Some(Arc::clone(&scanned));
4477 }
4478 }
4479 }
4480 Ok(scanned)
4481 }
4482
4483 fn find_in_pack_registry(
4484 &self,
4485 registry: Arc<PackRegistrySnapshot>,
4486 oid: &ObjectId,
4487 ) -> Result<Option<PackLookup>> {
4488 let hinted_pack_index = registry.cached_hint();
4489 if let Some(pack_index) = hinted_pack_index {
4490 let pack = ®istry.packs[pack_index];
4491 let index = pack.index(self.format)?;
4492 if let Some(entry) = index.find(oid) {
4493 return Ok(Some(PackLookup::from_registered(
4494 Arc::clone(pack),
4495 entry.offset,
4496 )));
4497 }
4498 }
4499 for (pack_index, pack) in registry.packs.iter().enumerate() {
4500 if Some(pack_index) == hinted_pack_index {
4501 continue;
4502 }
4503 let index = pack.index(self.format)?;
4504 if let Some(entry) = index.find(oid) {
4505 registry.remember_hint(pack_index);
4506 return Ok(Some(PackLookup::from_registered(
4507 Arc::clone(pack),
4508 entry.offset,
4509 )));
4510 }
4511 }
4512 Ok(None)
4513 }
4514
4515 fn read_packed_object_from_other_packs(
4521 &self,
4522 oid: &ObjectId,
4523 exclude: &PackLookup,
4524 ) -> Result<Option<Arc<EncodedObject>>> {
4525 let pack_dir = self.objects_dir.join("pack");
4526 let Ok(entries) = fs::read_dir(&pack_dir) else {
4527 return Ok(None);
4528 };
4529 let excluded_pack = exclude.pack_path().to_path_buf();
4530 for entry in entries {
4531 let idx_path = entry?.path();
4532 if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
4533 continue;
4534 }
4535 let pack_path = idx_path.with_extension("pack");
4536 if pack_path == excluded_pack {
4537 continue;
4538 }
4539 let Ok(idx_bytes) = fs::read(&idx_path) else {
4540 continue;
4541 };
4542 let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
4543 continue;
4544 };
4545 let Some(entry) = index.find(oid) else {
4546 continue;
4547 };
4548 let candidate = PackLookup::from_path(pack_path, entry.offset);
4549 if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
4550 return Ok(Some(object));
4551 }
4552 }
4553 Ok(None)
4554 }
4555
4556 fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
4557 if oid.format() != self.format {
4558 return Err(GitError::InvalidObjectId(format!(
4559 "object {oid} uses {}, store uses {}",
4560 oid.format().name(),
4561 self.format.name()
4562 )));
4563 }
4564 let pack_dir = self.objects_dir.join("pack");
4565 if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
4570 && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
4571 {
4572 return Ok(Some(pack_paths));
4573 }
4574 if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
4575 && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
4576 {
4577 return Ok(Some(pack_paths));
4578 }
4579
4580 if !pack_dir.exists() {
4581 return Ok(None);
4582 }
4583 if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
4584 return Ok(Some(pack_paths));
4585 }
4586 let registry = self.cached_pack_registry(&pack_dir, false)?;
4590 if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(®istry), oid)? {
4591 return Ok(Some(pack_paths));
4592 }
4593 let refreshed = self.cached_pack_registry(&pack_dir, true)?;
4594 if Arc::ptr_eq(®istry, &refreshed) {
4595 return Ok(None);
4597 }
4598 self.find_in_pack_registry(refreshed, oid)
4599 }
4600
4601 fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
4602 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
4603 return Ok(None);
4604 };
4605 let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
4606 let trailer_offset = pack_len
4607 .checked_sub(self.format.raw_len() as u64)
4608 .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
4609 let index = pack_lookup.pack_index(self)?;
4610 let pack = pack_lookup.pack_bytes(self)?;
4611 let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
4612 let delta_base_offset = match &delta_base {
4613 Some(PackDeltaBase::Offset(offset)) => Some(*offset),
4614 Some(PackDeltaBase::Ref(_)) | None => None,
4615 };
4616 let offset_info = scan_pack_index_offsets(
4617 &index,
4618 pack_lookup.offset,
4619 trailer_offset,
4620 delta_base_offset,
4621 )?;
4622 let disk_size = offset_info
4623 .end_offset
4624 .checked_sub(pack_lookup.offset)
4625 .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
4626 let deltabase = match delta_base {
4627 Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
4628 GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
4634 })?,
4635 Some(PackDeltaBase::Ref(oid)) => oid,
4636 None => zero_oid(self.format)?,
4637 };
4638 Ok(Some(ObjectStorageInfo {
4639 disk_size,
4640 deltabase,
4641 }))
4642 }
4643
4644 fn find_midx_pack_containing(
4645 &self,
4646 pack_dir: &Path,
4647 oid: &ObjectId,
4648 ) -> Result<Option<PackLookup>> {
4649 let midx_path = pack_dir.join("multi-pack-index");
4650 let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
4651 return Ok(None);
4652 };
4653 self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
4654 }
4655
4656 fn midx_oid_lookup_pack_paths(
4657 &self,
4658 pack_dir: &Path,
4659 midx: &MultiPackIndexOidLookup,
4660 oid: &ObjectId,
4661 ) -> Result<Option<PackLookup>> {
4662 let Some(entry) = midx.find(oid)? else {
4663 return Ok(None);
4664 };
4665 let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
4666 return Err(GitError::InvalidFormat(
4667 "multi-pack-index object points past pack table".into(),
4668 ));
4669 };
4670 let pack_file_name = pack_name
4671 .strip_suffix(".idx")
4672 .map(|stem| format!("{stem}.pack"))
4673 .unwrap_or_else(|| pack_name.to_string());
4674 let pack = pack_dir.join(pack_file_name);
4675 Ok(Some(PackLookup::from_path(pack, entry.offset)))
4676 }
4677
4678 fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
4679 let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
4680 let cache = self.multi_pack_oid_lookups.lock().ok()?;
4681 cache.get(&midx_path).map(Arc::clone)
4682 }
4683
4684 fn cached_loaded_pack_registry(
4690 &self,
4691 _pack_dir: &Path,
4692 ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
4693 let cache = match self.pack_registry.lock() {
4694 Ok(cache) => cache,
4695 Err(_) => return Ok(None),
4696 };
4697 Ok(cache.as_ref().map(Arc::clone))
4698 }
4699}
4700
4701fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
4702 if prefix.len() < 4 || prefix.len() > format.hex_len() {
4703 return Err(GitError::InvalidObjectId(format!(
4704 "expected 4 to {} hex digits for {}, got {}",
4705 format.hex_len(),
4706 format.name(),
4707 prefix.len()
4708 )));
4709 }
4710 if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
4711 return Err(GitError::InvalidObjectId(format!(
4712 "non-hex object id prefix {prefix}"
4713 )));
4714 }
4715 Ok(())
4716}
4717
4718fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
4719 oid.to_hex()
4720 .as_bytes()
4721 .iter()
4722 .zip(prefix.as_bytes())
4723 .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
4724}
4725
4726fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
4727 match fs::metadata(pack_dir) {
4728 Ok(metadata) => Ok(metadata.modified().ok()),
4729 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
4730 Err(err) => Err(GitError::Io(err.to_string())),
4731 }
4732}
4733
4734fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
4739 let modified = pack_dir_modified(pack_dir)?;
4740 let entries = match fs::read_dir(pack_dir) {
4741 Ok(entries) => entries,
4742 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4743 return Ok(PackRegistrySnapshot::new(
4744 PackDirFingerprint {
4745 modified,
4746 idx_count: 0,
4747 pack_count: 0,
4748 },
4749 Vec::new(),
4750 ));
4751 }
4752 Err(err) => return Err(GitError::Io(err.to_string())),
4753 };
4754
4755 let mut idx_paths = Vec::new();
4756 let mut idx_count = 0;
4757 let mut pack_count = 0;
4758 for entry in entries {
4759 let entry = entry?;
4760 let path = entry.path();
4761 match path.extension().and_then(|ext| ext.to_str()) {
4762 Some("idx") => {
4763 idx_count += 1;
4764 idx_paths.push(path);
4765 }
4766 Some("pack") => {
4767 pack_count += 1;
4768 }
4769 _ => {}
4770 }
4771 }
4772
4773 let mut packs = Vec::new();
4774 for idx in idx_paths {
4775 let pack = idx.with_extension("pack");
4776 let Ok(metadata) = fs::metadata(&pack) else {
4777 continue;
4778 };
4779 let modified = pack_sort_modified(&metadata);
4780 packs.push((
4781 modified,
4782 metadata.len(),
4783 Arc::new(RegisteredPack::new(idx, pack)),
4784 ));
4785 }
4786 packs.sort_by(|left, right| {
4791 right
4792 .0
4793 .cmp(&left.0)
4794 .then_with(|| right.1.cmp(&left.1))
4795 .then_with(|| left.2.idx.cmp(&right.2.idx))
4796 });
4797 let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
4798 Ok(PackRegistrySnapshot::new(
4799 PackDirFingerprint {
4800 modified,
4801 idx_count,
4802 pack_count,
4803 },
4804 packs,
4805 ))
4806}
4807
4808fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
4809 metadata
4810 .modified()
4811 .ok()
4812 .and_then(|modified| {
4813 modified
4814 .duration_since(std::time::UNIX_EPOCH)
4815 .ok()
4816 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
4817 })
4818 .unwrap_or((0, 0))
4819}
4820
4821fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
4824 left.len() == right.len()
4825 && left
4826 .iter()
4827 .zip(right.iter())
4828 .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
4829}
4830
4831fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
4832 let mut alternates = Vec::new();
4833 if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
4834 for raw in value.to_string_lossy().split(':') {
4835 if !raw.is_empty() {
4836 alternates.push(PathBuf::from(raw));
4837 }
4838 }
4839 }
4840 let alternates_path = objects_dir.join("info").join("alternates");
4841 if let Ok(contents) = fs::read(&alternates_path) {
4842 for raw in contents.split(|byte| *byte == b'\n') {
4843 let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4844 if line.is_empty() || line.starts_with(b"#") {
4845 continue;
4846 }
4847 let Ok(value) = std::str::from_utf8(line) else {
4848 continue;
4849 };
4850 let path = Path::new(value);
4851 let absolute = if path.is_absolute() {
4852 path.to_path_buf()
4853 } else {
4854 objects_dir.join(path)
4855 };
4856 alternates.push(absolute);
4857 }
4858 }
4859 alternates
4860}
4861
4862impl ObjectReader for FileObjectDatabase {
4863 fn has_shallow_grafts(&self) -> bool {
4864 !self
4865 .shallow_grafts
4866 .get_or_init(|| {
4867 let shallow_file = self
4868 .objects_dir
4869 .parent()
4870 .map(|git_dir| git_dir.join("shallow"));
4871 match shallow_file {
4872 Some(path) => read_shallow_grafts(&path, self.format),
4873 None => HashSet::new(),
4874 }
4875 })
4876 .is_empty()
4877 }
4878
4879 fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
4880 self.shallow_grafts
4881 .get_or_init(|| {
4882 let shallow_file = self
4883 .objects_dir
4884 .parent()
4885 .map(|git_dir| git_dir.join("shallow"));
4886 match shallow_file {
4887 Some(path) => read_shallow_grafts(&path, self.format),
4888 None => HashSet::new(),
4889 }
4890 })
4891 .contains(oid)
4892 }
4893
4894 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4895 if let Some(object) = implied_empty_tree_object(self.format, oid) {
4896 return Ok(object);
4897 }
4898 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4906 match self.read_packed_object_at_lookup(oid, &pack_lookup) {
4907 Ok(object) => return Ok(object),
4908 Err(GitError::NotFound(_)) => {}
4909 Err(packed_err) => {
4915 if let Ok(object) = self.loose.read_object(oid) {
4916 return Ok(object);
4917 }
4918 if let Some(object) =
4921 self.read_packed_object_from_other_packs(oid, &pack_lookup)?
4922 {
4923 return Ok(object);
4924 }
4925 for alternate in &self.alternates {
4926 if let Ok(object) =
4927 Self::without_alternates(alternate, self.format).read_object(oid)
4928 {
4929 return Ok(object);
4930 }
4931 }
4932 return Err(packed_err);
4933 }
4934 }
4935 }
4936 let loose_err = match self.loose.read_object(oid) {
4937 Ok(object) => return Ok(object),
4938 Err(GitError::NotFound(_)) => None,
4939 Err(err) => Some(err),
4940 };
4941 if let Some(object) = self.read_packed_object(oid)? {
4942 return Ok(object);
4943 }
4944 for alternate in &self.alternates {
4945 match Self::without_alternates(alternate, self.format).read_object(oid) {
4946 Ok(object) => return Ok(object),
4947 Err(GitError::NotFound(_)) => {}
4948 Err(err) => return Err(err),
4949 }
4950 }
4951 self.loose.invalidate_cache();
4957 match self.loose.read_object(oid) {
4958 Ok(object) => return Ok(object),
4959 Err(GitError::NotFound(_)) => {}
4960 Err(err) => return Err(err),
4961 }
4962 if let Some(err) = loose_err {
4966 return Err(err);
4967 }
4968 Err(GitError::object_not_found_in(
4969 *oid,
4970 MissingObjectContext::Read,
4971 ))
4972 }
4973}
4974
4975impl ObjectWriter for FileObjectDatabase {
4976 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4977 let oid = object.object_id(self.format)?;
4983 if self.contains(&oid)? {
4984 return Ok(oid);
4985 }
4986 self.loose.write_object(object)
4987 }
4988}
4989
4990fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
4991 if path.exists() {
4992 return Ok(());
4993 }
4994 let parent = path
4995 .parent()
4996 .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
4997 fs::create_dir_all(parent)?;
4998 let temp_path = unique_temp_path(parent);
4999 let write_result = (|| -> Result<()> {
5000 {
5001 let mut file = fs::OpenOptions::new()
5002 .write(true)
5003 .create_new(true)
5004 .open(&temp_path)?;
5005 file.write_all(bytes)?;
5006 file.sync_all()?;
5007 }
5008 match fs::rename(&temp_path, path) {
5009 Ok(()) => Ok(()),
5010 Err(_) if path.exists() => {
5011 let _ = fs::remove_file(&temp_path);
5012 Ok(())
5013 }
5014 Err(err) => Err(GitError::Io(err.to_string())),
5015 }
5016 })();
5017 if write_result.is_err() {
5018 let _ = fs::remove_file(&temp_path);
5019 }
5020 write_result
5021}
5022
5023fn write_promisor_pack_sidecar(
5024 pack_dir: &Path,
5025 pack_name: &str,
5026 promisor: bool,
5027) -> Result<Option<PathBuf>> {
5028 if !promisor {
5029 return Ok(None);
5030 }
5031 let path = pack_dir.join(format!("{pack_name}.promisor"));
5032 write_pack_component(&path, b"")?;
5033 Ok(Some(path))
5034}
5035
5036const MAX_LOOSE_HEADER_LEN: usize = 32;
5041
5042fn loose_header_too_long(oid: &ObjectId) -> GitError {
5047 GitError::InvalidObject(format!(
5048 "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
5049 ))
5050}
5051
5052fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
5056 GitError::InvalidObject(format!("unable to unpack {oid} header"))
5057}
5058
5059fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
5067 let [cmf, flg, ..] = *input else { return None };
5068 if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
5069 return Some("inflate: data stream error (incorrect header check)");
5070 }
5071 if cmf & 0x0f != 8 {
5072 return Some("inflate: data stream error (unknown compression method)");
5073 }
5074 if cmf >> 4 > 7 {
5075 return Some("inflate: data stream error (invalid window size)");
5076 }
5077 if flg & 0x20 != 0 {
5078 return Some("inflate: needs dictionary (no message)");
5079 }
5080 None
5081}
5082
5083fn emit_inflate_diagnostic(input: &[u8]) {
5086 if let Some(diagnostic) = inflate_header_diagnostic(input) {
5087 eprintln!("error: {diagnostic}");
5088 }
5089}
5090
5091#[derive(Debug, Clone, PartialEq, Eq)]
5094pub enum LooseObjectIntegrity {
5095 Ok,
5097 HashMismatch { actual: ObjectId },
5100 Corrupt,
5103}
5104
5105#[derive(Debug, Clone)]
5106pub struct LooseObjectStore {
5107 objects_dir: PathBuf,
5108 format: ObjectFormat,
5109 loose_cache: Arc<Mutex<LoosePresenceCache>>,
5118}
5119
5120impl LooseObjectStore {
5121 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
5122 Self {
5123 objects_dir: objects_dir.into(),
5124 format,
5125 loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
5126 }
5127 }
5128
5129 fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
5134 let mut guard = self.loose_cache.lock().ok()?;
5135 let fanout = oid.as_bytes()[0];
5136 if !guard.loaded_fanouts.contains(&fanout) {
5137 collect_loose_fanout_object_ids(
5138 &self.objects_dir,
5139 self.format,
5140 fanout,
5141 &mut guard.objects,
5142 )
5143 .ok()?;
5144 guard.loaded_fanouts.insert(fanout);
5145 }
5146 Some(guard.objects.contains(oid))
5147 }
5148
5149 fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
5153 if let Ok(mut guard) = self.loose_cache.lock() {
5154 guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
5155 guard.loaded_fanouts = (0..=u8::MAX).collect();
5156 let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
5157 ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
5158 return Ok(ids);
5159 }
5160 loose_object_ids(&self.objects_dir, self.format)
5161 }
5162
5163 fn note_loose_write(&self, oid: ObjectId) {
5167 if let Ok(mut guard) = self.loose_cache.lock() {
5168 guard.objects.insert(oid);
5169 }
5170 }
5171
5172 pub(crate) fn invalidate_cache(&self) {
5175 if let Ok(mut guard) = self.loose_cache.lock() {
5176 *guard = LoosePresenceCache::default();
5177 }
5178 }
5179
5180 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
5181 Self::new(repository_objects_dir(git_dir), format)
5182 }
5183
5184 fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
5185 if oid.format() != self.format {
5186 return Err(GitError::InvalidObjectId(format!(
5187 "object {oid} uses {}, store uses {}",
5188 oid.format().name(),
5189 self.format.name()
5190 )));
5191 }
5192 Ok(())
5193 }
5194
5195 pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
5196 self.validate_oid_format(oid)?;
5197 let hex = oid.to_hex();
5198 Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
5199 }
5200
5201 pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
5202 self.validate_oid_format(oid)?;
5203 if self.cached_loose_presence(oid) == Some(false) {
5204 return Ok(false);
5205 }
5206 let path = self.object_path(oid)?;
5207 Ok(path.exists())
5208 }
5209
5210 pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
5211 self.validate_oid_format(oid)?;
5212 if self.cached_loose_presence(oid) == Some(false) {
5213 return Ok(None);
5214 }
5215 let path = self.object_path(oid)?;
5216 match fs::metadata(path) {
5217 Ok(metadata) => Ok(Some(metadata.len())),
5218 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
5219 Err(err) => Err(GitError::Io(err.to_string())),
5220 }
5221 }
5222
5223 pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
5228 self.validate_oid_format(oid)?;
5229 if self.cached_loose_presence(oid) == Some(false) {
5230 return Ok(None);
5231 }
5232 let path = self.object_path(oid)?;
5233 let compressed = match fs::read(&path) {
5234 Ok(compressed) => compressed,
5235 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5236 Err(err) => return Err(GitError::Io(err.to_string())),
5237 };
5238 match inflate_loose_header(&compressed)? {
5239 LooseHeader::Ok(header) => {
5240 let header = std::str::from_utf8(&header)
5241 .map_err(|err| GitError::InvalidObject(err.to_string()))?;
5242 let (kind, size) = header
5243 .split_once(' ')
5244 .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
5245 let object_type = kind.parse::<ObjectType>()?;
5246 let size = size
5247 .parse::<u64>()
5248 .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
5249 Ok(Some((object_type, size)))
5250 }
5251 LooseHeader::Bad => {
5252 emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
5255 Err(loose_unpack_header_failed(oid))
5256 }
5257 LooseHeader::TooLong => {
5258 Err(loose_header_too_long(oid))
5263 }
5264 }
5265 }
5266
5267 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
5269 self.loose_object_ids_cached()
5270 }
5271
5272 pub fn verify_object(
5280 &self,
5281 oid: &ObjectId,
5282 display_path: &str,
5283 ) -> Result<Option<LooseObjectIntegrity>> {
5284 let path = self.object_path(oid)?;
5285 let compressed = match fs::read(&path) {
5286 Ok(compressed) => compressed,
5287 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5288 Err(err) => return Err(GitError::Io(err.to_string())),
5289 };
5290 let mut decoder = ZlibDecoder::new(compressed.as_slice());
5291 let mut framed = Vec::new();
5292 if decoder.read_to_end(&mut framed).is_err() {
5293 emit_inflate_diagnostic(&compressed);
5294 if framed_loose_header_terminated(&framed) {
5302 eprintln!("error: corrupt loose object '{oid}'");
5303 eprintln!("error: unable to unpack contents of {display_path}");
5304 } else {
5305 eprintln!("error: unable to unpack header of {display_path}");
5306 }
5307 return Ok(Some(LooseObjectIntegrity::Corrupt));
5308 }
5309 if !framed_loose_header_terminated(&framed) {
5310 eprintln!("error: unable to unpack header of {display_path}");
5313 return Ok(Some(LooseObjectIntegrity::Corrupt));
5314 }
5315 if (decoder.total_in() as usize) < compressed.len() {
5322 eprintln!("error: garbage at end of loose object '{oid}'");
5326 eprintln!("error: unable to unpack contents of {display_path}");
5327 return Ok(Some(LooseObjectIntegrity::Corrupt));
5328 }
5329 if let Some(declared) = loose_header_declared_size(&framed) {
5336 let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
5337 let body_len = framed.len() - (nul + 1).min(framed.len());
5338 if body_len < declared {
5339 eprintln!("error: corrupt loose object '{oid}'");
5340 eprintln!("error: unable to unpack contents of {display_path}");
5341 return Ok(Some(LooseObjectIntegrity::Corrupt));
5342 }
5343 }
5344 let Ok(object) = parse_framed_object(&framed) else {
5345 if let Some(header) = loose_header_with_unknown_type(&framed) {
5350 eprintln!("error: unable to parse type from header '{header}' of {display_path}");
5351 } else {
5352 eprintln!("error: unable to parse header of {display_path}");
5353 }
5354 return Ok(Some(LooseObjectIntegrity::Corrupt));
5355 };
5356 let actual = object.object_id(self.format)?;
5357 if &actual != oid {
5358 return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
5359 }
5360 Ok(Some(LooseObjectIntegrity::Ok))
5361 }
5362}
5363
5364fn framed_loose_header_terminated(framed: &[u8]) -> bool {
5368 framed
5369 .iter()
5370 .take(MAX_LOOSE_HEADER_LEN)
5371 .any(|byte| *byte == 0)
5372}
5373
5374fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
5379 let nul = framed.iter().position(|&b| b == 0)?;
5380 let header = std::str::from_utf8(&framed[..nul]).ok()?;
5381 let (kind, size) = header.split_once(' ')?;
5382 let size: usize = size.parse().ok()?;
5383 if framed.len() - (nul + 1) != size {
5386 return None;
5387 }
5388 if kind.parse::<ObjectType>().is_ok() {
5391 return None;
5392 }
5393 Some(header.to_string())
5394}
5395
5396fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
5400 let nul = framed.iter().position(|&b| b == 0)?;
5401 let header = std::str::from_utf8(&framed[..nul]).ok()?;
5402 let (_kind, size) = header.split_once(' ')?;
5403 size.parse::<usize>().ok()
5404}
5405
5406enum LooseHeader {
5412 Ok(Vec<u8>),
5415 Bad,
5417 TooLong,
5419}
5420
5421fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
5435 let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
5436 let mut decompress = Decompress::new(true);
5437 let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
5441 let produced = decompress.total_out() as usize;
5442 match status {
5443 Ok(_) => {
5444 let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
5445 match window.iter().position(|&byte| byte == 0) {
5446 Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
5447 None => Ok(LooseHeader::TooLong),
5451 }
5452 }
5453 Err(_) => Ok(LooseHeader::Bad),
5455 }
5456}
5457
5458impl ObjectReader for LooseObjectStore {
5459 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
5460 self.validate_oid_format(oid)?;
5461 if self.cached_loose_presence(oid) == Some(false) {
5465 return Err(GitError::object_not_found_in(
5466 *oid,
5467 MissingObjectContext::Read,
5468 ));
5469 }
5470 let path = self.object_path(oid)?;
5471 let compressed = match fs::read(&path) {
5472 Ok(compressed) => compressed,
5473 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
5474 return Err(GitError::object_not_found_in(
5475 *oid,
5476 MissingObjectContext::Read,
5477 ));
5478 }
5479 Err(err) => return Err(GitError::Io(err.to_string())),
5480 };
5481 let mut decoder = ZlibDecoder::new(compressed.as_slice());
5482 let mut framed = Vec::new();
5483 if decoder.read_to_end(&mut framed).is_err() {
5484 emit_inflate_diagnostic(&compressed);
5485 if !framed_loose_header_terminated(&framed) {
5490 return Err(loose_unpack_header_failed(oid));
5491 }
5492 return Err(GitError::InvalidObject(format!(
5493 "corrupt loose object '{oid}'"
5494 )));
5495 }
5496 if framed
5501 .iter()
5502 .take(MAX_LOOSE_HEADER_LEN)
5503 .all(|byte| *byte != 0)
5504 {
5505 return Err(loose_header_too_long(oid));
5506 }
5507 let object = parse_framed_object(&framed)?;
5508 if verify_reads_enabled() {
5512 let actual = object.object_id(self.format)?;
5513 if &actual != oid {
5514 return Err(GitError::InvalidObject(format!(
5515 "loose object {} hashes to {actual}",
5516 path.display()
5517 )));
5518 }
5519 }
5520 Ok(Arc::new(object))
5521 }
5522}
5523
5524impl ObjectWriter for LooseObjectStore {
5525 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
5526 let oid = object.object_id(self.format)?;
5527 let path = self.object_path(&oid)?;
5528 if path.exists() {
5529 self.note_loose_write(oid);
5530 return Ok(oid);
5531 }
5532 let parent = path
5533 .parent()
5534 .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
5535 fs::create_dir_all(parent)?;
5536 let temp_path = unique_temp_path(parent);
5537 let write_result = (|| -> Result<()> {
5538 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5539 encoder.write_all(&object.framed_bytes())?;
5540 let compressed = encoder.finish()?;
5541 {
5542 let mut file = fs::OpenOptions::new()
5543 .write(true)
5544 .create_new(true)
5545 .open(&temp_path)?;
5546 file.write_all(&compressed)?;
5547 }
5557 match fs::rename(&temp_path, &path) {
5558 Ok(()) => Ok(()),
5559 Err(_) if path.exists() => {
5560 let _ = fs::remove_file(&temp_path);
5561 Ok(())
5562 }
5563 Err(err) => Err(GitError::Io(err.to_string())),
5564 }
5565 })();
5566 if write_result.is_err() {
5567 let _ = fs::remove_file(&temp_path);
5568 }
5569 write_result?;
5570 self.note_loose_write(oid);
5571 Ok(oid)
5572 }
5573}
5574
5575fn unique_temp_path(parent: &Path) -> PathBuf {
5576 let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
5577 parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
5578}
5579
5580#[cfg(test)]
5581mod tests {
5582 use super::*;
5583 use sley_core::BString;
5584 use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
5585 use sley_pack::{PackFile, PackWriteOptions};
5586
5587 fn blob_of(byte: u8, len: usize) -> EncodedObject {
5588 EncodedObject::new(ObjectType::Blob, vec![byte; len])
5589 }
5590
5591 fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
5592 Arc::new(blob_of(byte, len))
5593 }
5594
5595 fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
5596 reader
5597 .read_object(oid)
5598 .expect("test operation should succeed")
5599 .as_ref()
5600 .clone()
5601 }
5602
5603 #[test]
5604 fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
5605 let one = cached_object_cost(&blob_of(0, 1000));
5607 let mut cache = LruCache::<u32>::new(one * 2 + 8);
5608 cache.put(1, cached_blob_of(b'a', 1000));
5609 cache.put(2, cached_blob_of(b'b', 1000));
5610 assert!(cache.get(&1).is_some());
5612 cache.put(3, cached_blob_of(b'c', 1000));
5613 assert!(cache.get(&1).is_some());
5615 assert!(cache.get(&2).is_none());
5616 assert!(cache.get(&3).is_some());
5617 }
5618
5619 #[test]
5620 fn lru_cache_zero_budget_is_inert() {
5621 let mut cache = LruCache::<u32>::new(0);
5622 cache.put(1, cached_blob_of(b'a', 16));
5623 assert!(cache.get(&1).is_none());
5624 }
5625
5626 #[test]
5627 fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
5628 let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
5629 cache.put(1, cached_blob_of(b'a', 50));
5630 assert!(cache.get(&1).is_some());
5631 cache.put(1, cached_blob_of(b'b', 10_000));
5634 assert!(cache.get(&1).is_none());
5635 cache.put(2, cached_blob_of(b'c', 50));
5638 assert!(cache.get(&2).is_some());
5639 }
5640
5641 #[test]
5642 fn lru_cache_replacing_entry_updates_byte_accounting() {
5643 let small = cached_object_cost(&blob_of(0, 500));
5646 let mut cache = LruCache::<u32>::new(small * 2 + 200);
5647 cache.put(1, cached_blob_of(b'a', 500));
5648 cache.put(2, cached_blob_of(b'b', 500));
5649 assert!(cache.get(&1).is_some());
5650 assert!(cache.get(&2).is_some());
5651 cache.put(2, cached_blob_of(b'b', 1000));
5656 assert!(cache.get(&2).is_some());
5657 assert!(cache.get(&1).is_none());
5658 }
5659
5660 #[test]
5661 fn write_and_validate_blob() {
5662 let db = ObjectDatabase::new(ObjectFormat::Sha1);
5663 let oid = db
5664 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
5665 .expect("test operation should succeed");
5666 assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
5667 db.validate(&oid).expect("test operation should succeed");
5668 }
5669
5670 #[test]
5671 fn loose_store_writes_and_reads_object() {
5672 let root = std::env::temp_dir().join(format!(
5673 "sley-loose-store-{}-{}",
5674 std::process::id(),
5675 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
5676 ));
5677 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
5678 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5679 let oid = store
5680 .write_object(object.clone())
5681 .expect("test operation should succeed");
5682 assert_eq!(read_object_for_assert(&store, &oid), object);
5683 assert!(
5684 store
5685 .object_path(&oid)
5686 .expect("test operation should succeed")
5687 .exists()
5688 );
5689 fs::remove_dir_all(root).expect("test operation should succeed");
5690 }
5691
5692 #[test]
5693 fn read_header_detects_corruption_within_gits_header_window() {
5694 let root = temp_root("sley-loose-header-corrupt");
5702 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
5703 let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
5704 let oid = store
5705 .write_object(object)
5706 .expect("test operation should succeed");
5707 let path = store
5708 .object_path(&oid)
5709 .expect("test operation should succeed");
5710 let mut bytes = fs::read(&path).expect("test operation should succeed");
5711 bytes[10] = 0;
5715 fs::write(&path, &bytes).expect("test operation should succeed");
5716 store.invalidate_cache();
5717 let err = store
5718 .read_header(&oid)
5719 .expect_err("corrupt loose header must fail like git's ULHR_BAD");
5720 let msg = err.to_string();
5721 assert!(
5722 msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
5723 "expected git's ULHR_BAD message, got: {msg}"
5724 );
5725 fs::remove_dir_all(root).expect("test operation should succeed");
5726 }
5727
5728 #[test]
5729 fn read_header_ignores_corruption_past_gits_header_window() {
5730 let root = temp_root("sley-loose-header-deep-corrupt");
5735 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
5736 let body: Vec<u8> = (0..4096u32).map(|i| (i.wrapping_mul(2654435761)) as u8).collect();
5739 let object = EncodedObject::new(ObjectType::Blob, body.clone());
5740 let oid = store
5741 .write_object(object)
5742 .expect("test operation should succeed");
5743 let path = store
5744 .object_path(&oid)
5745 .expect("test operation should succeed");
5746 let mut bytes = fs::read(&path).expect("test operation should succeed");
5747 let deep = bytes.len() / 2;
5748 bytes[deep] ^= 0xff;
5749 fs::write(&path, &bytes).expect("test operation should succeed");
5750 store.invalidate_cache();
5751 let header = store
5752 .read_header(&oid)
5753 .expect("header-only read must still succeed for deep body corruption");
5754 assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
5755 fs::remove_dir_all(root).expect("test operation should succeed");
5756 }
5757
5758 #[test]
5759 fn file_database_reads_object_from_pack_index() {
5760 let root = temp_root("sley-file-odb-pack");
5761 let git_dir = root.join(".git");
5762 let pack_dir = git_dir.join("objects").join("pack");
5763 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5764 let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
5765 let oid = object
5766 .object_id(ObjectFormat::Sha1)
5767 .expect("test operation should succeed");
5768 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5769 .expect("test operation should succeed");
5770 let pack_name = written.checksum.to_hex();
5771 fs::write(
5772 pack_dir.join(format!("pack-{pack_name}.pack")),
5773 written.pack,
5774 )
5775 .expect("test operation should succeed");
5776 fs::write(
5777 pack_dir.join(format!("pack-{pack_name}.idx")),
5778 written.index,
5779 )
5780 .expect("test operation should succeed");
5781
5782 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5783 assert!(db.contains(&oid).expect("test operation should succeed"));
5784 assert_eq!(read_object_for_assert(&db, &oid), object);
5785 fs::remove_dir_all(root).expect("test operation should succeed");
5786 }
5787
5788 #[test]
5789 fn file_database_loose_cache_observes_same_process_write_after_miss() {
5790 let root = temp_root("sley-file-odb-loose-cache-write");
5791 let git_dir = root.join(".git");
5792 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5793 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5794
5795 let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
5796 let oid = object
5797 .object_id(ObjectFormat::Sha1)
5798 .expect("test operation should succeed");
5799
5800 assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
5801 db.loose()
5802 .write_object(object.clone())
5803 .expect("test operation should succeed");
5804
5805 assert_eq!(read_object_for_assert(&db, &oid), object);
5806 fs::remove_dir_all(root).expect("test operation should succeed");
5807 }
5808
5809 #[test]
5810 fn object_presence_checker_observes_same_process_loose_write_after_miss() {
5811 let root = temp_root("sley-presence-checker-loose-cache-write");
5812 let git_dir = root.join(".git");
5813 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5814 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5815 let mut checker = db.presence_checker();
5816
5817 let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
5818 let oid = object
5819 .object_id(ObjectFormat::Sha1)
5820 .expect("test operation should succeed");
5821
5822 assert!(
5823 !checker
5824 .contains(&oid)
5825 .expect("test operation should succeed")
5826 );
5827 db.loose()
5828 .write_object(object)
5829 .expect("test operation should succeed");
5830
5831 assert!(
5832 checker
5833 .contains(&oid)
5834 .expect("test operation should succeed")
5835 );
5836 fs::remove_dir_all(root).expect("test operation should succeed");
5837 }
5838
5839 #[test]
5840 fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
5841 let root = temp_root("sley-read-object-header");
5842 let git_dir = root.join(".git");
5843 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5844 let format = ObjectFormat::Sha1;
5845 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5846
5847 let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
5849 let loose_oid = db
5850 .write_object(loose.clone())
5851 .expect("test operation should succeed");
5852
5853 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5857 let mut child_body = vec![b'a'; 4096];
5858 child_body.extend_from_slice(b" plus a deltified tail\n");
5859 let child = EncodedObject::new(ObjectType::Blob, child_body);
5860 let commitish =
5861 EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
5862 let base_oid = base
5863 .object_id(format)
5864 .expect("test operation should succeed");
5865 let child_oid = child
5866 .object_id(format)
5867 .expect("test operation should succeed");
5868 let commit_oid = commitish
5869 .object_id(format)
5870 .expect("test operation should succeed");
5871 let options = PackWriteOptions::new()
5872 .with_prefer_ofs_delta(true)
5873 .with_reorder(false);
5874 let pack = PackFile::write_packed_with_options(
5875 &[base.clone(), child.clone(), commitish.clone()],
5876 format,
5877 &options,
5878 )
5879 .expect("test operation should succeed");
5880 db.install_pack(&pack)
5881 .expect("test operation should succeed");
5882
5883 for (oid, want_type, want_len) in [
5886 (&loose_oid, ObjectType::Blob, loose.body.len()),
5887 (&base_oid, ObjectType::Blob, base.body.len()),
5888 (&child_oid, ObjectType::Blob, child.body.len()),
5889 (&commit_oid, ObjectType::Commit, commitish.body.len()),
5890 ] {
5891 assert_eq!(
5892 db.read_object_header(oid)
5893 .expect("test operation should succeed"),
5894 Some((want_type, want_len as u64)),
5895 "header for {oid}"
5896 );
5897 let full = db.read_object(oid).expect("test operation should succeed");
5898 assert_eq!(
5899 db.read_object_header(oid)
5900 .expect("test operation should succeed"),
5901 Some((full.object_type, full.body.len() as u64))
5902 );
5903 }
5904
5905 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5906 .expect("test operation should succeed");
5907 assert_eq!(
5908 db.read_object_header(&missing)
5909 .expect("test operation should succeed"),
5910 None
5911 );
5912 fs::remove_dir_all(root).expect("test operation should succeed");
5913 }
5914
5915 #[test]
5916 fn object_storage_info_reports_loose_packed_and_delta_metadata() {
5917 let root = temp_root("sley-object-storage-info");
5918 let git_dir = root.join(".git");
5919 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5920 let format = ObjectFormat::Sha1;
5921 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5922
5923 let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
5924 let loose_oid = db
5925 .write_object(loose)
5926 .expect("test operation should succeed");
5927 let loose_size = fs::metadata(
5928 db.loose()
5929 .object_path(&loose_oid)
5930 .expect("test operation should succeed"),
5931 )
5932 .expect("test operation should succeed")
5933 .len();
5934 let loose_info = db
5935 .object_storage_info(&loose_oid)
5936 .expect("test operation should succeed")
5937 .expect("test operation should succeed");
5938 assert_eq!(loose_info.disk_size, loose_size);
5939 assert_eq!(
5940 loose_info.deltabase,
5941 zero_oid(format).expect("test operation should succeed")
5942 );
5943
5944 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5945 let mut child_body = vec![b'a'; 4096];
5946 child_body.extend_from_slice(b" changed tail\n");
5947 let child = EncodedObject::new(ObjectType::Blob, child_body);
5948 let base_oid = base
5949 .object_id(format)
5950 .expect("test operation should succeed");
5951 let child_oid = child
5952 .object_id(format)
5953 .expect("test operation should succeed");
5954 let options = PackWriteOptions::new()
5955 .with_prefer_ofs_delta(true)
5956 .with_reorder(false);
5957 let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
5958 .expect("test operation should succeed");
5959 db.install_pack(&pack)
5960 .expect("test operation should succeed");
5961
5962 let base_info = db
5963 .object_storage_info(&base_oid)
5964 .expect("test operation should succeed")
5965 .expect("test operation should succeed");
5966 assert!(base_info.disk_size > 0);
5967 assert_eq!(
5968 base_info.deltabase,
5969 zero_oid(format).expect("test operation should succeed")
5970 );
5971
5972 let child_info = db
5973 .object_storage_info(&child_oid)
5974 .expect("test operation should succeed")
5975 .expect("test operation should succeed");
5976 assert!(child_info.disk_size > 0);
5977 assert_eq!(child_info.deltabase, base_oid);
5978
5979 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5980 .expect("test operation should succeed");
5981 assert_eq!(
5982 db.object_storage_info(&missing)
5983 .expect("test operation should succeed"),
5984 None
5985 );
5986 fs::remove_dir_all(root).expect("test operation should succeed");
5987 }
5988
5989 #[test]
5990 fn file_database_resolves_unique_loose_object_prefix() {
5991 let root = temp_root("sley-file-odb-prefix-loose");
5992 let git_dir = root.join(".git");
5993 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5994 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5995 let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
5996 let oid = db
5997 .write_object(object)
5998 .expect("test operation should succeed");
5999 let prefix = &oid.to_hex()[..8];
6000
6001 assert_eq!(
6002 db.resolve_prefix(prefix)
6003 .expect("test operation should succeed"),
6004 ObjectPrefixResolution::Unique(oid)
6005 );
6006 assert!(
6007 db.object_ids()
6008 .expect("test operation should succeed")
6009 .contains(&oid)
6010 );
6011 fs::remove_dir_all(root).expect("test operation should succeed");
6012 }
6013
6014 #[test]
6015 fn file_database_resolves_unique_packed_object_prefix() {
6016 let root = temp_root("sley-file-odb-prefix-packed");
6017 let git_dir = root.join(".git");
6018 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6019 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6020 let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
6021 let oid = object
6022 .object_id(ObjectFormat::Sha1)
6023 .expect("test operation should succeed");
6024 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6025 .expect("test operation should succeed");
6026 db.install_pack(&pack)
6027 .expect("test operation should succeed");
6028 let prefix = &oid.to_hex()[..8];
6029
6030 assert_eq!(
6031 db.resolve_prefix(prefix)
6032 .expect("test operation should succeed"),
6033 ObjectPrefixResolution::Unique(oid)
6034 );
6035 fs::remove_dir_all(root).expect("test operation should succeed");
6036 }
6037
6038 #[test]
6039 fn file_database_reports_ambiguous_object_prefix() {
6040 let root = temp_root("sley-file-odb-prefix-ambiguous");
6041 let git_dir = root.join(".git");
6042 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6043 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6044 let mut seen = HashMap::new();
6045 let (prefix, first, second) = (0..10_000)
6046 .find_map(|idx| {
6047 let object =
6048 EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
6049 let oid = db
6050 .write_object(object)
6051 .expect("test operation should succeed");
6052 let prefix = oid.to_hex()[..4].to_string();
6053 seen.insert(prefix.clone(), oid)
6054 .map(|first| (prefix, first, oid))
6055 })
6056 .expect("test should find a 4-hex collision");
6057
6058 let ObjectPrefixResolution::Ambiguous(mut matches) = db
6059 .resolve_prefix(&prefix)
6060 .expect("test operation should succeed")
6061 else {
6062 panic!("expected ambiguous prefix {prefix}");
6063 };
6064 matches.sort_by_key(ObjectId::to_hex);
6065 let mut expected = vec![first, second];
6066 expected.sort_by_key(ObjectId::to_hex);
6067 assert_eq!(matches, expected);
6068 fs::remove_dir_all(root).expect("test operation should succeed");
6069 }
6070
6071 #[test]
6072 fn file_database_rejects_too_short_object_prefix() {
6073 let root = temp_root("sley-file-odb-prefix-short");
6074 let git_dir = root.join(".git");
6075 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6076 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6077
6078 assert!(matches!(
6079 db.resolve_prefix("abc"),
6080 Err(GitError::InvalidObjectId(_))
6081 ));
6082 fs::remove_dir_all(root).expect("test operation should succeed");
6083 }
6084
6085 #[test]
6086 fn file_database_reads_sha256_object_from_pack_index() {
6087 let root = temp_root("sley-file-odb-pack-sha256");
6088 let git_dir = root.join(".git");
6089 let pack_dir = git_dir.join("objects").join("pack");
6090 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6091 let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
6092 let oid = object
6093 .object_id(ObjectFormat::Sha256)
6094 .expect("test operation should succeed");
6095 let written =
6096 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6097 .expect("test operation should succeed");
6098 let pack_name = written.checksum.to_hex();
6099 fs::write(
6100 pack_dir.join(format!("pack-{pack_name}.pack")),
6101 written.pack,
6102 )
6103 .expect("test operation should succeed");
6104 fs::write(
6105 pack_dir.join(format!("pack-{pack_name}.idx")),
6106 written.index,
6107 )
6108 .expect("test operation should succeed");
6109
6110 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6111 assert!(db.contains(&oid).expect("test operation should succeed"));
6112 assert_eq!(read_object_for_assert(&db, &oid), object);
6113 fs::remove_dir_all(root).expect("test operation should succeed");
6114 }
6115
6116 #[test]
6117 fn file_database_installs_sha256_pack_without_loose_objects() {
6118 let root = temp_root("sley-file-odb-install-pack");
6119 let git_dir = root.join(".git");
6120 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6121 let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
6122 let oid = object
6123 .object_id(ObjectFormat::Sha256)
6124 .expect("test operation should succeed");
6125 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6126 .expect("test operation should succeed");
6127 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6128
6129 let result = db
6130 .install_pack(&pack)
6131 .expect("test operation should succeed");
6132
6133 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6134 assert_eq!(result.object_ids, vec![oid]);
6135 assert!(result.pack_path.exists());
6136 assert!(result.index_path.exists());
6137 assert_eq!(result.promisor_path, None);
6138 assert!(
6139 !db.loose()
6140 .object_path(&oid)
6141 .expect("test operation should succeed")
6142 .exists()
6143 );
6144 assert!(db.contains(&oid).expect("test operation should succeed"));
6145 assert_eq!(read_object_for_assert(&db, &oid), object);
6146 fs::remove_dir_all(root).expect("test operation should succeed");
6147 }
6148
6149 #[test]
6150 fn file_database_installs_raw_sha256_pack_without_loose_objects() {
6151 let root = temp_root("sley-file-odb-install-raw-pack");
6152 let git_dir = root.join(".git");
6153 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6154 let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
6155 let oid = object
6156 .object_id(ObjectFormat::Sha256)
6157 .expect("test operation should succeed");
6158 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6159 .expect("test operation should succeed");
6160 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
6161
6162 let result = db
6163 .install_raw_pack(&pack.pack)
6164 .expect("test operation should succeed");
6165
6166 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
6167 assert_eq!(result.object_ids, vec![oid]);
6168 assert!(result.pack_path.exists());
6169 assert!(result.index_path.exists());
6170 assert_eq!(result.promisor_path, None);
6171 assert!(
6172 !db.loose()
6173 .object_path(&oid)
6174 .expect("test operation should succeed")
6175 .exists()
6176 );
6177 assert!(db.contains(&oid).expect("test operation should succeed"));
6178 assert_eq!(read_object_for_assert(&db, &oid), object);
6179 fs::remove_dir_all(root).expect("test operation should succeed");
6180 }
6181
6182 #[test]
6183 fn file_database_rejects_noncanonical_pack_index() {
6184 let root = temp_root("sley-file-odb-install-bad-index");
6185 let git_dir = root.join(".git");
6186 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6187 let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
6188 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6189 .expect("test operation should succeed");
6190 let mut entries = pack.entries.clone();
6191 entries[0].crc32 ^= 1;
6192 let mut bad_pack = pack.clone();
6193 bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
6194 .expect("test operation should succeed");
6195 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6196
6197 assert!(db.install_pack(&bad_pack).is_err());
6198
6199 fs::remove_dir_all(root).expect("test operation should succeed");
6200 }
6201
6202 #[test]
6203 fn file_database_installs_raw_promisor_pack_with_sidecar() {
6204 let root = temp_root("sley-file-odb-install-raw-promisor-pack");
6205 let git_dir = root.join(".git");
6206 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6207 let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
6208 let oid = object
6209 .object_id(ObjectFormat::Sha1)
6210 .expect("test operation should succeed");
6211 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
6212 .expect("test operation should succeed");
6213 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6214
6215 let result = db
6216 .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
6217 .expect("test operation should succeed");
6218
6219 let promisor_path = result.promisor_path.expect("promisor sidecar");
6220 assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
6221 assert_eq!(
6222 promisor_path.extension().and_then(|ext| ext.to_str()),
6223 Some("promisor")
6224 );
6225 assert!(promisor_path.exists());
6226 assert_eq!(
6227 fs::read(&promisor_path).expect("test operation should succeed"),
6228 b""
6229 );
6230 assert!(result.pack_path.exists());
6231 assert!(result.index_path.exists());
6232 assert!(
6233 !db.loose()
6234 .object_path(&oid)
6235 .expect("test operation should succeed")
6236 .exists()
6237 );
6238 assert_eq!(read_object_for_assert(&db, &oid), object);
6239 fs::remove_dir_all(root).expect("test operation should succeed");
6240 }
6241
6242 #[test]
6243 fn repository_objects_dir_uses_linked_worktree_common_dir() {
6244 let root = temp_root("sley-odb-common-dir");
6245 let common = root.join(".git");
6246 let admin = common.join("worktrees").join("linked");
6247 fs::create_dir_all(&admin).expect("test operation should succeed");
6248 fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
6249
6250 let common = fs::canonicalize(common).expect("test operation should succeed");
6251 assert_eq!(repository_common_dir(&admin), common);
6252 assert_eq!(repository_objects_dir(&admin), common.join("objects"));
6253
6254 fs::remove_dir_all(root).expect("test operation should succeed");
6255 }
6256
6257 #[test]
6258 fn reachable_object_helpers_walk_graph_and_install_pack() {
6259 let root = temp_root("sley-reachable-pack");
6260 let source_git_dir = root.join("source.git");
6261 let destination_git_dir = root.join("destination.git");
6262 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6263 fs::create_dir_all(destination_git_dir.join("objects"))
6264 .expect("test operation should succeed");
6265 let format = ObjectFormat::Sha1;
6266 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6267 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6268
6269 let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
6270 let blob_oid = source
6271 .write_object(blob.clone())
6272 .expect("test operation should succeed");
6273 let tree = EncodedObject::new(
6274 ObjectType::Tree,
6275 Tree {
6276 entries: vec![TreeEntry {
6277 mode: 0o100644,
6278 name: BString::from(b"payload.txt"),
6279 oid: blob_oid,
6280 }],
6281 }
6282 .write(),
6283 );
6284 let tree_oid = source
6285 .write_object(tree.clone())
6286 .expect("test operation should succeed");
6287 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6288 let commit = EncodedObject::new(
6289 ObjectType::Commit,
6290 Commit {
6291 tree: tree_oid,
6292 parents: Vec::new(),
6293 author: identity.clone(),
6294 committer: identity,
6295 encoding: None,
6296 message: b"initial\n".to_vec(),
6297 }
6298 .write(),
6299 );
6300 let commit_oid = source
6301 .write_object(commit.clone())
6302 .expect("test operation should succeed");
6303
6304 let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
6305 .expect("test operation should succeed");
6306 assert!(reachable.contains(&commit_oid));
6307 assert!(reachable.contains(&tree_oid));
6308 assert!(reachable.contains(&blob_oid));
6309
6310 let install =
6311 install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
6312 .expect("test operation should succeed")
6313 .expect("reachable pack should be written");
6314 assert_eq!(install.object_ids.len(), 3);
6315 for (oid, object) in [
6316 (&commit_oid, &commit),
6317 (&tree_oid, &tree),
6318 (&blob_oid, &blob),
6319 ] {
6320 assert!(
6321 !destination
6322 .loose()
6323 .object_path(oid)
6324 .expect("test operation should succeed")
6325 .exists()
6326 );
6327 assert!(
6328 destination
6329 .contains(oid)
6330 .expect("test operation should succeed")
6331 );
6332 assert_eq!(read_object_for_assert(&destination, oid), *object);
6333 }
6334 fs::remove_dir_all(root).expect("test operation should succeed");
6335 }
6336
6337 #[test]
6338 fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
6339 let root = temp_root("sley-reachable-exclusions");
6340 let git_dir = root.join("repo.git");
6341 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6342 let format = ObjectFormat::Sha1;
6343 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6344
6345 let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
6346 let blob_oid = db
6347 .write_object(blob)
6348 .expect("test operation should succeed");
6349 let tree = EncodedObject::new(
6350 ObjectType::Tree,
6351 Tree {
6352 entries: vec![TreeEntry {
6353 mode: 0o100644,
6354 name: BString::from(b"payload.txt"),
6355 oid: blob_oid,
6356 }],
6357 }
6358 .write(),
6359 );
6360 let tree_oid = db
6361 .write_object(tree)
6362 .expect("test operation should succeed");
6363 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6364 let commit = EncodedObject::new(
6365 ObjectType::Commit,
6366 Commit {
6367 tree: tree_oid,
6368 parents: Vec::new(),
6369 author: identity.clone(),
6370 committer: identity,
6371 encoding: None,
6372 message: b"initial\n".to_vec(),
6373 }
6374 .write(),
6375 );
6376 let commit_oid = db
6377 .write_object(commit)
6378 .expect("test operation should succeed");
6379 let excluded = HashSet::from([tree_oid]);
6380
6381 let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
6382 .expect("test operation should succeed");
6383
6384 assert_eq!(objects.len(), 1);
6385 assert_eq!(
6386 objects[0]
6387 .object_id(format)
6388 .expect("test operation should succeed"),
6389 commit_oid
6390 );
6391 fs::remove_dir_all(root).expect("test operation should succeed");
6392 }
6393
6394 #[test]
6395 fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
6396 let root = temp_root("sley-build-reachable-pack");
6397 let git_dir = root.join("repo.git");
6398 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6399 let format = ObjectFormat::Sha1;
6400 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6401
6402 let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
6403 let oid = db
6404 .write_object(object.clone())
6405 .expect("test operation should succeed");
6406 let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
6407 .expect("test operation should succeed")
6408 .expect("reachable pack should be built");
6409 assert!(pack.pack.starts_with(b"PACK"));
6410 assert_eq!(pack.entries.len(), 1);
6411 assert_eq!(pack.entries[0].oid, oid);
6412
6413 let excluded = HashSet::from([oid]);
6414 assert!(
6415 build_reachable_pack(
6416 &db,
6417 format,
6418 pack.entries.into_iter().map(|entry| entry.oid),
6419 &excluded
6420 )
6421 .expect("test operation should succeed")
6422 .is_none()
6423 );
6424 fs::remove_dir_all(root).expect("test operation should succeed");
6425 }
6426
6427 #[test]
6428 fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
6429 let root = temp_root("sley-reachable-tags");
6430 let git_dir = root.join("repo.git");
6431 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6432 let format = ObjectFormat::Sha1;
6433 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6434
6435 let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
6436 let blob_oid = db
6437 .write_object(blob)
6438 .expect("test operation should succeed");
6439 let tag = EncodedObject::new(
6440 ObjectType::Tag,
6441 Tag {
6442 object: blob_oid,
6443 object_type: ObjectType::Blob,
6444 name: b"v1".to_vec(),
6445 tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
6446 message: b"tag message\n".to_vec(),
6447 raw_body: None,
6448 }
6449 .write(),
6450 );
6451 let tag_oid = db.write_object(tag).expect("test operation should succeed");
6452
6453 let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
6454 .expect("test operation should succeed");
6455 assert!(reachable.contains(&tag_oid));
6456 assert!(reachable.contains(&blob_oid));
6457
6458 let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
6459 .expect("test operation should succeed");
6460 let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
6461 .expect_err("missing traversal root should error");
6462 let kind = err.not_found_kind().expect("typed not found");
6463 assert_eq!(kind.object_id(), Some(missing));
6464 assert_eq!(
6465 kind.missing_object_context(),
6466 Some(MissingObjectContext::Traversal)
6467 );
6468 fs::remove_dir_all(root).expect("test operation should succeed");
6469 }
6470
6471 #[test]
6472 fn install_reachable_pack_empty_starts_create_no_pack() {
6473 let root = temp_root("sley-reachable-empty");
6474 let source_git_dir = root.join("source.git");
6475 let destination_git_dir = root.join("destination.git");
6476 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6477 fs::create_dir_all(destination_git_dir.join("objects"))
6478 .expect("test operation should succeed");
6479 let format = ObjectFormat::Sha1;
6480 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6481 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6482
6483 let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
6484 .expect("test operation should succeed");
6485
6486 assert!(result.is_none());
6487 assert!(!destination_git_dir.join("objects").join("pack").exists());
6488 fs::remove_dir_all(root).expect("test operation should succeed");
6489 }
6490
6491 #[test]
6492 fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
6493 let root = temp_root("sley-reachable-install-excluding");
6494 let source_git_dir = root.join("source.git");
6495 let destination_git_dir = root.join("destination.git");
6496 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6497 fs::create_dir_all(destination_git_dir.join("objects"))
6498 .expect("test operation should succeed");
6499 let format = ObjectFormat::Sha1;
6500 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6501 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6502 let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
6503 let oid = source
6504 .write_object(object)
6505 .expect("test operation should succeed");
6506 let excluded = HashSet::from([oid]);
6507
6508 let result = install_reachable_pack_excluding(
6509 &source,
6510 &destination,
6511 format,
6512 std::iter::once(oid),
6513 &excluded,
6514 )
6515 .expect("test operation should succeed");
6516
6517 assert!(result.is_none());
6518 assert!(!destination_git_dir.join("objects").join("pack").exists());
6519 fs::remove_dir_all(root).expect("test operation should succeed");
6520 }
6521
6522 #[test]
6523 fn install_reachable_pack_supports_sha256() {
6524 let root = temp_root("sley-reachable-pack-sha256");
6525 let source_git_dir = root.join("source.git");
6526 let destination_git_dir = root.join("destination.git");
6527 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
6528 fs::create_dir_all(destination_git_dir.join("objects"))
6529 .expect("test operation should succeed");
6530 let format = ObjectFormat::Sha256;
6531 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
6532 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
6533 let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
6534 let oid = source
6535 .write_object(object.clone())
6536 .expect("test operation should succeed");
6537
6538 let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
6539 .expect("test operation should succeed")
6540 .expect("sha256 reachable pack should be built");
6541 assert!(pack.pack.starts_with(b"PACK"));
6542 assert_eq!(pack.entries[0].oid, oid);
6543
6544 let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
6545 .expect("test operation should succeed")
6546 .expect("sha256 reachable pack should be written");
6547
6548 assert_eq!(result.object_ids, vec![oid]);
6549 assert!(
6550 !destination
6551 .loose()
6552 .object_path(&oid)
6553 .expect("test operation should succeed")
6554 .exists()
6555 );
6556 assert_eq!(read_object_for_assert(&destination, &oid), object);
6557 fs::remove_dir_all(root).expect("test operation should succeed");
6558 }
6559
6560 #[test]
6561 fn install_helpers_accept_custom_raw_pack_installer() {
6562 #[derive(Default)]
6563 struct RecordingInstaller {
6564 packs: std::cell::RefCell<Vec<Vec<u8>>>,
6565 installed: std::cell::RefCell<Vec<ObjectId>>,
6566 }
6567
6568 impl RawPackInstaller for RecordingInstaller {
6569 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
6570 self.packs.borrow_mut().push(pack_bytes.to_vec());
6571 let object_ids = self.installed.borrow().clone();
6572 Ok(RawPackInstallResult { object_ids })
6573 }
6574 }
6575
6576 let format = ObjectFormat::Sha1;
6577 let source = ObjectDatabase::new(format);
6578 let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
6579 let oid = source
6580 .write_object(object)
6581 .expect("test operation should succeed");
6582 let installer = RecordingInstaller::default();
6583 installer.installed.borrow_mut().push(oid);
6584
6585 let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
6586 .expect("test operation should succeed")
6587 .expect("custom installer should receive pack");
6588
6589 assert_eq!(result.object_ids, installer.installed.into_inner());
6590 let packs = installer.packs.into_inner();
6591 assert_eq!(packs.len(), 1);
6592 assert!(packs[0].starts_with(b"PACK"));
6593 }
6594
6595 #[test]
6596 fn file_database_reads_object_from_multi_pack_index() {
6597 let root = temp_root("sley-file-odb-midx");
6598 let git_dir = root.join(".git");
6599 let pack_dir = git_dir.join("objects").join("pack");
6600 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6601 let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
6602 let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
6603 let first_oid = first
6604 .object_id(ObjectFormat::Sha1)
6605 .expect("test operation should succeed");
6606 let second_oid = second
6607 .object_id(ObjectFormat::Sha1)
6608 .expect("test operation should succeed");
6609 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6610 .expect("test operation should succeed");
6611 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6612 .expect("test operation should succeed");
6613 let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
6614 let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
6615 fs::write(
6616 pack_dir.join(first_pack_name.replace(".idx", ".pack")),
6617 first_pack.pack,
6618 )
6619 .expect("test operation should succeed");
6620 fs::write(
6621 pack_dir.join(second_pack_name.replace(".idx", ".pack")),
6622 second_pack.pack,
6623 )
6624 .expect("test operation should succeed");
6625 let midx = MultiPackIndex::write(
6626 ObjectFormat::Sha1,
6627 2,
6628 &[first_pack_name, second_pack_name],
6629 &[
6630 sley_pack::MultiPackIndexEntry {
6631 oid: first_oid,
6632 pack_int_id: 0,
6633 offset: first_pack.entries[0].offset,
6634 },
6635 sley_pack::MultiPackIndexEntry {
6636 oid: second_oid,
6637 pack_int_id: 1,
6638 offset: second_pack.entries[0].offset,
6639 },
6640 ],
6641 )
6642 .expect("test operation should succeed");
6643 fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
6644
6645 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6646 assert!(
6647 db.contains(&second_oid)
6648 .expect("test operation should succeed")
6649 );
6650 assert_eq!(
6651 db.resolve_prefix(&second_oid.to_hex()[..8])
6652 .expect("test operation should succeed"),
6653 ObjectPrefixResolution::Unique(second_oid)
6654 );
6655 assert_eq!(read_object_for_assert(&db, &second_oid), second);
6656 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6657 fs::remove_dir_all(root).expect("test operation should succeed");
6658 }
6659
6660 #[test]
6661 fn file_database_finds_pack_added_after_registry_was_cached() {
6662 let root = temp_root("sley-file-odb-pack-added-late");
6666 let git_dir = root.join(".git");
6667 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6668 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6669
6670 let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
6672 let first_oid = first
6673 .object_id(ObjectFormat::Sha1)
6674 .expect("test operation should succeed");
6675 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6676 .expect("test operation should succeed");
6677 db.install_pack(&first_pack)
6678 .expect("test operation should succeed");
6679 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6680
6681 let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
6683 let second_oid = second
6684 .object_id(ObjectFormat::Sha1)
6685 .expect("test operation should succeed");
6686 assert!(matches!(
6688 db.read_object(&second_oid),
6689 Err(GitError::NotFound(_))
6690 ));
6691
6692 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6695 .expect("test operation should succeed");
6696 db.install_pack(&second_pack)
6697 .expect("test operation should succeed");
6698 assert!(
6699 db.contains(&second_oid)
6700 .expect("test operation should succeed")
6701 );
6702 assert_eq!(read_object_for_assert(&db, &second_oid), second);
6703 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6705
6706 fs::remove_dir_all(root).expect("test operation should succeed");
6707 }
6708
6709 #[test]
6710 fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
6711 let root = temp_root("sley-presence-checker-pack-added-late");
6712 let git_dir = root.join(".git");
6713 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6714 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6715
6716 let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
6717 let first_oid = first
6718 .object_id(ObjectFormat::Sha1)
6719 .expect("test operation should succeed");
6720 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6721 .expect("test operation should succeed");
6722 db.install_pack(&first_pack)
6723 .expect("test operation should succeed");
6724
6725 let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
6726 let second_oid = second
6727 .object_id(ObjectFormat::Sha1)
6728 .expect("test operation should succeed");
6729 let mut checker = db.presence_checker();
6730 assert!(
6731 checker
6732 .contains(&first_oid)
6733 .expect("test operation should succeed")
6734 );
6735 assert!(
6736 !checker
6737 .contains(&second_oid)
6738 .expect("test operation should succeed")
6739 );
6740
6741 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6742 .expect("test operation should succeed");
6743 db.install_pack(&second_pack)
6744 .expect("test operation should succeed");
6745
6746 assert!(
6747 checker
6748 .contains(&second_oid)
6749 .expect("test operation should succeed")
6750 );
6751 fs::remove_dir_all(root).expect("test operation should succeed");
6752 }
6753
6754 #[test]
6755 fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
6756 let root = temp_root("sley-file-odb-pack-registry-refresh");
6757 let git_dir = root.join(".git");
6758 let pack_dir = git_dir.join("objects").join("pack");
6759 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6760 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6761
6762 let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
6763 let first_oid = first
6764 .object_id(ObjectFormat::Sha1)
6765 .expect("test operation should succeed");
6766 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6767 .expect("test operation should succeed");
6768 db.install_pack(&first_pack)
6769 .expect("test operation should succeed");
6770
6771 let first_registry = db
6772 .cached_pack_registry(&pack_dir, false)
6773 .expect("test operation should succeed");
6774 assert_eq!(first_registry.fingerprint.idx_count, 1);
6775 assert_eq!(first_registry.fingerprint.pack_count, 1);
6776 assert_eq!(first_registry.packs.len(), 1);
6777 assert!(
6778 first_registry.packs[0]
6779 .index
6780 .lock()
6781 .expect("test operation should succeed")
6782 .is_none()
6783 );
6784 assert!(
6785 first_registry.packs[0]
6786 .data
6787 .lock()
6788 .expect("test operation should succeed")
6789 .is_none()
6790 );
6791
6792 assert!(
6795 db.contains(&first_oid)
6796 .expect("test operation should succeed")
6797 );
6798 assert!(
6799 first_registry.packs[0]
6800 .index
6801 .lock()
6802 .expect("test operation should succeed")
6803 .is_some()
6804 );
6805 assert!(
6806 first_registry.packs[0]
6807 .data
6808 .lock()
6809 .expect("test operation should succeed")
6810 .is_none()
6811 );
6812 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6813 assert!(
6814 first_registry.packs[0]
6815 .data
6816 .lock()
6817 .expect("test operation should succeed")
6818 .is_some()
6819 );
6820
6821 let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
6822 let second_oid = second
6823 .object_id(ObjectFormat::Sha1)
6824 .expect("test operation should succeed");
6825 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6826 .expect("test operation should succeed");
6827 db.install_pack(&second_pack)
6828 .expect("test operation should succeed");
6829
6830 let refreshed = db
6831 .cached_pack_registry(&pack_dir, true)
6832 .expect("test operation should succeed");
6833 assert!(!Arc::ptr_eq(&first_registry, &refreshed));
6834 assert_eq!(refreshed.fingerprint.idx_count, 2);
6835 assert_eq!(refreshed.fingerprint.pack_count, 2);
6836 assert_eq!(refreshed.packs.len(), 2);
6837 assert_eq!(read_object_for_assert(&db, &second_oid), second);
6838
6839 fs::remove_dir_all(root).expect("test operation should succeed");
6840 }
6841
6842 #[test]
6843 fn file_database_pack_search_hint_rebuilds_after_pack_added() {
6844 let root = temp_root("sley-file-odb-pack-lookup-added-late");
6848 let git_dir = root.join(".git");
6849 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6850 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6851
6852 let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
6853 let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
6854 let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
6855 let first_oid = first
6856 .object_id(ObjectFormat::Sha1)
6857 .expect("test operation should succeed");
6858 let second_oid = second
6859 .object_id(ObjectFormat::Sha1)
6860 .expect("test operation should succeed");
6861 let third_oid = third
6862 .object_id(ObjectFormat::Sha1)
6863 .expect("test operation should succeed");
6864
6865 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6866 .expect("test operation should succeed");
6867 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6868 .expect("test operation should succeed");
6869 db.install_pack(&first_pack)
6870 .expect("test operation should succeed");
6871 db.install_pack(&second_pack)
6872 .expect("test operation should succeed");
6873
6874 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6876 assert_eq!(read_object_for_assert(&db, &second_oid), second);
6877 assert!(matches!(
6878 db.read_object(&third_oid),
6879 Err(GitError::NotFound(_))
6880 ));
6881
6882 let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
6883 .expect("test operation should succeed");
6884 db.install_pack(&third_pack)
6885 .expect("test operation should succeed");
6886
6887 assert_eq!(read_object_for_assert(&db, &third_oid), third);
6888 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6889
6890 fs::remove_dir_all(root).expect("test operation should succeed");
6891 }
6892
6893 #[test]
6894 fn file_database_prefers_loose_object_over_packed_object() {
6895 let root = temp_root("sley-file-odb-prefer-loose");
6896 let git_dir = root.join(".git");
6897 let pack_dir = git_dir.join("objects").join("pack");
6898 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6899 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6900 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6901 .expect("test operation should succeed");
6902 let pack_name = written.checksum.to_hex();
6903 fs::write(
6904 pack_dir.join(format!("pack-{pack_name}.pack")),
6905 written.pack,
6906 )
6907 .expect("test operation should succeed");
6908 fs::write(
6909 pack_dir.join(format!("pack-{pack_name}.idx")),
6910 written.index,
6911 )
6912 .expect("test operation should succeed");
6913
6914 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6915 let oid = db
6916 .write_object(object.clone())
6917 .expect("test operation should succeed");
6918 assert_eq!(read_object_for_assert(&db, &oid), object);
6919 fs::remove_dir_all(root).expect("test operation should succeed");
6920 }
6921
6922 #[test]
6923 fn bundle_prerequisite_verification_reads_existing_objects() {
6924 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6925 let oid = db
6926 .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
6927 .expect("test operation should succeed");
6928 let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
6929 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6930 .expect("test operation should succeed");
6931
6932 verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
6933 }
6934
6935 #[test]
6936 fn bundle_prerequisite_verification_reports_missing_objects() {
6937 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6938 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6939 .expect("test operation should succeed");
6940 let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
6941 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6942 .expect("test operation should succeed");
6943
6944 assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
6945 }
6946
6947 #[test]
6948 fn unbundle_objects_writes_pack_entries_and_returns_refs() {
6949 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6950 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6951 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6952 let oid = object
6953 .object_id(ObjectFormat::Sha1)
6954 .expect("test operation should succeed");
6955 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6956 .expect("test operation should succeed");
6957 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6958 .into_bytes()
6959 .into_iter()
6960 .chain(pack.pack)
6961 .collect::<Vec<_>>();
6962 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6963 .expect("test operation should succeed");
6964
6965 let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
6966 .expect("test operation should succeed");
6967 assert_eq!(result.written_objects, vec![oid]);
6968 assert_eq!(result.references, bundle.references);
6969 assert_eq!(read_object_for_assert(&writer, &oid), object);
6970 }
6971
6972 #[test]
6973 fn install_bundle_pack_writes_pack_and_returns_refs() {
6974 let root = temp_root("sley-install-bundle-pack");
6975 let git_dir = root.join(".git");
6976 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6977 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6978 let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6979 let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
6980 let oid = object
6981 .object_id(ObjectFormat::Sha1)
6982 .expect("test operation should succeed");
6983 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6984 .expect("test operation should succeed");
6985 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6986 .into_bytes()
6987 .into_iter()
6988 .chain(pack.pack)
6989 .collect::<Vec<_>>();
6990 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6991 .expect("test operation should succeed");
6992
6993 let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
6994 .expect("test operation should succeed");
6995
6996 assert_eq!(result.written_objects, vec![oid]);
6997 assert_eq!(result.references, bundle.references);
6998 assert!(
6999 database
7000 .contains(&oid)
7001 .expect("test operation should succeed")
7002 );
7003 assert_eq!(read_object_for_assert(&database, &oid), object);
7004 assert!(
7005 !database
7006 .loose()
7007 .object_path(&oid)
7008 .expect("test operation should succeed")
7009 .exists()
7010 );
7011 fs::remove_dir_all(root).expect("test operation should succeed");
7012 }
7013
7014 #[test]
7015 fn unpack_packfile_objects_writes_sha256_pack_entries() {
7016 let writer = ObjectDatabase::new(ObjectFormat::Sha256);
7017 let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
7018 let oid = object
7019 .object_id(ObjectFormat::Sha256)
7020 .expect("test operation should succeed");
7021 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7022 .expect("test operation should succeed");
7023
7024 let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
7025 .expect("test operation should succeed");
7026
7027 assert_eq!(result.written_objects, vec![oid]);
7028 assert_eq!(read_object_for_assert(&writer, &oid), object);
7029 }
7030
7031 #[test]
7032 fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
7033 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
7034 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
7035 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
7036 .expect("test operation should succeed");
7037 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
7038 let oid = object
7039 .object_id(ObjectFormat::Sha1)
7040 .expect("test operation should succeed");
7041 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7042 .expect("test operation should succeed");
7043 let bundle_bytes =
7044 format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
7045 .into_bytes()
7046 .into_iter()
7047 .chain(pack.pack)
7048 .collect::<Vec<_>>();
7049 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7050 .expect("test operation should succeed");
7051
7052 assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
7053 assert!(!writer.contains(&oid));
7054 }
7055
7056 fn write_commit_graph(
7059 db: &mut FileObjectDatabase,
7060 payload: &[u8],
7061 ) -> Vec<(ObjectId, EncodedObject)> {
7062 let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
7063 let blob_oid = db
7064 .write_object(blob.clone())
7065 .expect("test operation should succeed");
7066 let tree = EncodedObject::new(
7067 ObjectType::Tree,
7068 Tree {
7069 entries: vec![TreeEntry {
7070 mode: 0o100644,
7071 name: BString::from(b"payload.txt"),
7072 oid: blob_oid,
7073 }],
7074 }
7075 .write(),
7076 );
7077 let tree_oid = db
7078 .write_object(tree.clone())
7079 .expect("test operation should succeed");
7080 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7081 let commit = EncodedObject::new(
7082 ObjectType::Commit,
7083 Commit {
7084 tree: tree_oid,
7085 parents: Vec::new(),
7086 author: identity.clone(),
7087 committer: identity,
7088 encoding: None,
7089 message: b"initial\n".to_vec(),
7090 }
7091 .write(),
7092 );
7093 let commit_oid = db
7094 .write_object(commit.clone())
7095 .expect("test operation should succeed");
7096 vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
7097 }
7098
7099 fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
7100 let root = temp_root("sley-repack-all");
7101 let git_dir = root.join(".git");
7102 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7103 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7104
7105 let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
7107 let packed_oid = packed_blob
7108 .object_id(format)
7109 .expect("test operation should succeed");
7110 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7111 .expect("test operation should succeed");
7112 let existing = db
7113 .install_pack(&existing_pack)
7114 .expect("test operation should succeed");
7115
7116 let graph = write_commit_graph(&mut db, b"repack payload\n");
7117
7118 let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
7119 expected.insert(packed_oid, packed_blob.clone());
7120
7121 let result = repack_all_objects(&git_dir, format)
7122 .expect("test operation should succeed")
7123 .expect("repository has objects");
7124
7125 assert_eq!(result.object_count, expected.len());
7127 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7128 assert_eq!(parsed.entries.len(), expected.len());
7129 for entry in &parsed.entries {
7130 let want = expected
7131 .get(&entry.entry.oid)
7132 .expect("packed object was in the repository");
7133 assert_eq!(&entry.object, want);
7134 assert_eq!(
7135 entry
7136 .object
7137 .object_id(format)
7138 .expect("test operation should succeed"),
7139 entry.entry.oid
7140 );
7141 }
7142 let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
7144 assert_eq!(idx.pack_checksum, parsed.checksum);
7145 assert_eq!(idx.entries.len(), expected.len());
7146
7147 assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
7149 let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
7151 want_loose.sort_by_key(ObjectId::to_hex);
7152 assert_eq!(result.packed_loose, want_loose);
7153 assert!(!result.packed_loose.contains(&packed_oid));
7154
7155 fs::remove_dir_all(root).expect("test operation should succeed");
7156 }
7157
7158 #[test]
7159 fn repack_all_objects_consolidates_loose_and_pack_sha1() {
7160 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
7161 }
7162
7163 #[test]
7164 fn repack_all_objects_consolidates_loose_and_pack_sha256() {
7165 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
7166 }
7167
7168 #[test]
7169 fn repack_all_objects_returns_none_for_empty_repository() {
7170 let root = temp_root("sley-repack-empty");
7171 let git_dir = root.join(".git");
7172 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7173
7174 assert!(
7175 repack_all_objects(&git_dir, ObjectFormat::Sha1)
7176 .expect("test operation should succeed")
7177 .is_none()
7178 );
7179
7180 fs::remove_dir_all(root).expect("test operation should succeed");
7181 }
7182
7183 #[test]
7184 fn install_repack_result_writes_pack_without_pruning_by_default() {
7185 let root = temp_root("sley-repack-install-nodelete");
7186 let git_dir = root.join(".git");
7187 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7188 let format = ObjectFormat::Sha1;
7189 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7190 let graph = write_commit_graph(&mut db, b"install no prune\n");
7191
7192 let result = repack_all_objects(&git_dir, format)
7193 .expect("test operation should succeed")
7194 .expect("test operation should succeed");
7195 install_repack_result(&git_dir, format, &result, false)
7196 .expect("test operation should succeed");
7197
7198 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
7200 let pack_dir = git_dir.join("objects").join("pack");
7201 let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
7202 let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
7203 assert!(pack_path.exists());
7204 assert!(idx_path.exists());
7205 for (oid, object) in &graph {
7207 assert!(
7208 db.loose()
7209 .object_path(oid)
7210 .expect("test operation should succeed")
7211 .exists()
7212 );
7213 assert_eq!(read_object_for_assert(&db, oid), *object);
7214 }
7215
7216 fs::remove_dir_all(root).expect("test operation should succeed");
7217 }
7218
7219 #[test]
7220 fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
7221 let root = temp_root("sley-repack-install-prune");
7222 let git_dir = root.join(".git");
7223 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7224 let format = ObjectFormat::Sha1;
7225 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7226
7227 let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
7228 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
7229 .expect("test operation should succeed");
7230 let existing = db
7231 .install_pack(&existing_pack)
7232 .expect("test operation should succeed");
7233 let graph = write_commit_graph(&mut db, b"prune payload\n");
7234
7235 let result = repack_all_objects(&git_dir, format)
7236 .expect("test operation should succeed")
7237 .expect("test operation should succeed");
7238 let new_pack_checksum = PackFile::parse(&result.pack, format)
7239 .expect("test operation should succeed")
7240 .checksum;
7241 install_repack_result(&git_dir, format, &result, true)
7242 .expect("test operation should succeed");
7243
7244 assert!(!existing.pack_path.exists());
7246 assert!(!existing.index_path.exists());
7247 for (oid, _) in &graph {
7249 assert!(
7250 !db.loose()
7251 .object_path(oid)
7252 .expect("test operation should succeed")
7253 .exists()
7254 );
7255 }
7256 let pack_dir = git_dir.join("objects").join("pack");
7258 assert!(
7259 pack_dir
7260 .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
7261 .exists()
7262 );
7263 let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
7264 for (oid, object) in &graph {
7265 assert!(
7266 reopened
7267 .contains(oid)
7268 .expect("test operation should succeed")
7269 );
7270 assert_eq!(read_object_for_assert(&reopened, oid), *object);
7271 }
7272 let packed_oid = packed_blob
7273 .object_id(format)
7274 .expect("test operation should succeed");
7275 assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
7276
7277 fs::remove_dir_all(root).expect("test operation should succeed");
7278 }
7279
7280 #[test]
7281 fn install_repack_result_preserves_keep_and_promisor_packs() {
7282 let root = temp_root("sley-repack-install-keep-promisor");
7283 let git_dir = root.join(".git");
7284 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7285 let format = ObjectFormat::Sha1;
7286 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7287
7288 let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
7289 let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
7290 .expect("test operation should succeed");
7291 let keep_install = db
7292 .install_pack(&keep_pack)
7293 .expect("test operation should succeed");
7294 let keep_sidecar = keep_install.pack_path.with_extension("keep");
7295 fs::write(&keep_sidecar, b"").expect("test operation should succeed");
7296
7297 let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
7298 let promisor_pack =
7299 PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
7300 .expect("test operation should succeed");
7301 let promisor_install = db
7302 .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
7303 .expect("test operation should succeed");
7304 let promisor_sidecar = promisor_install
7305 .promisor_path
7306 .clone()
7307 .expect("promisor sidecar");
7308
7309 let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
7310 let result = repack_all_objects(&git_dir, format)
7311 .expect("test operation should succeed")
7312 .expect("test operation should succeed");
7313 assert!(result.obsolete_packs.contains(&keep_install.pack_path));
7314 assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
7315
7316 install_repack_result(&git_dir, format, &result, true)
7317 .expect("test operation should succeed");
7318
7319 for path in [
7320 &keep_install.pack_path,
7321 &keep_install.index_path,
7322 &keep_sidecar,
7323 &promisor_install.pack_path,
7324 &promisor_install.index_path,
7325 &promisor_sidecar,
7326 ] {
7327 assert!(path.exists(), "{} should be preserved", path.display());
7328 }
7329 for (oid, _) in &graph {
7330 assert!(
7331 !db.loose()
7332 .object_path(oid)
7333 .expect("test operation should succeed")
7334 .exists()
7335 );
7336 }
7337
7338 fs::remove_dir_all(root).expect("test operation should succeed");
7339 }
7340
7341 #[test]
7342 fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
7343 let root = temp_root("sley-repack-install-safety");
7346 let git_dir = root.join(".git");
7347 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7348 let format = ObjectFormat::Sha1;
7349 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7350 let graph = write_commit_graph(&mut db, b"safety packed\n");
7351
7352 let mut result = repack_all_objects(&git_dir, format)
7353 .expect("test operation should succeed")
7354 .expect("test operation should succeed");
7355
7356 let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
7358 let stray_oid = db
7359 .write_object(stray.clone())
7360 .expect("test operation should succeed");
7361 assert!(!result.packed_loose.contains(&stray_oid));
7362 result.packed_loose.push(stray_oid);
7363
7364 install_repack_result(&git_dir, format, &result, true)
7365 .expect("test operation should succeed");
7366
7367 assert!(
7369 db.loose()
7370 .object_path(&stray_oid)
7371 .expect("test operation should succeed")
7372 .exists()
7373 );
7374 assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
7375 for (oid, _) in &graph {
7377 assert!(
7378 !db.loose()
7379 .object_path(oid)
7380 .expect("test operation should succeed")
7381 .exists()
7382 );
7383 }
7384
7385 fs::remove_dir_all(root).expect("test operation should succeed");
7386 }
7387
7388 #[test]
7389 fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
7390 let root = temp_root("sley-prune-unreachable");
7391 let git_dir = root.join(".git");
7392 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7393 let format = ObjectFormat::Sha1;
7394 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
7395 let graph = write_commit_graph(&mut db, b"reachable payload\n");
7396 let commit_oid = graph[0].0.clone();
7397
7398 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
7400 let dangling_oid = db
7401 .write_object(dangling)
7402 .expect("test operation should succeed");
7403
7404 let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
7406 .expect("test operation should succeed");
7407 assert_eq!(reported, vec![dangling_oid]);
7408 assert!(
7409 db.loose()
7410 .object_path(&dangling_oid)
7411 .expect("test operation should succeed")
7412 .exists()
7413 );
7414
7415 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
7417 .expect("test operation should succeed");
7418 assert_eq!(deleted, vec![dangling_oid]);
7419 assert!(
7420 !db.loose()
7421 .object_path(&dangling_oid)
7422 .expect("test operation should succeed")
7423 .exists()
7424 );
7425 for (oid, object) in &graph {
7426 assert!(
7427 db.loose()
7428 .object_path(oid)
7429 .expect("test operation should succeed")
7430 .exists()
7431 );
7432 assert_eq!(read_object_for_assert(&db, oid), *object);
7433 }
7434
7435 fs::remove_dir_all(root).expect("test operation should succeed");
7436 }
7437
7438 #[test]
7439 fn prune_unreachable_loose_ignores_gitlink_targets() {
7440 let root = temp_root("sley-prune-gitlink");
7441 let git_dir = root.join(".git");
7442 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
7443 let format = ObjectFormat::Sha1;
7444 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
7445
7446 let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
7447 .expect("test operation should succeed");
7448 let tree = EncodedObject::new(
7449 ObjectType::Tree,
7450 Tree {
7451 entries: vec![TreeEntry {
7452 mode: 0o160000,
7453 name: BString::from(b"submodule"),
7454 oid: submodule_oid,
7455 }],
7456 }
7457 .write(),
7458 );
7459 let tree_oid = db
7460 .write_object(tree)
7461 .expect("test operation should succeed");
7462 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
7463 let commit = EncodedObject::new(
7464 ObjectType::Commit,
7465 Commit {
7466 tree: tree_oid,
7467 parents: Vec::new(),
7468 author: identity.clone(),
7469 committer: identity,
7470 encoding: None,
7471 message: b"gitlink\n".to_vec(),
7472 }
7473 .write(),
7474 );
7475 let commit_oid = db
7476 .write_object(commit)
7477 .expect("test operation should succeed");
7478 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
7479 let dangling_oid = db
7480 .write_object(dangling)
7481 .expect("test operation should succeed");
7482
7483 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
7484 .expect("test operation should succeed");
7485
7486 assert_eq!(deleted, vec![dangling_oid]);
7487 assert!(
7488 !db.loose()
7489 .object_path(&dangling_oid)
7490 .expect("test operation should succeed")
7491 .exists()
7492 );
7493
7494 fs::remove_dir_all(root).expect("test operation should succeed");
7495 }
7496
7497 fn temp_root(prefix: &str) -> PathBuf {
7498 std::env::temp_dir().join(format!(
7499 "{prefix}-{}-{}",
7500 std::process::id(),
7501 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
7502 ))
7503 }
7504}