1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
9use sley_formats::{Bundle, BundleReference};
10use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
11use sley_pack::{
12 MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
13 PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
14};
15use std::collections::{HashMap, HashSet};
16use std::io::{Read, Seek, SeekFrom, Write};
17use std::path::{Path, PathBuf};
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::sync::{Arc, Mutex, OnceLock};
20use std::{env, fs};
21
22static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24pub trait ObjectReader {
25 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
26
27 fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
35 false
36 }
37
38 fn has_shallow_grafts(&self) -> bool {
42 false
43 }
44}
45
46fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
47 (*oid == ObjectId::empty_tree(format))
48 .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
49}
50
51fn with_missing_object_context(
52 err: GitError,
53 oid: ObjectId,
54 context: MissingObjectContext,
55) -> GitError {
56 let kind = err
57 .not_found_kind()
58 .and_then(sley_core::NotFoundKind::missing_object_kind);
59 match kind {
60 Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
61 None => err,
62 }
63}
64
65pub fn grafted_parents<R: ObjectReader + ?Sized>(
69 reader: &R,
70 oid: &ObjectId,
71 parents: Vec<ObjectId>,
72) -> Vec<ObjectId> {
73 if reader.is_shallow_graft(oid) {
74 Vec::new()
75 } else {
76 parents
77 }
78}
79
80pub trait ObjectWriter {
81 fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
87}
88
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub struct BundleUnbundleResult {
91 pub written_objects: Vec<ObjectId>,
92 pub references: Vec<BundleReference>,
93}
94
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct PackUnpackResult {
97 pub written_objects: Vec<ObjectId>,
98}
99
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub struct PackInstallResult {
102 pub pack_name: String,
103 pub pack_path: PathBuf,
104 pub index_path: PathBuf,
105 pub promisor_path: Option<PathBuf>,
106 pub object_ids: Vec<ObjectId>,
107}
108
109#[derive(Debug, Clone, PartialEq, Eq)]
110pub struct RawPackInstallResult {
111 pub object_ids: Vec<ObjectId>,
112}
113
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
115pub struct RawPackInstallOptions {
116 pub promisor: bool,
117}
118
119pub trait RawPackInstaller {
120 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
121}
122
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub enum ObjectPrefixResolution {
125 Missing,
126 Unique(ObjectId),
127 Ambiguous(Vec<ObjectId>),
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct ObjectStorageInfo {
132 pub disk_size: u64,
133 pub deltabase: ObjectId,
134}
135
136impl RawPackInstaller for FileObjectDatabase {
137 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
138 let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
139 Ok(RawPackInstallResult {
140 object_ids: result.object_ids,
141 })
142 }
143}
144
145impl RawPackInstaller for ObjectDatabase {
146 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
147 let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
148 Ok(RawPackInstallResult {
149 object_ids: result.written_objects,
150 })
151 }
152}
153
154pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
155 let mut missing = Vec::new();
156 for prerequisite in &bundle.prerequisites {
157 match reader.read_object(&prerequisite.oid) {
158 Ok(object) => {
159 let actual = object.object_id(bundle.format)?;
160 if actual != prerequisite.oid {
161 return Err(GitError::InvalidObject(format!(
162 "bundle prerequisite {} hashes to {actual}",
163 prerequisite.oid
164 )));
165 }
166 }
167 Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
168 Err(err) => return Err(err),
169 }
170 }
171 if missing.is_empty() {
172 return Ok(());
173 }
174 Err(GitError::object_not_found_in(
175 missing[0],
176 MissingObjectContext::PackInstall,
177 ))
178}
179
180pub fn unbundle_objects<R, W>(
181 bundle: &Bundle,
182 prerequisite_reader: &R,
183 writer: &mut W,
184) -> Result<BundleUnbundleResult>
185where
186 R: ObjectReader,
187 W: ObjectWriter,
188{
189 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
190 let pack = PackFile::parse_bundle(bundle)?;
191 let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
192 Ok(BundleUnbundleResult {
193 written_objects,
194 references: bundle.references.clone(),
195 })
196}
197
198pub fn install_bundle_pack<R>(
199 bundle: &Bundle,
200 prerequisite_reader: &R,
201 destination: &impl RawPackInstaller,
202) -> Result<BundleUnbundleResult>
203where
204 R: ObjectReader,
205{
206 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
207 let install = destination.install_raw_pack(&bundle.pack)?;
208 Ok(BundleUnbundleResult {
209 written_objects: install.object_ids,
210 references: bundle.references.clone(),
211 })
212}
213
214pub fn unpack_packfile_objects<W>(
215 pack_bytes: &[u8],
216 format: ObjectFormat,
217 writer: &W,
218) -> Result<PackUnpackResult>
219where
220 W: ObjectWriter,
221{
222 let pack = PackFile::parse(pack_bytes, format)?;
223 write_pack_objects(pack, writer, "pack")
224}
225
226fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
227where
228 W: ObjectWriter,
229{
230 let mut written_objects = Vec::with_capacity(pack.entries.len());
231 for entry in pack.entries {
232 let expected = entry.entry.oid;
233 let actual = writer.write_object(entry.object)?;
234 if actual != expected {
235 return Err(GitError::InvalidObject(format!(
236 "{source} object id mismatch: expected {expected}, wrote {actual}"
237 )));
238 }
239 written_objects.push(actual);
240 }
241 Ok(PackUnpackResult { written_objects })
242}
243
244pub fn collect_reachable_object_ids<R, I>(
245 reader: &R,
246 format: ObjectFormat,
247 starts: I,
248) -> Result<HashSet<ObjectId>>
249where
250 R: ObjectReader,
251 I: IntoIterator<Item = ObjectId>,
252{
253 walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
254}
255
256pub fn collect_reachable_object_ids_with_cut<R, I>(
261 reader: &R,
262 format: ObjectFormat,
263 starts: I,
264 cut: &HashSet<ObjectId>,
265) -> Result<HashSet<ObjectId>>
266where
267 R: ObjectReader,
268 I: IntoIterator<Item = ObjectId>,
269{
270 walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
271}
272
273pub fn collect_reachable_object_ids_excluding<R, I>(
277 reader: &R,
278 format: ObjectFormat,
279 starts: I,
280 excluded: &HashSet<ObjectId>,
281) -> Result<HashSet<ObjectId>>
282where
283 R: ObjectReader,
284 I: IntoIterator<Item = ObjectId>,
285{
286 walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
287}
288
289pub fn collect_reachable_objects<R, I>(
290 reader: &R,
291 format: ObjectFormat,
292 starts: I,
293 excluded: &HashSet<ObjectId>,
294) -> Result<Vec<Arc<EncodedObject>>>
295where
296 R: ObjectReader,
297 I: IntoIterator<Item = ObjectId>,
298{
299 let mut objects = Vec::new();
300 walk_reachable_objects(reader, format, starts, excluded, |_, object| {
301 objects.push(Arc::clone(object));
302 })?;
303 Ok(objects)
304}
305
306#[derive(Debug, Clone)]
307struct ReachablePackObject {
308 oid: ObjectId,
309 object: Arc<EncodedObject>,
310}
311
312fn collect_reachable_pack_objects<R, I>(
313 reader: &R,
314 format: ObjectFormat,
315 starts: I,
316 excluded: &HashSet<ObjectId>,
317) -> Result<Vec<ReachablePackObject>>
318where
319 R: ObjectReader,
320 I: IntoIterator<Item = ObjectId>,
321{
322 let mut objects = Vec::new();
323 walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
324 objects.push(ReachablePackObject {
325 oid: *oid,
326 object: Arc::clone(object),
327 });
328 })?;
329 Ok(objects)
330}
331
332fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
333 objects
334 .iter()
335 .map(|entry| PackInput {
336 oid: &entry.oid,
337 object: &entry.object,
338 })
339 .collect()
340}
341
342pub fn install_reachable_pack<I>(
343 source: &impl ObjectReader,
344 destination: &impl RawPackInstaller,
345 format: ObjectFormat,
346 starts: I,
347) -> Result<Option<RawPackInstallResult>>
348where
349 I: IntoIterator<Item = ObjectId>,
350{
351 install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
352}
353
354pub fn install_reachable_pack_excluding<I>(
355 source: &impl ObjectReader,
356 destination: &impl RawPackInstaller,
357 format: ObjectFormat,
358 starts: I,
359 excluded: &HashSet<ObjectId>,
360) -> Result<Option<RawPackInstallResult>>
361where
362 I: IntoIterator<Item = ObjectId>,
363{
364 let pack = match build_reachable_pack(source, format, starts, excluded)? {
365 Some(pack) => pack,
366 None => return Ok(None),
367 };
368 destination.install_raw_pack(&pack.pack).map(Some)
369}
370
371pub fn build_reachable_pack<R, I>(
372 reader: &R,
373 format: ObjectFormat,
374 starts: I,
375 excluded: &HashSet<ObjectId>,
376) -> Result<Option<PackWrite>>
377where
378 R: ObjectReader,
379 I: IntoIterator<Item = ObjectId>,
380{
381 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
382 if objects.is_empty() {
383 return Ok(None);
384 }
385 let inputs = pack_inputs(&objects);
390 PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
391}
392
393pub fn build_and_install_reachable_pack<R, I>(
394 source: &R,
395 destination: &FileObjectDatabase,
396 format: ObjectFormat,
397 starts: I,
398 excluded: &HashSet<ObjectId>,
399 options: RawPackInstallOptions,
400) -> Result<Option<PackInstallResult>>
401where
402 R: ObjectReader,
403 I: IntoIterator<Item = ObjectId>,
404{
405 build_and_install_reachable_pack_filtered(
406 source,
407 destination,
408 format,
409 starts,
410 excluded,
411 options,
412 None,
413 None,
414 )
415}
416
417#[derive(Debug, Clone, Copy, PartialEq, Eq)]
424pub enum PackObjectFilter {
425 BlobNone,
427}
428
429#[allow(clippy::too_many_arguments)]
433pub fn build_and_install_reachable_pack_filtered<R, I>(
434 source: &R,
435 destination: &FileObjectDatabase,
436 format: ObjectFormat,
437 starts: I,
438 excluded: &HashSet<ObjectId>,
439 options: RawPackInstallOptions,
440 filter: Option<PackObjectFilter>,
441 unpack_limit: Option<usize>,
442) -> Result<Option<PackInstallResult>>
443where
444 R: ObjectReader,
445 I: IntoIterator<Item = ObjectId>,
446{
447 let starts: Vec<ObjectId> = starts.into_iter().collect();
448 let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
449 let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
450 match filter {
451 Some(PackObjectFilter::BlobNone) => {
452 objects.retain(|entry| {
453 entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
454 });
455 }
456 None => {}
457 }
458 if objects.is_empty() {
459 return Ok(None);
460 }
461 if let Some(limit) = unpack_limit
465 && objects.len() < limit
466 {
467 for entry in &objects {
468 destination.loose().write_object((*entry.object).clone())?;
469 }
470 return Ok(None);
471 }
472 let inputs = pack_inputs(&objects);
473 let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
474 destination
475 .install_generated_pack_unchecked(&pack, options)
476 .map(Some)
477}
478
479pub fn assemble_pack_with_verbatim_reuse(
489 format: ObjectFormat,
490 reused_pack_bytes: &[u8],
491 appended: &[PackInput<'_>],
492) -> Result<(Vec<u8>, u32)> {
493 assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
494}
495
496pub fn assemble_pack_with_verbatim_reuses(
499 format: ObjectFormat,
500 reused_packs: &[&[u8]],
501 appended: &[PackInput<'_>],
502) -> Result<(Vec<u8>, u32)> {
503 let hash_len = format.raw_len();
504 let mut reused_count = 0u32;
505 let mut capacity = 12 + hash_len + 64 * appended.len();
506 for reused_pack_bytes in reused_packs {
507 if reused_pack_bytes.len() < 12 + hash_len {
508 return Err(GitError::InvalidFormat("reused pack too short".into()));
509 }
510 if &reused_pack_bytes[..4] != b"PACK" {
511 return Err(GitError::InvalidFormat(
512 "reused pack has no signature".into(),
513 ));
514 }
515 let version = u32::from_be_bytes([
516 reused_pack_bytes[4],
517 reused_pack_bytes[5],
518 reused_pack_bytes[6],
519 reused_pack_bytes[7],
520 ]);
521 if version != 2 {
522 return Err(GitError::Unsupported(format!(
523 "reused pack version {version}"
524 )));
525 }
526 let count = u32::from_be_bytes([
527 reused_pack_bytes[8],
528 reused_pack_bytes[9],
529 reused_pack_bytes[10],
530 reused_pack_bytes[11],
531 ]);
532 reused_count = reused_count
533 .checked_add(count)
534 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
535 capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
536 }
537 let total = reused_count
538 .checked_add(appended.len() as u32)
539 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
540
541 let mut out = Vec::with_capacity(capacity);
542 out.extend_from_slice(b"PACK");
543 out.extend_from_slice(&2u32.to_be_bytes());
544 out.extend_from_slice(&total.to_be_bytes());
545 for reused_pack_bytes in reused_packs {
546 out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
547 }
548 for input in appended {
549 write_undeltified_pack_entry(&mut out, input.object)?;
550 }
551 let checksum = sley_core::digest_bytes(format, &out)?;
552 out.extend_from_slice(checksum.as_bytes());
553 Ok((out, reused_count))
554}
555
556pub fn assemble_pack_with_verbatim_entries(
559 format: ObjectFormat,
560 reused_entries: &[&[u8]],
561 appended: &[PackInput<'_>],
562) -> Result<(Vec<u8>, u32)> {
563 let reused_count = u32::try_from(reused_entries.len())
564 .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
565 let total = reused_count
566 .checked_add(appended.len() as u32)
567 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
568
569 let mut capacity = 12 + format.raw_len() + 64 * appended.len();
570 for entry in reused_entries {
571 capacity = capacity.saturating_add(entry.len());
572 }
573 let mut out = Vec::with_capacity(capacity);
574 out.extend_from_slice(b"PACK");
575 out.extend_from_slice(&2u32.to_be_bytes());
576 out.extend_from_slice(&total.to_be_bytes());
577 for entry in reused_entries {
578 out.extend_from_slice(entry);
579 }
580 for input in appended {
581 write_undeltified_pack_entry(&mut out, input.object)?;
582 }
583 let checksum = sley_core::digest_bytes(format, &out)?;
584 out.extend_from_slice(checksum.as_bytes());
585 Ok((out, reused_count))
586}
587
588fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
590 let type_bits: u8 = match object.object_type {
591 ObjectType::Commit => 1,
592 ObjectType::Tree => 2,
593 ObjectType::Blob => 3,
594 ObjectType::Tag => 4,
595 };
596 let mut size = object.body.len() as u64;
597 let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
598 size >>= 4;
599 while size > 0 {
600 out.push(byte | 0x80);
601 byte = (size & 0x7f) as u8;
602 size >>= 7;
603 }
604 out.push(byte);
605 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
606 encoder.write_all(&object.body)?;
607 out.extend_from_slice(&encoder.finish()?);
608 Ok(())
609}
610
611#[derive(Debug, Clone, PartialEq, Eq)]
619pub struct RepackResult {
620 pub pack: Vec<u8>,
622 pub idx: Vec<u8>,
624 pub object_count: usize,
626 pub obsolete_packs: Vec<PathBuf>,
629 pub packed_loose: Vec<ObjectId>,
632 pack_checksum: ObjectId,
633 index_entries: Vec<PackIndexEntry>,
634}
635
636pub fn repack_reachable_objects(
656 git_dir: &Path,
657 format: ObjectFormat,
658 roots: &[ObjectId],
659) -> Result<Option<RepackResult>> {
660 let objects_dir = repository_objects_dir(git_dir);
661 let database = FileObjectDatabase::new(objects_dir.clone(), format);
662
663 let mut seen: HashSet<ObjectId> = HashSet::new();
664 let mut objects: Vec<ReachablePackObject> = Vec::new();
665 let mut pending: Vec<ObjectId> = roots.to_vec();
666 while let Some(oid) = pending.pop() {
667 if !seen.insert(oid) {
668 continue;
669 }
670 let object = match database.read_object(&oid) {
671 Ok(object) => object,
672 Err(GitError::NotFound(_)) => continue,
673 Err(err) => return Err(err),
674 };
675 match object.object_type {
676 ObjectType::Commit => {
677 let commit = Commit::parse_ref(format, &object.body)?;
678 pending.extend(grafted_parents(&database, &oid, commit.parents));
679 pending.push(commit.tree);
680 }
681 ObjectType::Tree => {
682 for entry in TreeEntries::new(format, &object.body) {
683 let entry = entry?;
684 if !entry.is_gitlink() {
685 pending.push(entry.oid);
686 }
687 }
688 }
689 ObjectType::Tag => {
690 let tag = Tag::parse_ref(format, &object.body)?;
691 pending.push(tag.object);
692 }
693 ObjectType::Blob => {}
694 }
695 objects.push(ReachablePackObject { oid, object });
696 }
697 if objects.is_empty() {
698 return Ok(None);
699 }
700
701 let inputs = pack_inputs(&objects);
702 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
703 let object_count = written.entries.len();
704
705 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
708 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
709 .into_iter()
710 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
711 .collect();
712
713 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
714 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
715 .into_iter()
716 .filter(|oid| packed_oid_set.contains(oid))
717 .collect();
718 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
719
720 let pack_checksum = written.checksum;
721 let index_entries = written.entries.clone();
722 Ok(Some(RepackResult {
723 pack: written.pack,
724 idx: written.index,
725 object_count,
726 obsolete_packs,
727 packed_loose,
728 pack_checksum,
729 index_entries,
730 }))
731}
732
733pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
734 let objects_dir = repository_objects_dir(git_dir);
735 let database = FileObjectDatabase::new(objects_dir.clone(), format);
736
737 let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
741 if all_oids.is_empty() {
742 return Ok(None);
743 }
744
745 let mut objects = Vec::with_capacity(all_oids.len());
749 for oid in &all_oids {
750 objects.push(ReachablePackObject {
751 oid: *oid,
752 object: database.read_object(oid)?,
753 });
754 }
755
756 let inputs = pack_inputs(&objects);
757 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
758 let object_count = written.entries.len();
759
760 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
766 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
767 .into_iter()
768 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
769 .collect();
770
771 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
774 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
775 .into_iter()
776 .filter(|oid| packed_oid_set.contains(oid))
777 .collect();
778 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
779
780 Ok(Some(RepackResult {
781 pack: written.pack,
782 idx: written.index,
783 object_count,
784 obsolete_packs,
785 packed_loose,
786 pack_checksum: written.checksum,
787 index_entries: written.entries,
788 }))
789}
790
791pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
797 let objects_dir = repository_objects_dir(git_dir);
798 let database = FileObjectDatabase::new(objects_dir.clone(), format);
799 let loose_oids = loose_object_ids(&objects_dir, format)?;
800 if loose_oids.is_empty() {
801 return Ok(None);
802 }
803
804 let mut objects = Vec::with_capacity(loose_oids.len());
805 for oid in &loose_oids {
806 objects.push(ReachablePackObject {
807 oid: *oid,
808 object: database.read_object(oid)?,
809 });
810 }
811
812 let inputs = pack_inputs(&objects);
813 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
814 let object_count = written.entries.len();
815 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
816 let mut packed_loose: Vec<ObjectId> = loose_oids
817 .into_iter()
818 .filter(|oid| packed_oid_set.contains(oid))
819 .collect();
820 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
821
822 let pack_checksum = written.checksum;
823 let index_entries = written.entries.clone();
824 Ok(Some(RepackResult {
825 pack: written.pack,
826 idx: written.index,
827 object_count,
828 obsolete_packs: Vec::new(),
829 packed_loose,
830 pack_checksum,
831 index_entries,
832 }))
833}
834
835pub fn install_repack_result(
850 git_dir: &Path,
851 format: ObjectFormat,
852 result: &RepackResult,
853 prune: bool,
854) -> Result<()> {
855 install_repack_result_with_bitmap(git_dir, format, result, prune, None)
856}
857
858pub fn install_repack_result_with_bitmap(
864 git_dir: &Path,
865 format: ObjectFormat,
866 result: &RepackResult,
867 prune: bool,
868 bitmap_tips: Option<&HashSet<ObjectId>>,
869) -> Result<()> {
870 let objects_dir = repository_objects_dir(git_dir);
871 let pack_dir = objects_dir.join("pack");
872 fs::create_dir_all(&pack_dir)?;
873
874 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
879 let parsed_index = PackIndex::parse(&result.idx, format)?;
880 if parsed_index.pack_checksum != result.pack_checksum {
881 return Err(GitError::InvalidFormat(
882 "repack index checksum does not match the new pack".into(),
883 ));
884 }
885 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
886 return Err(GitError::InvalidFormat(
887 "repack index does not match the new pack contents".into(),
888 ));
889 }
890 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
891 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
892 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
893 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
894 let reverse_index = sley_pack::PackReverseIndex::write(
898 format,
899 &sley_pack::pack_order_index_positions(&parsed_index.entries),
900 &result.pack_checksum,
901 )?;
902 write_pack_component(&new_pack_path, &result.pack)?;
903 write_pack_component(&new_rev_path, &reverse_index)?;
904 write_pack_component(&new_index_path, &result.idx)?;
905
906 if let Some(tips) = bitmap_tips {
907 let database = FileObjectDatabase::new(objects_dir.clone(), format);
910 if let Some(bitmap) = build_pack_bitmap(
911 &database,
912 format,
913 &result.index_entries,
914 &result.pack_checksum,
915 tips,
916 )? {
917 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
922 remove_file_if_exists(&bitmap_path)?;
923 write_pack_component(&bitmap_path, &bitmap)?;
924 }
925 }
926
927 if !prune {
928 return Ok(());
929 }
930
931 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
935
936 prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
937 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
938 Ok(())
939}
940
941fn validate_pack_checksum(
942 pack: &[u8],
943 format: ObjectFormat,
944 expected: &ObjectId,
945 context: &str,
946) -> Result<()> {
947 if expected.format() != format {
948 return Err(GitError::InvalidObjectId(format!(
949 "{context} checksum format does not match object format"
950 )));
951 }
952 let hash_len = format.raw_len();
953 if pack.len() < 12 + hash_len {
954 return Err(GitError::InvalidFormat(format!(
955 "{context} pack file too short"
956 )));
957 }
958 if &pack[..4] != b"PACK" {
959 return Err(GitError::InvalidFormat(format!(
960 "{context} pack file missing PACK signature"
961 )));
962 }
963 let trailer_offset = pack.len() - hash_len;
964 let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
965 let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
966 if &actual != expected || trailer != *expected {
967 return Err(GitError::InvalidFormat(format!(
968 "{context} pack checksum does not match generated pack"
969 )));
970 }
971 Ok(())
972}
973
974fn pack_index_entries_match_writer(
975 parsed: &[PackIndexEntry],
976 writer_entries: &[PackIndexEntry],
977) -> bool {
978 if parsed.len() != writer_entries.len() {
979 return false;
980 }
981 let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
982 writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
983 parsed.iter().zip(writer_entries).all(|(left, right)| {
984 left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
985 })
986}
987
988pub fn prune_unreachable_loose<I>(
997 git_dir: &Path,
998 format: ObjectFormat,
999 roots: I,
1000 delete: bool,
1001) -> Result<Vec<ObjectId>>
1002where
1003 I: IntoIterator<Item = ObjectId>,
1004{
1005 let objects_dir = repository_objects_dir(git_dir);
1006 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1007 let reachable = collect_reachable_object_ids(&database, format, roots)?;
1008
1009 let store = LooseObjectStore::new(objects_dir.clone(), format);
1010 let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1011 .into_iter()
1012 .filter(|oid| !reachable.contains(oid))
1013 .collect();
1014 pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1015
1016 if delete {
1017 for oid in &pruned {
1018 let path = store.object_path(oid)?;
1019 match fs::remove_file(&path) {
1020 Ok(()) => {}
1021 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1022 Err(err) => return Err(GitError::Io(err.to_string())),
1023 }
1024 }
1025 }
1026 Ok(pruned)
1027}
1028
1029fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1032 let oids = loose_object_id_set(objects_dir, format)?;
1033 let mut oids = oids.into_iter().collect::<Vec<_>>();
1034 oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1035 Ok(oids)
1036}
1037
1038fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1039 let mut oids = HashSet::new();
1040 collect_loose_object_ids(objects_dir, format, &mut oids)?;
1041 Ok(oids)
1042}
1043
1044fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1047 if !pack_dir.exists() {
1048 return Ok(Vec::new());
1049 }
1050 let mut packs = Vec::new();
1051 for entry in fs::read_dir(pack_dir)? {
1052 let path = entry?.path();
1053 if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1054 packs.push(path);
1055 }
1056 }
1057 packs.sort();
1058 Ok(packs)
1059}
1060
1061fn prune_packs_contained_in(
1065 objects_dir: &Path,
1066 format: ObjectFormat,
1067 present: &HashSet<ObjectId>,
1068 keep: &Path,
1069) -> Result<()> {
1070 let pack_dir = objects_dir.join("pack");
1071 let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1072 let mut removed_stems: HashSet<String> = HashSet::new();
1073
1074 for pack_path in existing_pack_files(&pack_dir)? {
1075 if pack_path == keep {
1076 continue;
1077 }
1078 let Some(stem) = pack_path.file_stem() else {
1079 continue;
1080 };
1081 if Some(stem) == keep_stem.as_deref() {
1082 continue;
1083 }
1084 if pack_path.with_extension("keep").exists()
1085 || pack_path.with_extension("promisor").exists()
1086 {
1087 continue;
1088 }
1089 let index_path = pack_path.with_extension("idx");
1090 if !index_path.exists() {
1091 continue;
1093 }
1094 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1095 if !index
1096 .entries
1097 .iter()
1098 .all(|entry| present.contains(&entry.oid))
1099 {
1100 continue;
1101 }
1102 remove_file_if_exists(&pack_path)?;
1106 remove_file_if_exists(&index_path)?;
1107 for ext in ["rev", "mtimes", "bitmap"] {
1108 remove_file_if_exists(&pack_path.with_extension(ext))?;
1109 }
1110 removed_stems.insert(stem.to_string_lossy().into_owned());
1111 }
1112
1113 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1114 Ok(())
1115}
1116
1117fn prune_stale_multi_pack_index(
1124 pack_dir: &Path,
1125 format: ObjectFormat,
1126 removed_stems: &HashSet<String>,
1127) -> Result<()> {
1128 if removed_stems.is_empty() {
1129 return Ok(());
1130 }
1131 let midx_path = pack_dir.join("multi-pack-index");
1132 if !midx_path.exists() {
1133 return Ok(());
1134 }
1135 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1136 let references_removed_pack = midx.pack_names.iter().any(|name| {
1137 let stem = name.strip_suffix(".idx").unwrap_or(name);
1138 removed_stems.contains(stem)
1139 });
1140 if references_removed_pack {
1141 remove_file_if_exists(&midx_path)?;
1142 }
1143 Ok(())
1144}
1145
1146fn prune_loose_objects<'a, I>(
1149 objects_dir: &Path,
1150 format: ObjectFormat,
1151 candidates: I,
1152 present: &HashSet<ObjectId>,
1153) -> Result<()>
1154where
1155 I: IntoIterator<Item = &'a ObjectId>,
1156{
1157 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1158 for oid in candidates {
1159 if !present.contains(oid) {
1160 continue;
1161 }
1162 remove_file_if_exists(&store.object_path(oid)?)?;
1163 }
1164 Ok(())
1165}
1166
1167enum PackDeltaBase {
1168 Offset(u64),
1169 Ref(ObjectId),
1170}
1171
1172struct PackIndexOffsetInfo {
1173 end_offset: u64,
1174 delta_base_oid: Option<ObjectId>,
1175}
1176
1177fn scan_pack_index_offsets(
1178 index: &PackIndex,
1179 target_offset: u64,
1180 trailer_offset: u64,
1181 delta_base_offset: Option<u64>,
1182) -> Result<PackIndexOffsetInfo> {
1183 let mut target_count = 0usize;
1184 let mut next_offset = None;
1185 let mut delta_base_oid = None;
1186
1187 for entry in &index.entries {
1188 if entry.offset == target_offset {
1189 target_count += 1;
1190 } else if entry.offset > target_offset {
1191 match next_offset {
1192 Some(current) if current <= entry.offset => {}
1193 _ => next_offset = Some(entry.offset),
1194 }
1195 }
1196 if Some(entry.offset) == delta_base_offset {
1197 delta_base_oid = Some(entry.oid);
1198 }
1199 }
1200
1201 if target_count == 0 {
1202 return Err(GitError::InvalidFormat(format!(
1203 "pack index offset {target_offset} not found"
1204 )));
1205 }
1206 if let Some(offset) = delta_base_offset
1207 && delta_base_oid.is_none()
1208 {
1209 return Err(GitError::InvalidFormat(format!(
1210 "ofs-delta base offset {offset} not found"
1211 )));
1212 }
1213
1214 Ok(PackIndexOffsetInfo {
1215 end_offset: if target_count > 1 {
1218 target_offset
1219 } else {
1220 next_offset.unwrap_or(trailer_offset)
1221 },
1222 delta_base_oid,
1223 })
1224}
1225
1226fn pack_entry_delta_base(
1227 format: ObjectFormat,
1228 pack: &[u8],
1229 entry_offset: u64,
1230) -> Result<Option<PackDeltaBase>> {
1231 let mut cursor = usize::try_from(entry_offset)
1232 .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1233 let first = pack_next_byte(pack, &mut cursor)?;
1234 let kind = (first >> 4) & 0x07;
1235 let mut byte = first;
1236 while byte & 0x80 != 0 {
1237 byte = pack_next_byte(pack, &mut cursor)?;
1238 }
1239 match kind {
1240 6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1241 pack,
1242 &mut cursor,
1243 entry_offset,
1244 )?))),
1245 7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1246 format,
1247 pack,
1248 &mut cursor,
1249 )?))),
1250 _ => Ok(None),
1251 }
1252}
1253
1254fn parse_ref_delta_base_oid(
1255 format: ObjectFormat,
1256 pack: &[u8],
1257 cursor: &mut usize,
1258) -> Result<ObjectId> {
1259 let raw_len = format.raw_len();
1260 if *cursor + raw_len > pack.len() {
1261 return Err(GitError::InvalidFormat(
1262 "truncated ref-delta base object id".into(),
1263 ));
1264 }
1265 let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1266 *cursor += raw_len;
1267 Ok(oid)
1268}
1269
1270fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
1271 let mut byte = pack_next_byte(pack, cursor)?;
1272 let mut relative = u64::from(byte & 0x7f);
1273 while byte & 0x80 != 0 {
1274 byte = pack_next_byte(pack, cursor)?;
1275 relative = relative
1276 .checked_add(1)
1277 .and_then(|value| value.checked_shl(7))
1278 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
1279 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
1280 }
1281 entry_offset
1282 .checked_sub(relative)
1283 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
1284}
1285
1286fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
1287 let Some(byte) = pack.get(*cursor).copied() else {
1288 return Err(GitError::InvalidFormat("truncated pack entry".into()));
1289 };
1290 *cursor += 1;
1291 Ok(byte)
1292}
1293
1294fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
1295 Ok(ObjectId::null(format))
1296}
1297
1298fn remove_file_if_exists(path: &Path) -> Result<()> {
1300 match fs::remove_file(path) {
1301 Ok(()) => Ok(()),
1302 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1303 Err(err) => Err(GitError::Io(err.to_string())),
1304 }
1305}
1306
1307fn walk_reachable_objects<R, I, F>(
1308 reader: &R,
1309 format: ObjectFormat,
1310 starts: I,
1311 excluded: &HashSet<ObjectId>,
1312 visit: F,
1313) -> Result<HashSet<ObjectId>>
1314where
1315 R: ObjectReader,
1316 I: IntoIterator<Item = ObjectId>,
1317 F: FnMut(&ObjectId, &Arc<EncodedObject>),
1318{
1319 walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
1320}
1321
1322fn walk_reachable_objects_with_cut<R, I, F>(
1326 reader: &R,
1327 format: ObjectFormat,
1328 starts: I,
1329 excluded: &HashSet<ObjectId>,
1330 cut: &HashSet<ObjectId>,
1331 mut visit: F,
1332) -> Result<HashSet<ObjectId>>
1333where
1334 R: ObjectReader,
1335 I: IntoIterator<Item = ObjectId>,
1336 F: FnMut(&ObjectId, &Arc<EncodedObject>),
1337{
1338 let mut seen = HashSet::new();
1339 let mut pending = Vec::new();
1340 for start in starts {
1341 pending.push(start);
1342 while let Some(oid) = pending.pop() {
1343 if excluded.contains(&oid) {
1344 continue;
1345 }
1346 if !seen.insert(oid) {
1347 continue;
1348 }
1349 let object = reader.read_object(&oid).map_err(|err| {
1350 with_missing_object_context(err, oid, MissingObjectContext::Traversal)
1351 })?;
1352 match object.object_type {
1353 ObjectType::Commit => {
1354 let (tree, parents) = {
1355 let commit = Commit::parse_ref(format, &object.body)?;
1356 (commit.tree, commit.parents)
1357 };
1358 visit(&oid, &object);
1359 if !cut.contains(&oid) {
1360 for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
1361 pending.push(parent);
1362 }
1363 }
1364 pending.push(tree);
1365 }
1366 ObjectType::Tree => {
1367 let mut child_oids = Vec::new();
1368 for entry in TreeEntries::new(format, &object.body) {
1369 let entry = entry?;
1370 if entry.is_gitlink() {
1371 continue;
1372 }
1373 child_oids.push(entry.oid);
1374 }
1375 visit(&oid, &object);
1376 pending.extend(child_oids.into_iter().rev());
1377 }
1378 ObjectType::Tag => {
1379 let target = {
1380 let tag = Tag::parse_ref(format, &object.body)?;
1381 tag.object
1382 };
1383 visit(&oid, &object);
1384 pending.push(target);
1385 }
1386 ObjectType::Blob => visit(&oid, &object),
1387 }
1388 }
1389 }
1390 Ok(seen)
1391}
1392
1393fn bitset_get(words: &[u64], position: u32) -> bool {
1398 let word = (position / 64) as usize;
1399 word < words.len() && words[word] & (1u64 << (position % 64)) != 0
1400}
1401
1402fn bitset_set(words: &mut [u64], position: u32) {
1403 let word = (position / 64) as usize;
1404 if word < words.len() {
1405 words[word] |= 1u64 << (position % 64);
1406 }
1407}
1408
1409fn bitset_or(acc: &mut [u64], other: &[u64]) {
1410 for (dst, src) in acc.iter_mut().zip(other) {
1411 *dst |= *src;
1412 }
1413}
1414
1415fn bitset_positions(words: &[u64]) -> Vec<u32> {
1417 let mut positions = Vec::new();
1418 for (word_index, word) in words.iter().enumerate() {
1419 let mut remaining = *word;
1420 while remaining != 0 {
1421 let bit = remaining.trailing_zeros();
1422 positions.push(word_index as u32 * 64 + bit);
1423 remaining &= remaining - 1;
1424 }
1425 }
1426 positions
1427}
1428
1429fn commit_identity_timestamp(identity: &[u8]) -> i64 {
1433 let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
1434 let _tz = fields.next();
1435 fields
1436 .next()
1437 .and_then(|raw| std::str::from_utf8(raw).ok())
1438 .and_then(|raw| raw.parse::<i64>().ok())
1439 .unwrap_or(0)
1440}
1441
1442fn bitmap_next_commit_index(idx: u32) -> u32 {
1445 const MIN_COMMITS: u32 = 100;
1446 const MAX_COMMITS: u32 = 5000;
1447 const MUST_REGION: u32 = 100;
1448 const MIN_REGION: u32 = 20000;
1449
1450 if idx <= MUST_REGION {
1451 return 0;
1452 }
1453 if idx <= MIN_REGION {
1454 let offset = idx - MUST_REGION;
1455 return offset.min(MIN_COMMITS);
1456 }
1457 let offset = idx - MIN_REGION;
1458 offset.clamp(MIN_COMMITS, MAX_COMMITS)
1459}
1460
1461pub fn build_pack_bitmap(
1475 db: &FileObjectDatabase,
1476 format: ObjectFormat,
1477 index_entries: &[PackIndexEntry],
1478 pack_checksum: &ObjectId,
1479 preferred_tips: &HashSet<ObjectId>,
1480) -> Result<Option<Vec<u8>>> {
1481 let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
1484 by_offset.sort_by_key(|&slot| index_entries[slot].offset);
1485 let bit_order: Vec<ObjectId> = by_offset
1486 .into_iter()
1487 .map(|slot| index_entries[slot].oid)
1488 .collect();
1489 build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
1490}
1491
1492pub fn build_midx_bitmap(
1498 db: &FileObjectDatabase,
1499 format: ObjectFormat,
1500 midx_entries: &[sley_pack::MultiPackIndexEntry],
1501 midx_checksum: &ObjectId,
1502 preferred_pack: u32,
1503 preferred_tips: &HashSet<ObjectId>,
1504) -> Result<Option<Vec<u8>>> {
1505 let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
1506 pseudo.sort_by_key(|&slot| {
1507 let entry = &midx_entries[slot];
1508 (
1509 entry.pack_int_id != preferred_pack,
1510 entry.pack_int_id,
1511 entry.offset,
1512 )
1513 });
1514 let bit_order: Vec<ObjectId> = pseudo
1515 .into_iter()
1516 .map(|slot| midx_entries[slot].oid)
1517 .collect();
1518 build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
1519}
1520
1521fn bitmap_num_maximal_commits(
1529 db: &FileObjectDatabase,
1530 format: ObjectFormat,
1531 selected: &[ObjectId],
1532) -> Result<usize> {
1533 let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
1535 let mut stack: Vec<ObjectId> = selected.to_vec();
1536 while let Some(oid) = stack.pop() {
1537 if first_parent.contains_key(&oid) {
1538 continue;
1539 }
1540 let object = db.read_object(&oid)?;
1541 let commit = Commit::parse_ref(format, &object.body)?;
1542 let parent = grafted_parents(db, &oid, commit.parents).first().copied();
1543 first_parent.insert(oid, parent);
1544 if let Some(parent) = parent {
1545 stack.push(parent);
1546 }
1547 }
1548 let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
1550 for parent in first_parent.values().flatten() {
1551 *pending_children.entry(*parent).or_default() += 1;
1552 }
1553 let word_count = selected.len().div_ceil(64);
1554 struct MaximalEnt {
1555 mask: Vec<u64>,
1556 maximal: bool,
1557 }
1558 let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
1559 for (bit, oid) in selected.iter().enumerate() {
1560 let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
1561 mask: vec![0u64; word_count],
1562 maximal: true,
1563 });
1564 ent.mask[bit / 64] |= 1u64 << (bit % 64);
1565 ent.maximal = true;
1566 }
1567 let mut queue: Vec<ObjectId> = first_parent
1568 .keys()
1569 .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
1570 .copied()
1571 .collect();
1572 let mut num_maximal = 0usize;
1573 while let Some(oid) = queue.pop() {
1574 if let Some(ent) = ents.remove(&oid) {
1575 if ent.maximal {
1576 num_maximal += 1;
1577 }
1578 if let Some(Some(parent)) = first_parent.get(&oid) {
1579 match ents.entry(*parent) {
1580 std::collections::hash_map::Entry::Vacant(vacant) => {
1581 vacant.insert(MaximalEnt {
1583 mask: ent.mask.clone(),
1584 maximal: false,
1585 });
1586 }
1587 std::collections::hash_map::Entry::Occupied(mut occupied) => {
1588 let parent_ent = occupied.get_mut();
1589 let c_not_p = ent
1590 .mask
1591 .iter()
1592 .zip(&parent_ent.mask)
1593 .any(|(child, parent)| child & !parent != 0);
1594 if c_not_p {
1595 let p_not_c = parent_ent
1596 .mask
1597 .iter()
1598 .zip(&ent.mask)
1599 .any(|(parent, child)| parent & !child != 0);
1600 for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
1601 *parent |= child;
1602 }
1603 parent_ent.maximal = p_not_c;
1604 }
1605 }
1606 }
1607 }
1608 }
1609 if let Some(Some(parent)) = first_parent.get(&oid)
1610 && let Some(remaining) = pending_children.get_mut(parent)
1611 {
1612 *remaining -= 1;
1613 if *remaining == 0 {
1614 queue.push(*parent);
1615 }
1616 }
1617 }
1618 Ok(num_maximal)
1619}
1620
1621fn build_reachability_bitmap(
1625 db: &FileObjectDatabase,
1626 format: ObjectFormat,
1627 checksum: &ObjectId,
1628 bit_order: &[ObjectId],
1629 preferred_tips: &HashSet<ObjectId>,
1630) -> Result<Option<Vec<u8>>> {
1631 if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
1632 return Ok(None);
1633 }
1634 let object_count = bit_order.len();
1635
1636 let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
1639 oid_sorted.sort_by(|&left, &right| {
1640 bit_order[left as usize]
1641 .as_bytes()
1642 .cmp(bit_order[right as usize].as_bytes())
1643 });
1644 let mut index_position = vec![0u32; object_count];
1645 for (position, &slot) in oid_sorted.iter().enumerate() {
1646 index_position[slot as usize] = position as u32;
1647 }
1648 let mut oid_to_pack = HashMap::with_capacity(object_count);
1649 for (pack_pos, oid) in bit_order.iter().enumerate() {
1650 oid_to_pack.insert(*oid, pack_pos as u32);
1651 }
1652
1653 let mut object_types = Vec::with_capacity(object_count);
1655 struct IndexedCommit {
1656 oid: ObjectId,
1657 pack_pos: u32,
1658 index_pos: u32,
1659 date: i64,
1660 parent_count: usize,
1661 }
1662 let mut indexed_commits = Vec::new();
1663 for (pack_pos, oid) in bit_order.iter().enumerate() {
1664 let object_type = match db.read_object_header(oid)? {
1667 Some((object_type, _)) => object_type,
1668 None => db.read_object(oid)?.object_type,
1669 };
1670 object_types.push(object_type);
1671 if object_type == ObjectType::Commit {
1672 let object = db.read_object(oid)?;
1673 let commit = Commit::parse_ref(format, &object.body)?;
1674 indexed_commits.push(IndexedCommit {
1675 oid: *oid,
1676 pack_pos: pack_pos as u32,
1677 index_pos: index_position[pack_pos],
1678 date: commit_identity_timestamp(commit.committer),
1679 parent_count: grafted_parents(db, oid, commit.parents).len(),
1680 });
1681 }
1682 }
1683
1684 indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
1686 let mut selected: Vec<&IndexedCommit> = Vec::new();
1687 let commit_count = indexed_commits.len() as u32;
1688 if commit_count < 100 {
1689 selected.extend(indexed_commits.iter());
1690 } else {
1691 let mut i = 0u32;
1692 loop {
1693 let next = bitmap_next_commit_index(i);
1694 if i + next >= commit_count {
1695 break;
1696 }
1697 let mut chosen = &indexed_commits[(i + next) as usize];
1698 if next > 0 {
1699 for j in 0..=next {
1700 let candidate = &indexed_commits[(i + j) as usize];
1701 if preferred_tips.contains(&candidate.oid) {
1702 chosen = candidate;
1703 break;
1704 }
1705 if candidate.parent_count >= 2 {
1706 chosen = candidate;
1707 }
1708 }
1709 }
1710 selected.push(chosen);
1711 i += next + 1;
1712 }
1713 }
1714
1715 if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
1720 let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
1721 let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
1722 sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
1723 sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
1724 }
1725
1726 let word_count = object_count.div_ceil(64);
1729 let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
1730 for commit in selected.iter().rev() {
1731 let mut acc = vec![0u64; word_count];
1732 let mut pending = vec![commit.oid];
1733 while let Some(oid) = pending.pop() {
1734 let Some(&pack_pos) = oid_to_pack.get(&oid) else {
1735 eprintln!(
1737 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
1738 );
1739 return Ok(None);
1740 };
1741 if bitset_get(&acc, pack_pos) {
1742 continue;
1743 }
1744 if let Some(stored) = memo.get(&oid) {
1745 bitset_or(&mut acc, stored);
1746 continue;
1747 }
1748 bitset_set(&mut acc, pack_pos);
1749 let object = db.read_object(&oid)?;
1750 let tree = {
1751 let parsed = Commit::parse_ref(format, &object.body)?;
1752 pending.extend(grafted_parents(db, &oid, parsed.parents));
1753 parsed.tree
1754 };
1755 if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
1756 return Ok(None);
1757 }
1758 }
1759 memo.insert(commit.oid, Arc::new(acc));
1760 }
1761
1762 let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
1763 for commit in &selected {
1764 let words = match memo.get(&commit.oid) {
1765 Some(words) => words,
1766 None => continue,
1767 };
1768 writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
1769 }
1770 writer.write().map(Some)
1771}
1772
1773fn bitmap_mark_tree(
1777 db: &impl ObjectReader,
1778 format: ObjectFormat,
1779 tree: &ObjectId,
1780 oid_to_pack: &HashMap<ObjectId, u32>,
1781 acc: &mut [u64],
1782) -> Result<bool> {
1783 let Some(&pack_pos) = oid_to_pack.get(tree) else {
1784 eprintln!(
1785 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
1786 );
1787 return Ok(false);
1788 };
1789 if bitset_get(acc, pack_pos) {
1790 return Ok(true);
1791 }
1792 bitset_set(acc, pack_pos);
1793 let object = db.read_object(tree)?;
1794 for entry in TreeEntries::new(format, &object.body) {
1795 let entry = entry?;
1796 if entry.is_gitlink() {
1797 continue;
1798 }
1799 if entry.is_tree() {
1800 if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
1801 return Ok(false);
1802 }
1803 } else {
1804 let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
1805 eprintln!(
1806 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
1807 entry.oid
1808 );
1809 return Ok(false);
1810 };
1811 bitset_set(acc, blob_pos);
1812 }
1813 }
1814 Ok(true)
1815}
1816
1817pub struct LoadedPackBitmap {
1821 object_count: u32,
1822 oid_to_pack: HashMap<ObjectId, u32>,
1823 pack_to_oid: Vec<ObjectId>,
1824 commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
1825 commits: Vec<u64>,
1826 trees: Vec<u64>,
1827 blobs: Vec<u64>,
1828 tags: Vec<u64>,
1829}
1830
1831impl LoadedPackBitmap {
1832 pub fn object_count(&self) -> u32 {
1833 self.object_count
1834 }
1835
1836 pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
1838 self.oid_to_pack.get(oid).copied()
1839 }
1840
1841 pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
1842 self.pack_to_oid.get(position as usize)
1843 }
1844
1845 pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
1848 self.commit_words.get(oid)
1849 }
1850
1851 pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
1853 self.commit_words.keys()
1854 }
1855
1856 pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
1858 match object_type {
1859 ObjectType::Commit => &self.commits,
1860 ObjectType::Tree => &self.trees,
1861 ObjectType::Blob => &self.blobs,
1862 ObjectType::Tag => &self.tags,
1863 }
1864 }
1865
1866 fn word_count(&self) -> usize {
1867 (self.object_count as usize).div_ceil(64)
1868 }
1869}
1870
1871pub fn load_pack_bitmap(
1878 objects_dir: &Path,
1879 format: ObjectFormat,
1880) -> Result<Option<LoadedPackBitmap>> {
1881 let pack_dir = objects_dir.join("pack");
1882 if !pack_dir.exists() {
1883 return Ok(None);
1884 }
1885 if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
1888 return Ok(Some(bitmap));
1889 }
1890 let mut bitmap_paths = Vec::new();
1891 for entry in fs::read_dir(&pack_dir)? {
1892 let path = entry?.path();
1893 if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
1894 && path
1895 .file_name()
1896 .and_then(|name| name.to_str())
1897 .is_some_and(|name| name.starts_with("pack-"))
1898 {
1899 bitmap_paths.push(path);
1900 }
1901 }
1902 bitmap_paths.sort();
1903 for bitmap_path in bitmap_paths {
1904 match load_pack_bitmap_file(&bitmap_path, format) {
1905 Ok(Some(bitmap)) => return Ok(Some(bitmap)),
1906 Ok(None) | Err(_) => continue,
1907 }
1908 }
1909 Ok(None)
1910}
1911
1912fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
1917 let midx_path = pack_dir.join("multi-pack-index");
1918 if !midx_path.exists() {
1919 return Ok(None);
1920 }
1921 let Ok(midx_bytes) = fs::read(&midx_path) else {
1922 return Ok(None);
1923 };
1924 let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
1925 return Ok(None);
1926 };
1927 let bitmap_path = pack_dir.join(format!(
1928 "multi-pack-index-{}.bitmap",
1929 midx.checksum.to_hex()
1930 ));
1931 if !bitmap_path.exists() {
1932 return Ok(None);
1933 }
1934 let object_count = midx.objects.len();
1935 let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
1940 .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
1941 .unwrap_or(true);
1942 let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
1943 (Some(chunk), true) => {
1944 sley_core::trace2::data("load_midx_revindex", "source", "midx");
1945 chunk.clone()
1946 }
1947 _ => {
1948 let rev_path =
1949 pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
1950 let Ok(rev_bytes) = fs::read(&rev_path) else {
1951 return Ok(None);
1953 };
1954 let Ok(parsed_rev) =
1955 sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
1956 else {
1957 return Ok(None);
1958 };
1959 sley_core::trace2::data("load_midx_revindex", "source", "rev");
1960 parsed_rev.positions
1961 }
1962 };
1963 let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
1964 return Ok(None);
1965 };
1966 let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
1967 Ok(parsed) => parsed,
1968 Err(_) => return Ok(None),
1969 };
1970 if parsed.pack_checksum != midx.checksum {
1971 return Ok(None);
1972 }
1973
1974 let mut pack_to_oid = Vec::with_capacity(object_count);
1977 for &midx_pos in &reverse_index {
1978 let Some(entry) = midx.objects.get(midx_pos as usize) else {
1979 return Ok(None);
1980 };
1981 pack_to_oid.push(entry.oid);
1982 }
1983 let mut oid_to_pack = HashMap::with_capacity(object_count);
1984 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
1985 oid_to_pack.insert(*oid, pack_pos as u32);
1986 }
1987 match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
1988 midx.objects.get(position).map(|entry| entry.oid)
1989 }) {
1990 Ok(loaded) => Ok(Some(loaded)),
1991 Err(_) => Ok(None),
1992 }
1993}
1994
1995fn load_pack_bitmap_file(
1996 bitmap_path: &Path,
1997 format: ObjectFormat,
1998) -> Result<Option<LoadedPackBitmap>> {
1999 let index_path = bitmap_path.with_extension("idx");
2000 if !index_path.exists() {
2001 return Ok(None);
2002 }
2003 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
2004 let object_count = index.entries.len();
2005 let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
2006 if parsed.pack_checksum != index.pack_checksum {
2007 return Ok(None);
2008 }
2009
2010 let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2011 pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2012 let mut pack_to_oid = Vec::with_capacity(object_count);
2013 for index_pos in &pack_order {
2014 pack_to_oid.push(index.entries[*index_pos as usize].oid);
2015 }
2016 let mut oid_to_pack = HashMap::with_capacity(object_count);
2017 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2018 oid_to_pack.insert(*oid, pack_pos as u32);
2019 }
2020
2021 assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2022 index.entries.get(position).map(|entry| entry.oid)
2023 })
2024 .map(Some)
2025}
2026
2027fn assemble_loaded_bitmap(
2032 parsed: PackBitmapIndex,
2033 object_count: usize,
2034 pack_to_oid: Vec<ObjectId>,
2035 oid_to_pack: HashMap<ObjectId, u32>,
2036 lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2037) -> Result<LoadedPackBitmap> {
2038 let word_count = object_count.div_ceil(64);
2039 let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2040 let mut words = bitmap.to_words()?;
2041 words.resize(word_count, 0);
2042 Ok(words)
2043 };
2044
2045 let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2046 let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2047 for (entry_index, entry) in parsed.entries.iter().enumerate() {
2048 let mut words = expand(&entry.bitmap)?;
2049 if entry.xor_offset > 0 {
2050 let base_index = entry_index - entry.xor_offset as usize;
2051 let base = &resolved[base_index];
2052 for (dst, src) in words.iter_mut().zip(base.iter()) {
2053 *dst ^= *src;
2054 }
2055 }
2056 let words = Arc::new(words);
2057 resolved.push(Arc::clone(&words));
2058 let commit_oid = lookup_oid(entry.object_position as usize)
2059 .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2060 commit_words.insert(commit_oid, words);
2061 }
2062
2063 Ok(LoadedPackBitmap {
2064 object_count: object_count as u32,
2065 oid_to_pack,
2066 pack_to_oid,
2067 commit_words,
2068 commits: expand(&parsed.type_bitmaps.commits)?,
2069 trees: expand(&parsed.type_bitmaps.trees)?,
2070 blobs: expand(&parsed.type_bitmaps.blobs)?,
2071 tags: expand(&parsed.type_bitmaps.tags)?,
2072 })
2073}
2074
2075pub struct BitmapWalkResult {
2079 pub words: Vec<u64>,
2080 pub extended: Vec<(ObjectId, ObjectType)>,
2081}
2082
2083impl BitmapWalkResult {
2084 pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2086 for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2087 *dst &= !*src;
2088 }
2089 let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2090 self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2091 }
2092}
2093
2094pub fn bitmap_reachable(
2105 bitmap: &LoadedPackBitmap,
2106 db: &impl ObjectReader,
2107 format: ObjectFormat,
2108 roots: &[ObjectId],
2109 include_objects: bool,
2110) -> Result<BitmapWalkResult> {
2111 let mut walk = BitmapFillWalk {
2112 bitmap,
2113 words: vec![0u64; bitmap.word_count()],
2114 extended: Vec::new(),
2115 extended_seen: HashSet::new(),
2116 };
2117 let mut commit_stack: Vec<ObjectId> = Vec::new();
2118
2119 for root in roots {
2120 let mut oid = *root;
2121 loop {
2123 let object = db.read_object(&oid)?;
2124 match object.object_type {
2125 ObjectType::Tag => {
2126 walk.mark(&oid, ObjectType::Tag);
2127 let tag = Tag::parse_ref(format, &object.body)?;
2128 oid = tag.object;
2129 }
2130 ObjectType::Commit => {
2131 commit_stack.push(oid);
2132 break;
2133 }
2134 ObjectType::Tree => {
2135 walk.mark_tree_closure(db, format, &oid)?;
2136 break;
2137 }
2138 ObjectType::Blob => {
2139 walk.mark(&oid, ObjectType::Blob);
2140 break;
2141 }
2142 }
2143 }
2144 }
2145
2146 while let Some(oid) = commit_stack.pop() {
2147 if let Some(position) = bitmap.pack_position(&oid) {
2148 if bitset_get(&walk.words, position) {
2149 continue;
2150 }
2151 if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2152 bitset_or(&mut walk.words, stored);
2153 continue;
2154 }
2155 bitset_set(&mut walk.words, position);
2156 } else {
2157 if walk.extended_seen.contains(&oid) {
2158 continue;
2159 }
2160 walk.extended_seen.insert(oid);
2161 walk.extended.push((oid, ObjectType::Commit));
2162 }
2163 let object = db.read_object(&oid)?;
2164 let commit = Commit::parse_ref(format, &object.body)?;
2165 commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2166 if include_objects {
2167 walk.mark_tree_closure(db, format, &commit.tree)?;
2168 }
2169 }
2170
2171 Ok(BitmapWalkResult {
2172 words: walk.words,
2173 extended: walk.extended,
2174 })
2175}
2176
2177struct BitmapFillWalk<'a> {
2178 bitmap: &'a LoadedPackBitmap,
2179 words: Vec<u64>,
2180 extended: Vec<(ObjectId, ObjectType)>,
2181 extended_seen: HashSet<ObjectId>,
2182}
2183
2184impl BitmapFillWalk<'_> {
2185 fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2187 if let Some(position) = self.bitmap.pack_position(oid) {
2188 if bitset_get(&self.words, position) {
2189 return false;
2190 }
2191 bitset_set(&mut self.words, position);
2192 true
2193 } else {
2194 if !self.extended_seen.insert(*oid) {
2195 return false;
2196 }
2197 self.extended.push((*oid, object_type));
2198 true
2199 }
2200 }
2201
2202 fn mark_tree_closure(
2206 &mut self,
2207 db: &impl ObjectReader,
2208 format: ObjectFormat,
2209 tree: &ObjectId,
2210 ) -> Result<()> {
2211 if !self.mark(tree, ObjectType::Tree) {
2212 return Ok(());
2213 }
2214 let object = db.read_object(tree)?;
2215 for entry in TreeEntries::new(format, &object.body) {
2216 let entry = entry?;
2217 if entry.is_gitlink() {
2218 continue;
2219 }
2220 if entry.is_tree() {
2221 self.mark_tree_closure(db, format, &entry.oid)?;
2222 } else {
2223 self.mark(&entry.oid, ObjectType::Blob);
2224 }
2225 }
2226 Ok(())
2227 }
2228}
2229
2230#[derive(Debug)]
2231pub struct ObjectDatabase {
2232 format: ObjectFormat,
2233 objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2239 promisor: bool,
2240}
2241
2242impl ObjectDatabase {
2243 pub fn new(format: ObjectFormat) -> Self {
2244 Self {
2245 format,
2246 objects: Mutex::new(HashMap::new()),
2247 promisor: false,
2248 }
2249 }
2250
2251 pub fn with_promisor(mut self, promisor: bool) -> Self {
2252 self.promisor = promisor;
2253 self
2254 }
2255
2256 pub fn contains(&self, oid: &ObjectId) -> bool {
2257 self.objects
2258 .lock()
2259 .map(|objects| objects.contains_key(oid))
2260 .unwrap_or(false)
2261 }
2262
2263 pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2264 let object = self.read_object(oid)?;
2265 let actual = object.object_id(self.format)?;
2266 if &actual == oid {
2267 Ok(())
2268 } else {
2269 Err(GitError::InvalidObject(format!(
2270 "object id mismatch: expected {oid}, got {actual}"
2271 )))
2272 }
2273 }
2274}
2275
2276impl ObjectReader for ObjectDatabase {
2277 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
2278 self.objects
2279 .lock()
2280 .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
2281 .get(oid)
2282 .map(Arc::clone)
2283 .or_else(|| implied_empty_tree_object(self.format, oid))
2284 .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
2285 }
2286}
2287
2288impl ObjectWriter for ObjectDatabase {
2289 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
2290 let oid = object.object_id(self.format)?;
2291 self.objects
2292 .lock()
2293 .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
2294 .entry(oid)
2295 .or_insert_with(|| Arc::new(object));
2296 Ok(oid)
2297 }
2298}
2299
2300#[derive(Debug, Clone, PartialEq, Eq)]
2301pub struct Alternate {
2302 pub path: std::path::PathBuf,
2303}
2304
2305#[derive(Debug, Clone, PartialEq, Eq)]
2306pub struct PartialClonePolicy {
2307 pub promisor_remote: Option<String>,
2308 pub allow_missing_promised_objects: bool,
2309}
2310
2311type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
2315
2316#[derive(Debug)]
2319enum PackData {
2320 #[cfg(feature = "mmap")]
2321 Mapped(sley_mmap::MappedFile),
2322 Heap(Vec<u8>),
2323}
2324
2325impl std::ops::Deref for PackData {
2326 type Target = [u8];
2327
2328 fn deref(&self) -> &[u8] {
2329 match self {
2330 #[cfg(feature = "mmap")]
2331 Self::Mapped(mapped) => mapped,
2332 Self::Heap(bytes) => bytes,
2333 }
2334 }
2335}
2336
2337#[cfg(feature = "mmap")]
2340fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2341 match sley_mmap::MappedFile::open_pack(pack_path) {
2342 Ok(mapped) => Ok(PackData::Mapped(mapped)),
2343 Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
2344 }
2345}
2346
2347#[cfg(not(feature = "mmap"))]
2348fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2349 Ok(PackData::Heap(fs::read(pack_path)?))
2350}
2351
2352#[cfg(feature = "mmap")]
2353fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2354 match sley_mmap::MappedFile::open_pack(index_path) {
2355 Ok(mapped) => Ok(Arc::new(mapped)),
2356 Err(_) => Ok(Arc::new(fs::read(index_path)?)),
2357 }
2358}
2359
2360#[cfg(not(feature = "mmap"))]
2361fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2362 Ok(Arc::new(fs::read(index_path)?))
2363}
2364
2365type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
2371
2372type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
2377
2378type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
2388
2389type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
2390
2391const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
2397
2398const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
2402
2403fn cached_object_cost(object: &EncodedObject) -> usize {
2407 object.body.len().saturating_add(64)
2408}
2409
2410fn cache_budget_from_env(var: &str, default: usize) -> usize {
2413 match env::var(var) {
2414 Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
2415 Err(_) => default,
2416 }
2417}
2418
2419fn object_cache_budget() -> usize {
2426 static BUDGET: OnceLock<usize> = OnceLock::new();
2427 *BUDGET.get_or_init(|| {
2428 cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
2429 })
2430}
2431
2432fn delta_base_cache_budget() -> usize {
2436 static BUDGET: OnceLock<usize> = OnceLock::new();
2437 *BUDGET.get_or_init(|| {
2438 cache_budget_from_env(
2439 "SLEY_DELTA_BASE_CACHE_BYTES",
2440 DEFAULT_DELTA_BASE_CACHE_BYTES,
2441 )
2442 })
2443}
2444
2445fn verify_reads_enabled() -> bool {
2456 static VERIFY: OnceLock<bool> = OnceLock::new();
2457 *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
2458 Ok(value) => !matches!(value.trim(), "" | "0"),
2459 Err(_) => false,
2460 })
2461}
2462
2463#[derive(Debug)]
2471struct LruCache<K: std::hash::Hash + Eq + Clone> {
2472 budget: usize,
2473 used: usize,
2474 map: HashMap<K, LruEntry<K>>,
2475 head: Option<K>,
2476 tail: Option<K>,
2477}
2478
2479#[derive(Debug)]
2480struct LruEntry<K> {
2481 object: Arc<EncodedObject>,
2482 prev: Option<K>,
2483 next: Option<K>,
2484}
2485
2486impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
2487 fn new(budget: usize) -> Self {
2488 Self {
2489 budget,
2490 used: 0,
2491 map: HashMap::new(),
2492 head: None,
2493 tail: None,
2494 }
2495 }
2496
2497 fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
2498 let object = Arc::clone(&self.map.get(key)?.object);
2499 self.touch(key);
2500 Some(object)
2501 }
2502
2503 fn touch(&mut self, key: &K) {
2505 if self.tail.as_ref() == Some(key) {
2506 return;
2507 }
2508 if self.map.contains_key(key) {
2509 self.detach(key);
2510 self.attach_back(key.clone());
2511 }
2512 }
2513
2514 fn remove(&mut self, key: &K) {
2516 if let Some(entry) = self.map.get(key) {
2517 self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
2518 }
2519 self.detach(key);
2520 self.map.remove(key);
2521 }
2522
2523 fn detach(&mut self, key: &K) {
2524 let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
2525 let prev = entry.prev.take();
2526 let next = entry.next.take();
2527 (prev, next)
2528 }) else {
2529 return;
2530 };
2531
2532 match &prev {
2533 Some(prev_key) => {
2534 if let Some(prev_entry) = self.map.get_mut(prev_key) {
2535 prev_entry.next = next.clone();
2536 }
2537 }
2538 None => self.head = next.clone(),
2539 }
2540 match &next {
2541 Some(next_key) => {
2542 if let Some(next_entry) = self.map.get_mut(next_key) {
2543 next_entry.prev = prev.clone();
2544 }
2545 }
2546 None => self.tail = prev.clone(),
2547 }
2548 }
2549
2550 fn attach_back(&mut self, key: K) {
2551 let previous_tail = self.tail.replace(key.clone());
2552 match previous_tail {
2553 Some(tail_key) => {
2554 if let Some(tail_entry) = self.map.get_mut(&tail_key) {
2555 tail_entry.next = Some(key.clone());
2556 }
2557 if let Some(entry) = self.map.get_mut(&key) {
2558 entry.prev = Some(tail_key);
2559 entry.next = None;
2560 }
2561 }
2562 None => {
2563 self.head = Some(key.clone());
2564 if let Some(entry) = self.map.get_mut(&key) {
2565 entry.prev = None;
2566 entry.next = None;
2567 }
2568 }
2569 }
2570 }
2571
2572 fn clear(&mut self) {
2573 self.map.clear();
2574 self.head = None;
2575 self.tail = None;
2576 self.used = 0;
2577 }
2578
2579 fn put(&mut self, key: K, object: Arc<EncodedObject>) {
2580 if self.budget == 0 {
2581 return;
2582 }
2583 let cost = cached_object_cost(&object);
2584 if cost > self.budget {
2588 self.remove(&key);
2589 return;
2590 }
2591 if let Some(entry) = self.map.get_mut(&key) {
2592 let previous = std::mem::replace(&mut entry.object, object);
2593 self.used = self
2595 .used
2596 .saturating_sub(cached_object_cost(&previous))
2597 .saturating_add(cost);
2598 self.touch(&key);
2599 } else {
2600 self.used = self.used.saturating_add(cost);
2601 self.map.insert(
2602 key.clone(),
2603 LruEntry {
2604 object,
2605 prev: None,
2606 next: None,
2607 },
2608 );
2609 self.attach_back(key);
2610 }
2611 while self.used > self.budget {
2612 let Some(evicted) = self.head.clone() else {
2613 break;
2614 };
2615 self.remove(&evicted);
2616 }
2617 }
2618}
2619
2620type LruObjectCache = LruCache<ObjectId>;
2622type LruOffsetCache = LruCache<u64>;
2624
2625struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
2630
2631impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
2632 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
2633 self.0.lock().ok()?.get(&offset)
2634 }
2635
2636 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
2637 if let Ok(mut cache) = self.0.lock() {
2638 cache.put(offset, object);
2639 }
2640 }
2641}
2642
2643struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
2647
2648impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
2649 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
2650 self.0.lock().ok()?.get(&pack_offset).copied()
2651 }
2652
2653 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
2654 if let Ok(mut cache) = self.0.lock() {
2655 cache.insert(pack_offset, header);
2656 }
2657 }
2658}
2659
2660type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
2665
2666type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
2670
2671type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
2675
2676#[derive(Debug)]
2681struct RegisteredPack {
2682 idx: PathBuf,
2683 pack: PathBuf,
2684 index: Mutex<Option<Arc<PackIndexViewData>>>,
2685 data: Mutex<Option<Arc<PackData>>>,
2686 delta_cache: Arc<Mutex<LruOffsetCache>>,
2687 header_type_cache: PackHeaderTypeCache,
2688}
2689
2690impl RegisteredPack {
2691 fn new(idx: PathBuf, pack: PathBuf) -> Self {
2692 Self {
2693 idx,
2694 pack,
2695 index: Mutex::new(None),
2696 data: Mutex::new(None),
2697 delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
2698 header_type_cache: Arc::new(Mutex::new(HashMap::new())),
2699 }
2700 }
2701
2702 fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
2703 if let Ok(cache) = self.index.lock()
2704 && let Some(index) = cache.as_ref()
2705 {
2706 return Ok(Arc::clone(index));
2707 }
2708 let index_bytes = load_pack_index_data(&self.idx)?;
2709 let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
2710 index_bytes,
2711 format,
2712 )?);
2713 if let Ok(mut cache) = self.index.lock() {
2714 *cache = Some(Arc::clone(&index));
2715 }
2716 Ok(index)
2717 }
2718
2719 fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
2720 if let Ok(cache) = self.data.lock()
2721 && let Some(bytes) = cache.as_ref()
2722 {
2723 return Ok(Arc::clone(bytes));
2724 }
2725 if let Ok(cache) = pack_bytes.lock()
2726 && let Some(bytes) = cache.get(&self.pack)
2727 {
2728 let bytes = Arc::clone(bytes);
2729 if let Ok(mut local_cache) = self.data.lock() {
2730 *local_cache = Some(Arc::clone(&bytes));
2731 }
2732 return Ok(bytes);
2733 }
2734 let bytes = Arc::new(load_pack_data(&self.pack)?);
2735 if let Ok(mut local_cache) = self.data.lock() {
2736 *local_cache = Some(Arc::clone(&bytes));
2737 }
2738 if let Ok(mut cache) = pack_bytes.lock() {
2739 cache.insert(self.pack.clone(), Arc::clone(&bytes));
2740 }
2741 Ok(bytes)
2742 }
2743}
2744
2745#[derive(Debug, Clone, PartialEq, Eq)]
2746struct PackDirFingerprint {
2747 modified: Option<std::time::SystemTime>,
2748 idx_count: usize,
2749 pack_count: usize,
2750}
2751
2752#[derive(Debug)]
2757struct PackRegistrySnapshot {
2758 fingerprint: PackDirFingerprint,
2759 packs: Vec<Arc<RegisteredPack>>,
2760 recent_pack: Mutex<Option<usize>>,
2761}
2762
2763impl PackRegistrySnapshot {
2764 fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
2765 Self {
2766 fingerprint,
2767 packs,
2768 recent_pack: Mutex::new(None),
2769 }
2770 }
2771
2772 fn cached_hint(&self) -> Option<usize> {
2773 self.recent_pack
2774 .lock()
2775 .ok()
2776 .and_then(|hint| *hint)
2777 .filter(|pack_index| *pack_index < self.packs.len())
2778 }
2779
2780 fn remember_hint(&self, pack_index: usize) {
2781 if let Ok(mut hint) = self.recent_pack.lock() {
2782 *hint = Some(pack_index);
2783 }
2784 }
2785}
2786
2787type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
2791
2792#[derive(Debug, Clone)]
2793struct PackLookup {
2794 pack: PathBuf,
2795 registered: Option<Arc<RegisteredPack>>,
2796 offset: u64,
2797}
2798
2799impl PackLookup {
2800 fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
2801 Self {
2802 pack: pack.pack.clone(),
2803 registered: Some(pack),
2804 offset,
2805 }
2806 }
2807
2808 fn from_path(pack: PathBuf, offset: u64) -> Self {
2809 Self {
2810 pack,
2811 registered: None,
2812 offset,
2813 }
2814 }
2815
2816 fn pack_path(&self) -> &Path {
2817 &self.pack
2818 }
2819
2820 fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
2821 match &self.registered {
2822 Some(pack) => pack.bytes(&database.pack_bytes),
2823 None => database.cached_pack_bytes(&self.pack),
2824 }
2825 }
2826
2827 fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
2828 match &self.registered {
2829 Some(pack) => database.cached_pack_index(&pack.idx),
2830 None => database.cached_pack_index(&self.pack.with_extension("idx")),
2831 }
2832 }
2833
2834 fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
2835 match &self.registered {
2836 Some(pack) => Some(Arc::clone(&pack.delta_cache)),
2837 None => database.pack_delta_cache(&self.pack),
2838 }
2839 }
2840
2841 fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
2842 match &self.registered {
2843 Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
2844 None => database.pack_header_type_cache(&self.pack),
2845 }
2846 }
2847}
2848
2849#[derive(Debug, Clone)]
2850pub struct FileObjectDatabase {
2851 loose: LooseObjectStore,
2852 objects_dir: PathBuf,
2853 alternates: Vec<PathBuf>,
2854 format: ObjectFormat,
2855 pack_bytes: PackBytesCache,
2856 pack_indexes: PackIndexCache,
2857 multi_pack_indexes: MultiPackIndexCache,
2858 multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
2859 pack_registry: PackRegistryCache,
2860 decoded: DecodedObjectCache,
2861 pack_deltas: PackDeltaCaches,
2862 pack_header_types: PackHeaderTypeCaches,
2863 shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
2867}
2868
2869#[derive(Debug)]
2870pub struct ObjectPresenceChecker {
2871 db: FileObjectDatabase,
2872 pack_dir: PathBuf,
2873 midx: Option<Arc<MultiPackIndexOidLookup>>,
2874 registry: Option<Arc<PackRegistrySnapshot>>,
2875 registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
2876 recent_pack: Option<usize>,
2877 prepared_packs: bool,
2878 prepared_registry: bool,
2879}
2880
2881impl ObjectPresenceChecker {
2882 fn new(db: FileObjectDatabase) -> Self {
2883 let pack_dir = db.objects_dir.join("pack");
2884 Self {
2885 db,
2886 pack_dir,
2887 midx: None,
2888 registry: None,
2889 registry_indexes: Vec::new(),
2890 recent_pack: None,
2891 prepared_packs: false,
2892 prepared_registry: false,
2893 }
2894 }
2895
2896 pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
2897 if oid.format() != self.db.format {
2898 return Err(GitError::InvalidObjectId(format!(
2899 "object {oid} uses {}, store uses {}",
2900 oid.format().name(),
2901 self.db.format.name()
2902 )));
2903 }
2904 if self.db.loose.exists(oid)? {
2905 return Ok(true);
2906 }
2907 if self.find_packed(oid, false)? {
2908 return Ok(true);
2909 }
2910 if self.find_packed(oid, true)? {
2911 return Ok(true);
2912 }
2913 for alternate in &self.db.alternates {
2914 if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
2915 return Ok(true);
2916 }
2917 }
2918 self.db.loose.invalidate_cache();
2921 self.db.loose.exists(oid)
2922 }
2923
2924 fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
2925 self.prepare_packs(force_rescan)?;
2926 if let Some(midx) = &self.midx
2927 && midx.contains(oid)
2928 {
2929 return Ok(true);
2930 }
2931 self.prepare_registry(force_rescan)?;
2932 self.find_in_registry(oid)
2933 }
2934
2935 fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
2936 if self.prepared_packs && !force_rescan {
2937 return Ok(());
2938 }
2939 let midx_path = self.pack_dir.join("multi-pack-index");
2940 self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
2941 self.prepared_packs = true;
2942 Ok(())
2943 }
2944
2945 fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
2946 if self.prepared_registry && !force_rescan {
2947 return Ok(());
2948 }
2949 let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
2950 let registry_changed = match self.registry.as_ref() {
2951 Some(cached) => !Arc::ptr_eq(cached, ®istry),
2952 None => true,
2953 };
2954 if registry_changed {
2955 self.registry_indexes = vec![None; registry.packs.len()];
2956 self.recent_pack = None;
2957 self.registry = Some(registry);
2958 }
2959 self.prepared_registry = true;
2960 Ok(())
2961 }
2962
2963 fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
2964 let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
2965 return Ok(false);
2966 };
2967 if let Some(pack_index) = self
2968 .recent_pack
2969 .filter(|pack_index| *pack_index < registry.packs.len())
2970 {
2971 let index = self.registry_index(®istry, pack_index)?;
2972 if index.find(oid).is_some() {
2973 return Ok(true);
2974 }
2975 }
2976 for pack_index in 0..registry.packs.len() {
2977 if Some(pack_index) == self.recent_pack {
2978 continue;
2979 }
2980 let index = self.registry_index(®istry, pack_index)?;
2981 if index.find(oid).is_some() {
2982 self.recent_pack = Some(pack_index);
2983 return Ok(true);
2984 }
2985 }
2986 Ok(false)
2987 }
2988
2989 fn registry_index(
2990 &mut self,
2991 registry: &PackRegistrySnapshot,
2992 pack_index: usize,
2993 ) -> Result<Arc<PackIndexViewData>> {
2994 if self.registry_indexes.len() != registry.packs.len() {
2995 self.registry_indexes = vec![None; registry.packs.len()];
2996 self.recent_pack = None;
2997 }
2998 if let Some(index) = self
2999 .registry_indexes
3000 .get(pack_index)
3001 .and_then(|index| index.as_ref())
3002 {
3003 return Ok(Arc::clone(index));
3004 }
3005 let index = registry.packs[pack_index].index(self.db.format)?;
3006 if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
3007 *slot = Some(Arc::clone(&index));
3008 }
3009 Ok(index)
3010 }
3011}
3012
3013fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
3017 let Ok(contents) = std::fs::read_to_string(shallow_file) else {
3018 return HashSet::new();
3019 };
3020 contents
3021 .lines()
3022 .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
3023 .collect()
3024}
3025
3026pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3027 env::var_os("GIT_OBJECT_DIRECTORY")
3028 .map(PathBuf::from)
3029 .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
3030}
3031
3032pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3033 if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
3034 return PathBuf::from(common_dir);
3035 }
3036 let git_dir = git_dir.as_ref();
3037 let commondir = git_dir.join("commondir");
3038 if let Ok(value) = fs::read_to_string(&commondir) {
3039 let path = PathBuf::from(value.trim());
3040 let common = if path.is_absolute() {
3041 path
3042 } else {
3043 git_dir.join(path)
3044 };
3045 return fs::canonicalize(&common).unwrap_or(common);
3046 }
3047 git_dir.to_path_buf()
3048}
3049
3050pub fn repository_object_ids(
3051 git_dir: impl AsRef<Path>,
3052 format: ObjectFormat,
3053) -> Result<Vec<ObjectId>> {
3054 object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
3055}
3056
3057pub fn object_ids_in_objects_dir(
3058 objects_dir: impl AsRef<Path>,
3059 format: ObjectFormat,
3060) -> Result<Vec<ObjectId>> {
3061 let objects_dir = objects_dir.as_ref();
3062 let mut oids = HashSet::new();
3063 collect_loose_object_ids(objects_dir, format, &mut oids)?;
3064 collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
3065 let mut oids = oids.into_iter().collect::<Vec<_>>();
3066 oids.sort_by_key(ObjectId::to_hex);
3067 Ok(oids)
3068}
3069
3070fn collect_loose_object_ids(
3071 objects_dir: &Path,
3072 format: ObjectFormat,
3073 oids: &mut HashSet<ObjectId>,
3074) -> Result<()> {
3075 if !objects_dir.exists() {
3076 return Ok(());
3077 }
3078 let hex_len = format.hex_len();
3079 for entry in fs::read_dir(objects_dir)? {
3080 let entry = entry?;
3081 if !entry.file_type()?.is_dir() {
3082 continue;
3083 }
3084 let name = entry.file_name();
3085 let Some(fanout) = name.to_str() else {
3086 continue;
3087 };
3088 if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3089 continue;
3090 }
3091 for object_entry in fs::read_dir(entry.path())? {
3092 let object_entry = object_entry?;
3093 if !object_entry.file_type()?.is_file() {
3094 continue;
3095 }
3096 let name = object_entry.file_name();
3097 let Some(suffix) = name.to_str() else {
3098 continue;
3099 };
3100 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3101 continue;
3102 }
3103 oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
3104 }
3105 }
3106 Ok(())
3107}
3108
3109fn collect_loose_fanout_object_ids(
3110 objects_dir: &Path,
3111 format: ObjectFormat,
3112 fanout: u8,
3113 oids: &mut HashSet<ObjectId>,
3114) -> Result<()> {
3115 let fanout_hex = format!("{fanout:02x}");
3116 let fanout_dir = objects_dir.join(&fanout_hex);
3117 let entries = match fs::read_dir(&fanout_dir) {
3118 Ok(entries) => entries,
3119 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
3120 Err(err) => return Err(GitError::Io(err.to_string())),
3121 };
3122 let hex_len = format.hex_len();
3123 for object_entry in entries {
3124 let object_entry = object_entry?;
3125 let name = object_entry.file_name();
3126 let Some(suffix) = name.to_str() else {
3127 continue;
3128 };
3129 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3130 continue;
3131 }
3132 oids.insert(ObjectId::from_hex(
3133 format,
3134 &format!("{fanout_hex}{suffix}"),
3135 )?);
3136 }
3137 Ok(())
3138}
3139
3140#[derive(Debug, Default)]
3141struct LoosePresenceCache {
3142 loaded_fanouts: HashSet<u8>,
3143 objects: HashSet<ObjectId>,
3144}
3145
3146pub fn packed_object_ids(
3151 objects_dir: impl AsRef<Path>,
3152 format: ObjectFormat,
3153) -> Result<HashSet<ObjectId>> {
3154 let mut oids = HashSet::new();
3155 collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
3156 Ok(oids)
3157}
3158
3159fn collect_packed_object_ids(
3160 pack_dir: &Path,
3161 format: ObjectFormat,
3162 oids: &mut HashSet<ObjectId>,
3163) -> Result<()> {
3164 if !pack_dir.exists() {
3165 return Ok(());
3166 }
3167 let midx_path = pack_dir.join("multi-pack-index");
3168 if midx_path.exists() {
3169 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
3170 oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
3171 }
3172 for entry in fs::read_dir(pack_dir)? {
3173 let path = entry?.path();
3174 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3175 continue;
3176 }
3177 let index = PackIndex::parse(&fs::read(path)?, format)?;
3178 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
3179 }
3180 Ok(())
3181}
3182
3183impl FileObjectDatabase {
3184 pub fn object_format(&self) -> ObjectFormat {
3186 self.format
3187 }
3188
3189 pub fn objects_dir(&self) -> &Path {
3191 &self.objects_dir
3192 }
3193
3194 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3195 let objects_dir = objects_dir.into();
3196 Self {
3197 loose: LooseObjectStore::new(objects_dir.clone(), format),
3198 alternates: alternate_object_dirs(&objects_dir),
3199 objects_dir,
3200 format,
3201 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3202 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3203 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3204 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3205 pack_registry: Arc::new(Mutex::new(None)),
3206 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3207 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3208 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3209 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3210 }
3211 }
3212
3213 fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3214 let objects_dir = objects_dir.into();
3215 Self {
3216 loose: LooseObjectStore::new(objects_dir.clone(), format),
3217 alternates: Vec::new(),
3218 objects_dir,
3219 format,
3220 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3221 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3222 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3223 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3224 pack_registry: Arc::new(Mutex::new(None)),
3225 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3226 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3227 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3228 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3229 }
3230 }
3231
3232 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3233 Self::new(repository_objects_dir(git_dir), format)
3234 }
3235
3236 pub fn refresh_read_cache(&self) {
3241 if let Ok(mut cache) = self.pack_registry.lock() {
3242 *cache = None;
3243 }
3244 if let Ok(mut cache) = self.pack_indexes.lock() {
3245 cache.clear();
3246 }
3247 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3248 cache.clear();
3249 }
3250 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3251 cache.clear();
3252 }
3253 if let Ok(mut cache) = self.pack_bytes.lock() {
3254 cache.clear();
3255 }
3256 if let Ok(mut cache) = self.pack_deltas.lock() {
3257 cache.clear();
3258 }
3259 if let Ok(mut cache) = self.pack_header_types.lock() {
3260 cache.clear();
3261 }
3262 if let Ok(mut cache) = self.decoded.lock() {
3263 cache.clear();
3264 }
3265 self.loose.invalidate_cache();
3266 }
3267
3268 pub fn loose(&self) -> &LooseObjectStore {
3269 &self.loose
3270 }
3271
3272 pub fn presence_checker(&self) -> ObjectPresenceChecker {
3273 ObjectPresenceChecker::new(self.clone())
3274 }
3275
3276 pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3277 self.install_pack_with_options(pack, RawPackInstallOptions::default())
3278 }
3279
3280 pub fn install_pack_with_options(
3281 &self,
3282 pack: &PackWrite,
3283 options: RawPackInstallOptions,
3284 ) -> Result<PackInstallResult> {
3285 if pack.checksum.format() != self.format {
3286 return Err(GitError::InvalidObjectId(format!(
3287 "pack checksum uses {}, store uses {}",
3288 pack.checksum.format().name(),
3289 self.format.name()
3290 )));
3291 }
3292 for entry in &pack.entries {
3293 if entry.oid.format() != self.format {
3294 return Err(GitError::InvalidObjectId(format!(
3295 "pack entry {} uses {}, store uses {}",
3296 entry.oid,
3297 entry.oid.format().name(),
3298 self.format.name()
3299 )));
3300 }
3301 }
3302 let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
3303 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3304 if canonical_index.pack_checksum != pack.checksum
3305 || parsed_index.pack_checksum != pack.checksum
3306 {
3307 return Err(GitError::InvalidFormat(
3308 "pack and index checksums do not match pack write".into(),
3309 ));
3310 }
3311 if pack.index != canonical_index.index {
3312 return Err(GitError::InvalidFormat(
3313 "pack index does not match pack contents".into(),
3314 ));
3315 }
3316
3317 let pack_dir = self.objects_dir.join("pack");
3318 fs::create_dir_all(&pack_dir)?;
3319 let pack_name = format!("pack-{}", pack.checksum.to_hex());
3320 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3321 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3322 if !pack_path.exists() || !index_path.exists() {
3323 write_pack_component(&pack_path, &pack.pack)?;
3324 write_pack_component(&index_path, &pack.index)?;
3325 }
3326 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3327 Ok(PackInstallResult {
3328 pack_name,
3329 pack_path,
3330 index_path,
3331 promisor_path,
3332 object_ids: canonical_index
3333 .entries
3334 .iter()
3335 .map(|entry| entry.oid)
3336 .collect(),
3337 })
3338 }
3339
3340 pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3348 self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
3349 }
3350
3351 pub fn install_written_pack_with_options(
3352 &self,
3353 pack: &PackWrite,
3354 options: RawPackInstallOptions,
3355 ) -> Result<PackInstallResult> {
3356 validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
3357 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3358 if parsed_index.pack_checksum != pack.checksum {
3359 return Err(GitError::InvalidFormat(
3360 "pack write index checksum does not match pack".into(),
3361 ));
3362 }
3363 if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
3364 return Err(GitError::InvalidFormat(
3365 "pack write index does not match generated entries".into(),
3366 ));
3367 }
3368 self.install_generated_pack_unchecked(pack, options)
3369 }
3370
3371 fn install_generated_pack_unchecked(
3372 &self,
3373 pack: &PackWrite,
3374 options: RawPackInstallOptions,
3375 ) -> Result<PackInstallResult> {
3376 let pack_dir = self.objects_dir.join("pack");
3377 fs::create_dir_all(&pack_dir)?;
3378 let pack_name = format!("pack-{}", pack.checksum.to_hex());
3379 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3380 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3381 if !pack_path.exists() || !index_path.exists() {
3382 write_pack_component(&pack_path, &pack.pack)?;
3383 write_pack_component(&index_path, &pack.index)?;
3384 }
3385 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3386 Ok(PackInstallResult {
3387 pack_name,
3388 pack_path,
3389 index_path,
3390 promisor_path,
3391 object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
3392 })
3393 }
3394
3395 pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
3396 self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
3397 }
3398
3399 pub fn install_raw_pack_with_options(
3400 &self,
3401 pack_bytes: &[u8],
3402 options: RawPackInstallOptions,
3403 ) -> Result<PackInstallResult> {
3404 let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
3405 let pack_dir = self.objects_dir.join("pack");
3406 fs::create_dir_all(&pack_dir)?;
3407 let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
3408 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3409 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3410 if !pack_path.exists() || !index_path.exists() {
3411 write_pack_component(&pack_path, pack_bytes)?;
3412 write_pack_component(&index_path, &built.index)?;
3413 }
3414 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3415 Ok(PackInstallResult {
3416 pack_name,
3417 pack_path,
3418 index_path,
3419 promisor_path,
3420 object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
3421 })
3422 }
3423
3424 pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
3425 if self.loose.exists(oid)? {
3426 return Ok(true);
3427 }
3428 if self.find_pack_containing(oid)?.is_some() {
3429 return Ok(true);
3430 }
3431 for alternate in &self.alternates {
3432 if Self::without_alternates(alternate, self.format).contains(oid)? {
3433 return Ok(true);
3434 }
3435 }
3436 self.loose.invalidate_cache();
3439 self.loose.exists(oid)
3440 }
3441
3442 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3443 let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
3444 .into_iter()
3445 .collect::<HashSet<_>>();
3446 for alternate in &self.alternates {
3447 oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
3448 }
3449 let mut oids = oids.into_iter().collect::<Vec<_>>();
3450 oids.sort_by_key(ObjectId::to_hex);
3451 Ok(oids)
3452 }
3453
3454 pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3455 if let Some(disk_size) = self.loose.disk_size(oid)? {
3456 return Ok(Some(ObjectStorageInfo {
3457 disk_size,
3458 deltabase: zero_oid(self.format)?,
3459 }));
3460 }
3461 if let Some(info) = self.packed_object_storage_info(oid)? {
3462 return Ok(Some(info));
3463 }
3464 for alternate in &self.alternates {
3465 if let Some(info) =
3466 Self::without_alternates(alternate, self.format).object_storage_info(oid)?
3467 {
3468 return Ok(Some(info));
3469 }
3470 }
3471 self.loose.invalidate_cache();
3474 if let Some(disk_size) = self.loose.disk_size(oid)? {
3475 return Ok(Some(ObjectStorageInfo {
3476 disk_size,
3477 deltabase: zero_oid(self.format)?,
3478 }));
3479 }
3480 Ok(None)
3481 }
3482
3483 pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
3484 validate_object_id_prefix(self.format, prefix)?;
3485 let mut matches = Vec::new();
3486 for oid in self.object_ids()? {
3487 if object_id_matches_prefix(&oid, prefix) {
3488 matches.push(oid);
3489 }
3490 }
3491 Ok(match matches.len() {
3492 0 => ObjectPrefixResolution::Missing,
3493 1 => ObjectPrefixResolution::Unique(matches.remove(0)),
3494 _ => ObjectPrefixResolution::Ambiguous(matches),
3495 })
3496 }
3497
3498 pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3508 if implied_empty_tree_object(self.format, oid).is_some() {
3509 return Ok(Some((ObjectType::Tree, 0)));
3510 }
3511 if let Ok(mut cache) = self.decoded.lock()
3512 && let Some(object) = cache.get(oid)
3513 {
3514 return Ok(Some((object.object_type, object.body.len() as u64)));
3515 }
3516 if let Some(header) = self.loose.read_header(oid)? {
3517 return Ok(Some(header));
3518 }
3519 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
3520 let bytes = pack_lookup.pack_bytes(self)?;
3521 let type_cache = pack_lookup.header_type_cache(self);
3526 let resolve_ref_base = |base: &ObjectId| {
3527 self.read_object_header(base)
3528 .map(|header| header.map(|(t, _)| t))
3529 };
3530 let header = match &type_cache {
3531 Some(cache) => {
3532 let mut adapter = PackHeaderTypeCacheAdapter(cache);
3533 sley_pack::read_object_header_at_with_cache(
3534 &bytes,
3535 pack_lookup.offset,
3536 self.format,
3537 resolve_ref_base,
3538 &mut adapter,
3539 )?
3540 }
3541 None => sley_pack::read_object_header_at(
3542 &bytes,
3543 pack_lookup.offset,
3544 self.format,
3545 resolve_ref_base,
3546 )?,
3547 };
3548 return Ok(Some(header));
3549 }
3550 for alternate in &self.alternates {
3551 if let Some(header) =
3552 Self::without_alternates(alternate, self.format).read_object_header(oid)?
3553 {
3554 return Ok(Some(header));
3555 }
3556 }
3557 self.loose.invalidate_cache();
3560 if let Some(header) = self.loose.read_header(oid)? {
3561 return Ok(Some(header));
3562 }
3563 Ok(None)
3564 }
3565
3566 fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
3567 if let Ok(mut cache) = self.decoded.lock()
3570 && let Some(object) = cache.get(oid)
3571 {
3572 return Ok(Some(object));
3573 }
3574 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3575 return Ok(None);
3576 };
3577 self.read_packed_object_at_lookup(oid, &pack_lookup).map(Some)
3578 }
3579
3580 fn read_packed_object_at_lookup(
3581 &self,
3582 oid: &ObjectId,
3583 pack_lookup: &PackLookup,
3584 ) -> Result<Arc<EncodedObject>> {
3585 if let Ok(mut cache) = self.decoded.lock()
3586 && let Some(object) = cache.get(oid)
3587 {
3588 return Ok(object);
3589 }
3590 let bytes = pack_lookup.pack_bytes(self)?;
3591 let delta_cache = pack_lookup.delta_cache(self);
3596 let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
3597 let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
3603 let object = match &delta_adapter {
3604 Some(adapter) => sley_pack::read_object_at_with_cache_arc(
3605 &bytes,
3606 pack_lookup.offset,
3607 self.format,
3608 resolve_ref_base,
3609 adapter,
3610 )?,
3611 None => sley_pack::read_object_at_arc(
3612 &bytes,
3613 pack_lookup.offset,
3614 self.format,
3615 resolve_ref_base,
3616 )?,
3617 };
3618 if verify_reads_enabled() {
3622 let actual = object.object_id(self.format)?;
3623 if actual != *oid {
3624 return Err(GitError::InvalidObject(format!(
3625 "pack object id mismatch: index says {oid}, decoded {actual}"
3626 )));
3627 }
3628 }
3629 if let Ok(mut cache) = self.decoded.lock() {
3630 cache.put(*oid, Arc::clone(&object));
3631 }
3632 Ok(object)
3633 }
3634
3635 fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
3639 let mut caches = self.pack_deltas.lock().ok()?;
3640 let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
3641 Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
3642 });
3643 Some(Arc::clone(cache))
3644 }
3645
3646 fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
3650 let mut caches = self.pack_header_types.lock().ok()?;
3651 let cache = caches
3652 .entry(pack_path.to_path_buf())
3653 .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
3654 Some(Arc::clone(cache))
3655 }
3656
3657 fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
3662 if let Ok(cache) = self.pack_bytes.lock()
3663 && let Some(bytes) = cache.get(pack_path)
3664 {
3665 return Ok(Arc::clone(bytes));
3666 }
3667 let bytes = Arc::new(load_pack_data(pack_path)?);
3668 if let Ok(mut cache) = self.pack_bytes.lock() {
3669 cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
3670 }
3671 Ok(bytes)
3672 }
3673
3674 fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
3678 if let Ok(cache) = self.pack_indexes.lock()
3679 && let Some(index) = cache.get(index_path)
3680 {
3681 return Ok(Arc::clone(index));
3682 }
3683 let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
3684 if let Ok(mut cache) = self.pack_indexes.lock() {
3685 cache.insert(index_path.to_path_buf(), Arc::clone(&index));
3686 }
3687 Ok(index)
3688 }
3689
3690 fn cached_multi_pack_index_oid_lookup(
3691 &self,
3692 midx_path: &Path,
3693 ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
3694 if !midx_path.exists() {
3695 return Ok(None);
3696 }
3697 if let Ok(cache) = self.multi_pack_oid_lookups.lock()
3698 && let Some(midx) = cache.get(midx_path)
3699 {
3700 return Ok(Some(Arc::clone(midx)));
3701 }
3702 let bytes = Arc::new(fs::read(midx_path)?);
3703 let midx = Arc::new(MultiPackIndexOidLookup::parse(bytes, self.format)?);
3704 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3705 cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
3706 }
3707 Ok(Some(midx))
3708 }
3709
3710 fn cached_pack_registry(
3715 &self,
3716 pack_dir: &Path,
3717 force_rescan: bool,
3718 ) -> Result<Arc<PackRegistrySnapshot>> {
3719 if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
3720 return Ok(registry);
3721 }
3722 let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
3723 if let Ok(mut cache) = self.pack_registry.lock() {
3724 match cache.as_ref() {
3725 Some(existing)
3726 if existing.fingerprint == scanned.fingerprint
3727 && same_registered_pack_set(&existing.packs, &scanned.packs) =>
3728 {
3729 return Ok(Arc::clone(existing));
3730 }
3731 _ => {
3732 *cache = Some(Arc::clone(&scanned));
3733 }
3734 }
3735 }
3736 Ok(scanned)
3737 }
3738
3739 fn find_in_pack_registry(
3740 &self,
3741 registry: Arc<PackRegistrySnapshot>,
3742 oid: &ObjectId,
3743 ) -> Result<Option<PackLookup>> {
3744 let hinted_pack_index = registry.cached_hint();
3745 if let Some(pack_index) = hinted_pack_index {
3746 let pack = ®istry.packs[pack_index];
3747 let index = pack.index(self.format)?;
3748 if let Some(entry) = index.find(oid) {
3749 return Ok(Some(PackLookup::from_registered(
3750 Arc::clone(pack),
3751 entry.offset,
3752 )));
3753 }
3754 }
3755 for (pack_index, pack) in registry.packs.iter().enumerate() {
3756 if Some(pack_index) == hinted_pack_index {
3757 continue;
3758 }
3759 let index = pack.index(self.format)?;
3760 if let Some(entry) = index.find(oid) {
3761 registry.remember_hint(pack_index);
3762 return Ok(Some(PackLookup::from_registered(
3763 Arc::clone(pack),
3764 entry.offset,
3765 )));
3766 }
3767 }
3768 Ok(None)
3769 }
3770
3771 fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
3772 if oid.format() != self.format {
3773 return Err(GitError::InvalidObjectId(format!(
3774 "object {oid} uses {}, store uses {}",
3775 oid.format().name(),
3776 self.format.name()
3777 )));
3778 }
3779 let pack_dir = self.objects_dir.join("pack");
3780 if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
3785 && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
3786 {
3787 return Ok(Some(pack_paths));
3788 }
3789 if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
3790 && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
3791 {
3792 return Ok(Some(pack_paths));
3793 }
3794
3795 if !pack_dir.exists() {
3796 return Ok(None);
3797 }
3798 if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
3799 return Ok(Some(pack_paths));
3800 }
3801 let registry = self.cached_pack_registry(&pack_dir, false)?;
3805 if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(®istry), oid)? {
3806 return Ok(Some(pack_paths));
3807 }
3808 let refreshed = self.cached_pack_registry(&pack_dir, true)?;
3809 if Arc::ptr_eq(®istry, &refreshed) {
3810 return Ok(None);
3812 }
3813 self.find_in_pack_registry(refreshed, oid)
3814 }
3815
3816 fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3817 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3818 return Ok(None);
3819 };
3820 let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
3821 let trailer_offset = pack_len
3822 .checked_sub(self.format.raw_len() as u64)
3823 .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
3824 let index = pack_lookup.pack_index(self)?;
3825 let pack = pack_lookup.pack_bytes(self)?;
3826 let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
3827 let delta_base_offset = match &delta_base {
3828 Some(PackDeltaBase::Offset(offset)) => Some(*offset),
3829 Some(PackDeltaBase::Ref(_)) | None => None,
3830 };
3831 let offset_info = scan_pack_index_offsets(
3832 &index,
3833 pack_lookup.offset,
3834 trailer_offset,
3835 delta_base_offset,
3836 )?;
3837 let disk_size = offset_info
3838 .end_offset
3839 .checked_sub(pack_lookup.offset)
3840 .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
3841 let deltabase = match delta_base {
3842 Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
3843 GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
3849 })?,
3850 Some(PackDeltaBase::Ref(oid)) => oid,
3851 None => zero_oid(self.format)?,
3852 };
3853 Ok(Some(ObjectStorageInfo {
3854 disk_size,
3855 deltabase,
3856 }))
3857 }
3858
3859 fn find_midx_pack_containing(
3860 &self,
3861 pack_dir: &Path,
3862 oid: &ObjectId,
3863 ) -> Result<Option<PackLookup>> {
3864 let midx_path = pack_dir.join("multi-pack-index");
3865 let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
3866 return Ok(None);
3867 };
3868 self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
3869 }
3870
3871 fn midx_oid_lookup_pack_paths(
3872 &self,
3873 pack_dir: &Path,
3874 midx: &MultiPackIndexOidLookup,
3875 oid: &ObjectId,
3876 ) -> Result<Option<PackLookup>> {
3877 let Some(entry) = midx.find(oid)? else {
3878 return Ok(None);
3879 };
3880 let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
3881 return Err(GitError::InvalidFormat(
3882 "multi-pack-index object points past pack table".into(),
3883 ));
3884 };
3885 let pack_file_name = pack_name
3886 .strip_suffix(".idx")
3887 .map(|stem| format!("{stem}.pack"))
3888 .unwrap_or_else(|| pack_name.to_string());
3889 let pack = pack_dir.join(pack_file_name);
3890 Ok(Some(PackLookup::from_path(pack, entry.offset)))
3891 }
3892
3893 fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
3894 let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
3895 let cache = self.multi_pack_oid_lookups.lock().ok()?;
3896 cache.get(&midx_path).map(Arc::clone)
3897 }
3898
3899 fn cached_loaded_pack_registry(
3905 &self,
3906 _pack_dir: &Path,
3907 ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
3908 let cache = match self.pack_registry.lock() {
3909 Ok(cache) => cache,
3910 Err(_) => return Ok(None),
3911 };
3912 Ok(cache.as_ref().map(Arc::clone))
3913 }
3914}
3915
3916fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
3917 if prefix.len() < 4 || prefix.len() > format.hex_len() {
3918 return Err(GitError::InvalidObjectId(format!(
3919 "expected 4 to {} hex digits for {}, got {}",
3920 format.hex_len(),
3921 format.name(),
3922 prefix.len()
3923 )));
3924 }
3925 if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3926 return Err(GitError::InvalidObjectId(format!(
3927 "non-hex object id prefix {prefix}"
3928 )));
3929 }
3930 Ok(())
3931}
3932
3933fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
3934 oid.to_hex()
3935 .as_bytes()
3936 .iter()
3937 .zip(prefix.as_bytes())
3938 .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
3939}
3940
3941fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
3942 match fs::metadata(pack_dir) {
3943 Ok(metadata) => Ok(metadata.modified().ok()),
3944 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
3945 Err(err) => Err(GitError::Io(err.to_string())),
3946 }
3947}
3948
3949fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
3954 let modified = pack_dir_modified(pack_dir)?;
3955 let entries = match fs::read_dir(pack_dir) {
3956 Ok(entries) => entries,
3957 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3958 return Ok(PackRegistrySnapshot::new(
3959 PackDirFingerprint {
3960 modified,
3961 idx_count: 0,
3962 pack_count: 0,
3963 },
3964 Vec::new(),
3965 ));
3966 }
3967 Err(err) => return Err(GitError::Io(err.to_string())),
3968 };
3969
3970 let mut idx_paths = Vec::new();
3971 let mut idx_count = 0;
3972 let mut pack_count = 0;
3973 for entry in entries {
3974 let entry = entry?;
3975 let path = entry.path();
3976 match path.extension().and_then(|ext| ext.to_str()) {
3977 Some("idx") => {
3978 idx_count += 1;
3979 idx_paths.push(path);
3980 }
3981 Some("pack") => {
3982 pack_count += 1;
3983 }
3984 _ => {}
3985 }
3986 }
3987
3988 let mut packs = Vec::new();
3989 for idx in idx_paths {
3990 let pack = idx.with_extension("pack");
3991 let Ok(metadata) = fs::metadata(&pack) else {
3992 continue;
3993 };
3994 let modified = pack_sort_modified(&metadata);
3995 packs.push((modified, metadata.len(), Arc::new(RegisteredPack::new(idx, pack))));
3996 }
3997 packs.sort_by(|left, right| {
4002 right
4003 .0
4004 .cmp(&left.0)
4005 .then_with(|| right.1.cmp(&left.1))
4006 .then_with(|| left.2.idx.cmp(&right.2.idx))
4007 });
4008 let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
4009 Ok(PackRegistrySnapshot::new(
4010 PackDirFingerprint {
4011 modified,
4012 idx_count,
4013 pack_count,
4014 },
4015 packs,
4016 ))
4017}
4018
4019fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
4020 metadata
4021 .modified()
4022 .ok()
4023 .and_then(|modified| {
4024 modified
4025 .duration_since(std::time::UNIX_EPOCH)
4026 .ok()
4027 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
4028 })
4029 .unwrap_or((0, 0))
4030}
4031
4032fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
4035 left.len() == right.len()
4036 && left
4037 .iter()
4038 .zip(right.iter())
4039 .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
4040}
4041
4042fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
4043 let mut alternates = Vec::new();
4044 if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
4045 for raw in value.to_string_lossy().split(':') {
4046 if !raw.is_empty() {
4047 alternates.push(PathBuf::from(raw));
4048 }
4049 }
4050 }
4051 let alternates_path = objects_dir.join("info").join("alternates");
4052 if let Ok(contents) = fs::read(&alternates_path) {
4053 for raw in contents.split(|byte| *byte == b'\n') {
4054 let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4055 if line.is_empty() || line.starts_with(b"#") {
4056 continue;
4057 }
4058 let Ok(value) = std::str::from_utf8(line) else {
4059 continue;
4060 };
4061 let path = Path::new(value);
4062 let absolute = if path.is_absolute() {
4063 path.to_path_buf()
4064 } else {
4065 objects_dir.join(path)
4066 };
4067 alternates.push(absolute);
4068 }
4069 }
4070 alternates
4071}
4072
4073impl ObjectReader for FileObjectDatabase {
4074 fn has_shallow_grafts(&self) -> bool {
4075 !self
4076 .shallow_grafts
4077 .get_or_init(|| {
4078 let shallow_file = self
4079 .objects_dir
4080 .parent()
4081 .map(|git_dir| git_dir.join("shallow"));
4082 match shallow_file {
4083 Some(path) => read_shallow_grafts(&path, self.format),
4084 None => HashSet::new(),
4085 }
4086 })
4087 .is_empty()
4088 }
4089
4090 fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
4091 self.shallow_grafts
4092 .get_or_init(|| {
4093 let shallow_file = self
4094 .objects_dir
4095 .parent()
4096 .map(|git_dir| git_dir.join("shallow"));
4097 match shallow_file {
4098 Some(path) => read_shallow_grafts(&path, self.format),
4099 None => HashSet::new(),
4100 }
4101 })
4102 .contains(oid)
4103 }
4104
4105 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4106 if let Some(object) = implied_empty_tree_object(self.format, oid) {
4107 return Ok(object);
4108 }
4109 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4117 return self.read_packed_object_at_lookup(oid, &pack_lookup);
4118 }
4119 let loose_err = match self.loose.read_object(oid) {
4120 Ok(object) => return Ok(object),
4121 Err(GitError::NotFound(_)) => None,
4122 Err(err) => Some(err),
4123 };
4124 if let Some(object) = self.read_packed_object(oid)? {
4125 return Ok(object);
4126 }
4127 for alternate in &self.alternates {
4128 match Self::without_alternates(alternate, self.format).read_object(oid) {
4129 Ok(object) => return Ok(object),
4130 Err(GitError::NotFound(_)) => {}
4131 Err(err) => return Err(err),
4132 }
4133 }
4134 self.loose.invalidate_cache();
4140 match self.loose.read_object(oid) {
4141 Ok(object) => return Ok(object),
4142 Err(GitError::NotFound(_)) => {}
4143 Err(err) => return Err(err),
4144 }
4145 if let Some(err) = loose_err {
4149 return Err(err);
4150 }
4151 Err(GitError::object_not_found_in(
4152 *oid,
4153 MissingObjectContext::Read,
4154 ))
4155 }
4156}
4157
4158impl ObjectWriter for FileObjectDatabase {
4159 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4160 let oid = object.object_id(self.format)?;
4166 if self.contains(&oid)? {
4167 return Ok(oid);
4168 }
4169 self.loose.write_object(object)
4170 }
4171}
4172
4173fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
4174 if path.exists() {
4175 return Ok(());
4176 }
4177 let parent = path
4178 .parent()
4179 .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
4180 fs::create_dir_all(parent)?;
4181 let temp_path = unique_temp_path(parent);
4182 let write_result = (|| -> Result<()> {
4183 {
4184 let mut file = fs::OpenOptions::new()
4185 .write(true)
4186 .create_new(true)
4187 .open(&temp_path)?;
4188 file.write_all(bytes)?;
4189 file.sync_all()?;
4190 }
4191 match fs::rename(&temp_path, path) {
4192 Ok(()) => Ok(()),
4193 Err(_) if path.exists() => {
4194 let _ = fs::remove_file(&temp_path);
4195 Ok(())
4196 }
4197 Err(err) => Err(GitError::Io(err.to_string())),
4198 }
4199 })();
4200 if write_result.is_err() {
4201 let _ = fs::remove_file(&temp_path);
4202 }
4203 write_result
4204}
4205
4206fn write_promisor_pack_sidecar(
4207 pack_dir: &Path,
4208 pack_name: &str,
4209 promisor: bool,
4210) -> Result<Option<PathBuf>> {
4211 if !promisor {
4212 return Ok(None);
4213 }
4214 let path = pack_dir.join(format!("{pack_name}.promisor"));
4215 write_pack_component(&path, b"")?;
4216 Ok(Some(path))
4217}
4218
4219const MAX_LOOSE_HEADER_LEN: usize = 32;
4224
4225fn loose_header_too_long(oid: &ObjectId) -> GitError {
4230 GitError::InvalidObject(format!(
4231 "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
4232 ))
4233}
4234
4235fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
4239 GitError::InvalidObject(format!("unable to unpack {oid} header"))
4240}
4241
4242fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
4250 let [cmf, flg, ..] = *input else { return None };
4251 if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
4252 return Some("inflate: data stream error (incorrect header check)");
4253 }
4254 if cmf & 0x0f != 8 {
4255 return Some("inflate: data stream error (unknown compression method)");
4256 }
4257 if cmf >> 4 > 7 {
4258 return Some("inflate: data stream error (invalid window size)");
4259 }
4260 if flg & 0x20 != 0 {
4261 return Some("inflate: needs dictionary (no message)");
4262 }
4263 None
4264}
4265
4266fn emit_inflate_diagnostic(input: &[u8]) {
4269 if let Some(diagnostic) = inflate_header_diagnostic(input) {
4270 eprintln!("error: {diagnostic}");
4271 }
4272}
4273
4274#[derive(Debug, Clone, PartialEq, Eq)]
4277pub enum LooseObjectIntegrity {
4278 Ok,
4280 HashMismatch { actual: ObjectId },
4283 Corrupt,
4286}
4287
4288#[derive(Debug, Clone)]
4289pub struct LooseObjectStore {
4290 objects_dir: PathBuf,
4291 format: ObjectFormat,
4292 loose_cache: Arc<Mutex<LoosePresenceCache>>,
4301}
4302
4303impl LooseObjectStore {
4304 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4305 Self {
4306 objects_dir: objects_dir.into(),
4307 format,
4308 loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
4309 }
4310 }
4311
4312 fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
4317 let mut guard = self.loose_cache.lock().ok()?;
4318 let fanout = oid.as_bytes()[0];
4319 if !guard.loaded_fanouts.contains(&fanout) {
4320 collect_loose_fanout_object_ids(
4321 &self.objects_dir,
4322 self.format,
4323 fanout,
4324 &mut guard.objects,
4325 )
4326 .ok()?;
4327 guard.loaded_fanouts.insert(fanout);
4328 }
4329 Some(guard.objects.contains(oid))
4330 }
4331
4332 fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
4336 if let Ok(mut guard) = self.loose_cache.lock() {
4337 guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
4338 guard.loaded_fanouts = (0..=u8::MAX).collect();
4339 let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
4340 ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
4341 return Ok(ids);
4342 }
4343 loose_object_ids(&self.objects_dir, self.format)
4344 }
4345
4346 fn note_loose_write(&self, oid: ObjectId) {
4350 if let Ok(mut guard) = self.loose_cache.lock() {
4351 guard.objects.insert(oid);
4352 }
4353 }
4354
4355 pub(crate) fn invalidate_cache(&self) {
4358 if let Ok(mut guard) = self.loose_cache.lock() {
4359 *guard = LoosePresenceCache::default();
4360 }
4361 }
4362
4363 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4364 Self::new(repository_objects_dir(git_dir), format)
4365 }
4366
4367 fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
4368 if oid.format() != self.format {
4369 return Err(GitError::InvalidObjectId(format!(
4370 "object {oid} uses {}, store uses {}",
4371 oid.format().name(),
4372 self.format.name()
4373 )));
4374 }
4375 Ok(())
4376 }
4377
4378 pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
4379 self.validate_oid_format(oid)?;
4380 let hex = oid.to_hex();
4381 Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
4382 }
4383
4384 pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
4385 self.validate_oid_format(oid)?;
4386 if self.cached_loose_presence(oid) == Some(false) {
4387 return Ok(false);
4388 }
4389 let path = self.object_path(oid)?;
4390 Ok(path.exists())
4391 }
4392
4393 pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
4394 self.validate_oid_format(oid)?;
4395 if self.cached_loose_presence(oid) == Some(false) {
4396 return Ok(None);
4397 }
4398 let path = self.object_path(oid)?;
4399 match fs::metadata(path) {
4400 Ok(metadata) => Ok(Some(metadata.len())),
4401 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
4402 Err(err) => Err(GitError::Io(err.to_string())),
4403 }
4404 }
4405
4406 pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4411 self.validate_oid_format(oid)?;
4412 if self.cached_loose_presence(oid) == Some(false) {
4413 return Ok(None);
4414 }
4415 let path = self.object_path(oid)?;
4416 let mut file = match fs::File::open(&path) {
4417 Ok(file) => file,
4418 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4419 Err(err) => return Err(GitError::Io(err.to_string())),
4420 };
4421 let mut stream_prefix = [0u8; 2];
4426 let prefix_len = read_full_prefix(&mut file, &mut stream_prefix)?;
4427 file.seek(SeekFrom::Start(0))
4428 .map_err(|err| GitError::Io(err.to_string()))?;
4429 let mut decoder = ZlibDecoder::new(file);
4430 let mut header = Vec::new();
4431 let mut byte = [0u8; 1];
4432 loop {
4433 let read = match decoder.read(&mut byte) {
4442 Ok(read) => read,
4443 Err(_) => {
4444 emit_inflate_diagnostic(&stream_prefix[..prefix_len]);
4445 return Err(loose_unpack_header_failed(oid));
4446 }
4447 };
4448 if read == 0 {
4449 return Err(loose_header_too_long(oid));
4450 }
4451 if byte[0] == 0 {
4452 break;
4453 }
4454 header.push(byte[0]);
4455 if header.len() >= MAX_LOOSE_HEADER_LEN {
4458 return Err(loose_header_too_long(oid));
4459 }
4460 }
4461 let header =
4462 std::str::from_utf8(&header).map_err(|err| GitError::InvalidObject(err.to_string()))?;
4463 let (kind, size) = header
4464 .split_once(' ')
4465 .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
4466 let object_type = kind.parse::<ObjectType>()?;
4467 let size = size
4468 .parse::<u64>()
4469 .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
4470 Ok(Some((object_type, size)))
4471 }
4472
4473 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4475 self.loose_object_ids_cached()
4476 }
4477
4478 pub fn verify_object(
4486 &self,
4487 oid: &ObjectId,
4488 display_path: &str,
4489 ) -> Result<Option<LooseObjectIntegrity>> {
4490 let path = self.object_path(oid)?;
4491 let compressed = match fs::read(&path) {
4492 Ok(compressed) => compressed,
4493 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4494 Err(err) => return Err(GitError::Io(err.to_string())),
4495 };
4496 let mut decoder = ZlibDecoder::new(compressed.as_slice());
4497 let mut framed = Vec::new();
4498 if decoder.read_to_end(&mut framed).is_err() {
4499 emit_inflate_diagnostic(&compressed);
4500 if framed_loose_header_terminated(&framed) {
4508 eprintln!("error: corrupt loose object '{oid}'");
4509 eprintln!("error: unable to unpack contents of {display_path}");
4510 } else {
4511 eprintln!("error: unable to unpack header of {display_path}");
4512 }
4513 return Ok(Some(LooseObjectIntegrity::Corrupt));
4514 }
4515 if !framed_loose_header_terminated(&framed) {
4516 eprintln!("error: unable to unpack header of {display_path}");
4519 return Ok(Some(LooseObjectIntegrity::Corrupt));
4520 }
4521 if (decoder.total_in() as usize) < compressed.len() {
4528 eprintln!("error: garbage at end of loose object '{oid}'");
4532 eprintln!("error: unable to unpack contents of {display_path}");
4533 return Ok(Some(LooseObjectIntegrity::Corrupt));
4534 }
4535 if let Some(declared) = loose_header_declared_size(&framed) {
4542 let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
4543 let body_len = framed.len() - (nul + 1).min(framed.len());
4544 if body_len < declared {
4545 eprintln!("error: corrupt loose object '{oid}'");
4546 eprintln!("error: unable to unpack contents of {display_path}");
4547 return Ok(Some(LooseObjectIntegrity::Corrupt));
4548 }
4549 }
4550 let Ok(object) = parse_framed_object(&framed) else {
4551 if let Some(header) = loose_header_with_unknown_type(&framed) {
4556 eprintln!(
4557 "error: unable to parse type from header '{header}' of {display_path}"
4558 );
4559 } else {
4560 eprintln!("error: unable to parse header of {display_path}");
4561 }
4562 return Ok(Some(LooseObjectIntegrity::Corrupt));
4563 };
4564 let actual = object.object_id(self.format)?;
4565 if &actual != oid {
4566 return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
4567 }
4568 Ok(Some(LooseObjectIntegrity::Ok))
4569 }
4570}
4571
4572fn framed_loose_header_terminated(framed: &[u8]) -> bool {
4576 framed
4577 .iter()
4578 .take(MAX_LOOSE_HEADER_LEN)
4579 .any(|byte| *byte == 0)
4580}
4581
4582fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
4587 let nul = framed.iter().position(|&b| b == 0)?;
4588 let header = std::str::from_utf8(&framed[..nul]).ok()?;
4589 let (kind, size) = header.split_once(' ')?;
4590 let size: usize = size.parse().ok()?;
4591 if framed.len() - (nul + 1) != size {
4594 return None;
4595 }
4596 if kind.parse::<ObjectType>().is_ok() {
4599 return None;
4600 }
4601 Some(header.to_string())
4602}
4603
4604fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
4608 let nul = framed.iter().position(|&b| b == 0)?;
4609 let header = std::str::from_utf8(&framed[..nul]).ok()?;
4610 let (_kind, size) = header.split_once(' ')?;
4611 size.parse::<usize>().ok()
4612}
4613
4614fn read_full_prefix(file: &mut fs::File, prefix: &mut [u8]) -> Result<usize> {
4617 let mut len = 0;
4618 while len < prefix.len() {
4619 let read = file
4620 .read(&mut prefix[len..])
4621 .map_err(|err| GitError::Io(err.to_string()))?;
4622 if read == 0 {
4623 break;
4624 }
4625 len += read;
4626 }
4627 Ok(len)
4628}
4629
4630impl ObjectReader for LooseObjectStore {
4631 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4632 self.validate_oid_format(oid)?;
4633 if self.cached_loose_presence(oid) == Some(false) {
4637 return Err(GitError::object_not_found_in(
4638 *oid,
4639 MissingObjectContext::Read,
4640 ));
4641 }
4642 let path = self.object_path(oid)?;
4643 let compressed = match fs::read(&path) {
4644 Ok(compressed) => compressed,
4645 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4646 return Err(GitError::object_not_found_in(
4647 *oid,
4648 MissingObjectContext::Read,
4649 ));
4650 }
4651 Err(err) => return Err(GitError::Io(err.to_string())),
4652 };
4653 let mut decoder = ZlibDecoder::new(compressed.as_slice());
4654 let mut framed = Vec::new();
4655 if decoder.read_to_end(&mut framed).is_err() {
4656 emit_inflate_diagnostic(&compressed);
4657 if !framed_loose_header_terminated(&framed) {
4662 return Err(loose_unpack_header_failed(oid));
4663 }
4664 return Err(GitError::InvalidObject(format!(
4665 "corrupt loose object '{oid}'"
4666 )));
4667 }
4668 if framed
4673 .iter()
4674 .take(MAX_LOOSE_HEADER_LEN)
4675 .all(|byte| *byte != 0)
4676 {
4677 return Err(loose_header_too_long(oid));
4678 }
4679 let object = parse_framed_object(&framed)?;
4680 if verify_reads_enabled() {
4684 let actual = object.object_id(self.format)?;
4685 if &actual != oid {
4686 return Err(GitError::InvalidObject(format!(
4687 "loose object {} hashes to {actual}",
4688 path.display()
4689 )));
4690 }
4691 }
4692 Ok(Arc::new(object))
4693 }
4694}
4695
4696impl ObjectWriter for LooseObjectStore {
4697 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4698 let oid = object.object_id(self.format)?;
4699 let path = self.object_path(&oid)?;
4700 if path.exists() {
4701 self.note_loose_write(oid);
4702 return Ok(oid);
4703 }
4704 let parent = path
4705 .parent()
4706 .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
4707 fs::create_dir_all(parent)?;
4708 let temp_path = unique_temp_path(parent);
4709 let write_result = (|| -> Result<()> {
4710 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4711 encoder.write_all(&object.framed_bytes())?;
4712 let compressed = encoder.finish()?;
4713 {
4714 let mut file = fs::OpenOptions::new()
4715 .write(true)
4716 .create_new(true)
4717 .open(&temp_path)?;
4718 file.write_all(&compressed)?;
4719 }
4729 match fs::rename(&temp_path, &path) {
4730 Ok(()) => Ok(()),
4731 Err(_) if path.exists() => {
4732 let _ = fs::remove_file(&temp_path);
4733 Ok(())
4734 }
4735 Err(err) => Err(GitError::Io(err.to_string())),
4736 }
4737 })();
4738 if write_result.is_err() {
4739 let _ = fs::remove_file(&temp_path);
4740 }
4741 write_result?;
4742 self.note_loose_write(oid);
4743 Ok(oid)
4744 }
4745}
4746
4747fn unique_temp_path(parent: &Path) -> PathBuf {
4748 let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
4749 parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
4750}
4751
4752#[cfg(test)]
4753mod tests {
4754 use super::*;
4755 use sley_core::BString;
4756 use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
4757 use sley_pack::{PackFile, PackWriteOptions};
4758
4759 fn blob_of(byte: u8, len: usize) -> EncodedObject {
4760 EncodedObject::new(ObjectType::Blob, vec![byte; len])
4761 }
4762
4763 fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
4764 Arc::new(blob_of(byte, len))
4765 }
4766
4767 fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
4768 reader
4769 .read_object(oid)
4770 .expect("test operation should succeed")
4771 .as_ref()
4772 .clone()
4773 }
4774
4775 #[test]
4776 fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
4777 let one = cached_object_cost(&blob_of(0, 1000));
4779 let mut cache = LruCache::<u32>::new(one * 2 + 8);
4780 cache.put(1, cached_blob_of(b'a', 1000));
4781 cache.put(2, cached_blob_of(b'b', 1000));
4782 assert!(cache.get(&1).is_some());
4784 cache.put(3, cached_blob_of(b'c', 1000));
4785 assert!(cache.get(&1).is_some());
4787 assert!(cache.get(&2).is_none());
4788 assert!(cache.get(&3).is_some());
4789 }
4790
4791 #[test]
4792 fn lru_cache_zero_budget_is_inert() {
4793 let mut cache = LruCache::<u32>::new(0);
4794 cache.put(1, cached_blob_of(b'a', 16));
4795 assert!(cache.get(&1).is_none());
4796 }
4797
4798 #[test]
4799 fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
4800 let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
4801 cache.put(1, cached_blob_of(b'a', 50));
4802 assert!(cache.get(&1).is_some());
4803 cache.put(1, cached_blob_of(b'b', 10_000));
4806 assert!(cache.get(&1).is_none());
4807 cache.put(2, cached_blob_of(b'c', 50));
4810 assert!(cache.get(&2).is_some());
4811 }
4812
4813 #[test]
4814 fn lru_cache_replacing_entry_updates_byte_accounting() {
4815 let small = cached_object_cost(&blob_of(0, 500));
4818 let mut cache = LruCache::<u32>::new(small * 2 + 200);
4819 cache.put(1, cached_blob_of(b'a', 500));
4820 cache.put(2, cached_blob_of(b'b', 500));
4821 assert!(cache.get(&1).is_some());
4822 assert!(cache.get(&2).is_some());
4823 cache.put(2, cached_blob_of(b'b', 1000));
4828 assert!(cache.get(&2).is_some());
4829 assert!(cache.get(&1).is_none());
4830 }
4831
4832 #[test]
4833 fn write_and_validate_blob() {
4834 let db = ObjectDatabase::new(ObjectFormat::Sha1);
4835 let oid = db
4836 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
4837 .expect("test operation should succeed");
4838 assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
4839 db.validate(&oid).expect("test operation should succeed");
4840 }
4841
4842 #[test]
4843 fn loose_store_writes_and_reads_object() {
4844 let root = std::env::temp_dir().join(format!(
4845 "sley-loose-store-{}-{}",
4846 std::process::id(),
4847 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
4848 ));
4849 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4850 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4851 let oid = store
4852 .write_object(object.clone())
4853 .expect("test operation should succeed");
4854 assert_eq!(read_object_for_assert(&store, &oid), object);
4855 assert!(
4856 store
4857 .object_path(&oid)
4858 .expect("test operation should succeed")
4859 .exists()
4860 );
4861 fs::remove_dir_all(root).expect("test operation should succeed");
4862 }
4863
4864 #[test]
4865 fn file_database_reads_object_from_pack_index() {
4866 let root = temp_root("sley-file-odb-pack");
4867 let git_dir = root.join(".git");
4868 let pack_dir = git_dir.join("objects").join("pack");
4869 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
4870 let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
4871 let oid = object
4872 .object_id(ObjectFormat::Sha1)
4873 .expect("test operation should succeed");
4874 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4875 .expect("test operation should succeed");
4876 let pack_name = written.checksum.to_hex();
4877 fs::write(
4878 pack_dir.join(format!("pack-{pack_name}.pack")),
4879 written.pack,
4880 )
4881 .expect("test operation should succeed");
4882 fs::write(
4883 pack_dir.join(format!("pack-{pack_name}.idx")),
4884 written.index,
4885 )
4886 .expect("test operation should succeed");
4887
4888 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4889 assert!(db.contains(&oid).expect("test operation should succeed"));
4890 assert_eq!(read_object_for_assert(&db, &oid), object);
4891 fs::remove_dir_all(root).expect("test operation should succeed");
4892 }
4893
4894 #[test]
4895 fn file_database_loose_cache_observes_same_process_write_after_miss() {
4896 let root = temp_root("sley-file-odb-loose-cache-write");
4897 let git_dir = root.join(".git");
4898 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4899 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4900
4901 let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
4902 let oid = object
4903 .object_id(ObjectFormat::Sha1)
4904 .expect("test operation should succeed");
4905
4906 assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
4907 db.loose()
4908 .write_object(object.clone())
4909 .expect("test operation should succeed");
4910
4911 assert_eq!(read_object_for_assert(&db, &oid), object);
4912 fs::remove_dir_all(root).expect("test operation should succeed");
4913 }
4914
4915 #[test]
4916 fn object_presence_checker_observes_same_process_loose_write_after_miss() {
4917 let root = temp_root("sley-presence-checker-loose-cache-write");
4918 let git_dir = root.join(".git");
4919 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4920 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4921 let mut checker = db.presence_checker();
4922
4923 let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
4924 let oid = object
4925 .object_id(ObjectFormat::Sha1)
4926 .expect("test operation should succeed");
4927
4928 assert!(
4929 !checker
4930 .contains(&oid)
4931 .expect("test operation should succeed")
4932 );
4933 db.loose()
4934 .write_object(object)
4935 .expect("test operation should succeed");
4936
4937 assert!(
4938 checker
4939 .contains(&oid)
4940 .expect("test operation should succeed")
4941 );
4942 fs::remove_dir_all(root).expect("test operation should succeed");
4943 }
4944
4945 #[test]
4946 fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
4947 let root = temp_root("sley-read-object-header");
4948 let git_dir = root.join(".git");
4949 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4950 let format = ObjectFormat::Sha1;
4951 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4952
4953 let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
4955 let loose_oid = db
4956 .write_object(loose.clone())
4957 .expect("test operation should succeed");
4958
4959 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
4963 let mut child_body = vec![b'a'; 4096];
4964 child_body.extend_from_slice(b" plus a deltified tail\n");
4965 let child = EncodedObject::new(ObjectType::Blob, child_body);
4966 let commitish =
4967 EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
4968 let base_oid = base
4969 .object_id(format)
4970 .expect("test operation should succeed");
4971 let child_oid = child
4972 .object_id(format)
4973 .expect("test operation should succeed");
4974 let commit_oid = commitish
4975 .object_id(format)
4976 .expect("test operation should succeed");
4977 let options = PackWriteOptions::new()
4978 .with_prefer_ofs_delta(true)
4979 .with_reorder(false);
4980 let pack = PackFile::write_packed_with_options(
4981 &[base.clone(), child.clone(), commitish.clone()],
4982 format,
4983 &options,
4984 )
4985 .expect("test operation should succeed");
4986 db.install_pack(&pack)
4987 .expect("test operation should succeed");
4988
4989 for (oid, want_type, want_len) in [
4992 (&loose_oid, ObjectType::Blob, loose.body.len()),
4993 (&base_oid, ObjectType::Blob, base.body.len()),
4994 (&child_oid, ObjectType::Blob, child.body.len()),
4995 (&commit_oid, ObjectType::Commit, commitish.body.len()),
4996 ] {
4997 assert_eq!(
4998 db.read_object_header(oid)
4999 .expect("test operation should succeed"),
5000 Some((want_type, want_len as u64)),
5001 "header for {oid}"
5002 );
5003 let full = db.read_object(oid).expect("test operation should succeed");
5004 assert_eq!(
5005 db.read_object_header(oid)
5006 .expect("test operation should succeed"),
5007 Some((full.object_type, full.body.len() as u64))
5008 );
5009 }
5010
5011 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5012 .expect("test operation should succeed");
5013 assert_eq!(
5014 db.read_object_header(&missing)
5015 .expect("test operation should succeed"),
5016 None
5017 );
5018 fs::remove_dir_all(root).expect("test operation should succeed");
5019 }
5020
5021 #[test]
5022 fn object_storage_info_reports_loose_packed_and_delta_metadata() {
5023 let root = temp_root("sley-object-storage-info");
5024 let git_dir = root.join(".git");
5025 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5026 let format = ObjectFormat::Sha1;
5027 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5028
5029 let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
5030 let loose_oid = db
5031 .write_object(loose)
5032 .expect("test operation should succeed");
5033 let loose_size = fs::metadata(
5034 db.loose()
5035 .object_path(&loose_oid)
5036 .expect("test operation should succeed"),
5037 )
5038 .expect("test operation should succeed")
5039 .len();
5040 let loose_info = db
5041 .object_storage_info(&loose_oid)
5042 .expect("test operation should succeed")
5043 .expect("test operation should succeed");
5044 assert_eq!(loose_info.disk_size, loose_size);
5045 assert_eq!(
5046 loose_info.deltabase,
5047 zero_oid(format).expect("test operation should succeed")
5048 );
5049
5050 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5051 let mut child_body = vec![b'a'; 4096];
5052 child_body.extend_from_slice(b" changed tail\n");
5053 let child = EncodedObject::new(ObjectType::Blob, child_body);
5054 let base_oid = base
5055 .object_id(format)
5056 .expect("test operation should succeed");
5057 let child_oid = child
5058 .object_id(format)
5059 .expect("test operation should succeed");
5060 let options = PackWriteOptions::new()
5061 .with_prefer_ofs_delta(true)
5062 .with_reorder(false);
5063 let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
5064 .expect("test operation should succeed");
5065 db.install_pack(&pack)
5066 .expect("test operation should succeed");
5067
5068 let base_info = db
5069 .object_storage_info(&base_oid)
5070 .expect("test operation should succeed")
5071 .expect("test operation should succeed");
5072 assert!(base_info.disk_size > 0);
5073 assert_eq!(
5074 base_info.deltabase,
5075 zero_oid(format).expect("test operation should succeed")
5076 );
5077
5078 let child_info = db
5079 .object_storage_info(&child_oid)
5080 .expect("test operation should succeed")
5081 .expect("test operation should succeed");
5082 assert!(child_info.disk_size > 0);
5083 assert_eq!(child_info.deltabase, base_oid);
5084
5085 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5086 .expect("test operation should succeed");
5087 assert_eq!(
5088 db.object_storage_info(&missing)
5089 .expect("test operation should succeed"),
5090 None
5091 );
5092 fs::remove_dir_all(root).expect("test operation should succeed");
5093 }
5094
5095 #[test]
5096 fn file_database_resolves_unique_loose_object_prefix() {
5097 let root = temp_root("sley-file-odb-prefix-loose");
5098 let git_dir = root.join(".git");
5099 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5100 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5101 let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
5102 let oid = db
5103 .write_object(object)
5104 .expect("test operation should succeed");
5105 let prefix = &oid.to_hex()[..8];
5106
5107 assert_eq!(
5108 db.resolve_prefix(prefix)
5109 .expect("test operation should succeed"),
5110 ObjectPrefixResolution::Unique(oid)
5111 );
5112 assert!(
5113 db.object_ids()
5114 .expect("test operation should succeed")
5115 .contains(&oid)
5116 );
5117 fs::remove_dir_all(root).expect("test operation should succeed");
5118 }
5119
5120 #[test]
5121 fn file_database_resolves_unique_packed_object_prefix() {
5122 let root = temp_root("sley-file-odb-prefix-packed");
5123 let git_dir = root.join(".git");
5124 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5125 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5126 let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
5127 let oid = object
5128 .object_id(ObjectFormat::Sha1)
5129 .expect("test operation should succeed");
5130 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5131 .expect("test operation should succeed");
5132 db.install_pack(&pack)
5133 .expect("test operation should succeed");
5134 let prefix = &oid.to_hex()[..8];
5135
5136 assert_eq!(
5137 db.resolve_prefix(prefix)
5138 .expect("test operation should succeed"),
5139 ObjectPrefixResolution::Unique(oid)
5140 );
5141 fs::remove_dir_all(root).expect("test operation should succeed");
5142 }
5143
5144 #[test]
5145 fn file_database_reports_ambiguous_object_prefix() {
5146 let root = temp_root("sley-file-odb-prefix-ambiguous");
5147 let git_dir = root.join(".git");
5148 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5149 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5150 let mut seen = HashMap::new();
5151 let (prefix, first, second) = (0..10_000)
5152 .find_map(|idx| {
5153 let object =
5154 EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
5155 let oid = db
5156 .write_object(object)
5157 .expect("test operation should succeed");
5158 let prefix = oid.to_hex()[..4].to_string();
5159 seen.insert(prefix.clone(), oid)
5160 .map(|first| (prefix, first, oid))
5161 })
5162 .expect("test should find a 4-hex collision");
5163
5164 let ObjectPrefixResolution::Ambiguous(mut matches) = db
5165 .resolve_prefix(&prefix)
5166 .expect("test operation should succeed")
5167 else {
5168 panic!("expected ambiguous prefix {prefix}");
5169 };
5170 matches.sort_by_key(ObjectId::to_hex);
5171 let mut expected = vec![first, second];
5172 expected.sort_by_key(ObjectId::to_hex);
5173 assert_eq!(matches, expected);
5174 fs::remove_dir_all(root).expect("test operation should succeed");
5175 }
5176
5177 #[test]
5178 fn file_database_rejects_too_short_object_prefix() {
5179 let root = temp_root("sley-file-odb-prefix-short");
5180 let git_dir = root.join(".git");
5181 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5182 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5183
5184 assert!(matches!(
5185 db.resolve_prefix("abc"),
5186 Err(GitError::InvalidObjectId(_))
5187 ));
5188 fs::remove_dir_all(root).expect("test operation should succeed");
5189 }
5190
5191 #[test]
5192 fn file_database_reads_sha256_object_from_pack_index() {
5193 let root = temp_root("sley-file-odb-pack-sha256");
5194 let git_dir = root.join(".git");
5195 let pack_dir = git_dir.join("objects").join("pack");
5196 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5197 let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
5198 let oid = object
5199 .object_id(ObjectFormat::Sha256)
5200 .expect("test operation should succeed");
5201 let written =
5202 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5203 .expect("test operation should succeed");
5204 let pack_name = written.checksum.to_hex();
5205 fs::write(
5206 pack_dir.join(format!("pack-{pack_name}.pack")),
5207 written.pack,
5208 )
5209 .expect("test operation should succeed");
5210 fs::write(
5211 pack_dir.join(format!("pack-{pack_name}.idx")),
5212 written.index,
5213 )
5214 .expect("test operation should succeed");
5215
5216 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5217 assert!(db.contains(&oid).expect("test operation should succeed"));
5218 assert_eq!(read_object_for_assert(&db, &oid), object);
5219 fs::remove_dir_all(root).expect("test operation should succeed");
5220 }
5221
5222 #[test]
5223 fn file_database_installs_sha256_pack_without_loose_objects() {
5224 let root = temp_root("sley-file-odb-install-pack");
5225 let git_dir = root.join(".git");
5226 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5227 let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
5228 let oid = object
5229 .object_id(ObjectFormat::Sha256)
5230 .expect("test operation should succeed");
5231 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5232 .expect("test operation should succeed");
5233 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5234
5235 let result = db
5236 .install_pack(&pack)
5237 .expect("test operation should succeed");
5238
5239 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5240 assert_eq!(result.object_ids, vec![oid]);
5241 assert!(result.pack_path.exists());
5242 assert!(result.index_path.exists());
5243 assert_eq!(result.promisor_path, None);
5244 assert!(
5245 !db.loose()
5246 .object_path(&oid)
5247 .expect("test operation should succeed")
5248 .exists()
5249 );
5250 assert!(db.contains(&oid).expect("test operation should succeed"));
5251 assert_eq!(read_object_for_assert(&db, &oid), object);
5252 fs::remove_dir_all(root).expect("test operation should succeed");
5253 }
5254
5255 #[test]
5256 fn file_database_installs_raw_sha256_pack_without_loose_objects() {
5257 let root = temp_root("sley-file-odb-install-raw-pack");
5258 let git_dir = root.join(".git");
5259 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5260 let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
5261 let oid = object
5262 .object_id(ObjectFormat::Sha256)
5263 .expect("test operation should succeed");
5264 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5265 .expect("test operation should succeed");
5266 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5267
5268 let result = db
5269 .install_raw_pack(&pack.pack)
5270 .expect("test operation should succeed");
5271
5272 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5273 assert_eq!(result.object_ids, vec![oid]);
5274 assert!(result.pack_path.exists());
5275 assert!(result.index_path.exists());
5276 assert_eq!(result.promisor_path, None);
5277 assert!(
5278 !db.loose()
5279 .object_path(&oid)
5280 .expect("test operation should succeed")
5281 .exists()
5282 );
5283 assert!(db.contains(&oid).expect("test operation should succeed"));
5284 assert_eq!(read_object_for_assert(&db, &oid), object);
5285 fs::remove_dir_all(root).expect("test operation should succeed");
5286 }
5287
5288 #[test]
5289 fn file_database_rejects_noncanonical_pack_index() {
5290 let root = temp_root("sley-file-odb-install-bad-index");
5291 let git_dir = root.join(".git");
5292 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5293 let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
5294 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5295 .expect("test operation should succeed");
5296 let mut entries = pack.entries.clone();
5297 entries[0].crc32 ^= 1;
5298 let mut bad_pack = pack.clone();
5299 bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
5300 .expect("test operation should succeed");
5301 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5302
5303 assert!(db.install_pack(&bad_pack).is_err());
5304
5305 fs::remove_dir_all(root).expect("test operation should succeed");
5306 }
5307
5308 #[test]
5309 fn file_database_installs_raw_promisor_pack_with_sidecar() {
5310 let root = temp_root("sley-file-odb-install-raw-promisor-pack");
5311 let git_dir = root.join(".git");
5312 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5313 let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
5314 let oid = object
5315 .object_id(ObjectFormat::Sha1)
5316 .expect("test operation should succeed");
5317 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5318 .expect("test operation should succeed");
5319 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5320
5321 let result = db
5322 .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
5323 .expect("test operation should succeed");
5324
5325 let promisor_path = result.promisor_path.expect("promisor sidecar");
5326 assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
5327 assert_eq!(
5328 promisor_path.extension().and_then(|ext| ext.to_str()),
5329 Some("promisor")
5330 );
5331 assert!(promisor_path.exists());
5332 assert_eq!(
5333 fs::read(&promisor_path).expect("test operation should succeed"),
5334 b""
5335 );
5336 assert!(result.pack_path.exists());
5337 assert!(result.index_path.exists());
5338 assert!(
5339 !db.loose()
5340 .object_path(&oid)
5341 .expect("test operation should succeed")
5342 .exists()
5343 );
5344 assert_eq!(read_object_for_assert(&db, &oid), object);
5345 fs::remove_dir_all(root).expect("test operation should succeed");
5346 }
5347
5348 #[test]
5349 fn repository_objects_dir_uses_linked_worktree_common_dir() {
5350 let root = temp_root("sley-odb-common-dir");
5351 let common = root.join(".git");
5352 let admin = common.join("worktrees").join("linked");
5353 fs::create_dir_all(&admin).expect("test operation should succeed");
5354 fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
5355
5356 let common = fs::canonicalize(common).expect("test operation should succeed");
5357 assert_eq!(repository_common_dir(&admin), common);
5358 assert_eq!(repository_objects_dir(&admin), common.join("objects"));
5359
5360 fs::remove_dir_all(root).expect("test operation should succeed");
5361 }
5362
5363 #[test]
5364 fn reachable_object_helpers_walk_graph_and_install_pack() {
5365 let root = temp_root("sley-reachable-pack");
5366 let source_git_dir = root.join("source.git");
5367 let destination_git_dir = root.join("destination.git");
5368 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5369 fs::create_dir_all(destination_git_dir.join("objects"))
5370 .expect("test operation should succeed");
5371 let format = ObjectFormat::Sha1;
5372 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5373 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5374
5375 let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
5376 let blob_oid = source
5377 .write_object(blob.clone())
5378 .expect("test operation should succeed");
5379 let tree = EncodedObject::new(
5380 ObjectType::Tree,
5381 Tree {
5382 entries: vec![TreeEntry {
5383 mode: 0o100644,
5384 name: BString::from(b"payload.txt"),
5385 oid: blob_oid,
5386 }],
5387 }
5388 .write(),
5389 );
5390 let tree_oid = source
5391 .write_object(tree.clone())
5392 .expect("test operation should succeed");
5393 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5394 let commit = EncodedObject::new(
5395 ObjectType::Commit,
5396 Commit {
5397 tree: tree_oid,
5398 parents: Vec::new(),
5399 author: identity.clone(),
5400 committer: identity,
5401 encoding: None,
5402 message: b"initial\n".to_vec(),
5403 }
5404 .write(),
5405 );
5406 let commit_oid = source
5407 .write_object(commit.clone())
5408 .expect("test operation should succeed");
5409
5410 let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
5411 .expect("test operation should succeed");
5412 assert!(reachable.contains(&commit_oid));
5413 assert!(reachable.contains(&tree_oid));
5414 assert!(reachable.contains(&blob_oid));
5415
5416 let install =
5417 install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
5418 .expect("test operation should succeed")
5419 .expect("reachable pack should be written");
5420 assert_eq!(install.object_ids.len(), 3);
5421 for (oid, object) in [
5422 (&commit_oid, &commit),
5423 (&tree_oid, &tree),
5424 (&blob_oid, &blob),
5425 ] {
5426 assert!(
5427 !destination
5428 .loose()
5429 .object_path(oid)
5430 .expect("test operation should succeed")
5431 .exists()
5432 );
5433 assert!(
5434 destination
5435 .contains(oid)
5436 .expect("test operation should succeed")
5437 );
5438 assert_eq!(read_object_for_assert(&destination, oid), *object);
5439 }
5440 fs::remove_dir_all(root).expect("test operation should succeed");
5441 }
5442
5443 #[test]
5444 fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
5445 let root = temp_root("sley-reachable-exclusions");
5446 let git_dir = root.join("repo.git");
5447 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5448 let format = ObjectFormat::Sha1;
5449 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5450
5451 let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
5452 let blob_oid = db
5453 .write_object(blob)
5454 .expect("test operation should succeed");
5455 let tree = EncodedObject::new(
5456 ObjectType::Tree,
5457 Tree {
5458 entries: vec![TreeEntry {
5459 mode: 0o100644,
5460 name: BString::from(b"payload.txt"),
5461 oid: blob_oid,
5462 }],
5463 }
5464 .write(),
5465 );
5466 let tree_oid = db
5467 .write_object(tree)
5468 .expect("test operation should succeed");
5469 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5470 let commit = EncodedObject::new(
5471 ObjectType::Commit,
5472 Commit {
5473 tree: tree_oid,
5474 parents: Vec::new(),
5475 author: identity.clone(),
5476 committer: identity,
5477 encoding: None,
5478 message: b"initial\n".to_vec(),
5479 }
5480 .write(),
5481 );
5482 let commit_oid = db
5483 .write_object(commit)
5484 .expect("test operation should succeed");
5485 let excluded = HashSet::from([tree_oid]);
5486
5487 let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
5488 .expect("test operation should succeed");
5489
5490 assert_eq!(objects.len(), 1);
5491 assert_eq!(
5492 objects[0]
5493 .object_id(format)
5494 .expect("test operation should succeed"),
5495 commit_oid
5496 );
5497 fs::remove_dir_all(root).expect("test operation should succeed");
5498 }
5499
5500 #[test]
5501 fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
5502 let root = temp_root("sley-build-reachable-pack");
5503 let git_dir = root.join("repo.git");
5504 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5505 let format = ObjectFormat::Sha1;
5506 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5507
5508 let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
5509 let oid = db
5510 .write_object(object.clone())
5511 .expect("test operation should succeed");
5512 let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
5513 .expect("test operation should succeed")
5514 .expect("reachable pack should be built");
5515 assert!(pack.pack.starts_with(b"PACK"));
5516 assert_eq!(pack.entries.len(), 1);
5517 assert_eq!(pack.entries[0].oid, oid);
5518
5519 let excluded = HashSet::from([oid]);
5520 assert!(
5521 build_reachable_pack(
5522 &db,
5523 format,
5524 pack.entries.into_iter().map(|entry| entry.oid),
5525 &excluded
5526 )
5527 .expect("test operation should succeed")
5528 .is_none()
5529 );
5530 fs::remove_dir_all(root).expect("test operation should succeed");
5531 }
5532
5533 #[test]
5534 fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
5535 let root = temp_root("sley-reachable-tags");
5536 let git_dir = root.join("repo.git");
5537 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5538 let format = ObjectFormat::Sha1;
5539 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5540
5541 let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
5542 let blob_oid = db
5543 .write_object(blob)
5544 .expect("test operation should succeed");
5545 let tag = EncodedObject::new(
5546 ObjectType::Tag,
5547 Tag {
5548 object: blob_oid,
5549 object_type: ObjectType::Blob,
5550 name: b"v1".to_vec(),
5551 tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
5552 message: b"tag message\n".to_vec(),
5553 raw_body: None,
5554 }
5555 .write(),
5556 );
5557 let tag_oid = db.write_object(tag).expect("test operation should succeed");
5558
5559 let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
5560 .expect("test operation should succeed");
5561 assert!(reachable.contains(&tag_oid));
5562 assert!(reachable.contains(&blob_oid));
5563
5564 let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
5565 .expect("test operation should succeed");
5566 let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
5567 .expect_err("missing traversal root should error");
5568 let kind = err.not_found_kind().expect("typed not found");
5569 assert_eq!(kind.object_id(), Some(missing));
5570 assert_eq!(
5571 kind.missing_object_context(),
5572 Some(MissingObjectContext::Traversal)
5573 );
5574 fs::remove_dir_all(root).expect("test operation should succeed");
5575 }
5576
5577 #[test]
5578 fn install_reachable_pack_empty_starts_create_no_pack() {
5579 let root = temp_root("sley-reachable-empty");
5580 let source_git_dir = root.join("source.git");
5581 let destination_git_dir = root.join("destination.git");
5582 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5583 fs::create_dir_all(destination_git_dir.join("objects"))
5584 .expect("test operation should succeed");
5585 let format = ObjectFormat::Sha1;
5586 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5587 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5588
5589 let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
5590 .expect("test operation should succeed");
5591
5592 assert!(result.is_none());
5593 assert!(!destination_git_dir.join("objects").join("pack").exists());
5594 fs::remove_dir_all(root).expect("test operation should succeed");
5595 }
5596
5597 #[test]
5598 fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
5599 let root = temp_root("sley-reachable-install-excluding");
5600 let source_git_dir = root.join("source.git");
5601 let destination_git_dir = root.join("destination.git");
5602 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5603 fs::create_dir_all(destination_git_dir.join("objects"))
5604 .expect("test operation should succeed");
5605 let format = ObjectFormat::Sha1;
5606 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5607 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5608 let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
5609 let oid = source
5610 .write_object(object)
5611 .expect("test operation should succeed");
5612 let excluded = HashSet::from([oid]);
5613
5614 let result = install_reachable_pack_excluding(
5615 &source,
5616 &destination,
5617 format,
5618 std::iter::once(oid),
5619 &excluded,
5620 )
5621 .expect("test operation should succeed");
5622
5623 assert!(result.is_none());
5624 assert!(!destination_git_dir.join("objects").join("pack").exists());
5625 fs::remove_dir_all(root).expect("test operation should succeed");
5626 }
5627
5628 #[test]
5629 fn install_reachable_pack_supports_sha256() {
5630 let root = temp_root("sley-reachable-pack-sha256");
5631 let source_git_dir = root.join("source.git");
5632 let destination_git_dir = root.join("destination.git");
5633 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5634 fs::create_dir_all(destination_git_dir.join("objects"))
5635 .expect("test operation should succeed");
5636 let format = ObjectFormat::Sha256;
5637 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5638 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5639 let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
5640 let oid = source
5641 .write_object(object.clone())
5642 .expect("test operation should succeed");
5643
5644 let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
5645 .expect("test operation should succeed")
5646 .expect("sha256 reachable pack should be built");
5647 assert!(pack.pack.starts_with(b"PACK"));
5648 assert_eq!(pack.entries[0].oid, oid);
5649
5650 let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
5651 .expect("test operation should succeed")
5652 .expect("sha256 reachable pack should be written");
5653
5654 assert_eq!(result.object_ids, vec![oid]);
5655 assert!(
5656 !destination
5657 .loose()
5658 .object_path(&oid)
5659 .expect("test operation should succeed")
5660 .exists()
5661 );
5662 assert_eq!(read_object_for_assert(&destination, &oid), object);
5663 fs::remove_dir_all(root).expect("test operation should succeed");
5664 }
5665
5666 #[test]
5667 fn install_helpers_accept_custom_raw_pack_installer() {
5668 #[derive(Default)]
5669 struct RecordingInstaller {
5670 packs: std::cell::RefCell<Vec<Vec<u8>>>,
5671 installed: std::cell::RefCell<Vec<ObjectId>>,
5672 }
5673
5674 impl RawPackInstaller for RecordingInstaller {
5675 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
5676 self.packs.borrow_mut().push(pack_bytes.to_vec());
5677 let object_ids = self.installed.borrow().clone();
5678 Ok(RawPackInstallResult { object_ids })
5679 }
5680 }
5681
5682 let format = ObjectFormat::Sha1;
5683 let source = ObjectDatabase::new(format);
5684 let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
5685 let oid = source
5686 .write_object(object)
5687 .expect("test operation should succeed");
5688 let installer = RecordingInstaller::default();
5689 installer.installed.borrow_mut().push(oid);
5690
5691 let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
5692 .expect("test operation should succeed")
5693 .expect("custom installer should receive pack");
5694
5695 assert_eq!(result.object_ids, installer.installed.into_inner());
5696 let packs = installer.packs.into_inner();
5697 assert_eq!(packs.len(), 1);
5698 assert!(packs[0].starts_with(b"PACK"));
5699 }
5700
5701 #[test]
5702 fn file_database_reads_object_from_multi_pack_index() {
5703 let root = temp_root("sley-file-odb-midx");
5704 let git_dir = root.join(".git");
5705 let pack_dir = git_dir.join("objects").join("pack");
5706 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5707 let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
5708 let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
5709 let first_oid = first
5710 .object_id(ObjectFormat::Sha1)
5711 .expect("test operation should succeed");
5712 let second_oid = second
5713 .object_id(ObjectFormat::Sha1)
5714 .expect("test operation should succeed");
5715 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5716 .expect("test operation should succeed");
5717 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5718 .expect("test operation should succeed");
5719 let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
5720 let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
5721 fs::write(
5722 pack_dir.join(first_pack_name.replace(".idx", ".pack")),
5723 first_pack.pack,
5724 )
5725 .expect("test operation should succeed");
5726 fs::write(
5727 pack_dir.join(second_pack_name.replace(".idx", ".pack")),
5728 second_pack.pack,
5729 )
5730 .expect("test operation should succeed");
5731 let midx = MultiPackIndex::write(
5732 ObjectFormat::Sha1,
5733 2,
5734 &[first_pack_name, second_pack_name],
5735 &[
5736 sley_pack::MultiPackIndexEntry {
5737 oid: first_oid,
5738 pack_int_id: 0,
5739 offset: first_pack.entries[0].offset,
5740 },
5741 sley_pack::MultiPackIndexEntry {
5742 oid: second_oid,
5743 pack_int_id: 1,
5744 offset: second_pack.entries[0].offset,
5745 },
5746 ],
5747 )
5748 .expect("test operation should succeed");
5749 fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
5750
5751 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5752 assert!(
5753 db.contains(&second_oid)
5754 .expect("test operation should succeed")
5755 );
5756 assert_eq!(
5757 db.resolve_prefix(&second_oid.to_hex()[..8])
5758 .expect("test operation should succeed"),
5759 ObjectPrefixResolution::Unique(second_oid)
5760 );
5761 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5762 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5763 fs::remove_dir_all(root).expect("test operation should succeed");
5764 }
5765
5766 #[test]
5767 fn file_database_finds_pack_added_after_registry_was_cached() {
5768 let root = temp_root("sley-file-odb-pack-added-late");
5772 let git_dir = root.join(".git");
5773 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5774 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5775
5776 let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
5778 let first_oid = first
5779 .object_id(ObjectFormat::Sha1)
5780 .expect("test operation should succeed");
5781 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5782 .expect("test operation should succeed");
5783 db.install_pack(&first_pack)
5784 .expect("test operation should succeed");
5785 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5786
5787 let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
5789 let second_oid = second
5790 .object_id(ObjectFormat::Sha1)
5791 .expect("test operation should succeed");
5792 assert!(matches!(
5794 db.read_object(&second_oid),
5795 Err(GitError::NotFound(_))
5796 ));
5797
5798 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5801 .expect("test operation should succeed");
5802 db.install_pack(&second_pack)
5803 .expect("test operation should succeed");
5804 assert!(
5805 db.contains(&second_oid)
5806 .expect("test operation should succeed")
5807 );
5808 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5809 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5811
5812 fs::remove_dir_all(root).expect("test operation should succeed");
5813 }
5814
5815 #[test]
5816 fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
5817 let root = temp_root("sley-presence-checker-pack-added-late");
5818 let git_dir = root.join(".git");
5819 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5820 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5821
5822 let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
5823 let first_oid = first
5824 .object_id(ObjectFormat::Sha1)
5825 .expect("test operation should succeed");
5826 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5827 .expect("test operation should succeed");
5828 db.install_pack(&first_pack)
5829 .expect("test operation should succeed");
5830
5831 let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
5832 let second_oid = second
5833 .object_id(ObjectFormat::Sha1)
5834 .expect("test operation should succeed");
5835 let mut checker = db.presence_checker();
5836 assert!(
5837 checker
5838 .contains(&first_oid)
5839 .expect("test operation should succeed")
5840 );
5841 assert!(
5842 !checker
5843 .contains(&second_oid)
5844 .expect("test operation should succeed")
5845 );
5846
5847 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5848 .expect("test operation should succeed");
5849 db.install_pack(&second_pack)
5850 .expect("test operation should succeed");
5851
5852 assert!(
5853 checker
5854 .contains(&second_oid)
5855 .expect("test operation should succeed")
5856 );
5857 fs::remove_dir_all(root).expect("test operation should succeed");
5858 }
5859
5860 #[test]
5861 fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
5862 let root = temp_root("sley-file-odb-pack-registry-refresh");
5863 let git_dir = root.join(".git");
5864 let pack_dir = git_dir.join("objects").join("pack");
5865 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5866 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5867
5868 let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
5869 let first_oid = first
5870 .object_id(ObjectFormat::Sha1)
5871 .expect("test operation should succeed");
5872 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5873 .expect("test operation should succeed");
5874 db.install_pack(&first_pack)
5875 .expect("test operation should succeed");
5876
5877 let first_registry = db
5878 .cached_pack_registry(&pack_dir, false)
5879 .expect("test operation should succeed");
5880 assert_eq!(first_registry.fingerprint.idx_count, 1);
5881 assert_eq!(first_registry.fingerprint.pack_count, 1);
5882 assert_eq!(first_registry.packs.len(), 1);
5883 assert!(
5884 first_registry.packs[0]
5885 .index
5886 .lock()
5887 .expect("test operation should succeed")
5888 .is_none()
5889 );
5890 assert!(
5891 first_registry.packs[0]
5892 .data
5893 .lock()
5894 .expect("test operation should succeed")
5895 .is_none()
5896 );
5897
5898 assert!(
5901 db.contains(&first_oid)
5902 .expect("test operation should succeed")
5903 );
5904 assert!(
5905 first_registry.packs[0]
5906 .index
5907 .lock()
5908 .expect("test operation should succeed")
5909 .is_some()
5910 );
5911 assert!(
5912 first_registry.packs[0]
5913 .data
5914 .lock()
5915 .expect("test operation should succeed")
5916 .is_none()
5917 );
5918 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5919 assert!(
5920 first_registry.packs[0]
5921 .data
5922 .lock()
5923 .expect("test operation should succeed")
5924 .is_some()
5925 );
5926
5927 let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
5928 let second_oid = second
5929 .object_id(ObjectFormat::Sha1)
5930 .expect("test operation should succeed");
5931 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5932 .expect("test operation should succeed");
5933 db.install_pack(&second_pack)
5934 .expect("test operation should succeed");
5935
5936 let refreshed = db
5937 .cached_pack_registry(&pack_dir, true)
5938 .expect("test operation should succeed");
5939 assert!(!Arc::ptr_eq(&first_registry, &refreshed));
5940 assert_eq!(refreshed.fingerprint.idx_count, 2);
5941 assert_eq!(refreshed.fingerprint.pack_count, 2);
5942 assert_eq!(refreshed.packs.len(), 2);
5943 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5944
5945 fs::remove_dir_all(root).expect("test operation should succeed");
5946 }
5947
5948 #[test]
5949 fn file_database_pack_search_hint_rebuilds_after_pack_added() {
5950 let root = temp_root("sley-file-odb-pack-lookup-added-late");
5954 let git_dir = root.join(".git");
5955 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5956 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5957
5958 let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
5959 let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
5960 let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
5961 let first_oid = first
5962 .object_id(ObjectFormat::Sha1)
5963 .expect("test operation should succeed");
5964 let second_oid = second
5965 .object_id(ObjectFormat::Sha1)
5966 .expect("test operation should succeed");
5967 let third_oid = third
5968 .object_id(ObjectFormat::Sha1)
5969 .expect("test operation should succeed");
5970
5971 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5972 .expect("test operation should succeed");
5973 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5974 .expect("test operation should succeed");
5975 db.install_pack(&first_pack)
5976 .expect("test operation should succeed");
5977 db.install_pack(&second_pack)
5978 .expect("test operation should succeed");
5979
5980 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5982 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5983 assert!(matches!(
5984 db.read_object(&third_oid),
5985 Err(GitError::NotFound(_))
5986 ));
5987
5988 let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
5989 .expect("test operation should succeed");
5990 db.install_pack(&third_pack)
5991 .expect("test operation should succeed");
5992
5993 assert_eq!(read_object_for_assert(&db, &third_oid), third);
5994 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5995
5996 fs::remove_dir_all(root).expect("test operation should succeed");
5997 }
5998
5999 #[test]
6000 fn file_database_prefers_loose_object_over_packed_object() {
6001 let root = temp_root("sley-file-odb-prefer-loose");
6002 let git_dir = root.join(".git");
6003 let pack_dir = git_dir.join("objects").join("pack");
6004 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6005 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6006 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6007 .expect("test operation should succeed");
6008 let pack_name = written.checksum.to_hex();
6009 fs::write(
6010 pack_dir.join(format!("pack-{pack_name}.pack")),
6011 written.pack,
6012 )
6013 .expect("test operation should succeed");
6014 fs::write(
6015 pack_dir.join(format!("pack-{pack_name}.idx")),
6016 written.index,
6017 )
6018 .expect("test operation should succeed");
6019
6020 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6021 let oid = db
6022 .write_object(object.clone())
6023 .expect("test operation should succeed");
6024 assert_eq!(read_object_for_assert(&db, &oid), object);
6025 fs::remove_dir_all(root).expect("test operation should succeed");
6026 }
6027
6028 #[test]
6029 fn bundle_prerequisite_verification_reads_existing_objects() {
6030 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6031 let oid = db
6032 .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
6033 .expect("test operation should succeed");
6034 let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
6035 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6036 .expect("test operation should succeed");
6037
6038 verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
6039 }
6040
6041 #[test]
6042 fn bundle_prerequisite_verification_reports_missing_objects() {
6043 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6044 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6045 .expect("test operation should succeed");
6046 let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
6047 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6048 .expect("test operation should succeed");
6049
6050 assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
6051 }
6052
6053 #[test]
6054 fn unbundle_objects_writes_pack_entries_and_returns_refs() {
6055 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6056 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6057 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6058 let oid = object
6059 .object_id(ObjectFormat::Sha1)
6060 .expect("test operation should succeed");
6061 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6062 .expect("test operation should succeed");
6063 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6064 .into_bytes()
6065 .into_iter()
6066 .chain(pack.pack)
6067 .collect::<Vec<_>>();
6068 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6069 .expect("test operation should succeed");
6070
6071 let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
6072 .expect("test operation should succeed");
6073 assert_eq!(result.written_objects, vec![oid]);
6074 assert_eq!(result.references, bundle.references);
6075 assert_eq!(read_object_for_assert(&writer, &oid), object);
6076 }
6077
6078 #[test]
6079 fn install_bundle_pack_writes_pack_and_returns_refs() {
6080 let root = temp_root("sley-install-bundle-pack");
6081 let git_dir = root.join(".git");
6082 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6083 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6084 let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6085 let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
6086 let oid = object
6087 .object_id(ObjectFormat::Sha1)
6088 .expect("test operation should succeed");
6089 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6090 .expect("test operation should succeed");
6091 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6092 .into_bytes()
6093 .into_iter()
6094 .chain(pack.pack)
6095 .collect::<Vec<_>>();
6096 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6097 .expect("test operation should succeed");
6098
6099 let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
6100 .expect("test operation should succeed");
6101
6102 assert_eq!(result.written_objects, vec![oid]);
6103 assert_eq!(result.references, bundle.references);
6104 assert!(
6105 database
6106 .contains(&oid)
6107 .expect("test operation should succeed")
6108 );
6109 assert_eq!(read_object_for_assert(&database, &oid), object);
6110 assert!(
6111 !database
6112 .loose()
6113 .object_path(&oid)
6114 .expect("test operation should succeed")
6115 .exists()
6116 );
6117 fs::remove_dir_all(root).expect("test operation should succeed");
6118 }
6119
6120 #[test]
6121 fn unpack_packfile_objects_writes_sha256_pack_entries() {
6122 let writer = ObjectDatabase::new(ObjectFormat::Sha256);
6123 let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
6124 let oid = object
6125 .object_id(ObjectFormat::Sha256)
6126 .expect("test operation should succeed");
6127 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6128 .expect("test operation should succeed");
6129
6130 let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
6131 .expect("test operation should succeed");
6132
6133 assert_eq!(result.written_objects, vec![oid]);
6134 assert_eq!(read_object_for_assert(&writer, &oid), object);
6135 }
6136
6137 #[test]
6138 fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
6139 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6140 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6141 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6142 .expect("test operation should succeed");
6143 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6144 let oid = object
6145 .object_id(ObjectFormat::Sha1)
6146 .expect("test operation should succeed");
6147 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6148 .expect("test operation should succeed");
6149 let bundle_bytes =
6150 format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
6151 .into_bytes()
6152 .into_iter()
6153 .chain(pack.pack)
6154 .collect::<Vec<_>>();
6155 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6156 .expect("test operation should succeed");
6157
6158 assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
6159 assert!(!writer.contains(&oid));
6160 }
6161
6162 fn write_commit_graph(
6165 db: &mut FileObjectDatabase,
6166 payload: &[u8],
6167 ) -> Vec<(ObjectId, EncodedObject)> {
6168 let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
6169 let blob_oid = db
6170 .write_object(blob.clone())
6171 .expect("test operation should succeed");
6172 let tree = EncodedObject::new(
6173 ObjectType::Tree,
6174 Tree {
6175 entries: vec![TreeEntry {
6176 mode: 0o100644,
6177 name: BString::from(b"payload.txt"),
6178 oid: blob_oid,
6179 }],
6180 }
6181 .write(),
6182 );
6183 let tree_oid = db
6184 .write_object(tree.clone())
6185 .expect("test operation should succeed");
6186 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6187 let commit = EncodedObject::new(
6188 ObjectType::Commit,
6189 Commit {
6190 tree: tree_oid,
6191 parents: Vec::new(),
6192 author: identity.clone(),
6193 committer: identity,
6194 encoding: None,
6195 message: b"initial\n".to_vec(),
6196 }
6197 .write(),
6198 );
6199 let commit_oid = db
6200 .write_object(commit.clone())
6201 .expect("test operation should succeed");
6202 vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
6203 }
6204
6205 fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
6206 let root = temp_root("sley-repack-all");
6207 let git_dir = root.join(".git");
6208 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6209 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6210
6211 let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
6213 let packed_oid = packed_blob
6214 .object_id(format)
6215 .expect("test operation should succeed");
6216 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6217 .expect("test operation should succeed");
6218 let existing = db
6219 .install_pack(&existing_pack)
6220 .expect("test operation should succeed");
6221
6222 let graph = write_commit_graph(&mut db, b"repack payload\n");
6223
6224 let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
6225 expected.insert(packed_oid, packed_blob.clone());
6226
6227 let result = repack_all_objects(&git_dir, format)
6228 .expect("test operation should succeed")
6229 .expect("repository has objects");
6230
6231 assert_eq!(result.object_count, expected.len());
6233 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6234 assert_eq!(parsed.entries.len(), expected.len());
6235 for entry in &parsed.entries {
6236 let want = expected
6237 .get(&entry.entry.oid)
6238 .expect("packed object was in the repository");
6239 assert_eq!(&entry.object, want);
6240 assert_eq!(
6241 entry
6242 .object
6243 .object_id(format)
6244 .expect("test operation should succeed"),
6245 entry.entry.oid
6246 );
6247 }
6248 let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
6250 assert_eq!(idx.pack_checksum, parsed.checksum);
6251 assert_eq!(idx.entries.len(), expected.len());
6252
6253 assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
6255 let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
6257 want_loose.sort_by_key(ObjectId::to_hex);
6258 assert_eq!(result.packed_loose, want_loose);
6259 assert!(!result.packed_loose.contains(&packed_oid));
6260
6261 fs::remove_dir_all(root).expect("test operation should succeed");
6262 }
6263
6264 #[test]
6265 fn repack_all_objects_consolidates_loose_and_pack_sha1() {
6266 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
6267 }
6268
6269 #[test]
6270 fn repack_all_objects_consolidates_loose_and_pack_sha256() {
6271 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
6272 }
6273
6274 #[test]
6275 fn repack_all_objects_returns_none_for_empty_repository() {
6276 let root = temp_root("sley-repack-empty");
6277 let git_dir = root.join(".git");
6278 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6279
6280 assert!(
6281 repack_all_objects(&git_dir, ObjectFormat::Sha1)
6282 .expect("test operation should succeed")
6283 .is_none()
6284 );
6285
6286 fs::remove_dir_all(root).expect("test operation should succeed");
6287 }
6288
6289 #[test]
6290 fn install_repack_result_writes_pack_without_pruning_by_default() {
6291 let root = temp_root("sley-repack-install-nodelete");
6292 let git_dir = root.join(".git");
6293 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6294 let format = ObjectFormat::Sha1;
6295 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6296 let graph = write_commit_graph(&mut db, b"install no prune\n");
6297
6298 let result = repack_all_objects(&git_dir, format)
6299 .expect("test operation should succeed")
6300 .expect("test operation should succeed");
6301 install_repack_result(&git_dir, format, &result, false)
6302 .expect("test operation should succeed");
6303
6304 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6306 let pack_dir = git_dir.join("objects").join("pack");
6307 let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
6308 let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
6309 assert!(pack_path.exists());
6310 assert!(idx_path.exists());
6311 for (oid, object) in &graph {
6313 assert!(
6314 db.loose()
6315 .object_path(oid)
6316 .expect("test operation should succeed")
6317 .exists()
6318 );
6319 assert_eq!(read_object_for_assert(&db, oid), *object);
6320 }
6321
6322 fs::remove_dir_all(root).expect("test operation should succeed");
6323 }
6324
6325 #[test]
6326 fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
6327 let root = temp_root("sley-repack-install-prune");
6328 let git_dir = root.join(".git");
6329 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6330 let format = ObjectFormat::Sha1;
6331 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6332
6333 let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
6334 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6335 .expect("test operation should succeed");
6336 let existing = db
6337 .install_pack(&existing_pack)
6338 .expect("test operation should succeed");
6339 let graph = write_commit_graph(&mut db, b"prune payload\n");
6340
6341 let result = repack_all_objects(&git_dir, format)
6342 .expect("test operation should succeed")
6343 .expect("test operation should succeed");
6344 let new_pack_checksum = PackFile::parse(&result.pack, format)
6345 .expect("test operation should succeed")
6346 .checksum;
6347 install_repack_result(&git_dir, format, &result, true)
6348 .expect("test operation should succeed");
6349
6350 assert!(!existing.pack_path.exists());
6352 assert!(!existing.index_path.exists());
6353 for (oid, _) in &graph {
6355 assert!(
6356 !db.loose()
6357 .object_path(oid)
6358 .expect("test operation should succeed")
6359 .exists()
6360 );
6361 }
6362 let pack_dir = git_dir.join("objects").join("pack");
6364 assert!(
6365 pack_dir
6366 .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
6367 .exists()
6368 );
6369 let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
6370 for (oid, object) in &graph {
6371 assert!(
6372 reopened
6373 .contains(oid)
6374 .expect("test operation should succeed")
6375 );
6376 assert_eq!(read_object_for_assert(&reopened, oid), *object);
6377 }
6378 let packed_oid = packed_blob
6379 .object_id(format)
6380 .expect("test operation should succeed");
6381 assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
6382
6383 fs::remove_dir_all(root).expect("test operation should succeed");
6384 }
6385
6386 #[test]
6387 fn install_repack_result_preserves_keep_and_promisor_packs() {
6388 let root = temp_root("sley-repack-install-keep-promisor");
6389 let git_dir = root.join(".git");
6390 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6391 let format = ObjectFormat::Sha1;
6392 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6393
6394 let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
6395 let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
6396 .expect("test operation should succeed");
6397 let keep_install = db
6398 .install_pack(&keep_pack)
6399 .expect("test operation should succeed");
6400 let keep_sidecar = keep_install.pack_path.with_extension("keep");
6401 fs::write(&keep_sidecar, b"").expect("test operation should succeed");
6402
6403 let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
6404 let promisor_pack =
6405 PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
6406 .expect("test operation should succeed");
6407 let promisor_install = db
6408 .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
6409 .expect("test operation should succeed");
6410 let promisor_sidecar = promisor_install
6411 .promisor_path
6412 .clone()
6413 .expect("promisor sidecar");
6414
6415 let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
6416 let result = repack_all_objects(&git_dir, format)
6417 .expect("test operation should succeed")
6418 .expect("test operation should succeed");
6419 assert!(result.obsolete_packs.contains(&keep_install.pack_path));
6420 assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
6421
6422 install_repack_result(&git_dir, format, &result, true)
6423 .expect("test operation should succeed");
6424
6425 for path in [
6426 &keep_install.pack_path,
6427 &keep_install.index_path,
6428 &keep_sidecar,
6429 &promisor_install.pack_path,
6430 &promisor_install.index_path,
6431 &promisor_sidecar,
6432 ] {
6433 assert!(path.exists(), "{} should be preserved", path.display());
6434 }
6435 for (oid, _) in &graph {
6436 assert!(
6437 !db.loose()
6438 .object_path(oid)
6439 .expect("test operation should succeed")
6440 .exists()
6441 );
6442 }
6443
6444 fs::remove_dir_all(root).expect("test operation should succeed");
6445 }
6446
6447 #[test]
6448 fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
6449 let root = temp_root("sley-repack-install-safety");
6452 let git_dir = root.join(".git");
6453 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6454 let format = ObjectFormat::Sha1;
6455 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6456 let graph = write_commit_graph(&mut db, b"safety packed\n");
6457
6458 let mut result = repack_all_objects(&git_dir, format)
6459 .expect("test operation should succeed")
6460 .expect("test operation should succeed");
6461
6462 let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
6464 let stray_oid = db
6465 .write_object(stray.clone())
6466 .expect("test operation should succeed");
6467 assert!(!result.packed_loose.contains(&stray_oid));
6468 result.packed_loose.push(stray_oid);
6469
6470 install_repack_result(&git_dir, format, &result, true)
6471 .expect("test operation should succeed");
6472
6473 assert!(
6475 db.loose()
6476 .object_path(&stray_oid)
6477 .expect("test operation should succeed")
6478 .exists()
6479 );
6480 assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
6481 for (oid, _) in &graph {
6483 assert!(
6484 !db.loose()
6485 .object_path(oid)
6486 .expect("test operation should succeed")
6487 .exists()
6488 );
6489 }
6490
6491 fs::remove_dir_all(root).expect("test operation should succeed");
6492 }
6493
6494 #[test]
6495 fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
6496 let root = temp_root("sley-prune-unreachable");
6497 let git_dir = root.join(".git");
6498 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6499 let format = ObjectFormat::Sha1;
6500 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6501 let graph = write_commit_graph(&mut db, b"reachable payload\n");
6502 let commit_oid = graph[0].0.clone();
6503
6504 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
6506 let dangling_oid = db
6507 .write_object(dangling)
6508 .expect("test operation should succeed");
6509
6510 let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
6512 .expect("test operation should succeed");
6513 assert_eq!(reported, vec![dangling_oid]);
6514 assert!(
6515 db.loose()
6516 .object_path(&dangling_oid)
6517 .expect("test operation should succeed")
6518 .exists()
6519 );
6520
6521 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6523 .expect("test operation should succeed");
6524 assert_eq!(deleted, vec![dangling_oid]);
6525 assert!(
6526 !db.loose()
6527 .object_path(&dangling_oid)
6528 .expect("test operation should succeed")
6529 .exists()
6530 );
6531 for (oid, object) in &graph {
6532 assert!(
6533 db.loose()
6534 .object_path(oid)
6535 .expect("test operation should succeed")
6536 .exists()
6537 );
6538 assert_eq!(read_object_for_assert(&db, oid), *object);
6539 }
6540
6541 fs::remove_dir_all(root).expect("test operation should succeed");
6542 }
6543
6544 #[test]
6545 fn prune_unreachable_loose_ignores_gitlink_targets() {
6546 let root = temp_root("sley-prune-gitlink");
6547 let git_dir = root.join(".git");
6548 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6549 let format = ObjectFormat::Sha1;
6550 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6551
6552 let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
6553 .expect("test operation should succeed");
6554 let tree = EncodedObject::new(
6555 ObjectType::Tree,
6556 Tree {
6557 entries: vec![TreeEntry {
6558 mode: 0o160000,
6559 name: BString::from(b"submodule"),
6560 oid: submodule_oid,
6561 }],
6562 }
6563 .write(),
6564 );
6565 let tree_oid = db
6566 .write_object(tree)
6567 .expect("test operation should succeed");
6568 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6569 let commit = EncodedObject::new(
6570 ObjectType::Commit,
6571 Commit {
6572 tree: tree_oid,
6573 parents: Vec::new(),
6574 author: identity.clone(),
6575 committer: identity,
6576 encoding: None,
6577 message: b"gitlink\n".to_vec(),
6578 }
6579 .write(),
6580 );
6581 let commit_oid = db
6582 .write_object(commit)
6583 .expect("test operation should succeed");
6584 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
6585 let dangling_oid = db
6586 .write_object(dangling)
6587 .expect("test operation should succeed");
6588
6589 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6590 .expect("test operation should succeed");
6591
6592 assert_eq!(deleted, vec![dangling_oid]);
6593 assert!(
6594 !db.loose()
6595 .object_path(&dangling_oid)
6596 .expect("test operation should succeed")
6597 .exists()
6598 );
6599
6600 fs::remove_dir_all(root).expect("test operation should succeed");
6601 }
6602
6603 fn temp_root(prefix: &str) -> PathBuf {
6604 std::env::temp_dir().join(format!(
6605 "{prefix}-{}-{}",
6606 std::process::id(),
6607 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6608 ))
6609 }
6610}