1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
9use sley_formats::{Bundle, BundleReference};
10use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
11use sley_pack::{
12 MultiPackIndex, PackBitmapIndex, PackBitmapWriter, PackFile, PackIndex, PackIndexEntry,
13 PackInput, PackWrite,
14};
15use std::collections::{HashMap, HashSet, VecDeque};
16use std::io::{Read, Seek, SeekFrom, Write};
17use std::path::{Path, PathBuf};
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::sync::{Arc, Mutex, OnceLock};
20use std::{env, fs};
21
22static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24pub trait ObjectReader {
25 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
26
27 fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
35 false
36 }
37}
38
39fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
40 (*oid == ObjectId::empty_tree(format))
41 .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
42}
43
44fn with_missing_object_context(
45 err: GitError,
46 oid: ObjectId,
47 context: MissingObjectContext,
48) -> GitError {
49 let kind = err
50 .not_found_kind()
51 .and_then(sley_core::NotFoundKind::missing_object_kind);
52 match kind {
53 Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
54 None => err,
55 }
56}
57
58pub fn grafted_parents<R: ObjectReader + ?Sized>(
62 reader: &R,
63 oid: &ObjectId,
64 parents: Vec<ObjectId>,
65) -> Vec<ObjectId> {
66 if reader.is_shallow_graft(oid) {
67 Vec::new()
68 } else {
69 parents
70 }
71}
72
73pub trait ObjectWriter {
74 fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
80}
81
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct BundleUnbundleResult {
84 pub written_objects: Vec<ObjectId>,
85 pub references: Vec<BundleReference>,
86}
87
88#[derive(Debug, Clone, PartialEq, Eq)]
89pub struct PackUnpackResult {
90 pub written_objects: Vec<ObjectId>,
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct PackInstallResult {
95 pub pack_name: String,
96 pub pack_path: PathBuf,
97 pub index_path: PathBuf,
98 pub promisor_path: Option<PathBuf>,
99 pub object_ids: Vec<ObjectId>,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct RawPackInstallResult {
104 pub object_ids: Vec<ObjectId>,
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
108pub struct RawPackInstallOptions {
109 pub promisor: bool,
110}
111
112pub trait RawPackInstaller {
113 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum ObjectPrefixResolution {
118 Missing,
119 Unique(ObjectId),
120 Ambiguous(Vec<ObjectId>),
121}
122
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct ObjectStorageInfo {
125 pub disk_size: u64,
126 pub deltabase: ObjectId,
127}
128
129impl RawPackInstaller for FileObjectDatabase {
130 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
131 let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
132 Ok(RawPackInstallResult {
133 object_ids: result.object_ids,
134 })
135 }
136}
137
138impl RawPackInstaller for ObjectDatabase {
139 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
140 let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
141 Ok(RawPackInstallResult {
142 object_ids: result.written_objects,
143 })
144 }
145}
146
147pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
148 let mut missing = Vec::new();
149 for prerequisite in &bundle.prerequisites {
150 match reader.read_object(&prerequisite.oid) {
151 Ok(object) => {
152 let actual = object.object_id(bundle.format)?;
153 if actual != prerequisite.oid {
154 return Err(GitError::InvalidObject(format!(
155 "bundle prerequisite {} hashes to {actual}",
156 prerequisite.oid
157 )));
158 }
159 }
160 Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
161 Err(err) => return Err(err),
162 }
163 }
164 if missing.is_empty() {
165 return Ok(());
166 }
167 Err(GitError::object_not_found_in(
168 missing[0],
169 MissingObjectContext::PackInstall,
170 ))
171}
172
173pub fn unbundle_objects<R, W>(
174 bundle: &Bundle,
175 prerequisite_reader: &R,
176 writer: &mut W,
177) -> Result<BundleUnbundleResult>
178where
179 R: ObjectReader,
180 W: ObjectWriter,
181{
182 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
183 let pack = PackFile::parse_bundle(bundle)?;
184 let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
185 Ok(BundleUnbundleResult {
186 written_objects,
187 references: bundle.references.clone(),
188 })
189}
190
191pub fn install_bundle_pack<R>(
192 bundle: &Bundle,
193 prerequisite_reader: &R,
194 destination: &impl RawPackInstaller,
195) -> Result<BundleUnbundleResult>
196where
197 R: ObjectReader,
198{
199 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
200 let install = destination.install_raw_pack(&bundle.pack)?;
201 Ok(BundleUnbundleResult {
202 written_objects: install.object_ids,
203 references: bundle.references.clone(),
204 })
205}
206
207pub fn unpack_packfile_objects<W>(
208 pack_bytes: &[u8],
209 format: ObjectFormat,
210 writer: &W,
211) -> Result<PackUnpackResult>
212where
213 W: ObjectWriter,
214{
215 let pack = PackFile::parse(pack_bytes, format)?;
216 write_pack_objects(pack, writer, "pack")
217}
218
219fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
220where
221 W: ObjectWriter,
222{
223 let mut written_objects = Vec::with_capacity(pack.entries.len());
224 for entry in pack.entries {
225 let expected = entry.entry.oid;
226 let actual = writer.write_object(entry.object)?;
227 if actual != expected {
228 return Err(GitError::InvalidObject(format!(
229 "{source} object id mismatch: expected {expected}, wrote {actual}"
230 )));
231 }
232 written_objects.push(actual);
233 }
234 Ok(PackUnpackResult { written_objects })
235}
236
237pub fn collect_reachable_object_ids<R, I>(
238 reader: &R,
239 format: ObjectFormat,
240 starts: I,
241) -> Result<HashSet<ObjectId>>
242where
243 R: ObjectReader,
244 I: IntoIterator<Item = ObjectId>,
245{
246 walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
247}
248
249pub fn collect_reachable_object_ids_with_cut<R, I>(
254 reader: &R,
255 format: ObjectFormat,
256 starts: I,
257 cut: &HashSet<ObjectId>,
258) -> Result<HashSet<ObjectId>>
259where
260 R: ObjectReader,
261 I: IntoIterator<Item = ObjectId>,
262{
263 walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
264}
265
266pub fn collect_reachable_object_ids_excluding<R, I>(
270 reader: &R,
271 format: ObjectFormat,
272 starts: I,
273 excluded: &HashSet<ObjectId>,
274) -> Result<HashSet<ObjectId>>
275where
276 R: ObjectReader,
277 I: IntoIterator<Item = ObjectId>,
278{
279 walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
280}
281
282pub fn collect_reachable_objects<R, I>(
283 reader: &R,
284 format: ObjectFormat,
285 starts: I,
286 excluded: &HashSet<ObjectId>,
287) -> Result<Vec<Arc<EncodedObject>>>
288where
289 R: ObjectReader,
290 I: IntoIterator<Item = ObjectId>,
291{
292 let mut objects = Vec::new();
293 walk_reachable_objects(reader, format, starts, excluded, |_, object| {
294 objects.push(Arc::clone(object));
295 })?;
296 Ok(objects)
297}
298
299#[derive(Debug, Clone)]
300struct ReachablePackObject {
301 oid: ObjectId,
302 object: Arc<EncodedObject>,
303}
304
305fn collect_reachable_pack_objects<R, I>(
306 reader: &R,
307 format: ObjectFormat,
308 starts: I,
309 excluded: &HashSet<ObjectId>,
310) -> Result<Vec<ReachablePackObject>>
311where
312 R: ObjectReader,
313 I: IntoIterator<Item = ObjectId>,
314{
315 let mut objects = Vec::new();
316 walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
317 objects.push(ReachablePackObject {
318 oid: *oid,
319 object: Arc::clone(object),
320 });
321 })?;
322 Ok(objects)
323}
324
325fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
326 objects
327 .iter()
328 .map(|entry| PackInput {
329 oid: &entry.oid,
330 object: &entry.object,
331 })
332 .collect()
333}
334
335pub fn install_reachable_pack<I>(
336 source: &impl ObjectReader,
337 destination: &impl RawPackInstaller,
338 format: ObjectFormat,
339 starts: I,
340) -> Result<Option<RawPackInstallResult>>
341where
342 I: IntoIterator<Item = ObjectId>,
343{
344 install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
345}
346
347pub fn install_reachable_pack_excluding<I>(
348 source: &impl ObjectReader,
349 destination: &impl RawPackInstaller,
350 format: ObjectFormat,
351 starts: I,
352 excluded: &HashSet<ObjectId>,
353) -> Result<Option<RawPackInstallResult>>
354where
355 I: IntoIterator<Item = ObjectId>,
356{
357 let pack = match build_reachable_pack(source, format, starts, excluded)? {
358 Some(pack) => pack,
359 None => return Ok(None),
360 };
361 destination.install_raw_pack(&pack.pack).map(Some)
362}
363
364pub fn build_reachable_pack<R, I>(
365 reader: &R,
366 format: ObjectFormat,
367 starts: I,
368 excluded: &HashSet<ObjectId>,
369) -> Result<Option<PackWrite>>
370where
371 R: ObjectReader,
372 I: IntoIterator<Item = ObjectId>,
373{
374 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
375 if objects.is_empty() {
376 return Ok(None);
377 }
378 let inputs = pack_inputs(&objects);
383 PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
384}
385
386pub fn build_and_install_reachable_pack<R, I>(
387 source: &R,
388 destination: &FileObjectDatabase,
389 format: ObjectFormat,
390 starts: I,
391 excluded: &HashSet<ObjectId>,
392 options: RawPackInstallOptions,
393) -> Result<Option<PackInstallResult>>
394where
395 R: ObjectReader,
396 I: IntoIterator<Item = ObjectId>,
397{
398 build_and_install_reachable_pack_filtered(
399 source,
400 destination,
401 format,
402 starts,
403 excluded,
404 options,
405 None,
406 None,
407 )
408}
409
410#[derive(Debug, Clone, Copy, PartialEq, Eq)]
417pub enum PackObjectFilter {
418 BlobNone,
420}
421
422#[allow(clippy::too_many_arguments)]
426pub fn build_and_install_reachable_pack_filtered<R, I>(
427 source: &R,
428 destination: &FileObjectDatabase,
429 format: ObjectFormat,
430 starts: I,
431 excluded: &HashSet<ObjectId>,
432 options: RawPackInstallOptions,
433 filter: Option<PackObjectFilter>,
434 unpack_limit: Option<usize>,
435) -> Result<Option<PackInstallResult>>
436where
437 R: ObjectReader,
438 I: IntoIterator<Item = ObjectId>,
439{
440 let starts: Vec<ObjectId> = starts.into_iter().collect();
441 let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
442 let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
443 match filter {
444 Some(PackObjectFilter::BlobNone) => {
445 objects.retain(|entry| {
446 entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
447 });
448 }
449 None => {}
450 }
451 if objects.is_empty() {
452 return Ok(None);
453 }
454 if let Some(limit) = unpack_limit
458 && objects.len() < limit
459 {
460 for entry in &objects {
461 destination.loose().write_object((*entry.object).clone())?;
462 }
463 return Ok(None);
464 }
465 let inputs = pack_inputs(&objects);
466 let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
467 destination
468 .install_generated_pack_unchecked(&pack, options)
469 .map(Some)
470}
471
472pub fn assemble_pack_with_verbatim_reuse(
482 format: ObjectFormat,
483 reused_pack_bytes: &[u8],
484 appended: &[PackInput<'_>],
485) -> Result<(Vec<u8>, u32)> {
486 assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
487}
488
489pub fn assemble_pack_with_verbatim_reuses(
492 format: ObjectFormat,
493 reused_packs: &[&[u8]],
494 appended: &[PackInput<'_>],
495) -> Result<(Vec<u8>, u32)> {
496 let hash_len = format.raw_len();
497 let mut reused_count = 0u32;
498 let mut capacity = 12 + hash_len + 64 * appended.len();
499 for reused_pack_bytes in reused_packs {
500 if reused_pack_bytes.len() < 12 + hash_len {
501 return Err(GitError::InvalidFormat("reused pack too short".into()));
502 }
503 if &reused_pack_bytes[..4] != b"PACK" {
504 return Err(GitError::InvalidFormat(
505 "reused pack has no signature".into(),
506 ));
507 }
508 let version = u32::from_be_bytes([
509 reused_pack_bytes[4],
510 reused_pack_bytes[5],
511 reused_pack_bytes[6],
512 reused_pack_bytes[7],
513 ]);
514 if version != 2 {
515 return Err(GitError::Unsupported(format!(
516 "reused pack version {version}"
517 )));
518 }
519 let count = u32::from_be_bytes([
520 reused_pack_bytes[8],
521 reused_pack_bytes[9],
522 reused_pack_bytes[10],
523 reused_pack_bytes[11],
524 ]);
525 reused_count = reused_count
526 .checked_add(count)
527 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
528 capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
529 }
530 let total = reused_count
531 .checked_add(appended.len() as u32)
532 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
533
534 let mut out = Vec::with_capacity(capacity);
535 out.extend_from_slice(b"PACK");
536 out.extend_from_slice(&2u32.to_be_bytes());
537 out.extend_from_slice(&total.to_be_bytes());
538 for reused_pack_bytes in reused_packs {
539 out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
540 }
541 for input in appended {
542 write_undeltified_pack_entry(&mut out, input.object)?;
543 }
544 let checksum = sley_core::digest_bytes(format, &out)?;
545 out.extend_from_slice(checksum.as_bytes());
546 Ok((out, reused_count))
547}
548
549pub fn assemble_pack_with_verbatim_entries(
552 format: ObjectFormat,
553 reused_entries: &[&[u8]],
554 appended: &[PackInput<'_>],
555) -> Result<(Vec<u8>, u32)> {
556 let reused_count = u32::try_from(reused_entries.len())
557 .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
558 let total = reused_count
559 .checked_add(appended.len() as u32)
560 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
561
562 let mut capacity = 12 + format.raw_len() + 64 * appended.len();
563 for entry in reused_entries {
564 capacity = capacity.saturating_add(entry.len());
565 }
566 let mut out = Vec::with_capacity(capacity);
567 out.extend_from_slice(b"PACK");
568 out.extend_from_slice(&2u32.to_be_bytes());
569 out.extend_from_slice(&total.to_be_bytes());
570 for entry in reused_entries {
571 out.extend_from_slice(entry);
572 }
573 for input in appended {
574 write_undeltified_pack_entry(&mut out, input.object)?;
575 }
576 let checksum = sley_core::digest_bytes(format, &out)?;
577 out.extend_from_slice(checksum.as_bytes());
578 Ok((out, reused_count))
579}
580
581fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
583 let type_bits: u8 = match object.object_type {
584 ObjectType::Commit => 1,
585 ObjectType::Tree => 2,
586 ObjectType::Blob => 3,
587 ObjectType::Tag => 4,
588 };
589 let mut size = object.body.len() as u64;
590 let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
591 size >>= 4;
592 while size > 0 {
593 out.push(byte | 0x80);
594 byte = (size & 0x7f) as u8;
595 size >>= 7;
596 }
597 out.push(byte);
598 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
599 encoder.write_all(&object.body)?;
600 out.extend_from_slice(&encoder.finish()?);
601 Ok(())
602}
603
604#[derive(Debug, Clone, PartialEq, Eq)]
612pub struct RepackResult {
613 pub pack: Vec<u8>,
615 pub idx: Vec<u8>,
617 pub object_count: usize,
619 pub obsolete_packs: Vec<PathBuf>,
622 pub packed_loose: Vec<ObjectId>,
625 pack_checksum: ObjectId,
626 index_entries: Vec<PackIndexEntry>,
627}
628
629pub fn repack_reachable_objects(
649 git_dir: &Path,
650 format: ObjectFormat,
651 roots: &[ObjectId],
652) -> Result<Option<RepackResult>> {
653 let objects_dir = repository_objects_dir(git_dir);
654 let database = FileObjectDatabase::new(objects_dir.clone(), format);
655
656 let mut seen: HashSet<ObjectId> = HashSet::new();
657 let mut objects: Vec<ReachablePackObject> = Vec::new();
658 let mut pending: Vec<ObjectId> = roots.to_vec();
659 while let Some(oid) = pending.pop() {
660 if !seen.insert(oid) {
661 continue;
662 }
663 let object = match database.read_object(&oid) {
664 Ok(object) => object,
665 Err(GitError::NotFound(_)) => continue,
666 Err(err) => return Err(err),
667 };
668 match object.object_type {
669 ObjectType::Commit => {
670 let commit = Commit::parse_ref(format, &object.body)?;
671 pending.extend(grafted_parents(&database, &oid, commit.parents));
672 pending.push(commit.tree);
673 }
674 ObjectType::Tree => {
675 for entry in TreeEntries::new(format, &object.body) {
676 let entry = entry?;
677 if !entry.is_gitlink() {
678 pending.push(entry.oid);
679 }
680 }
681 }
682 ObjectType::Tag => {
683 let tag = Tag::parse_ref(format, &object.body)?;
684 pending.push(tag.object);
685 }
686 ObjectType::Blob => {}
687 }
688 objects.push(ReachablePackObject { oid, object });
689 }
690 if objects.is_empty() {
691 return Ok(None);
692 }
693
694 let inputs = pack_inputs(&objects);
695 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
696 let object_count = written.entries.len();
697
698 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
701 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
702 .into_iter()
703 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
704 .collect();
705
706 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
707 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
708 .into_iter()
709 .filter(|oid| packed_oid_set.contains(oid))
710 .collect();
711 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
712
713 let pack_checksum = written.checksum;
714 let index_entries = written.entries.clone();
715 Ok(Some(RepackResult {
716 pack: written.pack,
717 idx: written.index,
718 object_count,
719 obsolete_packs,
720 packed_loose,
721 pack_checksum,
722 index_entries,
723 }))
724}
725
726pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
727 let objects_dir = repository_objects_dir(git_dir);
728 let database = FileObjectDatabase::new(objects_dir.clone(), format);
729
730 let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
734 if all_oids.is_empty() {
735 return Ok(None);
736 }
737
738 let mut objects = Vec::with_capacity(all_oids.len());
742 for oid in &all_oids {
743 objects.push(ReachablePackObject {
744 oid: *oid,
745 object: database.read_object(oid)?,
746 });
747 }
748
749 let inputs = pack_inputs(&objects);
750 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
751 let object_count = written.entries.len();
752
753 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
759 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
760 .into_iter()
761 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
762 .collect();
763
764 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
767 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
768 .into_iter()
769 .filter(|oid| packed_oid_set.contains(oid))
770 .collect();
771 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
772
773 Ok(Some(RepackResult {
774 pack: written.pack,
775 idx: written.index,
776 object_count,
777 obsolete_packs,
778 packed_loose,
779 pack_checksum: written.checksum,
780 index_entries: written.entries,
781 }))
782}
783
784pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
790 let objects_dir = repository_objects_dir(git_dir);
791 let database = FileObjectDatabase::new(objects_dir.clone(), format);
792 let loose_oids = loose_object_ids(&objects_dir, format)?;
793 if loose_oids.is_empty() {
794 return Ok(None);
795 }
796
797 let mut objects = Vec::with_capacity(loose_oids.len());
798 for oid in &loose_oids {
799 objects.push(ReachablePackObject {
800 oid: *oid,
801 object: database.read_object(oid)?,
802 });
803 }
804
805 let inputs = pack_inputs(&objects);
806 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
807 let object_count = written.entries.len();
808 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
809 let mut packed_loose: Vec<ObjectId> = loose_oids
810 .into_iter()
811 .filter(|oid| packed_oid_set.contains(oid))
812 .collect();
813 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
814
815 let pack_checksum = written.checksum;
816 let index_entries = written.entries.clone();
817 Ok(Some(RepackResult {
818 pack: written.pack,
819 idx: written.index,
820 object_count,
821 obsolete_packs: Vec::new(),
822 packed_loose,
823 pack_checksum,
824 index_entries,
825 }))
826}
827
828pub fn install_repack_result(
843 git_dir: &Path,
844 format: ObjectFormat,
845 result: &RepackResult,
846 prune: bool,
847) -> Result<()> {
848 install_repack_result_with_bitmap(git_dir, format, result, prune, None)
849}
850
851pub fn install_repack_result_with_bitmap(
857 git_dir: &Path,
858 format: ObjectFormat,
859 result: &RepackResult,
860 prune: bool,
861 bitmap_tips: Option<&HashSet<ObjectId>>,
862) -> Result<()> {
863 let objects_dir = repository_objects_dir(git_dir);
864 let pack_dir = objects_dir.join("pack");
865 fs::create_dir_all(&pack_dir)?;
866
867 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
872 let parsed_index = PackIndex::parse(&result.idx, format)?;
873 if parsed_index.pack_checksum != result.pack_checksum {
874 return Err(GitError::InvalidFormat(
875 "repack index checksum does not match the new pack".into(),
876 ));
877 }
878 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
879 return Err(GitError::InvalidFormat(
880 "repack index does not match the new pack contents".into(),
881 ));
882 }
883 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
884 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
885 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
886 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
887 let reverse_index = sley_pack::PackReverseIndex::write(
891 format,
892 &sley_pack::pack_order_index_positions(&parsed_index.entries),
893 &result.pack_checksum,
894 )?;
895 write_pack_component(&new_pack_path, &result.pack)?;
896 write_pack_component(&new_rev_path, &reverse_index)?;
897 write_pack_component(&new_index_path, &result.idx)?;
898
899 if let Some(tips) = bitmap_tips {
900 let database = FileObjectDatabase::new(objects_dir.clone(), format);
903 if let Some(bitmap) = build_pack_bitmap(
904 &database,
905 format,
906 &result.index_entries,
907 &result.pack_checksum,
908 tips,
909 )? {
910 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
915 remove_file_if_exists(&bitmap_path)?;
916 write_pack_component(&bitmap_path, &bitmap)?;
917 }
918 }
919
920 if !prune {
921 return Ok(());
922 }
923
924 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
928
929 prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
930 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
931 Ok(())
932}
933
934fn validate_pack_checksum(
935 pack: &[u8],
936 format: ObjectFormat,
937 expected: &ObjectId,
938 context: &str,
939) -> Result<()> {
940 if expected.format() != format {
941 return Err(GitError::InvalidObjectId(format!(
942 "{context} checksum format does not match object format"
943 )));
944 }
945 let hash_len = format.raw_len();
946 if pack.len() < 12 + hash_len {
947 return Err(GitError::InvalidFormat(format!(
948 "{context} pack file too short"
949 )));
950 }
951 if &pack[..4] != b"PACK" {
952 return Err(GitError::InvalidFormat(format!(
953 "{context} pack file missing PACK signature"
954 )));
955 }
956 let trailer_offset = pack.len() - hash_len;
957 let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
958 let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
959 if &actual != expected || trailer != *expected {
960 return Err(GitError::InvalidFormat(format!(
961 "{context} pack checksum does not match generated pack"
962 )));
963 }
964 Ok(())
965}
966
967fn pack_index_entries_match_writer(
968 parsed: &[PackIndexEntry],
969 writer_entries: &[PackIndexEntry],
970) -> bool {
971 if parsed.len() != writer_entries.len() {
972 return false;
973 }
974 let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
975 writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
976 parsed.iter().zip(writer_entries).all(|(left, right)| {
977 left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
978 })
979}
980
981pub fn prune_unreachable_loose<I>(
990 git_dir: &Path,
991 format: ObjectFormat,
992 roots: I,
993 delete: bool,
994) -> Result<Vec<ObjectId>>
995where
996 I: IntoIterator<Item = ObjectId>,
997{
998 let objects_dir = repository_objects_dir(git_dir);
999 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1000 let reachable = collect_reachable_object_ids(&database, format, roots)?;
1001
1002 let store = LooseObjectStore::new(objects_dir.clone(), format);
1003 let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1004 .into_iter()
1005 .filter(|oid| !reachable.contains(oid))
1006 .collect();
1007 pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1008
1009 if delete {
1010 for oid in &pruned {
1011 let path = store.object_path(oid)?;
1012 match fs::remove_file(&path) {
1013 Ok(()) => {}
1014 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1015 Err(err) => return Err(GitError::Io(err.to_string())),
1016 }
1017 }
1018 }
1019 Ok(pruned)
1020}
1021
1022fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1025 let oids = loose_object_id_set(objects_dir, format)?;
1026 let mut oids = oids.into_iter().collect::<Vec<_>>();
1027 oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1028 Ok(oids)
1029}
1030
1031fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1032 let mut oids = HashSet::new();
1033 collect_loose_object_ids(objects_dir, format, &mut oids)?;
1034 Ok(oids)
1035}
1036
1037fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1040 if !pack_dir.exists() {
1041 return Ok(Vec::new());
1042 }
1043 let mut packs = Vec::new();
1044 for entry in fs::read_dir(pack_dir)? {
1045 let path = entry?.path();
1046 if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1047 packs.push(path);
1048 }
1049 }
1050 packs.sort();
1051 Ok(packs)
1052}
1053
1054fn prune_packs_contained_in(
1058 objects_dir: &Path,
1059 format: ObjectFormat,
1060 present: &HashSet<ObjectId>,
1061 keep: &Path,
1062) -> Result<()> {
1063 let pack_dir = objects_dir.join("pack");
1064 let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1065 let mut removed_stems: HashSet<String> = HashSet::new();
1066
1067 for pack_path in existing_pack_files(&pack_dir)? {
1068 if pack_path == keep {
1069 continue;
1070 }
1071 let Some(stem) = pack_path.file_stem() else {
1072 continue;
1073 };
1074 if Some(stem) == keep_stem.as_deref() {
1075 continue;
1076 }
1077 if pack_path.with_extension("keep").exists()
1078 || pack_path.with_extension("promisor").exists()
1079 {
1080 continue;
1081 }
1082 let index_path = pack_path.with_extension("idx");
1083 if !index_path.exists() {
1084 continue;
1086 }
1087 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1088 if !index
1089 .entries
1090 .iter()
1091 .all(|entry| present.contains(&entry.oid))
1092 {
1093 continue;
1094 }
1095 remove_file_if_exists(&pack_path)?;
1099 remove_file_if_exists(&index_path)?;
1100 for ext in ["rev", "mtimes", "bitmap"] {
1101 remove_file_if_exists(&pack_path.with_extension(ext))?;
1102 }
1103 removed_stems.insert(stem.to_string_lossy().into_owned());
1104 }
1105
1106 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1107 Ok(())
1108}
1109
1110fn prune_stale_multi_pack_index(
1117 pack_dir: &Path,
1118 format: ObjectFormat,
1119 removed_stems: &HashSet<String>,
1120) -> Result<()> {
1121 if removed_stems.is_empty() {
1122 return Ok(());
1123 }
1124 let midx_path = pack_dir.join("multi-pack-index");
1125 if !midx_path.exists() {
1126 return Ok(());
1127 }
1128 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1129 let references_removed_pack = midx.pack_names.iter().any(|name| {
1130 let stem = name.strip_suffix(".idx").unwrap_or(name);
1131 removed_stems.contains(stem)
1132 });
1133 if references_removed_pack {
1134 remove_file_if_exists(&midx_path)?;
1135 }
1136 Ok(())
1137}
1138
1139fn prune_loose_objects<'a, I>(
1142 objects_dir: &Path,
1143 format: ObjectFormat,
1144 candidates: I,
1145 present: &HashSet<ObjectId>,
1146) -> Result<()>
1147where
1148 I: IntoIterator<Item = &'a ObjectId>,
1149{
1150 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1151 for oid in candidates {
1152 if !present.contains(oid) {
1153 continue;
1154 }
1155 remove_file_if_exists(&store.object_path(oid)?)?;
1156 }
1157 Ok(())
1158}
1159
1160enum PackDeltaBase {
1161 Offset(u64),
1162 Ref(ObjectId),
1163}
1164
1165struct PackIndexOffsetInfo {
1166 end_offset: u64,
1167 delta_base_oid: Option<ObjectId>,
1168}
1169
1170fn scan_pack_index_offsets(
1171 index: &PackIndex,
1172 target_offset: u64,
1173 trailer_offset: u64,
1174 delta_base_offset: Option<u64>,
1175) -> Result<PackIndexOffsetInfo> {
1176 let mut target_count = 0usize;
1177 let mut next_offset = None;
1178 let mut delta_base_oid = None;
1179
1180 for entry in &index.entries {
1181 if entry.offset == target_offset {
1182 target_count += 1;
1183 } else if entry.offset > target_offset {
1184 match next_offset {
1185 Some(current) if current <= entry.offset => {}
1186 _ => next_offset = Some(entry.offset),
1187 }
1188 }
1189 if Some(entry.offset) == delta_base_offset {
1190 delta_base_oid = Some(entry.oid);
1191 }
1192 }
1193
1194 if target_count == 0 {
1195 return Err(GitError::InvalidFormat(format!(
1196 "pack index offset {target_offset} not found"
1197 )));
1198 }
1199 if let Some(offset) = delta_base_offset
1200 && delta_base_oid.is_none()
1201 {
1202 return Err(GitError::InvalidFormat(format!(
1203 "ofs-delta base offset {offset} not found"
1204 )));
1205 }
1206
1207 Ok(PackIndexOffsetInfo {
1208 end_offset: if target_count > 1 {
1211 target_offset
1212 } else {
1213 next_offset.unwrap_or(trailer_offset)
1214 },
1215 delta_base_oid,
1216 })
1217}
1218
1219fn pack_entry_delta_base(
1220 format: ObjectFormat,
1221 pack: &[u8],
1222 entry_offset: u64,
1223) -> Result<Option<PackDeltaBase>> {
1224 let mut cursor = usize::try_from(entry_offset)
1225 .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1226 let first = pack_next_byte(pack, &mut cursor)?;
1227 let kind = (first >> 4) & 0x07;
1228 let mut byte = first;
1229 while byte & 0x80 != 0 {
1230 byte = pack_next_byte(pack, &mut cursor)?;
1231 }
1232 match kind {
1233 6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1234 pack,
1235 &mut cursor,
1236 entry_offset,
1237 )?))),
1238 7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1239 format,
1240 pack,
1241 &mut cursor,
1242 )?))),
1243 _ => Ok(None),
1244 }
1245}
1246
1247fn parse_ref_delta_base_oid(
1248 format: ObjectFormat,
1249 pack: &[u8],
1250 cursor: &mut usize,
1251) -> Result<ObjectId> {
1252 let raw_len = format.raw_len();
1253 if *cursor + raw_len > pack.len() {
1254 return Err(GitError::InvalidFormat(
1255 "truncated ref-delta base object id".into(),
1256 ));
1257 }
1258 let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1259 *cursor += raw_len;
1260 Ok(oid)
1261}
1262
1263fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
1264 let mut byte = pack_next_byte(pack, cursor)?;
1265 let mut relative = u64::from(byte & 0x7f);
1266 while byte & 0x80 != 0 {
1267 byte = pack_next_byte(pack, cursor)?;
1268 relative = relative
1269 .checked_add(1)
1270 .and_then(|value| value.checked_shl(7))
1271 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
1272 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
1273 }
1274 entry_offset
1275 .checked_sub(relative)
1276 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
1277}
1278
1279fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
1280 let Some(byte) = pack.get(*cursor).copied() else {
1281 return Err(GitError::InvalidFormat("truncated pack entry".into()));
1282 };
1283 *cursor += 1;
1284 Ok(byte)
1285}
1286
1287fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
1288 Ok(ObjectId::null(format))
1289}
1290
1291fn remove_file_if_exists(path: &Path) -> Result<()> {
1293 match fs::remove_file(path) {
1294 Ok(()) => Ok(()),
1295 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1296 Err(err) => Err(GitError::Io(err.to_string())),
1297 }
1298}
1299
1300fn walk_reachable_objects<R, I, F>(
1301 reader: &R,
1302 format: ObjectFormat,
1303 starts: I,
1304 excluded: &HashSet<ObjectId>,
1305 visit: F,
1306) -> Result<HashSet<ObjectId>>
1307where
1308 R: ObjectReader,
1309 I: IntoIterator<Item = ObjectId>,
1310 F: FnMut(&ObjectId, &Arc<EncodedObject>),
1311{
1312 walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
1313}
1314
1315fn walk_reachable_objects_with_cut<R, I, F>(
1319 reader: &R,
1320 format: ObjectFormat,
1321 starts: I,
1322 excluded: &HashSet<ObjectId>,
1323 cut: &HashSet<ObjectId>,
1324 mut visit: F,
1325) -> Result<HashSet<ObjectId>>
1326where
1327 R: ObjectReader,
1328 I: IntoIterator<Item = ObjectId>,
1329 F: FnMut(&ObjectId, &Arc<EncodedObject>),
1330{
1331 let mut seen = HashSet::new();
1332 let mut pending = Vec::new();
1333 for start in starts {
1334 pending.push(start);
1335 while let Some(oid) = pending.pop() {
1336 if excluded.contains(&oid) {
1337 continue;
1338 }
1339 if !seen.insert(oid) {
1340 continue;
1341 }
1342 let object = reader.read_object(&oid).map_err(|err| {
1343 with_missing_object_context(err, oid, MissingObjectContext::Traversal)
1344 })?;
1345 match object.object_type {
1346 ObjectType::Commit => {
1347 let (tree, parents) = {
1348 let commit = Commit::parse_ref(format, &object.body)?;
1349 (commit.tree, commit.parents)
1350 };
1351 visit(&oid, &object);
1352 if !cut.contains(&oid) {
1353 for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
1354 pending.push(parent);
1355 }
1356 }
1357 pending.push(tree);
1358 }
1359 ObjectType::Tree => {
1360 let mut child_oids = Vec::new();
1361 for entry in TreeEntries::new(format, &object.body) {
1362 let entry = entry?;
1363 if entry.is_gitlink() {
1364 continue;
1365 }
1366 child_oids.push(entry.oid);
1367 }
1368 visit(&oid, &object);
1369 pending.extend(child_oids.into_iter().rev());
1370 }
1371 ObjectType::Tag => {
1372 let target = {
1373 let tag = Tag::parse_ref(format, &object.body)?;
1374 tag.object
1375 };
1376 visit(&oid, &object);
1377 pending.push(target);
1378 }
1379 ObjectType::Blob => visit(&oid, &object),
1380 }
1381 }
1382 }
1383 Ok(seen)
1384}
1385
1386fn bitset_get(words: &[u64], position: u32) -> bool {
1391 let word = (position / 64) as usize;
1392 word < words.len() && words[word] & (1u64 << (position % 64)) != 0
1393}
1394
1395fn bitset_set(words: &mut [u64], position: u32) {
1396 let word = (position / 64) as usize;
1397 if word < words.len() {
1398 words[word] |= 1u64 << (position % 64);
1399 }
1400}
1401
1402fn bitset_or(acc: &mut [u64], other: &[u64]) {
1403 for (dst, src) in acc.iter_mut().zip(other) {
1404 *dst |= *src;
1405 }
1406}
1407
1408fn bitset_positions(words: &[u64]) -> Vec<u32> {
1410 let mut positions = Vec::new();
1411 for (word_index, word) in words.iter().enumerate() {
1412 let mut remaining = *word;
1413 while remaining != 0 {
1414 let bit = remaining.trailing_zeros();
1415 positions.push(word_index as u32 * 64 + bit);
1416 remaining &= remaining - 1;
1417 }
1418 }
1419 positions
1420}
1421
1422fn commit_identity_timestamp(identity: &[u8]) -> i64 {
1426 let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
1427 let _tz = fields.next();
1428 fields
1429 .next()
1430 .and_then(|raw| std::str::from_utf8(raw).ok())
1431 .and_then(|raw| raw.parse::<i64>().ok())
1432 .unwrap_or(0)
1433}
1434
1435fn bitmap_next_commit_index(idx: u32) -> u32 {
1438 const MIN_COMMITS: u32 = 100;
1439 const MAX_COMMITS: u32 = 5000;
1440 const MUST_REGION: u32 = 100;
1441 const MIN_REGION: u32 = 20000;
1442
1443 if idx <= MUST_REGION {
1444 return 0;
1445 }
1446 if idx <= MIN_REGION {
1447 let offset = idx - MUST_REGION;
1448 return offset.min(MIN_COMMITS);
1449 }
1450 let offset = idx - MIN_REGION;
1451 offset.clamp(MIN_COMMITS, MAX_COMMITS)
1452}
1453
1454pub fn build_pack_bitmap(
1468 db: &FileObjectDatabase,
1469 format: ObjectFormat,
1470 index_entries: &[PackIndexEntry],
1471 pack_checksum: &ObjectId,
1472 preferred_tips: &HashSet<ObjectId>,
1473) -> Result<Option<Vec<u8>>> {
1474 let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
1477 by_offset.sort_by_key(|&slot| index_entries[slot].offset);
1478 let bit_order: Vec<ObjectId> = by_offset
1479 .into_iter()
1480 .map(|slot| index_entries[slot].oid)
1481 .collect();
1482 build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
1483}
1484
1485pub fn build_midx_bitmap(
1491 db: &FileObjectDatabase,
1492 format: ObjectFormat,
1493 midx_entries: &[sley_pack::MultiPackIndexEntry],
1494 midx_checksum: &ObjectId,
1495 preferred_pack: u32,
1496 preferred_tips: &HashSet<ObjectId>,
1497) -> Result<Option<Vec<u8>>> {
1498 let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
1499 pseudo.sort_by_key(|&slot| {
1500 let entry = &midx_entries[slot];
1501 (
1502 entry.pack_int_id != preferred_pack,
1503 entry.pack_int_id,
1504 entry.offset,
1505 )
1506 });
1507 let bit_order: Vec<ObjectId> = pseudo
1508 .into_iter()
1509 .map(|slot| midx_entries[slot].oid)
1510 .collect();
1511 build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
1512}
1513
1514fn bitmap_num_maximal_commits(
1522 db: &FileObjectDatabase,
1523 format: ObjectFormat,
1524 selected: &[ObjectId],
1525) -> Result<usize> {
1526 let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
1528 let mut stack: Vec<ObjectId> = selected.to_vec();
1529 while let Some(oid) = stack.pop() {
1530 if first_parent.contains_key(&oid) {
1531 continue;
1532 }
1533 let object = db.read_object(&oid)?;
1534 let commit = Commit::parse_ref(format, &object.body)?;
1535 let parent = grafted_parents(db, &oid, commit.parents).first().copied();
1536 first_parent.insert(oid, parent);
1537 if let Some(parent) = parent {
1538 stack.push(parent);
1539 }
1540 }
1541 let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
1543 for parent in first_parent.values().flatten() {
1544 *pending_children.entry(*parent).or_default() += 1;
1545 }
1546 let word_count = selected.len().div_ceil(64);
1547 struct MaximalEnt {
1548 mask: Vec<u64>,
1549 maximal: bool,
1550 }
1551 let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
1552 for (bit, oid) in selected.iter().enumerate() {
1553 let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
1554 mask: vec![0u64; word_count],
1555 maximal: true,
1556 });
1557 ent.mask[bit / 64] |= 1u64 << (bit % 64);
1558 ent.maximal = true;
1559 }
1560 let mut queue: Vec<ObjectId> = first_parent
1561 .keys()
1562 .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
1563 .copied()
1564 .collect();
1565 let mut num_maximal = 0usize;
1566 while let Some(oid) = queue.pop() {
1567 if let Some(ent) = ents.remove(&oid) {
1568 if ent.maximal {
1569 num_maximal += 1;
1570 }
1571 if let Some(Some(parent)) = first_parent.get(&oid) {
1572 match ents.entry(*parent) {
1573 std::collections::hash_map::Entry::Vacant(vacant) => {
1574 vacant.insert(MaximalEnt {
1576 mask: ent.mask.clone(),
1577 maximal: false,
1578 });
1579 }
1580 std::collections::hash_map::Entry::Occupied(mut occupied) => {
1581 let parent_ent = occupied.get_mut();
1582 let c_not_p = ent
1583 .mask
1584 .iter()
1585 .zip(&parent_ent.mask)
1586 .any(|(child, parent)| child & !parent != 0);
1587 if c_not_p {
1588 let p_not_c = parent_ent
1589 .mask
1590 .iter()
1591 .zip(&ent.mask)
1592 .any(|(parent, child)| parent & !child != 0);
1593 for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
1594 *parent |= child;
1595 }
1596 parent_ent.maximal = p_not_c;
1597 }
1598 }
1599 }
1600 }
1601 }
1602 if let Some(Some(parent)) = first_parent.get(&oid)
1603 && let Some(remaining) = pending_children.get_mut(parent)
1604 {
1605 *remaining -= 1;
1606 if *remaining == 0 {
1607 queue.push(*parent);
1608 }
1609 }
1610 }
1611 Ok(num_maximal)
1612}
1613
1614fn build_reachability_bitmap(
1618 db: &FileObjectDatabase,
1619 format: ObjectFormat,
1620 checksum: &ObjectId,
1621 bit_order: &[ObjectId],
1622 preferred_tips: &HashSet<ObjectId>,
1623) -> Result<Option<Vec<u8>>> {
1624 if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
1625 return Ok(None);
1626 }
1627 let object_count = bit_order.len();
1628
1629 let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
1632 oid_sorted.sort_by(|&left, &right| {
1633 bit_order[left as usize]
1634 .as_bytes()
1635 .cmp(bit_order[right as usize].as_bytes())
1636 });
1637 let mut index_position = vec![0u32; object_count];
1638 for (position, &slot) in oid_sorted.iter().enumerate() {
1639 index_position[slot as usize] = position as u32;
1640 }
1641 let mut oid_to_pack = HashMap::with_capacity(object_count);
1642 for (pack_pos, oid) in bit_order.iter().enumerate() {
1643 oid_to_pack.insert(*oid, pack_pos as u32);
1644 }
1645
1646 let mut object_types = Vec::with_capacity(object_count);
1648 struct IndexedCommit {
1649 oid: ObjectId,
1650 pack_pos: u32,
1651 index_pos: u32,
1652 date: i64,
1653 parent_count: usize,
1654 }
1655 let mut indexed_commits = Vec::new();
1656 for (pack_pos, oid) in bit_order.iter().enumerate() {
1657 let object_type = match db.read_object_header(oid)? {
1660 Some((object_type, _)) => object_type,
1661 None => db.read_object(oid)?.object_type,
1662 };
1663 object_types.push(object_type);
1664 if object_type == ObjectType::Commit {
1665 let object = db.read_object(oid)?;
1666 let commit = Commit::parse_ref(format, &object.body)?;
1667 indexed_commits.push(IndexedCommit {
1668 oid: *oid,
1669 pack_pos: pack_pos as u32,
1670 index_pos: index_position[pack_pos],
1671 date: commit_identity_timestamp(commit.committer),
1672 parent_count: grafted_parents(db, oid, commit.parents).len(),
1673 });
1674 }
1675 }
1676
1677 indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
1679 let mut selected: Vec<&IndexedCommit> = Vec::new();
1680 let commit_count = indexed_commits.len() as u32;
1681 if commit_count < 100 {
1682 selected.extend(indexed_commits.iter());
1683 } else {
1684 let mut i = 0u32;
1685 loop {
1686 let next = bitmap_next_commit_index(i);
1687 if i + next >= commit_count {
1688 break;
1689 }
1690 let mut chosen = &indexed_commits[(i + next) as usize];
1691 if next > 0 {
1692 for j in 0..=next {
1693 let candidate = &indexed_commits[(i + j) as usize];
1694 if preferred_tips.contains(&candidate.oid) {
1695 chosen = candidate;
1696 break;
1697 }
1698 if candidate.parent_count >= 2 {
1699 chosen = candidate;
1700 }
1701 }
1702 }
1703 selected.push(chosen);
1704 i += next + 1;
1705 }
1706 }
1707
1708 if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
1713 let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
1714 let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
1715 sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
1716 sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
1717 }
1718
1719 let word_count = object_count.div_ceil(64);
1722 let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
1723 for commit in selected.iter().rev() {
1724 let mut acc = vec![0u64; word_count];
1725 let mut pending = vec![commit.oid];
1726 while let Some(oid) = pending.pop() {
1727 let Some(&pack_pos) = oid_to_pack.get(&oid) else {
1728 eprintln!(
1730 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
1731 );
1732 return Ok(None);
1733 };
1734 if bitset_get(&acc, pack_pos) {
1735 continue;
1736 }
1737 if let Some(stored) = memo.get(&oid) {
1738 bitset_or(&mut acc, stored);
1739 continue;
1740 }
1741 bitset_set(&mut acc, pack_pos);
1742 let object = db.read_object(&oid)?;
1743 let tree = {
1744 let parsed = Commit::parse_ref(format, &object.body)?;
1745 pending.extend(grafted_parents(db, &oid, parsed.parents));
1746 parsed.tree
1747 };
1748 if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
1749 return Ok(None);
1750 }
1751 }
1752 memo.insert(commit.oid, Arc::new(acc));
1753 }
1754
1755 let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
1756 for commit in &selected {
1757 let words = match memo.get(&commit.oid) {
1758 Some(words) => words,
1759 None => continue,
1760 };
1761 writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
1762 }
1763 writer.write().map(Some)
1764}
1765
1766fn bitmap_mark_tree(
1770 db: &impl ObjectReader,
1771 format: ObjectFormat,
1772 tree: &ObjectId,
1773 oid_to_pack: &HashMap<ObjectId, u32>,
1774 acc: &mut [u64],
1775) -> Result<bool> {
1776 let Some(&pack_pos) = oid_to_pack.get(tree) else {
1777 eprintln!(
1778 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
1779 );
1780 return Ok(false);
1781 };
1782 if bitset_get(acc, pack_pos) {
1783 return Ok(true);
1784 }
1785 bitset_set(acc, pack_pos);
1786 let object = db.read_object(tree)?;
1787 for entry in TreeEntries::new(format, &object.body) {
1788 let entry = entry?;
1789 if entry.is_gitlink() {
1790 continue;
1791 }
1792 if entry.is_tree() {
1793 if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
1794 return Ok(false);
1795 }
1796 } else {
1797 let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
1798 eprintln!(
1799 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
1800 entry.oid
1801 );
1802 return Ok(false);
1803 };
1804 bitset_set(acc, blob_pos);
1805 }
1806 }
1807 Ok(true)
1808}
1809
1810pub struct LoadedPackBitmap {
1814 object_count: u32,
1815 oid_to_pack: HashMap<ObjectId, u32>,
1816 pack_to_oid: Vec<ObjectId>,
1817 commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
1818 commits: Vec<u64>,
1819 trees: Vec<u64>,
1820 blobs: Vec<u64>,
1821 tags: Vec<u64>,
1822}
1823
1824impl LoadedPackBitmap {
1825 pub fn object_count(&self) -> u32 {
1826 self.object_count
1827 }
1828
1829 pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
1831 self.oid_to_pack.get(oid).copied()
1832 }
1833
1834 pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
1835 self.pack_to_oid.get(position as usize)
1836 }
1837
1838 pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
1841 self.commit_words.get(oid)
1842 }
1843
1844 pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
1846 self.commit_words.keys()
1847 }
1848
1849 pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
1851 match object_type {
1852 ObjectType::Commit => &self.commits,
1853 ObjectType::Tree => &self.trees,
1854 ObjectType::Blob => &self.blobs,
1855 ObjectType::Tag => &self.tags,
1856 }
1857 }
1858
1859 fn word_count(&self) -> usize {
1860 (self.object_count as usize).div_ceil(64)
1861 }
1862}
1863
1864pub fn load_pack_bitmap(
1871 objects_dir: &Path,
1872 format: ObjectFormat,
1873) -> Result<Option<LoadedPackBitmap>> {
1874 let pack_dir = objects_dir.join("pack");
1875 if !pack_dir.exists() {
1876 return Ok(None);
1877 }
1878 if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
1881 return Ok(Some(bitmap));
1882 }
1883 let mut bitmap_paths = Vec::new();
1884 for entry in fs::read_dir(&pack_dir)? {
1885 let path = entry?.path();
1886 if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
1887 && path
1888 .file_name()
1889 .and_then(|name| name.to_str())
1890 .is_some_and(|name| name.starts_with("pack-"))
1891 {
1892 bitmap_paths.push(path);
1893 }
1894 }
1895 bitmap_paths.sort();
1896 for bitmap_path in bitmap_paths {
1897 match load_pack_bitmap_file(&bitmap_path, format) {
1898 Ok(Some(bitmap)) => return Ok(Some(bitmap)),
1899 Ok(None) | Err(_) => continue,
1900 }
1901 }
1902 Ok(None)
1903}
1904
1905fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
1910 let midx_path = pack_dir.join("multi-pack-index");
1911 if !midx_path.exists() {
1912 return Ok(None);
1913 }
1914 let Ok(midx_bytes) = fs::read(&midx_path) else {
1915 return Ok(None);
1916 };
1917 let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
1918 return Ok(None);
1919 };
1920 let bitmap_path = pack_dir.join(format!(
1921 "multi-pack-index-{}.bitmap",
1922 midx.checksum.to_hex()
1923 ));
1924 if !bitmap_path.exists() {
1925 return Ok(None);
1926 }
1927 let object_count = midx.objects.len();
1928 let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
1933 .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
1934 .unwrap_or(true);
1935 let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
1936 (Some(chunk), true) => {
1937 sley_core::trace2::data("load_midx_revindex", "source", "midx");
1938 chunk.clone()
1939 }
1940 _ => {
1941 let rev_path =
1942 pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
1943 let Ok(rev_bytes) = fs::read(&rev_path) else {
1944 return Ok(None);
1946 };
1947 let Ok(parsed_rev) =
1948 sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
1949 else {
1950 return Ok(None);
1951 };
1952 sley_core::trace2::data("load_midx_revindex", "source", "rev");
1953 parsed_rev.positions
1954 }
1955 };
1956 let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
1957 return Ok(None);
1958 };
1959 let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
1960 Ok(parsed) => parsed,
1961 Err(_) => return Ok(None),
1962 };
1963 if parsed.pack_checksum != midx.checksum {
1964 return Ok(None);
1965 }
1966
1967 let mut pack_to_oid = Vec::with_capacity(object_count);
1970 for &midx_pos in &reverse_index {
1971 let Some(entry) = midx.objects.get(midx_pos as usize) else {
1972 return Ok(None);
1973 };
1974 pack_to_oid.push(entry.oid);
1975 }
1976 let mut oid_to_pack = HashMap::with_capacity(object_count);
1977 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
1978 oid_to_pack.insert(*oid, pack_pos as u32);
1979 }
1980 match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
1981 midx.objects.get(position).map(|entry| entry.oid)
1982 }) {
1983 Ok(loaded) => Ok(Some(loaded)),
1984 Err(_) => Ok(None),
1985 }
1986}
1987
1988fn load_pack_bitmap_file(
1989 bitmap_path: &Path,
1990 format: ObjectFormat,
1991) -> Result<Option<LoadedPackBitmap>> {
1992 let index_path = bitmap_path.with_extension("idx");
1993 if !index_path.exists() {
1994 return Ok(None);
1995 }
1996 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1997 let object_count = index.entries.len();
1998 let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
1999 if parsed.pack_checksum != index.pack_checksum {
2000 return Ok(None);
2001 }
2002
2003 let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2004 pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2005 let mut pack_to_oid = Vec::with_capacity(object_count);
2006 for index_pos in &pack_order {
2007 pack_to_oid.push(index.entries[*index_pos as usize].oid);
2008 }
2009 let mut oid_to_pack = HashMap::with_capacity(object_count);
2010 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2011 oid_to_pack.insert(*oid, pack_pos as u32);
2012 }
2013
2014 assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2015 index.entries.get(position).map(|entry| entry.oid)
2016 })
2017 .map(Some)
2018}
2019
2020fn assemble_loaded_bitmap(
2025 parsed: PackBitmapIndex,
2026 object_count: usize,
2027 pack_to_oid: Vec<ObjectId>,
2028 oid_to_pack: HashMap<ObjectId, u32>,
2029 lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2030) -> Result<LoadedPackBitmap> {
2031 let word_count = object_count.div_ceil(64);
2032 let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2033 let mut words = bitmap.to_words()?;
2034 words.resize(word_count, 0);
2035 Ok(words)
2036 };
2037
2038 let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2039 let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2040 for (entry_index, entry) in parsed.entries.iter().enumerate() {
2041 let mut words = expand(&entry.bitmap)?;
2042 if entry.xor_offset > 0 {
2043 let base_index = entry_index - entry.xor_offset as usize;
2044 let base = &resolved[base_index];
2045 for (dst, src) in words.iter_mut().zip(base.iter()) {
2046 *dst ^= *src;
2047 }
2048 }
2049 let words = Arc::new(words);
2050 resolved.push(Arc::clone(&words));
2051 let commit_oid = lookup_oid(entry.object_position as usize)
2052 .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2053 commit_words.insert(commit_oid, words);
2054 }
2055
2056 Ok(LoadedPackBitmap {
2057 object_count: object_count as u32,
2058 oid_to_pack,
2059 pack_to_oid,
2060 commit_words,
2061 commits: expand(&parsed.type_bitmaps.commits)?,
2062 trees: expand(&parsed.type_bitmaps.trees)?,
2063 blobs: expand(&parsed.type_bitmaps.blobs)?,
2064 tags: expand(&parsed.type_bitmaps.tags)?,
2065 })
2066}
2067
2068pub struct BitmapWalkResult {
2072 pub words: Vec<u64>,
2073 pub extended: Vec<(ObjectId, ObjectType)>,
2074}
2075
2076impl BitmapWalkResult {
2077 pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2079 for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2080 *dst &= !*src;
2081 }
2082 let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2083 self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2084 }
2085}
2086
2087pub fn bitmap_reachable(
2098 bitmap: &LoadedPackBitmap,
2099 db: &impl ObjectReader,
2100 format: ObjectFormat,
2101 roots: &[ObjectId],
2102 include_objects: bool,
2103) -> Result<BitmapWalkResult> {
2104 let mut walk = BitmapFillWalk {
2105 bitmap,
2106 words: vec![0u64; bitmap.word_count()],
2107 extended: Vec::new(),
2108 extended_seen: HashSet::new(),
2109 };
2110 let mut commit_stack: Vec<ObjectId> = Vec::new();
2111
2112 for root in roots {
2113 let mut oid = *root;
2114 loop {
2116 let object = db.read_object(&oid)?;
2117 match object.object_type {
2118 ObjectType::Tag => {
2119 walk.mark(&oid, ObjectType::Tag);
2120 let tag = Tag::parse_ref(format, &object.body)?;
2121 oid = tag.object;
2122 }
2123 ObjectType::Commit => {
2124 commit_stack.push(oid);
2125 break;
2126 }
2127 ObjectType::Tree => {
2128 walk.mark_tree_closure(db, format, &oid)?;
2129 break;
2130 }
2131 ObjectType::Blob => {
2132 walk.mark(&oid, ObjectType::Blob);
2133 break;
2134 }
2135 }
2136 }
2137 }
2138
2139 while let Some(oid) = commit_stack.pop() {
2140 if let Some(position) = bitmap.pack_position(&oid) {
2141 if bitset_get(&walk.words, position) {
2142 continue;
2143 }
2144 if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2145 bitset_or(&mut walk.words, stored);
2146 continue;
2147 }
2148 bitset_set(&mut walk.words, position);
2149 } else {
2150 if walk.extended_seen.contains(&oid) {
2151 continue;
2152 }
2153 walk.extended_seen.insert(oid);
2154 walk.extended.push((oid, ObjectType::Commit));
2155 }
2156 let object = db.read_object(&oid)?;
2157 let commit = Commit::parse_ref(format, &object.body)?;
2158 commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2159 if include_objects {
2160 walk.mark_tree_closure(db, format, &commit.tree)?;
2161 }
2162 }
2163
2164 Ok(BitmapWalkResult {
2165 words: walk.words,
2166 extended: walk.extended,
2167 })
2168}
2169
2170struct BitmapFillWalk<'a> {
2171 bitmap: &'a LoadedPackBitmap,
2172 words: Vec<u64>,
2173 extended: Vec<(ObjectId, ObjectType)>,
2174 extended_seen: HashSet<ObjectId>,
2175}
2176
2177impl BitmapFillWalk<'_> {
2178 fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2180 if let Some(position) = self.bitmap.pack_position(oid) {
2181 if bitset_get(&self.words, position) {
2182 return false;
2183 }
2184 bitset_set(&mut self.words, position);
2185 true
2186 } else {
2187 if !self.extended_seen.insert(*oid) {
2188 return false;
2189 }
2190 self.extended.push((*oid, object_type));
2191 true
2192 }
2193 }
2194
2195 fn mark_tree_closure(
2199 &mut self,
2200 db: &impl ObjectReader,
2201 format: ObjectFormat,
2202 tree: &ObjectId,
2203 ) -> Result<()> {
2204 if !self.mark(tree, ObjectType::Tree) {
2205 return Ok(());
2206 }
2207 let object = db.read_object(tree)?;
2208 for entry in TreeEntries::new(format, &object.body) {
2209 let entry = entry?;
2210 if entry.is_gitlink() {
2211 continue;
2212 }
2213 if entry.is_tree() {
2214 self.mark_tree_closure(db, format, &entry.oid)?;
2215 } else {
2216 self.mark(&entry.oid, ObjectType::Blob);
2217 }
2218 }
2219 Ok(())
2220 }
2221}
2222
2223#[derive(Debug)]
2224pub struct ObjectDatabase {
2225 format: ObjectFormat,
2226 objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2232 promisor: bool,
2233}
2234
2235impl ObjectDatabase {
2236 pub fn new(format: ObjectFormat) -> Self {
2237 Self {
2238 format,
2239 objects: Mutex::new(HashMap::new()),
2240 promisor: false,
2241 }
2242 }
2243
2244 pub fn with_promisor(mut self, promisor: bool) -> Self {
2245 self.promisor = promisor;
2246 self
2247 }
2248
2249 pub fn contains(&self, oid: &ObjectId) -> bool {
2250 self.objects
2251 .lock()
2252 .map(|objects| objects.contains_key(oid))
2253 .unwrap_or(false)
2254 }
2255
2256 pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2257 let object = self.read_object(oid)?;
2258 let actual = object.object_id(self.format)?;
2259 if &actual == oid {
2260 Ok(())
2261 } else {
2262 Err(GitError::InvalidObject(format!(
2263 "object id mismatch: expected {oid}, got {actual}"
2264 )))
2265 }
2266 }
2267}
2268
2269impl ObjectReader for ObjectDatabase {
2270 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
2271 self.objects
2272 .lock()
2273 .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
2274 .get(oid)
2275 .map(Arc::clone)
2276 .or_else(|| implied_empty_tree_object(self.format, oid))
2277 .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
2278 }
2279}
2280
2281impl ObjectWriter for ObjectDatabase {
2282 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
2283 let oid = object.object_id(self.format)?;
2284 self.objects
2285 .lock()
2286 .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
2287 .entry(oid)
2288 .or_insert_with(|| Arc::new(object));
2289 Ok(oid)
2290 }
2291}
2292
2293#[derive(Debug, Clone, PartialEq, Eq)]
2294pub struct Alternate {
2295 pub path: std::path::PathBuf,
2296}
2297
2298#[derive(Debug, Clone, PartialEq, Eq)]
2299pub struct PartialClonePolicy {
2300 pub promisor_remote: Option<String>,
2301 pub allow_missing_promised_objects: bool,
2302}
2303
2304type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
2308
2309#[derive(Debug)]
2312enum PackData {
2313 #[cfg(feature = "mmap")]
2314 Mapped(sley_mmap::MappedFile),
2315 Heap(Vec<u8>),
2316}
2317
2318impl std::ops::Deref for PackData {
2319 type Target = [u8];
2320
2321 fn deref(&self) -> &[u8] {
2322 match self {
2323 #[cfg(feature = "mmap")]
2324 Self::Mapped(mapped) => mapped,
2325 Self::Heap(bytes) => bytes,
2326 }
2327 }
2328}
2329
2330#[cfg(feature = "mmap")]
2333fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2334 match sley_mmap::MappedFile::open_pack(pack_path) {
2335 Ok(mapped) => Ok(PackData::Mapped(mapped)),
2336 Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
2337 }
2338}
2339
2340#[cfg(not(feature = "mmap"))]
2341fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2342 Ok(PackData::Heap(fs::read(pack_path)?))
2343}
2344
2345type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
2351
2352type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
2357
2358type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
2368
2369type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
2370
2371const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
2377
2378const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
2382
2383fn cached_object_cost(object: &EncodedObject) -> usize {
2387 object.body.len().saturating_add(64)
2388}
2389
2390fn cache_budget_from_env(var: &str, default: usize) -> usize {
2393 match env::var(var) {
2394 Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
2395 Err(_) => default,
2396 }
2397}
2398
2399fn object_cache_budget() -> usize {
2406 static BUDGET: OnceLock<usize> = OnceLock::new();
2407 *BUDGET.get_or_init(|| {
2408 cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
2409 })
2410}
2411
2412fn delta_base_cache_budget() -> usize {
2416 static BUDGET: OnceLock<usize> = OnceLock::new();
2417 *BUDGET.get_or_init(|| {
2418 cache_budget_from_env(
2419 "SLEY_DELTA_BASE_CACHE_BYTES",
2420 DEFAULT_DELTA_BASE_CACHE_BYTES,
2421 )
2422 })
2423}
2424
2425fn verify_reads_enabled() -> bool {
2436 static VERIFY: OnceLock<bool> = OnceLock::new();
2437 *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
2438 Ok(value) => !matches!(value.trim(), "" | "0"),
2439 Err(_) => false,
2440 })
2441}
2442
2443#[derive(Debug)]
2451struct LruCache<K: std::hash::Hash + Eq + Clone> {
2452 budget: usize,
2453 used: usize,
2454 map: HashMap<K, Arc<EncodedObject>>,
2455 order: VecDeque<K>,
2456}
2457
2458impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
2459 fn new(budget: usize) -> Self {
2460 Self {
2461 budget,
2462 used: 0,
2463 map: HashMap::new(),
2464 order: VecDeque::new(),
2465 }
2466 }
2467
2468 fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
2469 let object = Arc::clone(self.map.get(key)?);
2470 self.touch(key);
2471 Some(object)
2472 }
2473
2474 fn touch(&mut self, key: &K) {
2477 if let Some(position) = self.order.iter().position(|existing| existing == key)
2478 && let Some(found) = self.order.remove(position)
2479 {
2480 self.order.push_back(found);
2481 }
2482 }
2483
2484 fn remove(&mut self, key: &K) {
2486 if let Some(object) = self.map.remove(key) {
2487 self.used = self.used.saturating_sub(cached_object_cost(&object));
2488 }
2489 if let Some(position) = self.order.iter().position(|existing| existing == key) {
2490 self.order.remove(position);
2491 }
2492 }
2493
2494 fn clear(&mut self) {
2495 self.map.clear();
2496 self.order.clear();
2497 self.used = 0;
2498 }
2499
2500 fn put(&mut self, key: K, object: Arc<EncodedObject>) {
2501 if self.budget == 0 {
2502 return;
2503 }
2504 let cost = cached_object_cost(&object);
2505 if cost > self.budget {
2509 self.remove(&key);
2510 return;
2511 }
2512 if let Some(previous) = self.map.insert(key.clone(), object) {
2513 self.used = self
2515 .used
2516 .saturating_sub(cached_object_cost(&previous))
2517 .saturating_add(cost);
2518 self.touch(&key);
2519 } else {
2520 self.used = self.used.saturating_add(cost);
2521 self.order.push_back(key);
2522 }
2523 while self.used > self.budget {
2524 let Some(evicted) = self.order.pop_front() else {
2525 break;
2526 };
2527 if let Some(object) = self.map.remove(&evicted) {
2528 self.used = self.used.saturating_sub(cached_object_cost(&object));
2529 }
2530 }
2531 }
2532}
2533
2534type LruObjectCache = LruCache<ObjectId>;
2536type LruOffsetCache = LruCache<u64>;
2538
2539struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
2544
2545impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
2546 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
2547 self.0.lock().ok()?.get(&offset)
2548 }
2549
2550 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
2551 if let Ok(mut cache) = self.0.lock() {
2552 cache.put(offset, object);
2553 }
2554 }
2555}
2556
2557struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
2561
2562impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
2563 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
2564 self.0.lock().ok()?.get(&pack_offset).copied()
2565 }
2566
2567 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
2568 if let Ok(mut cache) = self.0.lock() {
2569 cache.insert(pack_offset, header);
2570 }
2571 }
2572}
2573
2574type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
2578
2579type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
2583
2584#[derive(Debug, Clone)]
2586struct DiscoveredPack {
2587 idx: PathBuf,
2588 pack: PathBuf,
2589}
2590
2591type PackListingCache = Arc<Mutex<HashMap<PathBuf, Arc<Vec<DiscoveredPack>>>>>;
2598
2599#[derive(Debug, Clone)]
2600pub struct FileObjectDatabase {
2601 loose: LooseObjectStore,
2602 objects_dir: PathBuf,
2603 alternates: Vec<PathBuf>,
2604 format: ObjectFormat,
2605 pack_bytes: PackBytesCache,
2606 pack_indexes: PackIndexCache,
2607 multi_pack_indexes: MultiPackIndexCache,
2608 pack_listing: PackListingCache,
2609 decoded: DecodedObjectCache,
2610 pack_deltas: PackDeltaCaches,
2611 pack_header_types: PackHeaderTypeCaches,
2612 shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
2616}
2617
2618fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
2622 let Ok(contents) = std::fs::read_to_string(shallow_file) else {
2623 return HashSet::new();
2624 };
2625 contents
2626 .lines()
2627 .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
2628 .collect()
2629}
2630
2631pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
2632 env::var_os("GIT_OBJECT_DIRECTORY")
2633 .map(PathBuf::from)
2634 .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
2635}
2636
2637pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
2638 if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
2639 return PathBuf::from(common_dir);
2640 }
2641 let git_dir = git_dir.as_ref();
2642 let commondir = git_dir.join("commondir");
2643 if let Ok(value) = fs::read_to_string(&commondir) {
2644 let path = PathBuf::from(value.trim());
2645 let common = if path.is_absolute() {
2646 path
2647 } else {
2648 git_dir.join(path)
2649 };
2650 return fs::canonicalize(&common).unwrap_or(common);
2651 }
2652 git_dir.to_path_buf()
2653}
2654
2655pub fn repository_object_ids(
2656 git_dir: impl AsRef<Path>,
2657 format: ObjectFormat,
2658) -> Result<Vec<ObjectId>> {
2659 object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
2660}
2661
2662pub fn object_ids_in_objects_dir(
2663 objects_dir: impl AsRef<Path>,
2664 format: ObjectFormat,
2665) -> Result<Vec<ObjectId>> {
2666 let objects_dir = objects_dir.as_ref();
2667 let mut oids = HashSet::new();
2668 collect_loose_object_ids(objects_dir, format, &mut oids)?;
2669 collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
2670 let mut oids = oids.into_iter().collect::<Vec<_>>();
2671 oids.sort_by_key(ObjectId::to_hex);
2672 Ok(oids)
2673}
2674
2675fn collect_loose_object_ids(
2676 objects_dir: &Path,
2677 format: ObjectFormat,
2678 oids: &mut HashSet<ObjectId>,
2679) -> Result<()> {
2680 if !objects_dir.exists() {
2681 return Ok(());
2682 }
2683 let hex_len = format.hex_len();
2684 for entry in fs::read_dir(objects_dir)? {
2685 let entry = entry?;
2686 if !entry.file_type()?.is_dir() {
2687 continue;
2688 }
2689 let name = entry.file_name();
2690 let Some(fanout) = name.to_str() else {
2691 continue;
2692 };
2693 if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
2694 continue;
2695 }
2696 for object_entry in fs::read_dir(entry.path())? {
2697 let object_entry = object_entry?;
2698 if !object_entry.file_type()?.is_file() {
2699 continue;
2700 }
2701 let name = object_entry.file_name();
2702 let Some(suffix) = name.to_str() else {
2703 continue;
2704 };
2705 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
2706 continue;
2707 }
2708 oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
2709 }
2710 }
2711 Ok(())
2712}
2713
2714fn collect_loose_fanout_object_ids(
2715 objects_dir: &Path,
2716 format: ObjectFormat,
2717 fanout: u8,
2718 oids: &mut HashSet<ObjectId>,
2719) -> Result<()> {
2720 let fanout_hex = format!("{fanout:02x}");
2721 let fanout_dir = objects_dir.join(&fanout_hex);
2722 let entries = match fs::read_dir(&fanout_dir) {
2723 Ok(entries) => entries,
2724 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
2725 Err(err) => return Err(GitError::Io(err.to_string())),
2726 };
2727 let hex_len = format.hex_len();
2728 for object_entry in entries {
2729 let object_entry = object_entry?;
2730 let name = object_entry.file_name();
2731 let Some(suffix) = name.to_str() else {
2732 continue;
2733 };
2734 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
2735 continue;
2736 }
2737 oids.insert(ObjectId::from_hex(format, &format!("{fanout_hex}{suffix}"))?);
2738 }
2739 Ok(())
2740}
2741
2742#[derive(Debug, Default)]
2743struct LoosePresenceCache {
2744 loaded_fanouts: HashSet<u8>,
2745 objects: HashSet<ObjectId>,
2746}
2747
2748pub fn packed_object_ids(
2753 objects_dir: impl AsRef<Path>,
2754 format: ObjectFormat,
2755) -> Result<HashSet<ObjectId>> {
2756 let mut oids = HashSet::new();
2757 collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
2758 Ok(oids)
2759}
2760
2761fn collect_packed_object_ids(
2762 pack_dir: &Path,
2763 format: ObjectFormat,
2764 oids: &mut HashSet<ObjectId>,
2765) -> Result<()> {
2766 if !pack_dir.exists() {
2767 return Ok(());
2768 }
2769 let midx_path = pack_dir.join("multi-pack-index");
2770 if midx_path.exists() {
2771 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
2772 oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
2773 }
2774 for entry in fs::read_dir(pack_dir)? {
2775 let path = entry?.path();
2776 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
2777 continue;
2778 }
2779 let index = PackIndex::parse(&fs::read(path)?, format)?;
2780 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
2781 }
2782 Ok(())
2783}
2784
2785impl FileObjectDatabase {
2786 pub fn object_format(&self) -> ObjectFormat {
2788 self.format
2789 }
2790
2791 pub fn objects_dir(&self) -> &Path {
2793 &self.objects_dir
2794 }
2795
2796 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
2797 let objects_dir = objects_dir.into();
2798 Self {
2799 loose: LooseObjectStore::new(objects_dir.clone(), format),
2800 alternates: alternate_object_dirs(&objects_dir),
2801 objects_dir,
2802 format,
2803 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
2804 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2805 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2806 pack_listing: Arc::new(Mutex::new(HashMap::new())),
2807 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
2808 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
2809 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
2810 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
2811 }
2812 }
2813
2814 fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
2815 let objects_dir = objects_dir.into();
2816 Self {
2817 loose: LooseObjectStore::new(objects_dir.clone(), format),
2818 alternates: Vec::new(),
2819 objects_dir,
2820 format,
2821 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
2822 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2823 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
2824 pack_listing: Arc::new(Mutex::new(HashMap::new())),
2825 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
2826 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
2827 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
2828 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
2829 }
2830 }
2831
2832 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
2833 Self::new(repository_objects_dir(git_dir), format)
2834 }
2835
2836 pub fn refresh_read_cache(&self) {
2841 if let Ok(mut cache) = self.pack_listing.lock() {
2842 cache.clear();
2843 }
2844 if let Ok(mut cache) = self.pack_indexes.lock() {
2845 cache.clear();
2846 }
2847 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
2848 cache.clear();
2849 }
2850 if let Ok(mut cache) = self.pack_bytes.lock() {
2851 cache.clear();
2852 }
2853 if let Ok(mut cache) = self.pack_deltas.lock() {
2854 cache.clear();
2855 }
2856 if let Ok(mut cache) = self.pack_header_types.lock() {
2857 cache.clear();
2858 }
2859 if let Ok(mut cache) = self.decoded.lock() {
2860 cache.clear();
2861 }
2862 self.loose.invalidate_cache();
2863 }
2864
2865 pub fn loose(&self) -> &LooseObjectStore {
2866 &self.loose
2867 }
2868
2869 pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
2870 self.install_pack_with_options(pack, RawPackInstallOptions::default())
2871 }
2872
2873 pub fn install_pack_with_options(
2874 &self,
2875 pack: &PackWrite,
2876 options: RawPackInstallOptions,
2877 ) -> Result<PackInstallResult> {
2878 if pack.checksum.format() != self.format {
2879 return Err(GitError::InvalidObjectId(format!(
2880 "pack checksum uses {}, store uses {}",
2881 pack.checksum.format().name(),
2882 self.format.name()
2883 )));
2884 }
2885 for entry in &pack.entries {
2886 if entry.oid.format() != self.format {
2887 return Err(GitError::InvalidObjectId(format!(
2888 "pack entry {} uses {}, store uses {}",
2889 entry.oid,
2890 entry.oid.format().name(),
2891 self.format.name()
2892 )));
2893 }
2894 }
2895 let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
2896 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
2897 if canonical_index.pack_checksum != pack.checksum
2898 || parsed_index.pack_checksum != pack.checksum
2899 {
2900 return Err(GitError::InvalidFormat(
2901 "pack and index checksums do not match pack write".into(),
2902 ));
2903 }
2904 if pack.index != canonical_index.index {
2905 return Err(GitError::InvalidFormat(
2906 "pack index does not match pack contents".into(),
2907 ));
2908 }
2909
2910 let pack_dir = self.objects_dir.join("pack");
2911 fs::create_dir_all(&pack_dir)?;
2912 let pack_name = format!("pack-{}", pack.checksum.to_hex());
2913 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
2914 let index_path = pack_dir.join(format!("{pack_name}.idx"));
2915 if !pack_path.exists() || !index_path.exists() {
2916 write_pack_component(&pack_path, &pack.pack)?;
2917 write_pack_component(&index_path, &pack.index)?;
2918 }
2919 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
2920 Ok(PackInstallResult {
2921 pack_name,
2922 pack_path,
2923 index_path,
2924 promisor_path,
2925 object_ids: canonical_index
2926 .entries
2927 .iter()
2928 .map(|entry| entry.oid)
2929 .collect(),
2930 })
2931 }
2932
2933 pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
2941 self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
2942 }
2943
2944 pub fn install_written_pack_with_options(
2945 &self,
2946 pack: &PackWrite,
2947 options: RawPackInstallOptions,
2948 ) -> Result<PackInstallResult> {
2949 validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
2950 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
2951 if parsed_index.pack_checksum != pack.checksum {
2952 return Err(GitError::InvalidFormat(
2953 "pack write index checksum does not match pack".into(),
2954 ));
2955 }
2956 if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
2957 return Err(GitError::InvalidFormat(
2958 "pack write index does not match generated entries".into(),
2959 ));
2960 }
2961 self.install_generated_pack_unchecked(pack, options)
2962 }
2963
2964 fn install_generated_pack_unchecked(
2965 &self,
2966 pack: &PackWrite,
2967 options: RawPackInstallOptions,
2968 ) -> Result<PackInstallResult> {
2969 let pack_dir = self.objects_dir.join("pack");
2970 fs::create_dir_all(&pack_dir)?;
2971 let pack_name = format!("pack-{}", pack.checksum.to_hex());
2972 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
2973 let index_path = pack_dir.join(format!("{pack_name}.idx"));
2974 if !pack_path.exists() || !index_path.exists() {
2975 write_pack_component(&pack_path, &pack.pack)?;
2976 write_pack_component(&index_path, &pack.index)?;
2977 }
2978 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
2979 Ok(PackInstallResult {
2980 pack_name,
2981 pack_path,
2982 index_path,
2983 promisor_path,
2984 object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
2985 })
2986 }
2987
2988 pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
2989 self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
2990 }
2991
2992 pub fn install_raw_pack_with_options(
2993 &self,
2994 pack_bytes: &[u8],
2995 options: RawPackInstallOptions,
2996 ) -> Result<PackInstallResult> {
2997 let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
2998 let pack_dir = self.objects_dir.join("pack");
2999 fs::create_dir_all(&pack_dir)?;
3000 let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
3001 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3002 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3003 if !pack_path.exists() || !index_path.exists() {
3004 write_pack_component(&pack_path, pack_bytes)?;
3005 write_pack_component(&index_path, &built.index)?;
3006 }
3007 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3008 Ok(PackInstallResult {
3009 pack_name,
3010 pack_path,
3011 index_path,
3012 promisor_path,
3013 object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
3014 })
3015 }
3016
3017 pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
3018 if self.loose.exists(oid)? {
3019 return Ok(true);
3020 }
3021 if self.find_pack_containing(oid)?.is_some() {
3022 return Ok(true);
3023 }
3024 for alternate in &self.alternates {
3025 if Self::without_alternates(alternate, self.format).contains(oid)? {
3026 return Ok(true);
3027 }
3028 }
3029 self.loose.invalidate_cache();
3032 self.loose.exists(oid)
3033 }
3034
3035 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3036 let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
3037 .into_iter()
3038 .collect::<HashSet<_>>();
3039 for alternate in &self.alternates {
3040 oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
3041 }
3042 let mut oids = oids.into_iter().collect::<Vec<_>>();
3043 oids.sort_by_key(ObjectId::to_hex);
3044 Ok(oids)
3045 }
3046
3047 pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3048 if let Some(disk_size) = self.loose.disk_size(oid)? {
3049 return Ok(Some(ObjectStorageInfo {
3050 disk_size,
3051 deltabase: zero_oid(self.format)?,
3052 }));
3053 }
3054 if let Some(info) = self.packed_object_storage_info(oid)? {
3055 return Ok(Some(info));
3056 }
3057 for alternate in &self.alternates {
3058 if let Some(info) =
3059 Self::without_alternates(alternate, self.format).object_storage_info(oid)?
3060 {
3061 return Ok(Some(info));
3062 }
3063 }
3064 self.loose.invalidate_cache();
3067 if let Some(disk_size) = self.loose.disk_size(oid)? {
3068 return Ok(Some(ObjectStorageInfo {
3069 disk_size,
3070 deltabase: zero_oid(self.format)?,
3071 }));
3072 }
3073 Ok(None)
3074 }
3075
3076 pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
3077 validate_object_id_prefix(self.format, prefix)?;
3078 let mut matches = Vec::new();
3079 for oid in self.object_ids()? {
3080 if object_id_matches_prefix(&oid, prefix) {
3081 matches.push(oid);
3082 }
3083 }
3084 Ok(match matches.len() {
3085 0 => ObjectPrefixResolution::Missing,
3086 1 => ObjectPrefixResolution::Unique(matches.remove(0)),
3087 _ => ObjectPrefixResolution::Ambiguous(matches),
3088 })
3089 }
3090
3091 pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3101 if implied_empty_tree_object(self.format, oid).is_some() {
3102 return Ok(Some((ObjectType::Tree, 0)));
3103 }
3104 if let Ok(mut cache) = self.decoded.lock()
3105 && let Some(object) = cache.get(oid)
3106 {
3107 return Ok(Some((object.object_type, object.body.len() as u64)));
3108 }
3109 if let Some(header) = self.loose.read_header(oid)? {
3110 return Ok(Some(header));
3111 }
3112 if let Some(pack_paths) = self.find_pack_containing(oid)? {
3113 let bytes = self.cached_pack_bytes(&pack_paths.pack)?;
3114 let type_cache = self.pack_header_type_cache(&pack_paths.pack);
3119 let resolve_ref_base = |base: &ObjectId| {
3120 self.read_object_header(base)
3121 .map(|header| header.map(|(t, _)| t))
3122 };
3123 let header = match &type_cache {
3124 Some(cache) => {
3125 let mut adapter = PackHeaderTypeCacheAdapter(cache);
3126 sley_pack::read_object_header_at_with_cache(
3127 &bytes,
3128 pack_paths.offset,
3129 self.format,
3130 resolve_ref_base,
3131 &mut adapter,
3132 )?
3133 }
3134 None => sley_pack::read_object_header_at(
3135 &bytes,
3136 pack_paths.offset,
3137 self.format,
3138 resolve_ref_base,
3139 )?,
3140 };
3141 return Ok(Some(header));
3142 }
3143 for alternate in &self.alternates {
3144 if let Some(header) =
3145 Self::without_alternates(alternate, self.format).read_object_header(oid)?
3146 {
3147 return Ok(Some(header));
3148 }
3149 }
3150 self.loose.invalidate_cache();
3153 if let Some(header) = self.loose.read_header(oid)? {
3154 return Ok(Some(header));
3155 }
3156 Ok(None)
3157 }
3158
3159 fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
3160 if let Ok(mut cache) = self.decoded.lock()
3163 && let Some(object) = cache.get(oid)
3164 {
3165 return Ok(Some(object));
3166 }
3167 let Some(pack_paths) = self.find_pack_containing(oid)? else {
3168 return Ok(None);
3169 };
3170 let bytes = self.cached_pack_bytes(&pack_paths.pack)?;
3171 let delta_cache = self.pack_delta_cache(&pack_paths.pack);
3176 let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
3177 let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
3183 let object = match &delta_adapter {
3184 Some(adapter) => sley_pack::read_object_at_with_cache_arc(
3185 &bytes,
3186 pack_paths.offset,
3187 self.format,
3188 resolve_ref_base,
3189 adapter,
3190 )?,
3191 None => sley_pack::read_object_at_arc(
3192 &bytes,
3193 pack_paths.offset,
3194 self.format,
3195 resolve_ref_base,
3196 )?,
3197 };
3198 if verify_reads_enabled() {
3202 let actual = object.object_id(self.format)?;
3203 if actual != *oid {
3204 return Err(GitError::InvalidObject(format!(
3205 "pack object id mismatch: index says {oid}, decoded {actual}"
3206 )));
3207 }
3208 }
3209 if let Ok(mut cache) = self.decoded.lock() {
3210 cache.put(*oid, Arc::clone(&object));
3211 }
3212 Ok(Some(object))
3213 }
3214
3215 fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
3219 let mut caches = self.pack_deltas.lock().ok()?;
3220 let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
3221 Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
3222 });
3223 Some(Arc::clone(cache))
3224 }
3225
3226 fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
3230 let mut caches = self.pack_header_types.lock().ok()?;
3231 let cache = caches
3232 .entry(pack_path.to_path_buf())
3233 .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
3234 Some(Arc::clone(cache))
3235 }
3236
3237 fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
3242 if let Ok(cache) = self.pack_bytes.lock()
3243 && let Some(bytes) = cache.get(pack_path)
3244 {
3245 return Ok(Arc::clone(bytes));
3246 }
3247 let bytes = Arc::new(load_pack_data(pack_path)?);
3248 if let Ok(mut cache) = self.pack_bytes.lock() {
3249 cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
3250 }
3251 Ok(bytes)
3252 }
3253
3254 fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
3258 if let Ok(cache) = self.pack_indexes.lock()
3259 && let Some(index) = cache.get(index_path)
3260 {
3261 return Ok(Arc::clone(index));
3262 }
3263 let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
3264 if let Ok(mut cache) = self.pack_indexes.lock() {
3265 cache.insert(index_path.to_path_buf(), Arc::clone(&index));
3266 }
3267 Ok(index)
3268 }
3269
3270 fn cached_multi_pack_index(&self, midx_path: &Path) -> Result<Option<Arc<MultiPackIndex>>> {
3274 if !midx_path.exists() {
3275 return Ok(None);
3276 }
3277 if let Ok(cache) = self.multi_pack_indexes.lock()
3278 && let Some(midx) = cache.get(midx_path)
3279 {
3280 return Ok(Some(Arc::clone(midx)));
3281 }
3282 let midx = Arc::new(MultiPackIndex::parse(&fs::read(midx_path)?, self.format)?);
3283 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3284 cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
3285 }
3286 Ok(Some(midx))
3287 }
3288
3289 fn cached_pack_listing(
3296 &self,
3297 pack_dir: &Path,
3298 force_rescan: bool,
3299 ) -> Result<Arc<Vec<DiscoveredPack>>> {
3300 if !force_rescan
3301 && let Ok(cache) = self.pack_listing.lock()
3302 && let Some(listing) = cache.get(pack_dir)
3303 {
3304 return Ok(Arc::clone(listing));
3305 }
3306 let scanned = Arc::new(scan_pack_listing(pack_dir)?);
3307 if let Ok(mut cache) = self.pack_listing.lock() {
3308 match cache.get(pack_dir) {
3309 Some(existing) if same_pack_set(existing, &scanned) => {
3312 return Ok(Arc::clone(existing));
3313 }
3314 _ => {
3315 cache.insert(pack_dir.to_path_buf(), Arc::clone(&scanned));
3316 }
3317 }
3318 }
3319 Ok(scanned)
3320 }
3321
3322 fn find_in_pack_listing(
3325 &self,
3326 listing: &[DiscoveredPack],
3327 oid: &ObjectId,
3328 ) -> Result<Option<PackPaths>> {
3329 for pack in listing {
3330 let index = self.cached_pack_index(&pack.idx)?;
3331 if let Some(entry) = index.find(oid) {
3332 return Ok(Some(PackPaths {
3333 pack: pack.pack.clone(),
3334 offset: entry.offset,
3335 }));
3336 }
3337 }
3338 Ok(None)
3339 }
3340
3341 fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackPaths>> {
3342 if oid.format() != self.format {
3343 return Err(GitError::InvalidObjectId(format!(
3344 "object {oid} uses {}, store uses {}",
3345 oid.format().name(),
3346 self.format.name()
3347 )));
3348 }
3349 let pack_dir = self.objects_dir.join("pack");
3350 if let Some(midx) = self.cached_loaded_multi_pack_index()
3357 && let Some(pack_paths) = self.midx_pack_paths(&pack_dir, &midx, oid)?
3358 {
3359 return Ok(Some(pack_paths));
3360 }
3361 if let Some(listing) = self.cached_loaded_pack_listing(&pack_dir)
3362 && let Some(pack_paths) = self.find_in_pack_listing(&listing, oid)?
3363 {
3364 return Ok(Some(pack_paths));
3365 }
3366
3367 if !pack_dir.exists() {
3368 return Ok(None);
3369 }
3370 if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
3371 return Ok(Some(pack_paths));
3372 }
3373 let listing = self.cached_pack_listing(&pack_dir, false)?;
3377 if let Some(pack_paths) = self.find_in_pack_listing(&listing, oid)? {
3378 return Ok(Some(pack_paths));
3379 }
3380 let refreshed = self.cached_pack_listing(&pack_dir, true)?;
3381 if Arc::ptr_eq(&listing, &refreshed) {
3382 return Ok(None);
3384 }
3385 self.find_in_pack_listing(&refreshed, oid)
3386 }
3387
3388 fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3389 let Some(pack_paths) = self.find_pack_containing(oid)? else {
3390 return Ok(None);
3391 };
3392 let pack_len = fs::metadata(&pack_paths.pack)?.len();
3393 let trailer_offset = pack_len
3394 .checked_sub(self.format.raw_len() as u64)
3395 .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
3396 let index_path = pack_paths.pack.with_extension("idx");
3397 let index = self.cached_pack_index(&index_path)?;
3398 let pack = self.cached_pack_bytes(&pack_paths.pack)?;
3399 let delta_base = pack_entry_delta_base(self.format, &pack, pack_paths.offset)?;
3400 let delta_base_offset = match &delta_base {
3401 Some(PackDeltaBase::Offset(offset)) => Some(*offset),
3402 Some(PackDeltaBase::Ref(_)) | None => None,
3403 };
3404 let offset_info =
3405 scan_pack_index_offsets(&index, pack_paths.offset, trailer_offset, delta_base_offset)?;
3406 let disk_size = offset_info
3407 .end_offset
3408 .checked_sub(pack_paths.offset)
3409 .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
3410 let deltabase = match delta_base {
3411 Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
3412 GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
3418 })?,
3419 Some(PackDeltaBase::Ref(oid)) => oid,
3420 None => zero_oid(self.format)?,
3421 };
3422 Ok(Some(ObjectStorageInfo {
3423 disk_size,
3424 deltabase,
3425 }))
3426 }
3427
3428 fn find_midx_pack_containing(
3429 &self,
3430 pack_dir: &Path,
3431 oid: &ObjectId,
3432 ) -> Result<Option<PackPaths>> {
3433 let midx_path = pack_dir.join("multi-pack-index");
3434 let Some(midx) = self.cached_multi_pack_index(&midx_path)? else {
3435 return Ok(None);
3436 };
3437 self.midx_pack_paths(pack_dir, &midx, oid)
3438 }
3439
3440 fn midx_pack_paths(
3447 &self,
3448 pack_dir: &Path,
3449 midx: &MultiPackIndex,
3450 oid: &ObjectId,
3451 ) -> Result<Option<PackPaths>> {
3452 let Some(entry) = midx.find(oid) else {
3453 return Ok(None);
3454 };
3455 let Some(pack_name) = midx.pack_names.get(entry.pack_int_id as usize) else {
3456 return Err(GitError::InvalidFormat(
3457 "multi-pack-index object points past pack table".into(),
3458 ));
3459 };
3460 let pack_file_name = pack_name
3461 .strip_suffix(".idx")
3462 .map(|stem| format!("{stem}.pack"))
3463 .unwrap_or_else(|| pack_name.clone());
3464 let pack = pack_dir.join(pack_file_name);
3465 Ok(Some(PackPaths {
3466 pack,
3467 offset: entry.offset,
3468 }))
3469 }
3470
3471 fn cached_loaded_multi_pack_index(&self) -> Option<Arc<MultiPackIndex>> {
3475 let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
3476 let cache = self.multi_pack_indexes.lock().ok()?;
3477 cache.get(&midx_path).map(Arc::clone)
3478 }
3479
3480 fn cached_loaded_pack_listing(&self, pack_dir: &Path) -> Option<Arc<Vec<DiscoveredPack>>> {
3486 let cache = self.pack_listing.lock().ok()?;
3487 cache.get(pack_dir).map(Arc::clone)
3488 }
3489}
3490
3491fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
3492 if prefix.len() < 4 || prefix.len() > format.hex_len() {
3493 return Err(GitError::InvalidObjectId(format!(
3494 "expected 4 to {} hex digits for {}, got {}",
3495 format.hex_len(),
3496 format.name(),
3497 prefix.len()
3498 )));
3499 }
3500 if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3501 return Err(GitError::InvalidObjectId(format!(
3502 "non-hex object id prefix {prefix}"
3503 )));
3504 }
3505 Ok(())
3506}
3507
3508fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
3509 oid.to_hex()
3510 .as_bytes()
3511 .iter()
3512 .zip(prefix.as_bytes())
3513 .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
3514}
3515
3516fn scan_pack_listing(pack_dir: &Path) -> Result<Vec<DiscoveredPack>> {
3520 let mut packs = Vec::new();
3521 for entry in fs::read_dir(pack_dir)? {
3522 let entry = entry?;
3523 let idx = entry.path();
3524 if idx.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3525 continue;
3526 }
3527 let Some(stem) = idx.file_stem() else {
3528 continue;
3529 };
3530 let pack = idx.with_file_name(format!("{}.pack", stem.to_string_lossy()));
3531 if !pack.exists() {
3532 continue;
3533 }
3534 packs.push(DiscoveredPack { idx, pack });
3535 }
3536 packs.sort_by(|left, right| left.idx.cmp(&right.idx));
3538 Ok(packs)
3539}
3540
3541fn same_pack_set(left: &[DiscoveredPack], right: &[DiscoveredPack]) -> bool {
3544 left.len() == right.len()
3545 && left
3546 .iter()
3547 .zip(right.iter())
3548 .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
3549}
3550
3551fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
3552 let mut alternates = Vec::new();
3553 if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
3554 for raw in value.to_string_lossy().split(':') {
3555 if !raw.is_empty() {
3556 alternates.push(PathBuf::from(raw));
3557 }
3558 }
3559 }
3560 let alternates_path = objects_dir.join("info").join("alternates");
3561 if let Ok(contents) = fs::read(&alternates_path) {
3562 for raw in contents.split(|byte| *byte == b'\n') {
3563 let line = raw.strip_suffix(b"\r").unwrap_or(raw);
3564 if line.is_empty() || line.starts_with(b"#") {
3565 continue;
3566 }
3567 let Ok(value) = std::str::from_utf8(line) else {
3568 continue;
3569 };
3570 let path = Path::new(value);
3571 let absolute = if path.is_absolute() {
3572 path.to_path_buf()
3573 } else {
3574 objects_dir.join(path)
3575 };
3576 alternates.push(absolute);
3577 }
3578 }
3579 alternates
3580}
3581
3582impl ObjectReader for FileObjectDatabase {
3583 fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
3584 self.shallow_grafts
3585 .get_or_init(|| {
3586 let shallow_file = self
3587 .objects_dir
3588 .parent()
3589 .map(|git_dir| git_dir.join("shallow"));
3590 match shallow_file {
3591 Some(path) => read_shallow_grafts(&path, self.format),
3592 None => HashSet::new(),
3593 }
3594 })
3595 .contains(oid)
3596 }
3597
3598 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
3599 if let Some(object) = implied_empty_tree_object(self.format, oid) {
3600 return Ok(object);
3601 }
3602 match self.loose.read_object(oid) {
3603 Ok(object) => return Ok(object),
3604 Err(GitError::NotFound(_)) => {}
3605 Err(err) => return Err(err),
3606 }
3607 if let Some(object) = self.read_packed_object(oid)? {
3608 return Ok(object);
3609 }
3610 for alternate in &self.alternates {
3611 match Self::without_alternates(alternate, self.format).read_object(oid) {
3612 Ok(object) => return Ok(object),
3613 Err(GitError::NotFound(_)) => {}
3614 Err(err) => return Err(err),
3615 }
3616 }
3617 self.loose.invalidate_cache();
3623 match self.loose.read_object(oid) {
3624 Ok(object) => return Ok(object),
3625 Err(GitError::NotFound(_)) => {}
3626 Err(err) => return Err(err),
3627 }
3628 Err(GitError::object_not_found_in(
3629 *oid,
3630 MissingObjectContext::Read,
3631 ))
3632 }
3633}
3634
3635impl ObjectWriter for FileObjectDatabase {
3636 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
3637 let oid = object.object_id(self.format)?;
3643 if self.contains(&oid)? {
3644 return Ok(oid);
3645 }
3646 self.loose.write_object(object)
3647 }
3648}
3649
3650#[derive(Debug, Clone)]
3651struct PackPaths {
3652 pack: PathBuf,
3653 offset: u64,
3654}
3655
3656fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
3657 if path.exists() {
3658 return Ok(());
3659 }
3660 let parent = path
3661 .parent()
3662 .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
3663 fs::create_dir_all(parent)?;
3664 let temp_path = unique_temp_path(parent);
3665 let write_result = (|| -> Result<()> {
3666 {
3667 let mut file = fs::OpenOptions::new()
3668 .write(true)
3669 .create_new(true)
3670 .open(&temp_path)?;
3671 file.write_all(bytes)?;
3672 file.sync_all()?;
3673 }
3674 match fs::rename(&temp_path, path) {
3675 Ok(()) => Ok(()),
3676 Err(_) if path.exists() => {
3677 let _ = fs::remove_file(&temp_path);
3678 Ok(())
3679 }
3680 Err(err) => Err(GitError::Io(err.to_string())),
3681 }
3682 })();
3683 if write_result.is_err() {
3684 let _ = fs::remove_file(&temp_path);
3685 }
3686 write_result
3687}
3688
3689fn write_promisor_pack_sidecar(
3690 pack_dir: &Path,
3691 pack_name: &str,
3692 promisor: bool,
3693) -> Result<Option<PathBuf>> {
3694 if !promisor {
3695 return Ok(None);
3696 }
3697 let path = pack_dir.join(format!("{pack_name}.promisor"));
3698 write_pack_component(&path, b"")?;
3699 Ok(Some(path))
3700}
3701
3702const MAX_LOOSE_HEADER_LEN: usize = 32;
3707
3708fn loose_header_too_long(oid: &ObjectId) -> GitError {
3713 GitError::InvalidObject(format!(
3714 "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
3715 ))
3716}
3717
3718fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
3722 GitError::InvalidObject(format!("unable to unpack {oid} header"))
3723}
3724
3725fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
3733 let [cmf, flg, ..] = *input else { return None };
3734 if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
3735 return Some("inflate: data stream error (incorrect header check)");
3736 }
3737 if cmf & 0x0f != 8 {
3738 return Some("inflate: data stream error (unknown compression method)");
3739 }
3740 if cmf >> 4 > 7 {
3741 return Some("inflate: data stream error (invalid window size)");
3742 }
3743 if flg & 0x20 != 0 {
3744 return Some("inflate: needs dictionary (no message)");
3745 }
3746 None
3747}
3748
3749fn emit_inflate_diagnostic(input: &[u8]) {
3752 if let Some(diagnostic) = inflate_header_diagnostic(input) {
3753 eprintln!("error: {diagnostic}");
3754 }
3755}
3756
3757#[derive(Debug, Clone, PartialEq, Eq)]
3760pub enum LooseObjectIntegrity {
3761 Ok,
3763 HashMismatch { actual: ObjectId },
3766 Corrupt,
3769}
3770
3771#[derive(Debug, Clone)]
3772pub struct LooseObjectStore {
3773 objects_dir: PathBuf,
3774 format: ObjectFormat,
3775 loose_cache: Arc<Mutex<LoosePresenceCache>>,
3784}
3785
3786impl LooseObjectStore {
3787 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3788 Self {
3789 objects_dir: objects_dir.into(),
3790 format,
3791 loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
3792 }
3793 }
3794
3795 fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
3800 let mut guard = self.loose_cache.lock().ok()?;
3801 let fanout = oid.as_bytes()[0];
3802 if !guard.loaded_fanouts.contains(&fanout) {
3803 collect_loose_fanout_object_ids(
3804 &self.objects_dir,
3805 self.format,
3806 fanout,
3807 &mut guard.objects,
3808 )
3809 .ok()?;
3810 guard.loaded_fanouts.insert(fanout);
3811 }
3812 Some(guard.objects.contains(oid))
3813 }
3814
3815 fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
3819 if let Ok(mut guard) = self.loose_cache.lock() {
3820 guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
3821 guard.loaded_fanouts = (0..=u8::MAX).collect();
3822 let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
3823 ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
3824 return Ok(ids);
3825 }
3826 loose_object_ids(&self.objects_dir, self.format)
3827 }
3828
3829 fn note_loose_write(&self, oid: ObjectId) {
3833 if let Ok(mut guard) = self.loose_cache.lock() {
3834 guard.objects.insert(oid);
3835 }
3836 }
3837
3838 pub(crate) fn invalidate_cache(&self) {
3841 if let Ok(mut guard) = self.loose_cache.lock() {
3842 *guard = LoosePresenceCache::default();
3843 }
3844 }
3845
3846 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3847 Self::new(repository_objects_dir(git_dir), format)
3848 }
3849
3850 pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
3851 if oid.format() != self.format {
3852 return Err(GitError::InvalidObjectId(format!(
3853 "object {oid} uses {}, store uses {}",
3854 oid.format().name(),
3855 self.format.name()
3856 )));
3857 }
3858 let hex = oid.to_hex();
3859 Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
3860 }
3861
3862 pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
3863 let path = self.object_path(oid)?;
3864 if self.cached_loose_presence(oid) == Some(false) {
3865 return Ok(false);
3866 }
3867 Ok(path.exists())
3868 }
3869
3870 pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
3871 let path = self.object_path(oid)?;
3872 if self.cached_loose_presence(oid) == Some(false) {
3873 return Ok(None);
3874 }
3875 match fs::metadata(path) {
3876 Ok(metadata) => Ok(Some(metadata.len())),
3877 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
3878 Err(err) => Err(GitError::Io(err.to_string())),
3879 }
3880 }
3881
3882 pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3887 let path = self.object_path(oid)?;
3888 if self.cached_loose_presence(oid) == Some(false) {
3889 return Ok(None);
3890 }
3891 let mut file = match fs::File::open(&path) {
3892 Ok(file) => file,
3893 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3894 Err(err) => return Err(GitError::Io(err.to_string())),
3895 };
3896 let mut stream_prefix = [0u8; 2];
3901 let prefix_len = read_full_prefix(&mut file, &mut stream_prefix)?;
3902 file.seek(SeekFrom::Start(0))
3903 .map_err(|err| GitError::Io(err.to_string()))?;
3904 let mut decoder = ZlibDecoder::new(file);
3905 let mut header = Vec::new();
3906 let mut byte = [0u8; 1];
3907 loop {
3908 let read = match decoder.read(&mut byte) {
3917 Ok(read) => read,
3918 Err(_) => {
3919 emit_inflate_diagnostic(&stream_prefix[..prefix_len]);
3920 return Err(loose_unpack_header_failed(oid));
3921 }
3922 };
3923 if read == 0 {
3924 return Err(loose_header_too_long(oid));
3925 }
3926 if byte[0] == 0 {
3927 break;
3928 }
3929 header.push(byte[0]);
3930 if header.len() >= MAX_LOOSE_HEADER_LEN {
3933 return Err(loose_header_too_long(oid));
3934 }
3935 }
3936 let header =
3937 std::str::from_utf8(&header).map_err(|err| GitError::InvalidObject(err.to_string()))?;
3938 let (kind, size) = header
3939 .split_once(' ')
3940 .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
3941 let object_type = kind.parse::<ObjectType>()?;
3942 let size = size
3943 .parse::<u64>()
3944 .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
3945 Ok(Some((object_type, size)))
3946 }
3947
3948 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3950 self.loose_object_ids_cached()
3951 }
3952
3953 pub fn verify_object(
3961 &self,
3962 oid: &ObjectId,
3963 display_path: &str,
3964 ) -> Result<Option<LooseObjectIntegrity>> {
3965 let path = self.object_path(oid)?;
3966 let compressed = match fs::read(&path) {
3967 Ok(compressed) => compressed,
3968 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3969 Err(err) => return Err(GitError::Io(err.to_string())),
3970 };
3971 let mut decoder = ZlibDecoder::new(compressed.as_slice());
3972 let mut framed = Vec::new();
3973 if decoder.read_to_end(&mut framed).is_err() {
3974 emit_inflate_diagnostic(&compressed);
3975 if framed_loose_header_terminated(&framed) {
3980 eprintln!("error: unable to unpack contents of {display_path}");
3981 } else {
3982 eprintln!("error: unable to unpack header of {display_path}");
3983 }
3984 return Ok(Some(LooseObjectIntegrity::Corrupt));
3985 }
3986 if !framed_loose_header_terminated(&framed) {
3987 eprintln!("error: unable to unpack header of {display_path}");
3990 return Ok(Some(LooseObjectIntegrity::Corrupt));
3991 }
3992 let Ok(object) = parse_framed_object(&framed) else {
3993 eprintln!("error: unable to parse header of {display_path}");
3994 return Ok(Some(LooseObjectIntegrity::Corrupt));
3995 };
3996 let actual = object.object_id(self.format)?;
3997 if &actual != oid {
3998 return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
3999 }
4000 Ok(Some(LooseObjectIntegrity::Ok))
4001 }
4002}
4003
4004fn framed_loose_header_terminated(framed: &[u8]) -> bool {
4008 framed
4009 .iter()
4010 .take(MAX_LOOSE_HEADER_LEN)
4011 .any(|byte| *byte == 0)
4012}
4013
4014fn read_full_prefix(file: &mut fs::File, prefix: &mut [u8]) -> Result<usize> {
4017 let mut len = 0;
4018 while len < prefix.len() {
4019 let read = file
4020 .read(&mut prefix[len..])
4021 .map_err(|err| GitError::Io(err.to_string()))?;
4022 if read == 0 {
4023 break;
4024 }
4025 len += read;
4026 }
4027 Ok(len)
4028}
4029
4030impl ObjectReader for LooseObjectStore {
4031 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4032 let path = self.object_path(oid)?;
4033 if self.cached_loose_presence(oid) == Some(false) {
4037 return Err(GitError::object_not_found_in(
4038 *oid,
4039 MissingObjectContext::Read,
4040 ));
4041 }
4042 let compressed = match fs::read(&path) {
4043 Ok(compressed) => compressed,
4044 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4045 return Err(GitError::object_not_found_in(
4046 *oid,
4047 MissingObjectContext::Read,
4048 ));
4049 }
4050 Err(err) => return Err(GitError::Io(err.to_string())),
4051 };
4052 let mut decoder = ZlibDecoder::new(compressed.as_slice());
4053 let mut framed = Vec::new();
4054 if decoder.read_to_end(&mut framed).is_err() {
4055 emit_inflate_diagnostic(&compressed);
4056 if !framed_loose_header_terminated(&framed) {
4061 return Err(loose_unpack_header_failed(oid));
4062 }
4063 return Err(GitError::InvalidObject(format!(
4064 "corrupt loose object '{oid}'"
4065 )));
4066 }
4067 if framed
4072 .iter()
4073 .take(MAX_LOOSE_HEADER_LEN)
4074 .all(|byte| *byte != 0)
4075 {
4076 return Err(loose_header_too_long(oid));
4077 }
4078 let object = parse_framed_object(&framed)?;
4079 if verify_reads_enabled() {
4083 let actual = object.object_id(self.format)?;
4084 if &actual != oid {
4085 return Err(GitError::InvalidObject(format!(
4086 "loose object {} hashes to {actual}",
4087 path.display()
4088 )));
4089 }
4090 }
4091 Ok(Arc::new(object))
4092 }
4093}
4094
4095impl ObjectWriter for LooseObjectStore {
4096 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4097 let oid = object.object_id(self.format)?;
4098 let path = self.object_path(&oid)?;
4099 if path.exists() {
4100 self.note_loose_write(oid);
4101 return Ok(oid);
4102 }
4103 let parent = path
4104 .parent()
4105 .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
4106 fs::create_dir_all(parent)?;
4107 let temp_path = unique_temp_path(parent);
4108 let write_result = (|| -> Result<()> {
4109 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4110 encoder.write_all(&object.framed_bytes())?;
4111 let compressed = encoder.finish()?;
4112 {
4113 let mut file = fs::OpenOptions::new()
4114 .write(true)
4115 .create_new(true)
4116 .open(&temp_path)?;
4117 file.write_all(&compressed)?;
4118 }
4128 match fs::rename(&temp_path, &path) {
4129 Ok(()) => Ok(()),
4130 Err(_) if path.exists() => {
4131 let _ = fs::remove_file(&temp_path);
4132 Ok(())
4133 }
4134 Err(err) => Err(GitError::Io(err.to_string())),
4135 }
4136 })();
4137 if write_result.is_err() {
4138 let _ = fs::remove_file(&temp_path);
4139 }
4140 write_result?;
4141 self.note_loose_write(oid);
4142 Ok(oid)
4143 }
4144}
4145
4146fn unique_temp_path(parent: &Path) -> PathBuf {
4147 let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
4148 parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
4149}
4150
4151#[cfg(test)]
4152mod tests {
4153 use super::*;
4154 use sley_core::BString;
4155 use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
4156 use sley_pack::{PackFile, PackWriteOptions};
4157
4158 fn blob_of(byte: u8, len: usize) -> EncodedObject {
4159 EncodedObject::new(ObjectType::Blob, vec![byte; len])
4160 }
4161
4162 fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
4163 Arc::new(blob_of(byte, len))
4164 }
4165
4166 fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
4167 reader
4168 .read_object(oid)
4169 .expect("test operation should succeed")
4170 .as_ref()
4171 .clone()
4172 }
4173
4174 #[test]
4175 fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
4176 let one = cached_object_cost(&blob_of(0, 1000));
4178 let mut cache = LruCache::<u32>::new(one * 2 + 8);
4179 cache.put(1, cached_blob_of(b'a', 1000));
4180 cache.put(2, cached_blob_of(b'b', 1000));
4181 assert!(cache.get(&1).is_some());
4183 cache.put(3, cached_blob_of(b'c', 1000));
4184 assert!(cache.get(&1).is_some());
4186 assert!(cache.get(&2).is_none());
4187 assert!(cache.get(&3).is_some());
4188 }
4189
4190 #[test]
4191 fn lru_cache_zero_budget_is_inert() {
4192 let mut cache = LruCache::<u32>::new(0);
4193 cache.put(1, cached_blob_of(b'a', 16));
4194 assert!(cache.get(&1).is_none());
4195 }
4196
4197 #[test]
4198 fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
4199 let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
4200 cache.put(1, cached_blob_of(b'a', 50));
4201 assert!(cache.get(&1).is_some());
4202 cache.put(1, cached_blob_of(b'b', 10_000));
4205 assert!(cache.get(&1).is_none());
4206 cache.put(2, cached_blob_of(b'c', 50));
4209 assert!(cache.get(&2).is_some());
4210 }
4211
4212 #[test]
4213 fn lru_cache_replacing_entry_updates_byte_accounting() {
4214 let small = cached_object_cost(&blob_of(0, 500));
4217 let mut cache = LruCache::<u32>::new(small * 2 + 200);
4218 cache.put(1, cached_blob_of(b'a', 500));
4219 cache.put(2, cached_blob_of(b'b', 500));
4220 assert!(cache.get(&1).is_some());
4221 assert!(cache.get(&2).is_some());
4222 cache.put(2, cached_blob_of(b'b', 1000));
4227 assert!(cache.get(&2).is_some());
4228 assert!(cache.get(&1).is_none());
4229 }
4230
4231 #[test]
4232 fn write_and_validate_blob() {
4233 let db = ObjectDatabase::new(ObjectFormat::Sha1);
4234 let oid = db
4235 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
4236 .expect("test operation should succeed");
4237 assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
4238 db.validate(&oid).expect("test operation should succeed");
4239 }
4240
4241 #[test]
4242 fn loose_store_writes_and_reads_object() {
4243 let root = std::env::temp_dir().join(format!(
4244 "sley-loose-store-{}-{}",
4245 std::process::id(),
4246 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
4247 ));
4248 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4249 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4250 let oid = store
4251 .write_object(object.clone())
4252 .expect("test operation should succeed");
4253 assert_eq!(read_object_for_assert(&store, &oid), object);
4254 assert!(
4255 store
4256 .object_path(&oid)
4257 .expect("test operation should succeed")
4258 .exists()
4259 );
4260 fs::remove_dir_all(root).expect("test operation should succeed");
4261 }
4262
4263 #[test]
4264 fn file_database_reads_object_from_pack_index() {
4265 let root = temp_root("sley-file-odb-pack");
4266 let git_dir = root.join(".git");
4267 let pack_dir = git_dir.join("objects").join("pack");
4268 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
4269 let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
4270 let oid = object
4271 .object_id(ObjectFormat::Sha1)
4272 .expect("test operation should succeed");
4273 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4274 .expect("test operation should succeed");
4275 let pack_name = written.checksum.to_hex();
4276 fs::write(
4277 pack_dir.join(format!("pack-{pack_name}.pack")),
4278 written.pack,
4279 )
4280 .expect("test operation should succeed");
4281 fs::write(
4282 pack_dir.join(format!("pack-{pack_name}.idx")),
4283 written.index,
4284 )
4285 .expect("test operation should succeed");
4286
4287 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4288 assert!(db.contains(&oid).expect("test operation should succeed"));
4289 assert_eq!(read_object_for_assert(&db, &oid), object);
4290 fs::remove_dir_all(root).expect("test operation should succeed");
4291 }
4292
4293 #[test]
4294 fn file_database_loose_cache_observes_same_process_write_after_miss() {
4295 let root = temp_root("sley-file-odb-loose-cache-write");
4296 let git_dir = root.join(".git");
4297 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4298 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4299
4300 let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
4301 let oid = object
4302 .object_id(ObjectFormat::Sha1)
4303 .expect("test operation should succeed");
4304
4305 assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
4306 db.loose()
4307 .write_object(object.clone())
4308 .expect("test operation should succeed");
4309
4310 assert_eq!(read_object_for_assert(&db, &oid), object);
4311 fs::remove_dir_all(root).expect("test operation should succeed");
4312 }
4313
4314 #[test]
4315 fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
4316 let root = temp_root("sley-read-object-header");
4317 let git_dir = root.join(".git");
4318 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4319 let format = ObjectFormat::Sha1;
4320 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4321
4322 let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
4324 let loose_oid = db
4325 .write_object(loose.clone())
4326 .expect("test operation should succeed");
4327
4328 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
4332 let mut child_body = vec![b'a'; 4096];
4333 child_body.extend_from_slice(b" plus a deltified tail\n");
4334 let child = EncodedObject::new(ObjectType::Blob, child_body);
4335 let commitish =
4336 EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
4337 let base_oid = base
4338 .object_id(format)
4339 .expect("test operation should succeed");
4340 let child_oid = child
4341 .object_id(format)
4342 .expect("test operation should succeed");
4343 let commit_oid = commitish
4344 .object_id(format)
4345 .expect("test operation should succeed");
4346 let options = PackWriteOptions::new()
4347 .with_prefer_ofs_delta(true)
4348 .with_reorder(false);
4349 let pack = PackFile::write_packed_with_options(
4350 &[base.clone(), child.clone(), commitish.clone()],
4351 format,
4352 &options,
4353 )
4354 .expect("test operation should succeed");
4355 db.install_pack(&pack)
4356 .expect("test operation should succeed");
4357
4358 for (oid, want_type, want_len) in [
4361 (&loose_oid, ObjectType::Blob, loose.body.len()),
4362 (&base_oid, ObjectType::Blob, base.body.len()),
4363 (&child_oid, ObjectType::Blob, child.body.len()),
4364 (&commit_oid, ObjectType::Commit, commitish.body.len()),
4365 ] {
4366 assert_eq!(
4367 db.read_object_header(oid)
4368 .expect("test operation should succeed"),
4369 Some((want_type, want_len as u64)),
4370 "header for {oid}"
4371 );
4372 let full = db.read_object(oid).expect("test operation should succeed");
4373 assert_eq!(
4374 db.read_object_header(oid)
4375 .expect("test operation should succeed"),
4376 Some((full.object_type, full.body.len() as u64))
4377 );
4378 }
4379
4380 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
4381 .expect("test operation should succeed");
4382 assert_eq!(
4383 db.read_object_header(&missing)
4384 .expect("test operation should succeed"),
4385 None
4386 );
4387 fs::remove_dir_all(root).expect("test operation should succeed");
4388 }
4389
4390 #[test]
4391 fn object_storage_info_reports_loose_packed_and_delta_metadata() {
4392 let root = temp_root("sley-object-storage-info");
4393 let git_dir = root.join(".git");
4394 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4395 let format = ObjectFormat::Sha1;
4396 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4397
4398 let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
4399 let loose_oid = db
4400 .write_object(loose)
4401 .expect("test operation should succeed");
4402 let loose_size = fs::metadata(
4403 db.loose()
4404 .object_path(&loose_oid)
4405 .expect("test operation should succeed"),
4406 )
4407 .expect("test operation should succeed")
4408 .len();
4409 let loose_info = db
4410 .object_storage_info(&loose_oid)
4411 .expect("test operation should succeed")
4412 .expect("test operation should succeed");
4413 assert_eq!(loose_info.disk_size, loose_size);
4414 assert_eq!(
4415 loose_info.deltabase,
4416 zero_oid(format).expect("test operation should succeed")
4417 );
4418
4419 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
4420 let mut child_body = vec![b'a'; 4096];
4421 child_body.extend_from_slice(b" changed tail\n");
4422 let child = EncodedObject::new(ObjectType::Blob, child_body);
4423 let base_oid = base
4424 .object_id(format)
4425 .expect("test operation should succeed");
4426 let child_oid = child
4427 .object_id(format)
4428 .expect("test operation should succeed");
4429 let options = PackWriteOptions::new()
4430 .with_prefer_ofs_delta(true)
4431 .with_reorder(false);
4432 let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
4433 .expect("test operation should succeed");
4434 db.install_pack(&pack)
4435 .expect("test operation should succeed");
4436
4437 let base_info = db
4438 .object_storage_info(&base_oid)
4439 .expect("test operation should succeed")
4440 .expect("test operation should succeed");
4441 assert!(base_info.disk_size > 0);
4442 assert_eq!(
4443 base_info.deltabase,
4444 zero_oid(format).expect("test operation should succeed")
4445 );
4446
4447 let child_info = db
4448 .object_storage_info(&child_oid)
4449 .expect("test operation should succeed")
4450 .expect("test operation should succeed");
4451 assert!(child_info.disk_size > 0);
4452 assert_eq!(child_info.deltabase, base_oid);
4453
4454 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
4455 .expect("test operation should succeed");
4456 assert_eq!(
4457 db.object_storage_info(&missing)
4458 .expect("test operation should succeed"),
4459 None
4460 );
4461 fs::remove_dir_all(root).expect("test operation should succeed");
4462 }
4463
4464 #[test]
4465 fn file_database_resolves_unique_loose_object_prefix() {
4466 let root = temp_root("sley-file-odb-prefix-loose");
4467 let git_dir = root.join(".git");
4468 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4469 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4470 let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
4471 let oid = db
4472 .write_object(object)
4473 .expect("test operation should succeed");
4474 let prefix = &oid.to_hex()[..8];
4475
4476 assert_eq!(
4477 db.resolve_prefix(prefix)
4478 .expect("test operation should succeed"),
4479 ObjectPrefixResolution::Unique(oid)
4480 );
4481 assert!(
4482 db.object_ids()
4483 .expect("test operation should succeed")
4484 .contains(&oid)
4485 );
4486 fs::remove_dir_all(root).expect("test operation should succeed");
4487 }
4488
4489 #[test]
4490 fn file_database_resolves_unique_packed_object_prefix() {
4491 let root = temp_root("sley-file-odb-prefix-packed");
4492 let git_dir = root.join(".git");
4493 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4494 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4495 let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
4496 let oid = object
4497 .object_id(ObjectFormat::Sha1)
4498 .expect("test operation should succeed");
4499 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
4500 .expect("test operation should succeed");
4501 db.install_pack(&pack)
4502 .expect("test operation should succeed");
4503 let prefix = &oid.to_hex()[..8];
4504
4505 assert_eq!(
4506 db.resolve_prefix(prefix)
4507 .expect("test operation should succeed"),
4508 ObjectPrefixResolution::Unique(oid)
4509 );
4510 fs::remove_dir_all(root).expect("test operation should succeed");
4511 }
4512
4513 #[test]
4514 fn file_database_reports_ambiguous_object_prefix() {
4515 let root = temp_root("sley-file-odb-prefix-ambiguous");
4516 let git_dir = root.join(".git");
4517 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4518 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4519 let mut seen = HashMap::new();
4520 let (prefix, first, second) = (0..10_000)
4521 .find_map(|idx| {
4522 let object =
4523 EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
4524 let oid = db
4525 .write_object(object)
4526 .expect("test operation should succeed");
4527 let prefix = oid.to_hex()[..4].to_string();
4528 seen.insert(prefix.clone(), oid)
4529 .map(|first| (prefix, first, oid))
4530 })
4531 .expect("test should find a 4-hex collision");
4532
4533 let ObjectPrefixResolution::Ambiguous(mut matches) = db
4534 .resolve_prefix(&prefix)
4535 .expect("test operation should succeed")
4536 else {
4537 panic!("expected ambiguous prefix {prefix}");
4538 };
4539 matches.sort_by_key(ObjectId::to_hex);
4540 let mut expected = vec![first, second];
4541 expected.sort_by_key(ObjectId::to_hex);
4542 assert_eq!(matches, expected);
4543 fs::remove_dir_all(root).expect("test operation should succeed");
4544 }
4545
4546 #[test]
4547 fn file_database_rejects_too_short_object_prefix() {
4548 let root = temp_root("sley-file-odb-prefix-short");
4549 let git_dir = root.join(".git");
4550 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4551 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4552
4553 assert!(matches!(
4554 db.resolve_prefix("abc"),
4555 Err(GitError::InvalidObjectId(_))
4556 ));
4557 fs::remove_dir_all(root).expect("test operation should succeed");
4558 }
4559
4560 #[test]
4561 fn file_database_reads_sha256_object_from_pack_index() {
4562 let root = temp_root("sley-file-odb-pack-sha256");
4563 let git_dir = root.join(".git");
4564 let pack_dir = git_dir.join("objects").join("pack");
4565 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
4566 let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
4567 let oid = object
4568 .object_id(ObjectFormat::Sha256)
4569 .expect("test operation should succeed");
4570 let written =
4571 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4572 .expect("test operation should succeed");
4573 let pack_name = written.checksum.to_hex();
4574 fs::write(
4575 pack_dir.join(format!("pack-{pack_name}.pack")),
4576 written.pack,
4577 )
4578 .expect("test operation should succeed");
4579 fs::write(
4580 pack_dir.join(format!("pack-{pack_name}.idx")),
4581 written.index,
4582 )
4583 .expect("test operation should succeed");
4584
4585 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
4586 assert!(db.contains(&oid).expect("test operation should succeed"));
4587 assert_eq!(read_object_for_assert(&db, &oid), object);
4588 fs::remove_dir_all(root).expect("test operation should succeed");
4589 }
4590
4591 #[test]
4592 fn file_database_installs_sha256_pack_without_loose_objects() {
4593 let root = temp_root("sley-file-odb-install-pack");
4594 let git_dir = root.join(".git");
4595 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4596 let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
4597 let oid = object
4598 .object_id(ObjectFormat::Sha256)
4599 .expect("test operation should succeed");
4600 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4601 .expect("test operation should succeed");
4602 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
4603
4604 let result = db
4605 .install_pack(&pack)
4606 .expect("test operation should succeed");
4607
4608 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
4609 assert_eq!(result.object_ids, vec![oid]);
4610 assert!(result.pack_path.exists());
4611 assert!(result.index_path.exists());
4612 assert_eq!(result.promisor_path, None);
4613 assert!(
4614 !db.loose()
4615 .object_path(&oid)
4616 .expect("test operation should succeed")
4617 .exists()
4618 );
4619 assert!(db.contains(&oid).expect("test operation should succeed"));
4620 assert_eq!(read_object_for_assert(&db, &oid), object);
4621 fs::remove_dir_all(root).expect("test operation should succeed");
4622 }
4623
4624 #[test]
4625 fn file_database_installs_raw_sha256_pack_without_loose_objects() {
4626 let root = temp_root("sley-file-odb-install-raw-pack");
4627 let git_dir = root.join(".git");
4628 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4629 let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
4630 let oid = object
4631 .object_id(ObjectFormat::Sha256)
4632 .expect("test operation should succeed");
4633 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
4634 .expect("test operation should succeed");
4635 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
4636
4637 let result = db
4638 .install_raw_pack(&pack.pack)
4639 .expect("test operation should succeed");
4640
4641 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
4642 assert_eq!(result.object_ids, vec![oid]);
4643 assert!(result.pack_path.exists());
4644 assert!(result.index_path.exists());
4645 assert_eq!(result.promisor_path, None);
4646 assert!(
4647 !db.loose()
4648 .object_path(&oid)
4649 .expect("test operation should succeed")
4650 .exists()
4651 );
4652 assert!(db.contains(&oid).expect("test operation should succeed"));
4653 assert_eq!(read_object_for_assert(&db, &oid), object);
4654 fs::remove_dir_all(root).expect("test operation should succeed");
4655 }
4656
4657 #[test]
4658 fn file_database_rejects_noncanonical_pack_index() {
4659 let root = temp_root("sley-file-odb-install-bad-index");
4660 let git_dir = root.join(".git");
4661 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4662 let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
4663 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
4664 .expect("test operation should succeed");
4665 let mut entries = pack.entries.clone();
4666 entries[0].crc32 ^= 1;
4667 let mut bad_pack = pack.clone();
4668 bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
4669 .expect("test operation should succeed");
4670 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4671
4672 assert!(db.install_pack(&bad_pack).is_err());
4673
4674 fs::remove_dir_all(root).expect("test operation should succeed");
4675 }
4676
4677 #[test]
4678 fn file_database_installs_raw_promisor_pack_with_sidecar() {
4679 let root = temp_root("sley-file-odb-install-raw-promisor-pack");
4680 let git_dir = root.join(".git");
4681 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4682 let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
4683 let oid = object
4684 .object_id(ObjectFormat::Sha1)
4685 .expect("test operation should succeed");
4686 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
4687 .expect("test operation should succeed");
4688 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
4689
4690 let result = db
4691 .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
4692 .expect("test operation should succeed");
4693
4694 let promisor_path = result.promisor_path.expect("promisor sidecar");
4695 assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
4696 assert_eq!(
4697 promisor_path.extension().and_then(|ext| ext.to_str()),
4698 Some("promisor")
4699 );
4700 assert!(promisor_path.exists());
4701 assert_eq!(
4702 fs::read(&promisor_path).expect("test operation should succeed"),
4703 b""
4704 );
4705 assert!(result.pack_path.exists());
4706 assert!(result.index_path.exists());
4707 assert!(
4708 !db.loose()
4709 .object_path(&oid)
4710 .expect("test operation should succeed")
4711 .exists()
4712 );
4713 assert_eq!(read_object_for_assert(&db, &oid), object);
4714 fs::remove_dir_all(root).expect("test operation should succeed");
4715 }
4716
4717 #[test]
4718 fn repository_objects_dir_uses_linked_worktree_common_dir() {
4719 let root = temp_root("sley-odb-common-dir");
4720 let common = root.join(".git");
4721 let admin = common.join("worktrees").join("linked");
4722 fs::create_dir_all(&admin).expect("test operation should succeed");
4723 fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
4724
4725 let common = fs::canonicalize(common).expect("test operation should succeed");
4726 assert_eq!(repository_common_dir(&admin), common);
4727 assert_eq!(repository_objects_dir(&admin), common.join("objects"));
4728
4729 fs::remove_dir_all(root).expect("test operation should succeed");
4730 }
4731
4732 #[test]
4733 fn reachable_object_helpers_walk_graph_and_install_pack() {
4734 let root = temp_root("sley-reachable-pack");
4735 let source_git_dir = root.join("source.git");
4736 let destination_git_dir = root.join("destination.git");
4737 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
4738 fs::create_dir_all(destination_git_dir.join("objects"))
4739 .expect("test operation should succeed");
4740 let format = ObjectFormat::Sha1;
4741 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
4742 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
4743
4744 let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
4745 let blob_oid = source
4746 .write_object(blob.clone())
4747 .expect("test operation should succeed");
4748 let tree = EncodedObject::new(
4749 ObjectType::Tree,
4750 Tree {
4751 entries: vec![TreeEntry {
4752 mode: 0o100644,
4753 name: BString::from(b"payload.txt"),
4754 oid: blob_oid,
4755 }],
4756 }
4757 .write(),
4758 );
4759 let tree_oid = source
4760 .write_object(tree.clone())
4761 .expect("test operation should succeed");
4762 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
4763 let commit = EncodedObject::new(
4764 ObjectType::Commit,
4765 Commit {
4766 tree: tree_oid,
4767 parents: Vec::new(),
4768 author: identity.clone(),
4769 committer: identity,
4770 encoding: None,
4771 message: b"initial\n".to_vec(),
4772 }
4773 .write(),
4774 );
4775 let commit_oid = source
4776 .write_object(commit.clone())
4777 .expect("test operation should succeed");
4778
4779 let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
4780 .expect("test operation should succeed");
4781 assert!(reachable.contains(&commit_oid));
4782 assert!(reachable.contains(&tree_oid));
4783 assert!(reachable.contains(&blob_oid));
4784
4785 let install =
4786 install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
4787 .expect("test operation should succeed")
4788 .expect("reachable pack should be written");
4789 assert_eq!(install.object_ids.len(), 3);
4790 for (oid, object) in [
4791 (&commit_oid, &commit),
4792 (&tree_oid, &tree),
4793 (&blob_oid, &blob),
4794 ] {
4795 assert!(
4796 !destination
4797 .loose()
4798 .object_path(oid)
4799 .expect("test operation should succeed")
4800 .exists()
4801 );
4802 assert!(
4803 destination
4804 .contains(oid)
4805 .expect("test operation should succeed")
4806 );
4807 assert_eq!(read_object_for_assert(&destination, oid), *object);
4808 }
4809 fs::remove_dir_all(root).expect("test operation should succeed");
4810 }
4811
4812 #[test]
4813 fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
4814 let root = temp_root("sley-reachable-exclusions");
4815 let git_dir = root.join("repo.git");
4816 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4817 let format = ObjectFormat::Sha1;
4818 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4819
4820 let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
4821 let blob_oid = db
4822 .write_object(blob)
4823 .expect("test operation should succeed");
4824 let tree = EncodedObject::new(
4825 ObjectType::Tree,
4826 Tree {
4827 entries: vec![TreeEntry {
4828 mode: 0o100644,
4829 name: BString::from(b"payload.txt"),
4830 oid: blob_oid,
4831 }],
4832 }
4833 .write(),
4834 );
4835 let tree_oid = db
4836 .write_object(tree)
4837 .expect("test operation should succeed");
4838 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
4839 let commit = EncodedObject::new(
4840 ObjectType::Commit,
4841 Commit {
4842 tree: tree_oid,
4843 parents: Vec::new(),
4844 author: identity.clone(),
4845 committer: identity,
4846 encoding: None,
4847 message: b"initial\n".to_vec(),
4848 }
4849 .write(),
4850 );
4851 let commit_oid = db
4852 .write_object(commit)
4853 .expect("test operation should succeed");
4854 let excluded = HashSet::from([tree_oid]);
4855
4856 let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
4857 .expect("test operation should succeed");
4858
4859 assert_eq!(objects.len(), 1);
4860 assert_eq!(
4861 objects[0]
4862 .object_id(format)
4863 .expect("test operation should succeed"),
4864 commit_oid
4865 );
4866 fs::remove_dir_all(root).expect("test operation should succeed");
4867 }
4868
4869 #[test]
4870 fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
4871 let root = temp_root("sley-build-reachable-pack");
4872 let git_dir = root.join("repo.git");
4873 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4874 let format = ObjectFormat::Sha1;
4875 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4876
4877 let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
4878 let oid = db
4879 .write_object(object.clone())
4880 .expect("test operation should succeed");
4881 let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
4882 .expect("test operation should succeed")
4883 .expect("reachable pack should be built");
4884 assert!(pack.pack.starts_with(b"PACK"));
4885 assert_eq!(pack.entries.len(), 1);
4886 assert_eq!(pack.entries[0].oid, oid);
4887
4888 let excluded = HashSet::from([oid]);
4889 assert!(
4890 build_reachable_pack(
4891 &db,
4892 format,
4893 pack.entries.into_iter().map(|entry| entry.oid),
4894 &excluded
4895 )
4896 .expect("test operation should succeed")
4897 .is_none()
4898 );
4899 fs::remove_dir_all(root).expect("test operation should succeed");
4900 }
4901
4902 #[test]
4903 fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
4904 let root = temp_root("sley-reachable-tags");
4905 let git_dir = root.join("repo.git");
4906 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
4907 let format = ObjectFormat::Sha1;
4908 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
4909
4910 let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
4911 let blob_oid = db
4912 .write_object(blob)
4913 .expect("test operation should succeed");
4914 let tag = EncodedObject::new(
4915 ObjectType::Tag,
4916 Tag {
4917 object: blob_oid,
4918 object_type: ObjectType::Blob,
4919 name: b"v1".to_vec(),
4920 tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
4921 message: b"tag message\n".to_vec(),
4922 raw_body: None,
4923 }
4924 .write(),
4925 );
4926 let tag_oid = db.write_object(tag).expect("test operation should succeed");
4927
4928 let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
4929 .expect("test operation should succeed");
4930 assert!(reachable.contains(&tag_oid));
4931 assert!(reachable.contains(&blob_oid));
4932
4933 let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
4934 .expect("test operation should succeed");
4935 let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
4936 .expect_err("missing traversal root should error");
4937 let kind = err.not_found_kind().expect("typed not found");
4938 assert_eq!(kind.object_id(), Some(missing));
4939 assert_eq!(
4940 kind.missing_object_context(),
4941 Some(MissingObjectContext::Traversal)
4942 );
4943 fs::remove_dir_all(root).expect("test operation should succeed");
4944 }
4945
4946 #[test]
4947 fn install_reachable_pack_empty_starts_create_no_pack() {
4948 let root = temp_root("sley-reachable-empty");
4949 let source_git_dir = root.join("source.git");
4950 let destination_git_dir = root.join("destination.git");
4951 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
4952 fs::create_dir_all(destination_git_dir.join("objects"))
4953 .expect("test operation should succeed");
4954 let format = ObjectFormat::Sha1;
4955 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
4956 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
4957
4958 let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
4959 .expect("test operation should succeed");
4960
4961 assert!(result.is_none());
4962 assert!(!destination_git_dir.join("objects").join("pack").exists());
4963 fs::remove_dir_all(root).expect("test operation should succeed");
4964 }
4965
4966 #[test]
4967 fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
4968 let root = temp_root("sley-reachable-install-excluding");
4969 let source_git_dir = root.join("source.git");
4970 let destination_git_dir = root.join("destination.git");
4971 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
4972 fs::create_dir_all(destination_git_dir.join("objects"))
4973 .expect("test operation should succeed");
4974 let format = ObjectFormat::Sha1;
4975 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
4976 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
4977 let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
4978 let oid = source
4979 .write_object(object)
4980 .expect("test operation should succeed");
4981 let excluded = HashSet::from([oid]);
4982
4983 let result = install_reachable_pack_excluding(
4984 &source,
4985 &destination,
4986 format,
4987 std::iter::once(oid),
4988 &excluded,
4989 )
4990 .expect("test operation should succeed");
4991
4992 assert!(result.is_none());
4993 assert!(!destination_git_dir.join("objects").join("pack").exists());
4994 fs::remove_dir_all(root).expect("test operation should succeed");
4995 }
4996
4997 #[test]
4998 fn install_reachable_pack_supports_sha256() {
4999 let root = temp_root("sley-reachable-pack-sha256");
5000 let source_git_dir = root.join("source.git");
5001 let destination_git_dir = root.join("destination.git");
5002 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5003 fs::create_dir_all(destination_git_dir.join("objects"))
5004 .expect("test operation should succeed");
5005 let format = ObjectFormat::Sha256;
5006 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5007 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5008 let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
5009 let oid = source
5010 .write_object(object.clone())
5011 .expect("test operation should succeed");
5012
5013 let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
5014 .expect("test operation should succeed")
5015 .expect("sha256 reachable pack should be built");
5016 assert!(pack.pack.starts_with(b"PACK"));
5017 assert_eq!(pack.entries[0].oid, oid);
5018
5019 let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
5020 .expect("test operation should succeed")
5021 .expect("sha256 reachable pack should be written");
5022
5023 assert_eq!(result.object_ids, vec![oid]);
5024 assert!(
5025 !destination
5026 .loose()
5027 .object_path(&oid)
5028 .expect("test operation should succeed")
5029 .exists()
5030 );
5031 assert_eq!(read_object_for_assert(&destination, &oid), object);
5032 fs::remove_dir_all(root).expect("test operation should succeed");
5033 }
5034
5035 #[test]
5036 fn install_helpers_accept_custom_raw_pack_installer() {
5037 #[derive(Default)]
5038 struct RecordingInstaller {
5039 packs: std::cell::RefCell<Vec<Vec<u8>>>,
5040 installed: std::cell::RefCell<Vec<ObjectId>>,
5041 }
5042
5043 impl RawPackInstaller for RecordingInstaller {
5044 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
5045 self.packs.borrow_mut().push(pack_bytes.to_vec());
5046 let object_ids = self.installed.borrow().clone();
5047 Ok(RawPackInstallResult { object_ids })
5048 }
5049 }
5050
5051 let format = ObjectFormat::Sha1;
5052 let source = ObjectDatabase::new(format);
5053 let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
5054 let oid = source
5055 .write_object(object)
5056 .expect("test operation should succeed");
5057 let installer = RecordingInstaller::default();
5058 installer.installed.borrow_mut().push(oid);
5059
5060 let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
5061 .expect("test operation should succeed")
5062 .expect("custom installer should receive pack");
5063
5064 assert_eq!(result.object_ids, installer.installed.into_inner());
5065 let packs = installer.packs.into_inner();
5066 assert_eq!(packs.len(), 1);
5067 assert!(packs[0].starts_with(b"PACK"));
5068 }
5069
5070 #[test]
5071 fn file_database_reads_object_from_multi_pack_index() {
5072 let root = temp_root("sley-file-odb-midx");
5073 let git_dir = root.join(".git");
5074 let pack_dir = git_dir.join("objects").join("pack");
5075 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5076 let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
5077 let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
5078 let first_oid = first
5079 .object_id(ObjectFormat::Sha1)
5080 .expect("test operation should succeed");
5081 let second_oid = second
5082 .object_id(ObjectFormat::Sha1)
5083 .expect("test operation should succeed");
5084 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5085 .expect("test operation should succeed");
5086 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5087 .expect("test operation should succeed");
5088 let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
5089 let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
5090 fs::write(
5091 pack_dir.join(first_pack_name.replace(".idx", ".pack")),
5092 first_pack.pack,
5093 )
5094 .expect("test operation should succeed");
5095 fs::write(
5096 pack_dir.join(second_pack_name.replace(".idx", ".pack")),
5097 second_pack.pack,
5098 )
5099 .expect("test operation should succeed");
5100 let midx = MultiPackIndex::write(
5101 ObjectFormat::Sha1,
5102 2,
5103 &[first_pack_name, second_pack_name],
5104 &[
5105 sley_pack::MultiPackIndexEntry {
5106 oid: first_oid,
5107 pack_int_id: 0,
5108 offset: first_pack.entries[0].offset,
5109 },
5110 sley_pack::MultiPackIndexEntry {
5111 oid: second_oid,
5112 pack_int_id: 1,
5113 offset: second_pack.entries[0].offset,
5114 },
5115 ],
5116 )
5117 .expect("test operation should succeed");
5118 fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
5119
5120 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5121 assert!(
5122 db.contains(&second_oid)
5123 .expect("test operation should succeed")
5124 );
5125 assert_eq!(
5126 db.resolve_prefix(&second_oid.to_hex()[..8])
5127 .expect("test operation should succeed"),
5128 ObjectPrefixResolution::Unique(second_oid)
5129 );
5130 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5131 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5132 fs::remove_dir_all(root).expect("test operation should succeed");
5133 }
5134
5135 #[test]
5136 fn file_database_finds_pack_added_after_listing_was_cached() {
5137 let root = temp_root("sley-file-odb-pack-added-late");
5141 let git_dir = root.join(".git");
5142 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5143 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5144
5145 let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
5147 let first_oid = first
5148 .object_id(ObjectFormat::Sha1)
5149 .expect("test operation should succeed");
5150 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5151 .expect("test operation should succeed");
5152 db.install_pack(&first_pack)
5153 .expect("test operation should succeed");
5154 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5155
5156 let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
5158 let second_oid = second
5159 .object_id(ObjectFormat::Sha1)
5160 .expect("test operation should succeed");
5161 assert!(matches!(
5163 db.read_object(&second_oid),
5164 Err(GitError::NotFound(_))
5165 ));
5166
5167 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5170 .expect("test operation should succeed");
5171 db.install_pack(&second_pack)
5172 .expect("test operation should succeed");
5173 assert!(
5174 db.contains(&second_oid)
5175 .expect("test operation should succeed")
5176 );
5177 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5178 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5180
5181 fs::remove_dir_all(root).expect("test operation should succeed");
5182 }
5183
5184 #[test]
5185 fn file_database_prefers_loose_object_over_packed_object() {
5186 let root = temp_root("sley-file-odb-prefer-loose");
5187 let git_dir = root.join(".git");
5188 let pack_dir = git_dir.join("objects").join("pack");
5189 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5190 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
5191 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5192 .expect("test operation should succeed");
5193 let pack_name = written.checksum.to_hex();
5194 fs::write(
5195 pack_dir.join(format!("pack-{pack_name}.pack")),
5196 written.pack,
5197 )
5198 .expect("test operation should succeed");
5199 fs::write(
5200 pack_dir.join(format!("pack-{pack_name}.idx")),
5201 written.index,
5202 )
5203 .expect("test operation should succeed");
5204
5205 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5206 let oid = db
5207 .write_object(object.clone())
5208 .expect("test operation should succeed");
5209 assert_eq!(read_object_for_assert(&db, &oid), object);
5210 fs::remove_dir_all(root).expect("test operation should succeed");
5211 }
5212
5213 #[test]
5214 fn bundle_prerequisite_verification_reads_existing_objects() {
5215 let db = ObjectDatabase::new(ObjectFormat::Sha1);
5216 let oid = db
5217 .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
5218 .expect("test operation should succeed");
5219 let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
5220 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5221 .expect("test operation should succeed");
5222
5223 verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
5224 }
5225
5226 #[test]
5227 fn bundle_prerequisite_verification_reports_missing_objects() {
5228 let db = ObjectDatabase::new(ObjectFormat::Sha1);
5229 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
5230 .expect("test operation should succeed");
5231 let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
5232 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5233 .expect("test operation should succeed");
5234
5235 assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
5236 }
5237
5238 #[test]
5239 fn unbundle_objects_writes_pack_entries_and_returns_refs() {
5240 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
5241 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
5242 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
5243 let oid = object
5244 .object_id(ObjectFormat::Sha1)
5245 .expect("test operation should succeed");
5246 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5247 .expect("test operation should succeed");
5248 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5249 .into_bytes()
5250 .into_iter()
5251 .chain(pack.pack)
5252 .collect::<Vec<_>>();
5253 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5254 .expect("test operation should succeed");
5255
5256 let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
5257 .expect("test operation should succeed");
5258 assert_eq!(result.written_objects, vec![oid]);
5259 assert_eq!(result.references, bundle.references);
5260 assert_eq!(read_object_for_assert(&writer, &oid), object);
5261 }
5262
5263 #[test]
5264 fn install_bundle_pack_writes_pack_and_returns_refs() {
5265 let root = temp_root("sley-install-bundle-pack");
5266 let git_dir = root.join(".git");
5267 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5268 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
5269 let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5270 let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
5271 let oid = object
5272 .object_id(ObjectFormat::Sha1)
5273 .expect("test operation should succeed");
5274 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5275 .expect("test operation should succeed");
5276 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5277 .into_bytes()
5278 .into_iter()
5279 .chain(pack.pack)
5280 .collect::<Vec<_>>();
5281 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5282 .expect("test operation should succeed");
5283
5284 let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
5285 .expect("test operation should succeed");
5286
5287 assert_eq!(result.written_objects, vec![oid]);
5288 assert_eq!(result.references, bundle.references);
5289 assert!(
5290 database
5291 .contains(&oid)
5292 .expect("test operation should succeed")
5293 );
5294 assert_eq!(read_object_for_assert(&database, &oid), object);
5295 assert!(
5296 !database
5297 .loose()
5298 .object_path(&oid)
5299 .expect("test operation should succeed")
5300 .exists()
5301 );
5302 fs::remove_dir_all(root).expect("test operation should succeed");
5303 }
5304
5305 #[test]
5306 fn unpack_packfile_objects_writes_sha256_pack_entries() {
5307 let writer = ObjectDatabase::new(ObjectFormat::Sha256);
5308 let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
5309 let oid = object
5310 .object_id(ObjectFormat::Sha256)
5311 .expect("test operation should succeed");
5312 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5313 .expect("test operation should succeed");
5314
5315 let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
5316 .expect("test operation should succeed");
5317
5318 assert_eq!(result.written_objects, vec![oid]);
5319 assert_eq!(read_object_for_assert(&writer, &oid), object);
5320 }
5321
5322 #[test]
5323 fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
5324 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
5325 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
5326 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
5327 .expect("test operation should succeed");
5328 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
5329 let oid = object
5330 .object_id(ObjectFormat::Sha1)
5331 .expect("test operation should succeed");
5332 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5333 .expect("test operation should succeed");
5334 let bundle_bytes =
5335 format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
5336 .into_bytes()
5337 .into_iter()
5338 .chain(pack.pack)
5339 .collect::<Vec<_>>();
5340 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5341 .expect("test operation should succeed");
5342
5343 assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
5344 assert!(!writer.contains(&oid));
5345 }
5346
5347 fn write_commit_graph(
5350 db: &mut FileObjectDatabase,
5351 payload: &[u8],
5352 ) -> Vec<(ObjectId, EncodedObject)> {
5353 let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
5354 let blob_oid = db
5355 .write_object(blob.clone())
5356 .expect("test operation should succeed");
5357 let tree = EncodedObject::new(
5358 ObjectType::Tree,
5359 Tree {
5360 entries: vec![TreeEntry {
5361 mode: 0o100644,
5362 name: BString::from(b"payload.txt"),
5363 oid: blob_oid,
5364 }],
5365 }
5366 .write(),
5367 );
5368 let tree_oid = db
5369 .write_object(tree.clone())
5370 .expect("test operation should succeed");
5371 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5372 let commit = EncodedObject::new(
5373 ObjectType::Commit,
5374 Commit {
5375 tree: tree_oid,
5376 parents: Vec::new(),
5377 author: identity.clone(),
5378 committer: identity,
5379 encoding: None,
5380 message: b"initial\n".to_vec(),
5381 }
5382 .write(),
5383 );
5384 let commit_oid = db
5385 .write_object(commit.clone())
5386 .expect("test operation should succeed");
5387 vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
5388 }
5389
5390 fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
5391 let root = temp_root("sley-repack-all");
5392 let git_dir = root.join(".git");
5393 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5394 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5395
5396 let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
5398 let packed_oid = packed_blob
5399 .object_id(format)
5400 .expect("test operation should succeed");
5401 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
5402 .expect("test operation should succeed");
5403 let existing = db
5404 .install_pack(&existing_pack)
5405 .expect("test operation should succeed");
5406
5407 let graph = write_commit_graph(&mut db, b"repack payload\n");
5408
5409 let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
5410 expected.insert(packed_oid, packed_blob.clone());
5411
5412 let result = repack_all_objects(&git_dir, format)
5413 .expect("test operation should succeed")
5414 .expect("repository has objects");
5415
5416 assert_eq!(result.object_count, expected.len());
5418 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
5419 assert_eq!(parsed.entries.len(), expected.len());
5420 for entry in &parsed.entries {
5421 let want = expected
5422 .get(&entry.entry.oid)
5423 .expect("packed object was in the repository");
5424 assert_eq!(&entry.object, want);
5425 assert_eq!(
5426 entry
5427 .object
5428 .object_id(format)
5429 .expect("test operation should succeed"),
5430 entry.entry.oid
5431 );
5432 }
5433 let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
5435 assert_eq!(idx.pack_checksum, parsed.checksum);
5436 assert_eq!(idx.entries.len(), expected.len());
5437
5438 assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
5440 let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
5442 want_loose.sort_by_key(ObjectId::to_hex);
5443 assert_eq!(result.packed_loose, want_loose);
5444 assert!(!result.packed_loose.contains(&packed_oid));
5445
5446 fs::remove_dir_all(root).expect("test operation should succeed");
5447 }
5448
5449 #[test]
5450 fn repack_all_objects_consolidates_loose_and_pack_sha1() {
5451 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
5452 }
5453
5454 #[test]
5455 fn repack_all_objects_consolidates_loose_and_pack_sha256() {
5456 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
5457 }
5458
5459 #[test]
5460 fn repack_all_objects_returns_none_for_empty_repository() {
5461 let root = temp_root("sley-repack-empty");
5462 let git_dir = root.join(".git");
5463 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5464
5465 assert!(
5466 repack_all_objects(&git_dir, ObjectFormat::Sha1)
5467 .expect("test operation should succeed")
5468 .is_none()
5469 );
5470
5471 fs::remove_dir_all(root).expect("test operation should succeed");
5472 }
5473
5474 #[test]
5475 fn install_repack_result_writes_pack_without_pruning_by_default() {
5476 let root = temp_root("sley-repack-install-nodelete");
5477 let git_dir = root.join(".git");
5478 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5479 let format = ObjectFormat::Sha1;
5480 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5481 let graph = write_commit_graph(&mut db, b"install no prune\n");
5482
5483 let result = repack_all_objects(&git_dir, format)
5484 .expect("test operation should succeed")
5485 .expect("test operation should succeed");
5486 install_repack_result(&git_dir, format, &result, false)
5487 .expect("test operation should succeed");
5488
5489 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
5491 let pack_dir = git_dir.join("objects").join("pack");
5492 let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
5493 let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
5494 assert!(pack_path.exists());
5495 assert!(idx_path.exists());
5496 for (oid, object) in &graph {
5498 assert!(
5499 db.loose()
5500 .object_path(oid)
5501 .expect("test operation should succeed")
5502 .exists()
5503 );
5504 assert_eq!(read_object_for_assert(&db, oid), *object);
5505 }
5506
5507 fs::remove_dir_all(root).expect("test operation should succeed");
5508 }
5509
5510 #[test]
5511 fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
5512 let root = temp_root("sley-repack-install-prune");
5513 let git_dir = root.join(".git");
5514 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5515 let format = ObjectFormat::Sha1;
5516 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5517
5518 let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
5519 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
5520 .expect("test operation should succeed");
5521 let existing = db
5522 .install_pack(&existing_pack)
5523 .expect("test operation should succeed");
5524 let graph = write_commit_graph(&mut db, b"prune payload\n");
5525
5526 let result = repack_all_objects(&git_dir, format)
5527 .expect("test operation should succeed")
5528 .expect("test operation should succeed");
5529 let new_pack_checksum = PackFile::parse(&result.pack, format)
5530 .expect("test operation should succeed")
5531 .checksum;
5532 install_repack_result(&git_dir, format, &result, true)
5533 .expect("test operation should succeed");
5534
5535 assert!(!existing.pack_path.exists());
5537 assert!(!existing.index_path.exists());
5538 for (oid, _) in &graph {
5540 assert!(
5541 !db.loose()
5542 .object_path(oid)
5543 .expect("test operation should succeed")
5544 .exists()
5545 );
5546 }
5547 let pack_dir = git_dir.join("objects").join("pack");
5549 assert!(
5550 pack_dir
5551 .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
5552 .exists()
5553 );
5554 let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
5555 for (oid, object) in &graph {
5556 assert!(
5557 reopened
5558 .contains(oid)
5559 .expect("test operation should succeed")
5560 );
5561 assert_eq!(read_object_for_assert(&reopened, oid), *object);
5562 }
5563 let packed_oid = packed_blob
5564 .object_id(format)
5565 .expect("test operation should succeed");
5566 assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
5567
5568 fs::remove_dir_all(root).expect("test operation should succeed");
5569 }
5570
5571 #[test]
5572 fn install_repack_result_preserves_keep_and_promisor_packs() {
5573 let root = temp_root("sley-repack-install-keep-promisor");
5574 let git_dir = root.join(".git");
5575 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5576 let format = ObjectFormat::Sha1;
5577 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5578
5579 let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
5580 let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
5581 .expect("test operation should succeed");
5582 let keep_install = db
5583 .install_pack(&keep_pack)
5584 .expect("test operation should succeed");
5585 let keep_sidecar = keep_install.pack_path.with_extension("keep");
5586 fs::write(&keep_sidecar, b"").expect("test operation should succeed");
5587
5588 let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
5589 let promisor_pack =
5590 PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
5591 .expect("test operation should succeed");
5592 let promisor_install = db
5593 .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
5594 .expect("test operation should succeed");
5595 let promisor_sidecar = promisor_install
5596 .promisor_path
5597 .clone()
5598 .expect("promisor sidecar");
5599
5600 let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
5601 let result = repack_all_objects(&git_dir, format)
5602 .expect("test operation should succeed")
5603 .expect("test operation should succeed");
5604 assert!(result.obsolete_packs.contains(&keep_install.pack_path));
5605 assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
5606
5607 install_repack_result(&git_dir, format, &result, true)
5608 .expect("test operation should succeed");
5609
5610 for path in [
5611 &keep_install.pack_path,
5612 &keep_install.index_path,
5613 &keep_sidecar,
5614 &promisor_install.pack_path,
5615 &promisor_install.index_path,
5616 &promisor_sidecar,
5617 ] {
5618 assert!(path.exists(), "{} should be preserved", path.display());
5619 }
5620 for (oid, _) in &graph {
5621 assert!(
5622 !db.loose()
5623 .object_path(oid)
5624 .expect("test operation should succeed")
5625 .exists()
5626 );
5627 }
5628
5629 fs::remove_dir_all(root).expect("test operation should succeed");
5630 }
5631
5632 #[test]
5633 fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
5634 let root = temp_root("sley-repack-install-safety");
5637 let git_dir = root.join(".git");
5638 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5639 let format = ObjectFormat::Sha1;
5640 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5641 let graph = write_commit_graph(&mut db, b"safety packed\n");
5642
5643 let mut result = repack_all_objects(&git_dir, format)
5644 .expect("test operation should succeed")
5645 .expect("test operation should succeed");
5646
5647 let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
5649 let stray_oid = db
5650 .write_object(stray.clone())
5651 .expect("test operation should succeed");
5652 assert!(!result.packed_loose.contains(&stray_oid));
5653 result.packed_loose.push(stray_oid);
5654
5655 install_repack_result(&git_dir, format, &result, true)
5656 .expect("test operation should succeed");
5657
5658 assert!(
5660 db.loose()
5661 .object_path(&stray_oid)
5662 .expect("test operation should succeed")
5663 .exists()
5664 );
5665 assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
5666 for (oid, _) in &graph {
5668 assert!(
5669 !db.loose()
5670 .object_path(oid)
5671 .expect("test operation should succeed")
5672 .exists()
5673 );
5674 }
5675
5676 fs::remove_dir_all(root).expect("test operation should succeed");
5677 }
5678
5679 #[test]
5680 fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
5681 let root = temp_root("sley-prune-unreachable");
5682 let git_dir = root.join(".git");
5683 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5684 let format = ObjectFormat::Sha1;
5685 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
5686 let graph = write_commit_graph(&mut db, b"reachable payload\n");
5687 let commit_oid = graph[0].0.clone();
5688
5689 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
5691 let dangling_oid = db
5692 .write_object(dangling)
5693 .expect("test operation should succeed");
5694
5695 let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
5697 .expect("test operation should succeed");
5698 assert_eq!(reported, vec![dangling_oid]);
5699 assert!(
5700 db.loose()
5701 .object_path(&dangling_oid)
5702 .expect("test operation should succeed")
5703 .exists()
5704 );
5705
5706 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
5708 .expect("test operation should succeed");
5709 assert_eq!(deleted, vec![dangling_oid]);
5710 assert!(
5711 !db.loose()
5712 .object_path(&dangling_oid)
5713 .expect("test operation should succeed")
5714 .exists()
5715 );
5716 for (oid, object) in &graph {
5717 assert!(
5718 db.loose()
5719 .object_path(oid)
5720 .expect("test operation should succeed")
5721 .exists()
5722 );
5723 assert_eq!(read_object_for_assert(&db, oid), *object);
5724 }
5725
5726 fs::remove_dir_all(root).expect("test operation should succeed");
5727 }
5728
5729 #[test]
5730 fn prune_unreachable_loose_ignores_gitlink_targets() {
5731 let root = temp_root("sley-prune-gitlink");
5732 let git_dir = root.join(".git");
5733 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5734 let format = ObjectFormat::Sha1;
5735 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5736
5737 let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
5738 .expect("test operation should succeed");
5739 let tree = EncodedObject::new(
5740 ObjectType::Tree,
5741 Tree {
5742 entries: vec![TreeEntry {
5743 mode: 0o160000,
5744 name: BString::from(b"submodule"),
5745 oid: submodule_oid,
5746 }],
5747 }
5748 .write(),
5749 );
5750 let tree_oid = db
5751 .write_object(tree)
5752 .expect("test operation should succeed");
5753 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5754 let commit = EncodedObject::new(
5755 ObjectType::Commit,
5756 Commit {
5757 tree: tree_oid,
5758 parents: Vec::new(),
5759 author: identity.clone(),
5760 committer: identity,
5761 encoding: None,
5762 message: b"gitlink\n".to_vec(),
5763 }
5764 .write(),
5765 );
5766 let commit_oid = db
5767 .write_object(commit)
5768 .expect("test operation should succeed");
5769 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
5770 let dangling_oid = db
5771 .write_object(dangling)
5772 .expect("test operation should succeed");
5773
5774 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
5775 .expect("test operation should succeed");
5776
5777 assert_eq!(deleted, vec![dangling_oid]);
5778 assert!(
5779 !db.loose()
5780 .object_path(&dangling_oid)
5781 .expect("test operation should succeed")
5782 .exists()
5783 );
5784
5785 fs::remove_dir_all(root).expect("test operation should succeed");
5786 }
5787
5788 fn temp_root(prefix: &str) -> PathBuf {
5789 std::env::temp_dir().join(format!(
5790 "{prefix}-{}-{}",
5791 std::process::id(),
5792 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
5793 ))
5794 }
5795}