1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::Compression;
6use flate2::read::ZlibDecoder;
7use flate2::write::ZlibEncoder;
8use flate2::{Decompress, FlushDecompress};
9use sley_core::{GitError, MissingObjectContext, ObjectFormat, ObjectId, Result};
10use sley_formats::{Bundle, BundleReference};
11use sley_object::{Commit, EncodedObject, ObjectType, Tag, TreeEntries, parse_framed_object};
12use sley_pack::{
13 MultiPackIndex, MultiPackIndexOidLookup, PackBitmapIndex, PackBitmapWriter, PackFile,
14 PackIndex, PackIndexByteSource, PackIndexEntry, PackIndexViewData, PackInput, PackWrite,
15};
16use std::collections::{HashMap, HashSet};
17use std::io::{Read, Write};
18use std::path::{Path, PathBuf};
19use std::sync::atomic::{AtomicU64, Ordering};
20use std::sync::{Arc, Mutex, OnceLock};
21use std::{env, fs};
22
23static TEMPFILE_COUNTER: AtomicU64 = AtomicU64::new(0);
24
25pub trait ObjectReader {
26 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>>;
27
28 fn is_shallow_graft(&self, _oid: &ObjectId) -> bool {
36 false
37 }
38
39 fn has_shallow_grafts(&self) -> bool {
43 false
44 }
45}
46
47fn implied_empty_tree_object(format: ObjectFormat, oid: &ObjectId) -> Option<Arc<EncodedObject>> {
48 (*oid == ObjectId::empty_tree(format))
49 .then(|| Arc::new(EncodedObject::new(ObjectType::Tree, Vec::new())))
50}
51
52fn with_missing_object_context(
53 err: GitError,
54 oid: ObjectId,
55 context: MissingObjectContext,
56) -> GitError {
57 let kind = err
58 .not_found_kind()
59 .and_then(sley_core::NotFoundKind::missing_object_kind);
60 match kind {
61 Some(kind) => GitError::object_kind_not_found_in(oid, kind, context),
62 None => err,
63 }
64}
65
66pub fn grafted_parents<R: ObjectReader + ?Sized>(
70 reader: &R,
71 oid: &ObjectId,
72 parents: Vec<ObjectId>,
73) -> Vec<ObjectId> {
74 if reader.is_shallow_graft(oid) {
75 Vec::new()
76 } else {
77 parents
78 }
79}
80
81pub trait ObjectWriter {
82 fn write_object(&self, object: EncodedObject) -> Result<ObjectId>;
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct BundleUnbundleResult {
92 pub written_objects: Vec<ObjectId>,
93 pub references: Vec<BundleReference>,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct PackUnpackResult {
98 pub written_objects: Vec<ObjectId>,
99}
100
101#[derive(Debug, Clone, PartialEq, Eq)]
102pub struct PackInstallResult {
103 pub pack_name: String,
104 pub pack_path: PathBuf,
105 pub index_path: PathBuf,
106 pub promisor_path: Option<PathBuf>,
107 pub object_ids: Vec<ObjectId>,
108}
109
110#[derive(Debug, Clone, PartialEq, Eq)]
111pub struct RawPackInstallResult {
112 pub object_ids: Vec<ObjectId>,
113}
114
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
116pub struct RawPackInstallOptions {
117 pub promisor: bool,
118}
119
120pub trait RawPackInstaller {
121 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult>;
122}
123
124#[derive(Debug, Clone, PartialEq, Eq)]
125pub enum ObjectPrefixResolution {
126 Missing,
127 Unique(ObjectId),
128 Ambiguous(Vec<ObjectId>),
129}
130
131#[derive(Debug, Clone, PartialEq, Eq)]
132pub struct ObjectStorageInfo {
133 pub disk_size: u64,
134 pub deltabase: ObjectId,
135}
136
137impl RawPackInstaller for FileObjectDatabase {
138 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
139 let result = FileObjectDatabase::install_raw_pack(self, pack_bytes)?;
140 Ok(RawPackInstallResult {
141 object_ids: result.object_ids,
142 })
143 }
144}
145
146impl RawPackInstaller for ObjectDatabase {
147 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
148 let result = unpack_packfile_objects(pack_bytes, self.format, self)?;
149 Ok(RawPackInstallResult {
150 object_ids: result.written_objects,
151 })
152 }
153}
154
155pub fn verify_bundle_prerequisites<R: ObjectReader>(bundle: &Bundle, reader: &R) -> Result<()> {
156 let mut missing = Vec::new();
157 for prerequisite in &bundle.prerequisites {
158 match reader.read_object(&prerequisite.oid) {
159 Ok(object) => {
160 let actual = object.object_id(bundle.format)?;
161 if actual != prerequisite.oid {
162 return Err(GitError::InvalidObject(format!(
163 "bundle prerequisite {} hashes to {actual}",
164 prerequisite.oid
165 )));
166 }
167 }
168 Err(GitError::NotFound(_)) => missing.push(prerequisite.oid),
169 Err(err) => return Err(err),
170 }
171 }
172 if missing.is_empty() {
173 return Ok(());
174 }
175 Err(GitError::object_not_found_in(
176 missing[0],
177 MissingObjectContext::PackInstall,
178 ))
179}
180
181pub fn unbundle_objects<R, W>(
182 bundle: &Bundle,
183 prerequisite_reader: &R,
184 writer: &mut W,
185) -> Result<BundleUnbundleResult>
186where
187 R: ObjectReader,
188 W: ObjectWriter,
189{
190 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
191 let pack = PackFile::parse_bundle(bundle)?;
192 let written_objects = write_pack_objects(pack, writer, "bundle")?.written_objects;
193 Ok(BundleUnbundleResult {
194 written_objects,
195 references: bundle.references.clone(),
196 })
197}
198
199pub fn install_bundle_pack<R>(
200 bundle: &Bundle,
201 prerequisite_reader: &R,
202 destination: &impl RawPackInstaller,
203) -> Result<BundleUnbundleResult>
204where
205 R: ObjectReader,
206{
207 verify_bundle_prerequisites(bundle, prerequisite_reader)?;
208 let install = destination.install_raw_pack(&bundle.pack)?;
209 Ok(BundleUnbundleResult {
210 written_objects: install.object_ids,
211 references: bundle.references.clone(),
212 })
213}
214
215pub fn unpack_packfile_objects<W>(
216 pack_bytes: &[u8],
217 format: ObjectFormat,
218 writer: &W,
219) -> Result<PackUnpackResult>
220where
221 W: ObjectWriter,
222{
223 let pack = PackFile::parse(pack_bytes, format)?;
224 write_pack_objects(pack, writer, "pack")
225}
226
227fn write_pack_objects<W>(pack: PackFile, writer: &W, source: &str) -> Result<PackUnpackResult>
228where
229 W: ObjectWriter,
230{
231 let mut written_objects = Vec::with_capacity(pack.entries.len());
232 for entry in pack.entries {
233 let expected = entry.entry.oid;
234 let actual = writer.write_object(entry.object)?;
235 if actual != expected {
236 return Err(GitError::InvalidObject(format!(
237 "{source} object id mismatch: expected {expected}, wrote {actual}"
238 )));
239 }
240 written_objects.push(actual);
241 }
242 Ok(PackUnpackResult { written_objects })
243}
244
245pub fn collect_reachable_object_ids<R, I>(
246 reader: &R,
247 format: ObjectFormat,
248 starts: I,
249) -> Result<HashSet<ObjectId>>
250where
251 R: ObjectReader,
252 I: IntoIterator<Item = ObjectId>,
253{
254 walk_reachable_objects(reader, format, starts, &HashSet::new(), |_, _| {})
255}
256
257pub fn collect_reachable_object_ids_with_cut<R, I>(
262 reader: &R,
263 format: ObjectFormat,
264 starts: I,
265 cut: &HashSet<ObjectId>,
266) -> Result<HashSet<ObjectId>>
267where
268 R: ObjectReader,
269 I: IntoIterator<Item = ObjectId>,
270{
271 walk_reachable_objects_with_cut(reader, format, starts, &HashSet::new(), cut, |_, _| {})
272}
273
274pub fn collect_reachable_object_ids_excluding<R, I>(
278 reader: &R,
279 format: ObjectFormat,
280 starts: I,
281 excluded: &HashSet<ObjectId>,
282) -> Result<HashSet<ObjectId>>
283where
284 R: ObjectReader,
285 I: IntoIterator<Item = ObjectId>,
286{
287 walk_reachable_objects(reader, format, starts, excluded, |_, _| {})
288}
289
290pub fn collect_reachable_objects<R, I>(
291 reader: &R,
292 format: ObjectFormat,
293 starts: I,
294 excluded: &HashSet<ObjectId>,
295) -> Result<Vec<Arc<EncodedObject>>>
296where
297 R: ObjectReader,
298 I: IntoIterator<Item = ObjectId>,
299{
300 let mut objects = Vec::new();
301 walk_reachable_objects(reader, format, starts, excluded, |_, object| {
302 objects.push(Arc::clone(object));
303 })?;
304 Ok(objects)
305}
306
307#[derive(Debug, Clone)]
308struct ReachablePackObject {
309 oid: ObjectId,
310 object: Arc<EncodedObject>,
311}
312
313fn collect_reachable_pack_objects<R, I>(
314 reader: &R,
315 format: ObjectFormat,
316 starts: I,
317 excluded: &HashSet<ObjectId>,
318) -> Result<Vec<ReachablePackObject>>
319where
320 R: ObjectReader,
321 I: IntoIterator<Item = ObjectId>,
322{
323 let mut objects = Vec::new();
324 walk_reachable_objects(reader, format, starts, excluded, |oid, object| {
325 objects.push(ReachablePackObject {
326 oid: *oid,
327 object: Arc::clone(object),
328 });
329 })?;
330 Ok(objects)
331}
332
333fn pack_inputs(objects: &[ReachablePackObject]) -> Vec<PackInput<'_>> {
334 objects
335 .iter()
336 .map(|entry| PackInput {
337 oid: &entry.oid,
338 object: &entry.object,
339 })
340 .collect()
341}
342
343pub fn install_reachable_pack<I>(
344 source: &impl ObjectReader,
345 destination: &impl RawPackInstaller,
346 format: ObjectFormat,
347 starts: I,
348) -> Result<Option<RawPackInstallResult>>
349where
350 I: IntoIterator<Item = ObjectId>,
351{
352 install_reachable_pack_excluding(source, destination, format, starts, &HashSet::new())
353}
354
355pub fn install_reachable_pack_excluding<I>(
356 source: &impl ObjectReader,
357 destination: &impl RawPackInstaller,
358 format: ObjectFormat,
359 starts: I,
360 excluded: &HashSet<ObjectId>,
361) -> Result<Option<RawPackInstallResult>>
362where
363 I: IntoIterator<Item = ObjectId>,
364{
365 let pack = match build_reachable_pack(source, format, starts, excluded)? {
366 Some(pack) => pack,
367 None => return Ok(None),
368 };
369 destination.install_raw_pack(&pack.pack).map(Some)
370}
371
372pub fn build_reachable_pack<R, I>(
373 reader: &R,
374 format: ObjectFormat,
375 starts: I,
376 excluded: &HashSet<ObjectId>,
377) -> Result<Option<PackWrite>>
378where
379 R: ObjectReader,
380 I: IntoIterator<Item = ObjectId>,
381{
382 let objects = collect_reachable_pack_objects(reader, format, starts, excluded)?;
383 if objects.is_empty() {
384 return Ok(None);
385 }
386 let inputs = pack_inputs(&objects);
391 PackFile::write_packed_with_known_ids(&inputs, format).map(Some)
392}
393
394pub fn build_and_install_reachable_pack<R, I>(
395 source: &R,
396 destination: &FileObjectDatabase,
397 format: ObjectFormat,
398 starts: I,
399 excluded: &HashSet<ObjectId>,
400 options: RawPackInstallOptions,
401) -> Result<Option<PackInstallResult>>
402where
403 R: ObjectReader,
404 I: IntoIterator<Item = ObjectId>,
405{
406 build_and_install_reachable_pack_filtered(
407 source,
408 destination,
409 format,
410 starts,
411 excluded,
412 options,
413 None,
414 None,
415 )
416}
417
418#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425pub enum PackObjectFilter {
426 BlobNone,
428}
429
430#[allow(clippy::too_many_arguments)]
434pub fn build_and_install_reachable_pack_filtered<R, I>(
435 source: &R,
436 destination: &FileObjectDatabase,
437 format: ObjectFormat,
438 starts: I,
439 excluded: &HashSet<ObjectId>,
440 options: RawPackInstallOptions,
441 filter: Option<PackObjectFilter>,
442 unpack_limit: Option<usize>,
443) -> Result<Option<PackInstallResult>>
444where
445 R: ObjectReader,
446 I: IntoIterator<Item = ObjectId>,
447{
448 let starts: Vec<ObjectId> = starts.into_iter().collect();
449 let wanted: HashSet<ObjectId> = starts.iter().copied().collect();
450 let mut objects = collect_reachable_pack_objects(source, format, starts, excluded)?;
451 match filter {
452 Some(PackObjectFilter::BlobNone) => {
453 objects.retain(|entry| {
454 entry.object.object_type != ObjectType::Blob || wanted.contains(&entry.oid)
455 });
456 }
457 None => {}
458 }
459 if objects.is_empty() {
460 return Ok(None);
461 }
462 if let Some(limit) = unpack_limit
466 && objects.len() < limit
467 {
468 for entry in &objects {
469 destination.loose().write_object((*entry.object).clone())?;
470 }
471 return Ok(None);
472 }
473 let inputs = pack_inputs(&objects);
474 let pack = PackFile::write_packed_with_known_ids(&inputs, format)?;
475 destination
476 .install_generated_pack_unchecked(&pack, options)
477 .map(Some)
478}
479
480pub fn assemble_pack_with_verbatim_reuse(
490 format: ObjectFormat,
491 reused_pack_bytes: &[u8],
492 appended: &[PackInput<'_>],
493) -> Result<(Vec<u8>, u32)> {
494 assemble_pack_with_verbatim_reuses(format, &[reused_pack_bytes], appended)
495}
496
497pub fn assemble_pack_with_verbatim_reuses(
500 format: ObjectFormat,
501 reused_packs: &[&[u8]],
502 appended: &[PackInput<'_>],
503) -> Result<(Vec<u8>, u32)> {
504 let hash_len = format.raw_len();
505 let mut reused_count = 0u32;
506 let mut capacity = 12 + hash_len + 64 * appended.len();
507 for reused_pack_bytes in reused_packs {
508 if reused_pack_bytes.len() < 12 + hash_len {
509 return Err(GitError::InvalidFormat("reused pack too short".into()));
510 }
511 if &reused_pack_bytes[..4] != b"PACK" {
512 return Err(GitError::InvalidFormat(
513 "reused pack has no signature".into(),
514 ));
515 }
516 let version = u32::from_be_bytes([
517 reused_pack_bytes[4],
518 reused_pack_bytes[5],
519 reused_pack_bytes[6],
520 reused_pack_bytes[7],
521 ]);
522 if version != 2 {
523 return Err(GitError::Unsupported(format!(
524 "reused pack version {version}"
525 )));
526 }
527 let count = u32::from_be_bytes([
528 reused_pack_bytes[8],
529 reused_pack_bytes[9],
530 reused_pack_bytes[10],
531 reused_pack_bytes[11],
532 ]);
533 reused_count = reused_count
534 .checked_add(count)
535 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
536 capacity = capacity.saturating_add(reused_pack_bytes.len().saturating_sub(12 + hash_len));
537 }
538 let total = reused_count
539 .checked_add(appended.len() as u32)
540 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
541
542 let mut out = Vec::with_capacity(capacity);
543 out.extend_from_slice(b"PACK");
544 out.extend_from_slice(&2u32.to_be_bytes());
545 out.extend_from_slice(&total.to_be_bytes());
546 for reused_pack_bytes in reused_packs {
547 out.extend_from_slice(&reused_pack_bytes[12..reused_pack_bytes.len() - hash_len]);
548 }
549 for input in appended {
550 write_undeltified_pack_entry(&mut out, input.object)?;
551 }
552 let checksum = sley_core::digest_bytes(format, &out)?;
553 out.extend_from_slice(checksum.as_bytes());
554 Ok((out, reused_count))
555}
556
557pub fn assemble_pack_with_verbatim_entries(
560 format: ObjectFormat,
561 reused_entries: &[&[u8]],
562 appended: &[PackInput<'_>],
563) -> Result<(Vec<u8>, u32)> {
564 let reused_count = u32::try_from(reused_entries.len())
565 .map_err(|_| GitError::InvalidFormat("too many pack objects".into()))?;
566 let total = reused_count
567 .checked_add(appended.len() as u32)
568 .ok_or_else(|| GitError::InvalidFormat("too many pack objects".into()))?;
569
570 let mut capacity = 12 + format.raw_len() + 64 * appended.len();
571 for entry in reused_entries {
572 capacity = capacity.saturating_add(entry.len());
573 }
574 let mut out = Vec::with_capacity(capacity);
575 out.extend_from_slice(b"PACK");
576 out.extend_from_slice(&2u32.to_be_bytes());
577 out.extend_from_slice(&total.to_be_bytes());
578 for entry in reused_entries {
579 out.extend_from_slice(entry);
580 }
581 for input in appended {
582 write_undeltified_pack_entry(&mut out, input.object)?;
583 }
584 let checksum = sley_core::digest_bytes(format, &out)?;
585 out.extend_from_slice(checksum.as_bytes());
586 Ok((out, reused_count))
587}
588
589fn write_undeltified_pack_entry(out: &mut Vec<u8>, object: &EncodedObject) -> Result<()> {
591 let type_bits: u8 = match object.object_type {
592 ObjectType::Commit => 1,
593 ObjectType::Tree => 2,
594 ObjectType::Blob => 3,
595 ObjectType::Tag => 4,
596 };
597 let mut size = object.body.len() as u64;
598 let mut byte = (type_bits << 4) | (size & 0x0f) as u8;
599 size >>= 4;
600 while size > 0 {
601 out.push(byte | 0x80);
602 byte = (size & 0x7f) as u8;
603 size >>= 7;
604 }
605 out.push(byte);
606 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
607 encoder.write_all(&object.body)?;
608 out.extend_from_slice(&encoder.finish()?);
609 Ok(())
610}
611
612#[derive(Debug, Clone, PartialEq, Eq)]
620pub struct RepackResult {
621 pub pack: Vec<u8>,
623 pub idx: Vec<u8>,
625 pub object_count: usize,
627 pub obsolete_packs: Vec<PathBuf>,
630 pub packed_loose: Vec<ObjectId>,
633 pack_checksum: ObjectId,
634 index_entries: Vec<PackIndexEntry>,
635}
636
637pub fn repack_reachable_objects(
657 git_dir: &Path,
658 format: ObjectFormat,
659 roots: &[ObjectId],
660) -> Result<Option<RepackResult>> {
661 let objects_dir = repository_objects_dir(git_dir);
662 let database = FileObjectDatabase::new(objects_dir.clone(), format);
663
664 let mut seen: HashSet<ObjectId> = HashSet::new();
665 let mut objects: Vec<ReachablePackObject> = Vec::new();
666 let mut pending: Vec<ObjectId> = roots.to_vec();
667 while let Some(oid) = pending.pop() {
668 if !seen.insert(oid) {
669 continue;
670 }
671 let object = match database.read_object(&oid) {
672 Ok(object) => object,
673 Err(GitError::NotFound(_)) => continue,
674 Err(err) => return Err(err),
675 };
676 match object.object_type {
677 ObjectType::Commit => {
678 let commit = Commit::parse_ref(format, &object.body)?;
679 pending.extend(grafted_parents(&database, &oid, commit.parents));
680 pending.push(commit.tree);
681 }
682 ObjectType::Tree => {
683 for entry in TreeEntries::new(format, &object.body) {
684 let entry = entry?;
685 if !entry.is_gitlink() {
686 pending.push(entry.oid);
687 }
688 }
689 }
690 ObjectType::Tag => {
691 let tag = Tag::parse_ref(format, &object.body)?;
692 pending.push(tag.object);
693 }
694 ObjectType::Blob => {}
695 }
696 objects.push(ReachablePackObject { oid, object });
697 }
698 if objects.is_empty() {
699 return Ok(None);
700 }
701
702 let inputs = pack_inputs(&objects);
703 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
704 let object_count = written.entries.len();
705
706 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
709 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
710 .into_iter()
711 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
712 .collect();
713
714 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
715 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
716 .into_iter()
717 .filter(|oid| packed_oid_set.contains(oid))
718 .collect();
719 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
720
721 let pack_checksum = written.checksum;
722 let index_entries = written.entries.clone();
723 Ok(Some(RepackResult {
724 pack: written.pack,
725 idx: written.index,
726 object_count,
727 obsolete_packs,
728 packed_loose,
729 pack_checksum,
730 index_entries,
731 }))
732}
733
734pub fn repack_all_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
735 let objects_dir = repository_objects_dir(git_dir);
736 let database = FileObjectDatabase::new(objects_dir.clone(), format);
737
738 let all_oids = object_ids_in_objects_dir(&objects_dir, format)?;
742 if all_oids.is_empty() {
743 return Ok(None);
744 }
745
746 let mut objects = Vec::with_capacity(all_oids.len());
750 for oid in &all_oids {
751 objects.push(ReachablePackObject {
752 oid: *oid,
753 object: database.read_object(oid)?,
754 });
755 }
756
757 let inputs = pack_inputs(&objects);
758 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
759 let object_count = written.entries.len();
760
761 let new_pack_file_name = format!("pack-{}.pack", written.checksum.to_hex());
767 let obsolete_packs = existing_pack_files(&objects_dir.join("pack"))?
768 .into_iter()
769 .filter(|path| path.file_name().and_then(|name| name.to_str()) != Some(&new_pack_file_name))
770 .collect();
771
772 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
775 let mut packed_loose: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
776 .into_iter()
777 .filter(|oid| packed_oid_set.contains(oid))
778 .collect();
779 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
780
781 Ok(Some(RepackResult {
782 pack: written.pack,
783 idx: written.index,
784 object_count,
785 obsolete_packs,
786 packed_loose,
787 pack_checksum: written.checksum,
788 index_entries: written.entries,
789 }))
790}
791
792pub fn repack_loose_objects(git_dir: &Path, format: ObjectFormat) -> Result<Option<RepackResult>> {
798 let objects_dir = repository_objects_dir(git_dir);
799 let database = FileObjectDatabase::new(objects_dir.clone(), format);
800 let loose_oids = loose_object_ids(&objects_dir, format)?;
801 if loose_oids.is_empty() {
802 return Ok(None);
803 }
804
805 let mut objects = Vec::with_capacity(loose_oids.len());
806 for oid in &loose_oids {
807 objects.push(ReachablePackObject {
808 oid: *oid,
809 object: database.read_object(oid)?,
810 });
811 }
812
813 let inputs = pack_inputs(&objects);
814 let written = PackFile::write_packed_with_known_ids(&inputs, format)?;
815 let object_count = written.entries.len();
816 let packed_oid_set: HashSet<&ObjectId> = written.entries.iter().map(|e| &e.oid).collect();
817 let mut packed_loose: Vec<ObjectId> = loose_oids
818 .into_iter()
819 .filter(|oid| packed_oid_set.contains(oid))
820 .collect();
821 packed_loose.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
822
823 let pack_checksum = written.checksum;
824 let index_entries = written.entries.clone();
825 Ok(Some(RepackResult {
826 pack: written.pack,
827 idx: written.index,
828 object_count,
829 obsolete_packs: Vec::new(),
830 packed_loose,
831 pack_checksum,
832 index_entries,
833 }))
834}
835
836pub fn install_repack_result(
851 git_dir: &Path,
852 format: ObjectFormat,
853 result: &RepackResult,
854 prune: bool,
855) -> Result<()> {
856 install_repack_result_with_bitmap(git_dir, format, result, prune, None)
857}
858
859pub fn install_repack_result_with_bitmap(
865 git_dir: &Path,
866 format: ObjectFormat,
867 result: &RepackResult,
868 prune: bool,
869 bitmap_tips: Option<&HashSet<ObjectId>>,
870) -> Result<()> {
871 let objects_dir = repository_objects_dir(git_dir);
872 let pack_dir = objects_dir.join("pack");
873 fs::create_dir_all(&pack_dir)?;
874
875 validate_pack_checksum(&result.pack, format, &result.pack_checksum, "repack")?;
880 let parsed_index = PackIndex::parse(&result.idx, format)?;
881 if parsed_index.pack_checksum != result.pack_checksum {
882 return Err(GitError::InvalidFormat(
883 "repack index checksum does not match the new pack".into(),
884 ));
885 }
886 if !pack_index_entries_match_writer(&parsed_index.entries, &result.index_entries) {
887 return Err(GitError::InvalidFormat(
888 "repack index does not match the new pack contents".into(),
889 ));
890 }
891 let pack_name = format!("pack-{}", result.pack_checksum.to_hex());
892 let new_pack_path = pack_dir.join(format!("{pack_name}.pack"));
893 let new_rev_path = pack_dir.join(format!("{pack_name}.rev"));
894 let new_index_path = pack_dir.join(format!("{pack_name}.idx"));
895 let reverse_index = sley_pack::PackReverseIndex::write(
899 format,
900 &sley_pack::pack_order_index_positions(&parsed_index.entries),
901 &result.pack_checksum,
902 )?;
903 write_pack_component(&new_pack_path, &result.pack)?;
904 write_pack_component(&new_rev_path, &reverse_index)?;
905 write_pack_component(&new_index_path, &result.idx)?;
906
907 if let Some(tips) = bitmap_tips {
908 let database = FileObjectDatabase::new(objects_dir.clone(), format);
911 if let Some(bitmap) = build_pack_bitmap(
912 &database,
913 format,
914 &result.index_entries,
915 &result.pack_checksum,
916 tips,
917 )? {
918 let bitmap_path = pack_dir.join(format!("{pack_name}.bitmap"));
923 remove_file_if_exists(&bitmap_path)?;
924 write_pack_component(&bitmap_path, &bitmap)?;
925 }
926 }
927
928 if !prune {
929 return Ok(());
930 }
931
932 let present: HashSet<ObjectId> = parsed_index.entries.iter().map(|entry| entry.oid).collect();
936
937 prune_packs_contained_in(&objects_dir, format, &present, &new_pack_path)?;
938 prune_loose_objects(&objects_dir, format, result.packed_loose.iter(), &present)?;
939 Ok(())
940}
941
942fn validate_pack_checksum(
943 pack: &[u8],
944 format: ObjectFormat,
945 expected: &ObjectId,
946 context: &str,
947) -> Result<()> {
948 if expected.format() != format {
949 return Err(GitError::InvalidObjectId(format!(
950 "{context} checksum format does not match object format"
951 )));
952 }
953 let hash_len = format.raw_len();
954 if pack.len() < 12 + hash_len {
955 return Err(GitError::InvalidFormat(format!(
956 "{context} pack file too short"
957 )));
958 }
959 if &pack[..4] != b"PACK" {
960 return Err(GitError::InvalidFormat(format!(
961 "{context} pack file missing PACK signature"
962 )));
963 }
964 let trailer_offset = pack.len() - hash_len;
965 let actual = sley_core::digest_bytes(format, &pack[..trailer_offset])?;
966 let trailer = ObjectId::from_raw(format, &pack[trailer_offset..])?;
967 if &actual != expected || trailer != *expected {
968 return Err(GitError::InvalidFormat(format!(
969 "{context} pack checksum does not match generated pack"
970 )));
971 }
972 Ok(())
973}
974
975fn pack_index_entries_match_writer(
976 parsed: &[PackIndexEntry],
977 writer_entries: &[PackIndexEntry],
978) -> bool {
979 if parsed.len() != writer_entries.len() {
980 return false;
981 }
982 let mut writer_entries = writer_entries.iter().collect::<Vec<_>>();
983 writer_entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
984 parsed.iter().zip(writer_entries).all(|(left, right)| {
985 left.oid == right.oid && left.crc32 == right.crc32 && left.offset == right.offset
986 })
987}
988
989pub fn prune_unreachable_loose<I>(
998 git_dir: &Path,
999 format: ObjectFormat,
1000 roots: I,
1001 delete: bool,
1002) -> Result<Vec<ObjectId>>
1003where
1004 I: IntoIterator<Item = ObjectId>,
1005{
1006 let objects_dir = repository_objects_dir(git_dir);
1007 let database = FileObjectDatabase::new(objects_dir.clone(), format);
1008 let reachable = collect_reachable_object_ids(&database, format, roots)?;
1009
1010 let store = LooseObjectStore::new(objects_dir.clone(), format);
1011 let mut pruned: Vec<ObjectId> = loose_object_ids(&objects_dir, format)?
1012 .into_iter()
1013 .filter(|oid| !reachable.contains(oid))
1014 .collect();
1015 pruned.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1016
1017 if delete {
1018 for oid in &pruned {
1019 let path = store.object_path(oid)?;
1020 match fs::remove_file(&path) {
1021 Ok(()) => {}
1022 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1023 Err(err) => return Err(GitError::Io(err.to_string())),
1024 }
1025 }
1026 }
1027 Ok(pruned)
1028}
1029
1030fn loose_object_ids(objects_dir: &Path, format: ObjectFormat) -> Result<Vec<ObjectId>> {
1033 let oids = loose_object_id_set(objects_dir, format)?;
1034 let mut oids = oids.into_iter().collect::<Vec<_>>();
1035 oids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
1036 Ok(oids)
1037}
1038
1039fn loose_object_id_set(objects_dir: &Path, format: ObjectFormat) -> Result<HashSet<ObjectId>> {
1040 let mut oids = HashSet::new();
1041 collect_loose_object_ids(objects_dir, format, &mut oids)?;
1042 Ok(oids)
1043}
1044
1045fn existing_pack_files(pack_dir: &Path) -> Result<Vec<PathBuf>> {
1048 if !pack_dir.exists() {
1049 return Ok(Vec::new());
1050 }
1051 let mut packs = Vec::new();
1052 for entry in fs::read_dir(pack_dir)? {
1053 let path = entry?.path();
1054 if path.extension().and_then(|ext| ext.to_str()) == Some("pack") && path.is_file() {
1055 packs.push(path);
1056 }
1057 }
1058 packs.sort();
1059 Ok(packs)
1060}
1061
1062fn prune_packs_contained_in(
1066 objects_dir: &Path,
1067 format: ObjectFormat,
1068 present: &HashSet<ObjectId>,
1069 keep: &Path,
1070) -> Result<()> {
1071 let pack_dir = objects_dir.join("pack");
1072 let keep_stem = keep.file_stem().map(|stem| stem.to_owned());
1073 let mut removed_stems: HashSet<String> = HashSet::new();
1074
1075 for pack_path in existing_pack_files(&pack_dir)? {
1076 if pack_path == keep {
1077 continue;
1078 }
1079 let Some(stem) = pack_path.file_stem() else {
1080 continue;
1081 };
1082 if Some(stem) == keep_stem.as_deref() {
1083 continue;
1084 }
1085 if pack_path.with_extension("keep").exists()
1086 || pack_path.with_extension("promisor").exists()
1087 {
1088 continue;
1089 }
1090 let index_path = pack_path.with_extension("idx");
1091 if !index_path.exists() {
1092 continue;
1094 }
1095 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
1096 if !index
1097 .entries
1098 .iter()
1099 .all(|entry| present.contains(&entry.oid))
1100 {
1101 continue;
1102 }
1103 remove_file_if_exists(&pack_path)?;
1107 remove_file_if_exists(&index_path)?;
1108 for ext in ["rev", "mtimes", "bitmap"] {
1109 remove_file_if_exists(&pack_path.with_extension(ext))?;
1110 }
1111 removed_stems.insert(stem.to_string_lossy().into_owned());
1112 }
1113
1114 prune_stale_multi_pack_index(&pack_dir, format, &removed_stems)?;
1115 Ok(())
1116}
1117
1118fn prune_stale_multi_pack_index(
1125 pack_dir: &Path,
1126 format: ObjectFormat,
1127 removed_stems: &HashSet<String>,
1128) -> Result<()> {
1129 if removed_stems.is_empty() {
1130 return Ok(());
1131 }
1132 let midx_path = pack_dir.join("multi-pack-index");
1133 if !midx_path.exists() {
1134 return Ok(());
1135 }
1136 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
1137 let references_removed_pack = midx.pack_names.iter().any(|name| {
1138 let stem = name.strip_suffix(".idx").unwrap_or(name);
1139 removed_stems.contains(stem)
1140 });
1141 if references_removed_pack {
1142 remove_file_if_exists(&midx_path)?;
1143 }
1144 Ok(())
1145}
1146
1147fn prune_loose_objects<'a, I>(
1150 objects_dir: &Path,
1151 format: ObjectFormat,
1152 candidates: I,
1153 present: &HashSet<ObjectId>,
1154) -> Result<()>
1155where
1156 I: IntoIterator<Item = &'a ObjectId>,
1157{
1158 let store = LooseObjectStore::new(objects_dir.to_path_buf(), format);
1159 for oid in candidates {
1160 if !present.contains(oid) {
1161 continue;
1162 }
1163 remove_file_if_exists(&store.object_path(oid)?)?;
1164 }
1165 Ok(())
1166}
1167
1168enum PackDeltaBase {
1169 Offset(u64),
1170 Ref(ObjectId),
1171}
1172
1173struct PackIndexOffsetInfo {
1174 end_offset: u64,
1175 delta_base_oid: Option<ObjectId>,
1176}
1177
1178fn scan_pack_index_offsets(
1179 index: &PackIndex,
1180 target_offset: u64,
1181 trailer_offset: u64,
1182 delta_base_offset: Option<u64>,
1183) -> Result<PackIndexOffsetInfo> {
1184 let mut target_count = 0usize;
1185 let mut next_offset = None;
1186 let mut delta_base_oid = None;
1187
1188 for entry in &index.entries {
1189 if entry.offset == target_offset {
1190 target_count += 1;
1191 } else if entry.offset > target_offset {
1192 match next_offset {
1193 Some(current) if current <= entry.offset => {}
1194 _ => next_offset = Some(entry.offset),
1195 }
1196 }
1197 if Some(entry.offset) == delta_base_offset {
1198 delta_base_oid = Some(entry.oid);
1199 }
1200 }
1201
1202 if target_count == 0 {
1203 return Err(GitError::InvalidFormat(format!(
1204 "pack index offset {target_offset} not found"
1205 )));
1206 }
1207 if let Some(offset) = delta_base_offset
1208 && delta_base_oid.is_none()
1209 {
1210 return Err(GitError::InvalidFormat(format!(
1211 "ofs-delta base offset {offset} not found"
1212 )));
1213 }
1214
1215 Ok(PackIndexOffsetInfo {
1216 end_offset: if target_count > 1 {
1219 target_offset
1220 } else {
1221 next_offset.unwrap_or(trailer_offset)
1222 },
1223 delta_base_oid,
1224 })
1225}
1226
1227fn pack_entry_delta_base(
1228 format: ObjectFormat,
1229 pack: &[u8],
1230 entry_offset: u64,
1231) -> Result<Option<PackDeltaBase>> {
1232 let mut cursor = usize::try_from(entry_offset)
1233 .map_err(|_| GitError::InvalidFormat("pack entry offset overflows usize".into()))?;
1234 let first = pack_next_byte(pack, &mut cursor)?;
1235 let kind = (first >> 4) & 0x07;
1236 let mut byte = first;
1237 while byte & 0x80 != 0 {
1238 byte = pack_next_byte(pack, &mut cursor)?;
1239 }
1240 match kind {
1241 6 => Ok(Some(PackDeltaBase::Offset(parse_ofs_delta_base_offset(
1242 pack,
1243 &mut cursor,
1244 entry_offset,
1245 )?))),
1246 7 => Ok(Some(PackDeltaBase::Ref(parse_ref_delta_base_oid(
1247 format,
1248 pack,
1249 &mut cursor,
1250 )?))),
1251 _ => Ok(None),
1252 }
1253}
1254
1255fn parse_ref_delta_base_oid(
1256 format: ObjectFormat,
1257 pack: &[u8],
1258 cursor: &mut usize,
1259) -> Result<ObjectId> {
1260 let raw_len = format.raw_len();
1261 if *cursor + raw_len > pack.len() {
1262 return Err(GitError::InvalidFormat(
1263 "truncated ref-delta base object id".into(),
1264 ));
1265 }
1266 let oid = ObjectId::from_raw(format, &pack[*cursor..*cursor + raw_len])?;
1267 *cursor += raw_len;
1268 Ok(oid)
1269}
1270
1271fn parse_ofs_delta_base_offset(pack: &[u8], cursor: &mut usize, entry_offset: u64) -> Result<u64> {
1272 let mut byte = pack_next_byte(pack, cursor)?;
1273 let mut relative = u64::from(byte & 0x7f);
1274 while byte & 0x80 != 0 {
1275 byte = pack_next_byte(pack, cursor)?;
1276 relative = relative
1277 .checked_add(1)
1278 .and_then(|value| value.checked_shl(7))
1279 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
1280 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
1281 }
1282 entry_offset
1283 .checked_sub(relative)
1284 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
1285}
1286
1287fn pack_next_byte(pack: &[u8], cursor: &mut usize) -> Result<u8> {
1288 let Some(byte) = pack.get(*cursor).copied() else {
1289 return Err(GitError::InvalidFormat("truncated pack entry".into()));
1290 };
1291 *cursor += 1;
1292 Ok(byte)
1293}
1294
1295fn zero_oid(format: ObjectFormat) -> Result<ObjectId> {
1296 Ok(ObjectId::null(format))
1297}
1298
1299fn remove_file_if_exists(path: &Path) -> Result<()> {
1301 match fs::remove_file(path) {
1302 Ok(()) => Ok(()),
1303 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
1304 Err(err) => Err(GitError::Io(err.to_string())),
1305 }
1306}
1307
1308fn walk_reachable_objects<R, I, F>(
1309 reader: &R,
1310 format: ObjectFormat,
1311 starts: I,
1312 excluded: &HashSet<ObjectId>,
1313 visit: F,
1314) -> Result<HashSet<ObjectId>>
1315where
1316 R: ObjectReader,
1317 I: IntoIterator<Item = ObjectId>,
1318 F: FnMut(&ObjectId, &Arc<EncodedObject>),
1319{
1320 walk_reachable_objects_with_cut(reader, format, starts, excluded, &HashSet::new(), visit)
1321}
1322
1323fn walk_reachable_objects_with_cut<R, I, F>(
1327 reader: &R,
1328 format: ObjectFormat,
1329 starts: I,
1330 excluded: &HashSet<ObjectId>,
1331 cut: &HashSet<ObjectId>,
1332 mut visit: F,
1333) -> Result<HashSet<ObjectId>>
1334where
1335 R: ObjectReader,
1336 I: IntoIterator<Item = ObjectId>,
1337 F: FnMut(&ObjectId, &Arc<EncodedObject>),
1338{
1339 let mut seen = HashSet::new();
1340 let mut pending = Vec::new();
1341 for start in starts {
1342 pending.push(start);
1343 while let Some(oid) = pending.pop() {
1344 if excluded.contains(&oid) {
1345 continue;
1346 }
1347 if !seen.insert(oid) {
1348 continue;
1349 }
1350 let object = reader.read_object(&oid).map_err(|err| {
1351 with_missing_object_context(err, oid, MissingObjectContext::Traversal)
1352 })?;
1353 match object.object_type {
1354 ObjectType::Commit => {
1355 let (tree, parents) = {
1356 let commit = Commit::parse_ref(format, &object.body)?;
1357 (commit.tree, commit.parents)
1358 };
1359 visit(&oid, &object);
1360 if !cut.contains(&oid) {
1361 for parent in grafted_parents(reader, &oid, parents).into_iter().rev() {
1362 pending.push(parent);
1363 }
1364 }
1365 pending.push(tree);
1366 }
1367 ObjectType::Tree => {
1368 let mut child_oids = Vec::new();
1369 for entry in TreeEntries::new(format, &object.body) {
1370 let entry = entry?;
1371 if entry.is_gitlink() {
1372 continue;
1373 }
1374 child_oids.push(entry.oid);
1375 }
1376 visit(&oid, &object);
1377 pending.extend(child_oids.into_iter().rev());
1378 }
1379 ObjectType::Tag => {
1380 let target = {
1381 let tag = Tag::parse_ref(format, &object.body)?;
1382 tag.object
1383 };
1384 visit(&oid, &object);
1385 pending.push(target);
1386 }
1387 ObjectType::Blob => visit(&oid, &object),
1388 }
1389 }
1390 }
1391 Ok(seen)
1392}
1393
1394fn bitset_get(words: &[u64], position: u32) -> bool {
1399 let word = (position / 64) as usize;
1400 word < words.len() && words[word] & (1u64 << (position % 64)) != 0
1401}
1402
1403fn bitset_set(words: &mut [u64], position: u32) {
1404 let word = (position / 64) as usize;
1405 if word < words.len() {
1406 words[word] |= 1u64 << (position % 64);
1407 }
1408}
1409
1410fn bitset_or(acc: &mut [u64], other: &[u64]) {
1411 for (dst, src) in acc.iter_mut().zip(other) {
1412 *dst |= *src;
1413 }
1414}
1415
1416fn bitset_positions(words: &[u64]) -> Vec<u32> {
1418 let mut positions = Vec::new();
1419 for (word_index, word) in words.iter().enumerate() {
1420 let mut remaining = *word;
1421 while remaining != 0 {
1422 let bit = remaining.trailing_zeros();
1423 positions.push(word_index as u32 * 64 + bit);
1424 remaining &= remaining - 1;
1425 }
1426 }
1427 positions
1428}
1429
1430fn commit_identity_timestamp(identity: &[u8]) -> i64 {
1434 let mut fields = identity.rsplitn(3, |byte| *byte == b' ');
1435 let _tz = fields.next();
1436 fields
1437 .next()
1438 .and_then(|raw| std::str::from_utf8(raw).ok())
1439 .and_then(|raw| raw.parse::<i64>().ok())
1440 .unwrap_or(0)
1441}
1442
1443fn bitmap_next_commit_index(idx: u32) -> u32 {
1446 const MIN_COMMITS: u32 = 100;
1447 const MAX_COMMITS: u32 = 5000;
1448 const MUST_REGION: u32 = 100;
1449 const MIN_REGION: u32 = 20000;
1450
1451 if idx <= MUST_REGION {
1452 return 0;
1453 }
1454 if idx <= MIN_REGION {
1455 let offset = idx - MUST_REGION;
1456 return offset.min(MIN_COMMITS);
1457 }
1458 let offset = idx - MIN_REGION;
1459 offset.clamp(MIN_COMMITS, MAX_COMMITS)
1460}
1461
1462pub fn build_pack_bitmap(
1476 db: &FileObjectDatabase,
1477 format: ObjectFormat,
1478 index_entries: &[PackIndexEntry],
1479 pack_checksum: &ObjectId,
1480 preferred_tips: &HashSet<ObjectId>,
1481) -> Result<Option<Vec<u8>>> {
1482 let mut by_offset: Vec<usize> = (0..index_entries.len()).collect();
1485 by_offset.sort_by_key(|&slot| index_entries[slot].offset);
1486 let bit_order: Vec<ObjectId> = by_offset
1487 .into_iter()
1488 .map(|slot| index_entries[slot].oid)
1489 .collect();
1490 build_reachability_bitmap(db, format, pack_checksum, &bit_order, preferred_tips)
1491}
1492
1493pub fn build_midx_bitmap(
1499 db: &FileObjectDatabase,
1500 format: ObjectFormat,
1501 midx_entries: &[sley_pack::MultiPackIndexEntry],
1502 midx_checksum: &ObjectId,
1503 preferred_pack: u32,
1504 preferred_tips: &HashSet<ObjectId>,
1505) -> Result<Option<Vec<u8>>> {
1506 let mut pseudo: Vec<usize> = (0..midx_entries.len()).collect();
1507 pseudo.sort_by_key(|&slot| {
1508 let entry = &midx_entries[slot];
1509 (
1510 entry.pack_int_id != preferred_pack,
1511 entry.pack_int_id,
1512 entry.offset,
1513 )
1514 });
1515 let bit_order: Vec<ObjectId> = pseudo
1516 .into_iter()
1517 .map(|slot| midx_entries[slot].oid)
1518 .collect();
1519 build_reachability_bitmap(db, format, midx_checksum, &bit_order, preferred_tips)
1520}
1521
1522fn bitmap_num_maximal_commits(
1530 db: &FileObjectDatabase,
1531 format: ObjectFormat,
1532 selected: &[ObjectId],
1533) -> Result<usize> {
1534 let mut first_parent: HashMap<ObjectId, Option<ObjectId>> = HashMap::new();
1536 let mut stack: Vec<ObjectId> = selected.to_vec();
1537 while let Some(oid) = stack.pop() {
1538 if first_parent.contains_key(&oid) {
1539 continue;
1540 }
1541 let object = db.read_object(&oid)?;
1542 let commit = Commit::parse_ref(format, &object.body)?;
1543 let parent = grafted_parents(db, &oid, commit.parents).first().copied();
1544 first_parent.insert(oid, parent);
1545 if let Some(parent) = parent {
1546 stack.push(parent);
1547 }
1548 }
1549 let mut pending_children: HashMap<ObjectId, usize> = HashMap::new();
1551 for parent in first_parent.values().flatten() {
1552 *pending_children.entry(*parent).or_default() += 1;
1553 }
1554 let word_count = selected.len().div_ceil(64);
1555 struct MaximalEnt {
1556 mask: Vec<u64>,
1557 maximal: bool,
1558 }
1559 let mut ents: HashMap<ObjectId, MaximalEnt> = HashMap::new();
1560 for (bit, oid) in selected.iter().enumerate() {
1561 let ent = ents.entry(*oid).or_insert_with(|| MaximalEnt {
1562 mask: vec![0u64; word_count],
1563 maximal: true,
1564 });
1565 ent.mask[bit / 64] |= 1u64 << (bit % 64);
1566 ent.maximal = true;
1567 }
1568 let mut queue: Vec<ObjectId> = first_parent
1569 .keys()
1570 .filter(|oid| pending_children.get(*oid).copied().unwrap_or(0) == 0)
1571 .copied()
1572 .collect();
1573 let mut num_maximal = 0usize;
1574 while let Some(oid) = queue.pop() {
1575 if let Some(ent) = ents.remove(&oid) {
1576 if ent.maximal {
1577 num_maximal += 1;
1578 }
1579 if let Some(Some(parent)) = first_parent.get(&oid) {
1580 match ents.entry(*parent) {
1581 std::collections::hash_map::Entry::Vacant(vacant) => {
1582 vacant.insert(MaximalEnt {
1584 mask: ent.mask.clone(),
1585 maximal: false,
1586 });
1587 }
1588 std::collections::hash_map::Entry::Occupied(mut occupied) => {
1589 let parent_ent = occupied.get_mut();
1590 let c_not_p = ent
1591 .mask
1592 .iter()
1593 .zip(&parent_ent.mask)
1594 .any(|(child, parent)| child & !parent != 0);
1595 if c_not_p {
1596 let p_not_c = parent_ent
1597 .mask
1598 .iter()
1599 .zip(&ent.mask)
1600 .any(|(parent, child)| parent & !child != 0);
1601 for (parent, child) in parent_ent.mask.iter_mut().zip(&ent.mask) {
1602 *parent |= child;
1603 }
1604 parent_ent.maximal = p_not_c;
1605 }
1606 }
1607 }
1608 }
1609 }
1610 if let Some(Some(parent)) = first_parent.get(&oid)
1611 && let Some(remaining) = pending_children.get_mut(parent)
1612 {
1613 *remaining -= 1;
1614 if *remaining == 0 {
1615 queue.push(*parent);
1616 }
1617 }
1618 }
1619 Ok(num_maximal)
1620}
1621
1622fn build_reachability_bitmap(
1626 db: &FileObjectDatabase,
1627 format: ObjectFormat,
1628 checksum: &ObjectId,
1629 bit_order: &[ObjectId],
1630 preferred_tips: &HashSet<ObjectId>,
1631) -> Result<Option<Vec<u8>>> {
1632 if bit_order.is_empty() || bit_order.len() > u32::MAX as usize {
1633 return Ok(None);
1634 }
1635 let object_count = bit_order.len();
1636
1637 let mut oid_sorted: Vec<u32> = (0..object_count as u32).collect();
1640 oid_sorted.sort_by(|&left, &right| {
1641 bit_order[left as usize]
1642 .as_bytes()
1643 .cmp(bit_order[right as usize].as_bytes())
1644 });
1645 let mut index_position = vec![0u32; object_count];
1646 for (position, &slot) in oid_sorted.iter().enumerate() {
1647 index_position[slot as usize] = position as u32;
1648 }
1649 let mut oid_to_pack = HashMap::with_capacity(object_count);
1650 for (pack_pos, oid) in bit_order.iter().enumerate() {
1651 oid_to_pack.insert(*oid, pack_pos as u32);
1652 }
1653
1654 let mut object_types = Vec::with_capacity(object_count);
1656 struct IndexedCommit {
1657 oid: ObjectId,
1658 pack_pos: u32,
1659 index_pos: u32,
1660 date: i64,
1661 parent_count: usize,
1662 }
1663 let mut indexed_commits = Vec::new();
1664 for (pack_pos, oid) in bit_order.iter().enumerate() {
1665 let object_type = match db.read_object_header(oid)? {
1668 Some((object_type, _)) => object_type,
1669 None => db.read_object(oid)?.object_type,
1670 };
1671 object_types.push(object_type);
1672 if object_type == ObjectType::Commit {
1673 let object = db.read_object(oid)?;
1674 let commit = Commit::parse_ref(format, &object.body)?;
1675 indexed_commits.push(IndexedCommit {
1676 oid: *oid,
1677 pack_pos: pack_pos as u32,
1678 index_pos: index_position[pack_pos],
1679 date: commit_identity_timestamp(commit.committer),
1680 parent_count: grafted_parents(db, oid, commit.parents).len(),
1681 });
1682 }
1683 }
1684
1685 indexed_commits.sort_by_key(|commit| std::cmp::Reverse(commit.date));
1687 let mut selected: Vec<&IndexedCommit> = Vec::new();
1688 let commit_count = indexed_commits.len() as u32;
1689 if commit_count < 100 {
1690 selected.extend(indexed_commits.iter());
1691 } else {
1692 let mut i = 0u32;
1693 loop {
1694 let next = bitmap_next_commit_index(i);
1695 if i + next >= commit_count {
1696 break;
1697 }
1698 let mut chosen = &indexed_commits[(i + next) as usize];
1699 if next > 0 {
1700 for j in 0..=next {
1701 let candidate = &indexed_commits[(i + j) as usize];
1702 if preferred_tips.contains(&candidate.oid) {
1703 chosen = candidate;
1704 break;
1705 }
1706 if candidate.parent_count >= 2 {
1707 chosen = candidate;
1708 }
1709 }
1710 }
1711 selected.push(chosen);
1712 i += next + 1;
1713 }
1714 }
1715
1716 if std::env::var_os("GIT_TRACE2_EVENT").is_some() {
1721 let selected_oids: Vec<ObjectId> = selected.iter().map(|commit| commit.oid).collect();
1722 let num_maximal = bitmap_num_maximal_commits(db, format, &selected_oids)?;
1723 sley_core::trace2::data("pack-bitmap-write", "num_selected_commits", selected.len());
1724 sley_core::trace2::data("pack-bitmap-write", "num_maximal_commits", num_maximal);
1725 }
1726
1727 let word_count = object_count.div_ceil(64);
1730 let mut memo: HashMap<ObjectId, Arc<Vec<u64>>> = HashMap::new();
1731 for commit in selected.iter().rev() {
1732 let mut acc = vec![0u64; word_count];
1733 let mut pending = vec![commit.oid];
1734 while let Some(oid) = pending.pop() {
1735 let Some(&pack_pos) = oid_to_pack.get(&oid) else {
1736 eprintln!(
1738 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {oid} is missing)"
1739 );
1740 return Ok(None);
1741 };
1742 if bitset_get(&acc, pack_pos) {
1743 continue;
1744 }
1745 if let Some(stored) = memo.get(&oid) {
1746 bitset_or(&mut acc, stored);
1747 continue;
1748 }
1749 bitset_set(&mut acc, pack_pos);
1750 let object = db.read_object(&oid)?;
1751 let tree = {
1752 let parsed = Commit::parse_ref(format, &object.body)?;
1753 pending.extend(grafted_parents(db, &oid, parsed.parents));
1754 parsed.tree
1755 };
1756 if !bitmap_mark_tree(db, format, &tree, &oid_to_pack, &mut acc)? {
1757 return Ok(None);
1758 }
1759 }
1760 memo.insert(commit.oid, Arc::new(acc));
1761 }
1762
1763 let mut writer = PackBitmapWriter::new(format, *checksum, &object_types)?;
1764 for commit in &selected {
1765 let words = match memo.get(&commit.oid) {
1766 Some(words) => words,
1767 None => continue,
1768 };
1769 writer.add_commit(commit.pack_pos, commit.index_pos, &bitset_positions(words))?;
1770 }
1771 writer.write().map(Some)
1772}
1773
1774fn bitmap_mark_tree(
1778 db: &impl ObjectReader,
1779 format: ObjectFormat,
1780 tree: &ObjectId,
1781 oid_to_pack: &HashMap<ObjectId, u32>,
1782 acc: &mut [u64],
1783) -> Result<bool> {
1784 let Some(&pack_pos) = oid_to_pack.get(tree) else {
1785 eprintln!(
1786 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {tree} is missing)"
1787 );
1788 return Ok(false);
1789 };
1790 if bitset_get(acc, pack_pos) {
1791 return Ok(true);
1792 }
1793 bitset_set(acc, pack_pos);
1794 let object = db.read_object(tree)?;
1795 for entry in TreeEntries::new(format, &object.body) {
1796 let entry = entry?;
1797 if entry.is_gitlink() {
1798 continue;
1799 }
1800 if entry.is_tree() {
1801 if !bitmap_mark_tree(db, format, &entry.oid, oid_to_pack, acc)? {
1802 return Ok(false);
1803 }
1804 } else {
1805 let Some(&blob_pos) = oid_to_pack.get(&entry.oid) else {
1806 eprintln!(
1807 "warning: Failed to write bitmap index. Packfile doesn't have full closure (object {} is missing)",
1808 entry.oid
1809 );
1810 return Ok(false);
1811 };
1812 bitset_set(acc, blob_pos);
1813 }
1814 }
1815 Ok(true)
1816}
1817
1818pub struct LoadedPackBitmap {
1822 object_count: u32,
1823 oid_to_pack: HashMap<ObjectId, u32>,
1824 pack_to_oid: Vec<ObjectId>,
1825 commit_words: HashMap<ObjectId, Arc<Vec<u64>>>,
1826 commits: Vec<u64>,
1827 trees: Vec<u64>,
1828 blobs: Vec<u64>,
1829 tags: Vec<u64>,
1830}
1831
1832impl LoadedPackBitmap {
1833 pub fn object_count(&self) -> u32 {
1834 self.object_count
1835 }
1836
1837 pub fn pack_position(&self, oid: &ObjectId) -> Option<u32> {
1839 self.oid_to_pack.get(oid).copied()
1840 }
1841
1842 pub fn oid_at(&self, position: u32) -> Option<&ObjectId> {
1843 self.pack_to_oid.get(position as usize)
1844 }
1845
1846 pub fn bitmap_for_commit(&self, oid: &ObjectId) -> Option<&Arc<Vec<u64>>> {
1849 self.commit_words.get(oid)
1850 }
1851
1852 pub fn bitmapped_commits(&self) -> impl Iterator<Item = &ObjectId> {
1854 self.commit_words.keys()
1855 }
1856
1857 pub fn type_words(&self, object_type: ObjectType) -> &[u64] {
1859 match object_type {
1860 ObjectType::Commit => &self.commits,
1861 ObjectType::Tree => &self.trees,
1862 ObjectType::Blob => &self.blobs,
1863 ObjectType::Tag => &self.tags,
1864 }
1865 }
1866
1867 fn word_count(&self) -> usize {
1868 (self.object_count as usize).div_ceil(64)
1869 }
1870}
1871
1872pub fn load_pack_bitmap(
1879 objects_dir: &Path,
1880 format: ObjectFormat,
1881) -> Result<Option<LoadedPackBitmap>> {
1882 let pack_dir = objects_dir.join("pack");
1883 if !pack_dir.exists() {
1884 return Ok(None);
1885 }
1886 if let Some(bitmap) = load_midx_bitmap(&pack_dir, format)? {
1889 return Ok(Some(bitmap));
1890 }
1891 let mut bitmap_paths = Vec::new();
1892 for entry in fs::read_dir(&pack_dir)? {
1893 let path = entry?.path();
1894 if path.extension().and_then(|ext| ext.to_str()) == Some("bitmap")
1895 && path
1896 .file_name()
1897 .and_then(|name| name.to_str())
1898 .is_some_and(|name| name.starts_with("pack-"))
1899 {
1900 bitmap_paths.push(path);
1901 }
1902 }
1903 bitmap_paths.sort();
1904 for bitmap_path in bitmap_paths {
1905 match load_pack_bitmap_file(&bitmap_path, format) {
1906 Ok(Some(bitmap)) => return Ok(Some(bitmap)),
1907 Ok(None) | Err(_) => continue,
1908 }
1909 }
1910 Ok(None)
1911}
1912
1913fn load_midx_bitmap(pack_dir: &Path, format: ObjectFormat) -> Result<Option<LoadedPackBitmap>> {
1918 let midx_path = pack_dir.join("multi-pack-index");
1919 if !midx_path.exists() {
1920 return Ok(None);
1921 }
1922 let Ok(midx_bytes) = fs::read(&midx_path) else {
1923 return Ok(None);
1924 };
1925 let Ok(midx) = MultiPackIndex::parse(&midx_bytes, format) else {
1926 return Ok(None);
1927 };
1928 let bitmap_path = pack_dir.join(format!(
1929 "multi-pack-index-{}.bitmap",
1930 midx.checksum.to_hex()
1931 ));
1932 if !bitmap_path.exists() {
1933 return Ok(None);
1934 }
1935 let object_count = midx.objects.len();
1936 let read_ridx_chunk = env::var("GIT_TEST_MIDX_READ_RIDX")
1941 .map(|value| value != "0" && !value.eq_ignore_ascii_case("false"))
1942 .unwrap_or(true);
1943 let reverse_index: Vec<u32> = match (&midx.reverse_index, read_ridx_chunk) {
1944 (Some(chunk), true) => {
1945 sley_core::trace2::data("load_midx_revindex", "source", "midx");
1946 chunk.clone()
1947 }
1948 _ => {
1949 let rev_path =
1950 pack_dir.join(format!("multi-pack-index-{}.rev", midx.checksum.to_hex()));
1951 let Ok(rev_bytes) = fs::read(&rev_path) else {
1952 return Ok(None);
1954 };
1955 let Ok(parsed_rev) =
1956 sley_pack::PackReverseIndex::parse(&rev_bytes, format, object_count)
1957 else {
1958 return Ok(None);
1959 };
1960 sley_core::trace2::data("load_midx_revindex", "source", "rev");
1961 parsed_rev.positions
1962 }
1963 };
1964 let Ok(bitmap_bytes) = fs::read(&bitmap_path) else {
1965 return Ok(None);
1966 };
1967 let parsed = match PackBitmapIndex::parse(&bitmap_bytes, format, object_count) {
1968 Ok(parsed) => parsed,
1969 Err(_) => return Ok(None),
1970 };
1971 if parsed.pack_checksum != midx.checksum {
1972 return Ok(None);
1973 }
1974
1975 let mut pack_to_oid = Vec::with_capacity(object_count);
1978 for &midx_pos in &reverse_index {
1979 let Some(entry) = midx.objects.get(midx_pos as usize) else {
1980 return Ok(None);
1981 };
1982 pack_to_oid.push(entry.oid);
1983 }
1984 let mut oid_to_pack = HashMap::with_capacity(object_count);
1985 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
1986 oid_to_pack.insert(*oid, pack_pos as u32);
1987 }
1988 match assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
1989 midx.objects.get(position).map(|entry| entry.oid)
1990 }) {
1991 Ok(loaded) => Ok(Some(loaded)),
1992 Err(_) => Ok(None),
1993 }
1994}
1995
1996fn load_pack_bitmap_file(
1997 bitmap_path: &Path,
1998 format: ObjectFormat,
1999) -> Result<Option<LoadedPackBitmap>> {
2000 let index_path = bitmap_path.with_extension("idx");
2001 if !index_path.exists() {
2002 return Ok(None);
2003 }
2004 let index = PackIndex::parse(&fs::read(&index_path)?, format)?;
2005 let object_count = index.entries.len();
2006 let parsed = PackBitmapIndex::parse(&fs::read(bitmap_path)?, format, object_count)?;
2007 if parsed.pack_checksum != index.pack_checksum {
2008 return Ok(None);
2009 }
2010
2011 let mut pack_order: Vec<u32> = (0..object_count as u32).collect();
2012 pack_order.sort_by_key(|index_pos| index.entries[*index_pos as usize].offset);
2013 let mut pack_to_oid = Vec::with_capacity(object_count);
2014 for index_pos in &pack_order {
2015 pack_to_oid.push(index.entries[*index_pos as usize].oid);
2016 }
2017 let mut oid_to_pack = HashMap::with_capacity(object_count);
2018 for (pack_pos, oid) in pack_to_oid.iter().enumerate() {
2019 oid_to_pack.insert(*oid, pack_pos as u32);
2020 }
2021
2022 assemble_loaded_bitmap(parsed, object_count, pack_to_oid, oid_to_pack, |position| {
2023 index.entries.get(position).map(|entry| entry.oid)
2024 })
2025 .map(Some)
2026}
2027
2028fn assemble_loaded_bitmap(
2033 parsed: PackBitmapIndex,
2034 object_count: usize,
2035 pack_to_oid: Vec<ObjectId>,
2036 oid_to_pack: HashMap<ObjectId, u32>,
2037 lookup_oid: impl Fn(usize) -> Option<ObjectId>,
2038) -> Result<LoadedPackBitmap> {
2039 let word_count = object_count.div_ceil(64);
2040 let expand = |bitmap: &sley_pack::EwahBitmap| -> Result<Vec<u64>> {
2041 let mut words = bitmap.to_words()?;
2042 words.resize(word_count, 0);
2043 Ok(words)
2044 };
2045
2046 let mut resolved: Vec<Arc<Vec<u64>>> = Vec::with_capacity(parsed.entries.len());
2047 let mut commit_words = HashMap::with_capacity(parsed.entries.len());
2048 for (entry_index, entry) in parsed.entries.iter().enumerate() {
2049 let mut words = expand(&entry.bitmap)?;
2050 if entry.xor_offset > 0 {
2051 let base_index = entry_index - entry.xor_offset as usize;
2052 let base = &resolved[base_index];
2053 for (dst, src) in words.iter_mut().zip(base.iter()) {
2054 *dst ^= *src;
2055 }
2056 }
2057 let words = Arc::new(words);
2058 resolved.push(Arc::clone(&words));
2059 let commit_oid = lookup_oid(entry.object_position as usize)
2060 .ok_or_else(|| GitError::InvalidFormat("bitmap entry position out of range".into()))?;
2061 commit_words.insert(commit_oid, words);
2062 }
2063
2064 Ok(LoadedPackBitmap {
2065 object_count: object_count as u32,
2066 oid_to_pack,
2067 pack_to_oid,
2068 commit_words,
2069 commits: expand(&parsed.type_bitmaps.commits)?,
2070 trees: expand(&parsed.type_bitmaps.trees)?,
2071 blobs: expand(&parsed.type_bitmaps.blobs)?,
2072 tags: expand(&parsed.type_bitmaps.tags)?,
2073 })
2074}
2075
2076pub struct BitmapWalkResult {
2080 pub words: Vec<u64>,
2081 pub extended: Vec<(ObjectId, ObjectType)>,
2082}
2083
2084impl BitmapWalkResult {
2085 pub fn subtract(&mut self, haves: &BitmapWalkResult) {
2087 for (dst, src) in self.words.iter_mut().zip(haves.words.iter()) {
2088 *dst &= !*src;
2089 }
2090 let have_ext: HashSet<ObjectId> = haves.extended.iter().map(|(oid, _)| *oid).collect();
2091 self.extended.retain(|(oid, _)| !have_ext.contains(oid));
2092 }
2093}
2094
2095pub fn bitmap_reachable(
2106 bitmap: &LoadedPackBitmap,
2107 db: &impl ObjectReader,
2108 format: ObjectFormat,
2109 roots: &[ObjectId],
2110 include_objects: bool,
2111) -> Result<BitmapWalkResult> {
2112 let mut walk = BitmapFillWalk {
2113 bitmap,
2114 words: vec![0u64; bitmap.word_count()],
2115 extended: Vec::new(),
2116 extended_seen: HashSet::new(),
2117 };
2118 let mut commit_stack: Vec<ObjectId> = Vec::new();
2119
2120 for root in roots {
2121 let mut oid = *root;
2122 loop {
2124 let object = db.read_object(&oid)?;
2125 match object.object_type {
2126 ObjectType::Tag => {
2127 walk.mark(&oid, ObjectType::Tag);
2128 let tag = Tag::parse_ref(format, &object.body)?;
2129 oid = tag.object;
2130 }
2131 ObjectType::Commit => {
2132 commit_stack.push(oid);
2133 break;
2134 }
2135 ObjectType::Tree => {
2136 walk.mark_tree_closure(db, format, &oid)?;
2137 break;
2138 }
2139 ObjectType::Blob => {
2140 walk.mark(&oid, ObjectType::Blob);
2141 break;
2142 }
2143 }
2144 }
2145 }
2146
2147 while let Some(oid) = commit_stack.pop() {
2148 if let Some(position) = bitmap.pack_position(&oid) {
2149 if bitset_get(&walk.words, position) {
2150 continue;
2151 }
2152 if let Some(stored) = bitmap.bitmap_for_commit(&oid) {
2153 bitset_or(&mut walk.words, stored);
2154 continue;
2155 }
2156 bitset_set(&mut walk.words, position);
2157 } else {
2158 if walk.extended_seen.contains(&oid) {
2159 continue;
2160 }
2161 walk.extended_seen.insert(oid);
2162 walk.extended.push((oid, ObjectType::Commit));
2163 }
2164 let object = db.read_object(&oid)?;
2165 let commit = Commit::parse_ref(format, &object.body)?;
2166 commit_stack.extend(grafted_parents(db, &oid, commit.parents));
2167 if include_objects {
2168 walk.mark_tree_closure(db, format, &commit.tree)?;
2169 }
2170 }
2171
2172 Ok(BitmapWalkResult {
2173 words: walk.words,
2174 extended: walk.extended,
2175 })
2176}
2177
2178struct BitmapFillWalk<'a> {
2179 bitmap: &'a LoadedPackBitmap,
2180 words: Vec<u64>,
2181 extended: Vec<(ObjectId, ObjectType)>,
2182 extended_seen: HashSet<ObjectId>,
2183}
2184
2185impl BitmapFillWalk<'_> {
2186 fn mark(&mut self, oid: &ObjectId, object_type: ObjectType) -> bool {
2188 if let Some(position) = self.bitmap.pack_position(oid) {
2189 if bitset_get(&self.words, position) {
2190 return false;
2191 }
2192 bitset_set(&mut self.words, position);
2193 true
2194 } else {
2195 if !self.extended_seen.insert(*oid) {
2196 return false;
2197 }
2198 self.extended.push((*oid, object_type));
2199 true
2200 }
2201 }
2202
2203 fn mark_tree_closure(
2207 &mut self,
2208 db: &impl ObjectReader,
2209 format: ObjectFormat,
2210 tree: &ObjectId,
2211 ) -> Result<()> {
2212 if !self.mark(tree, ObjectType::Tree) {
2213 return Ok(());
2214 }
2215 let object = db.read_object(tree)?;
2216 for entry in TreeEntries::new(format, &object.body) {
2217 let entry = entry?;
2218 if entry.is_gitlink() {
2219 continue;
2220 }
2221 if entry.is_tree() {
2222 self.mark_tree_closure(db, format, &entry.oid)?;
2223 } else {
2224 self.mark(&entry.oid, ObjectType::Blob);
2225 }
2226 }
2227 Ok(())
2228 }
2229}
2230
2231#[derive(Debug)]
2232pub struct ObjectDatabase {
2233 format: ObjectFormat,
2234 objects: Mutex<HashMap<ObjectId, Arc<EncodedObject>>>,
2240 promisor: bool,
2241}
2242
2243impl ObjectDatabase {
2244 pub fn new(format: ObjectFormat) -> Self {
2245 Self {
2246 format,
2247 objects: Mutex::new(HashMap::new()),
2248 promisor: false,
2249 }
2250 }
2251
2252 pub fn with_promisor(mut self, promisor: bool) -> Self {
2253 self.promisor = promisor;
2254 self
2255 }
2256
2257 pub fn contains(&self, oid: &ObjectId) -> bool {
2258 self.objects
2259 .lock()
2260 .map(|objects| objects.contains_key(oid))
2261 .unwrap_or(false)
2262 }
2263
2264 pub fn validate(&self, oid: &ObjectId) -> Result<()> {
2265 let object = self.read_object(oid)?;
2266 let actual = object.object_id(self.format)?;
2267 if &actual == oid {
2268 Ok(())
2269 } else {
2270 Err(GitError::InvalidObject(format!(
2271 "object id mismatch: expected {oid}, got {actual}"
2272 )))
2273 }
2274 }
2275}
2276
2277impl ObjectReader for ObjectDatabase {
2278 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
2279 self.objects
2280 .lock()
2281 .map_err(|_| GitError::object_not_found_in(*oid, MissingObjectContext::Read))?
2282 .get(oid)
2283 .map(Arc::clone)
2284 .or_else(|| implied_empty_tree_object(self.format, oid))
2285 .ok_or_else(|| GitError::object_not_found_in(*oid, MissingObjectContext::Read))
2286 }
2287}
2288
2289impl ObjectWriter for ObjectDatabase {
2290 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
2291 let oid = object.object_id(self.format)?;
2292 self.objects
2293 .lock()
2294 .map_err(|_| GitError::Io("object cache lock poisoned".into()))?
2295 .entry(oid)
2296 .or_insert_with(|| Arc::new(object));
2297 Ok(oid)
2298 }
2299}
2300
2301#[derive(Debug, Clone, PartialEq, Eq)]
2302pub struct Alternate {
2303 pub path: std::path::PathBuf,
2304}
2305
2306#[derive(Debug, Clone, PartialEq, Eq)]
2307pub struct PartialClonePolicy {
2308 pub promisor_remote: Option<String>,
2309 pub allow_missing_promised_objects: bool,
2310}
2311
2312type PackBytesCache = Arc<Mutex<HashMap<PathBuf, Arc<PackData>>>>;
2316
2317#[derive(Debug)]
2320enum PackData {
2321 #[cfg(feature = "mmap")]
2322 Mapped(sley_mmap::MappedFile),
2323 Heap(Vec<u8>),
2324}
2325
2326impl std::ops::Deref for PackData {
2327 type Target = [u8];
2328
2329 fn deref(&self) -> &[u8] {
2330 match self {
2331 #[cfg(feature = "mmap")]
2332 Self::Mapped(mapped) => mapped,
2333 Self::Heap(bytes) => bytes,
2334 }
2335 }
2336}
2337
2338#[cfg(feature = "mmap")]
2341fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2342 match sley_mmap::MappedFile::open_pack(pack_path) {
2343 Ok(mapped) => Ok(PackData::Mapped(mapped)),
2344 Err(_) => Ok(PackData::Heap(fs::read(pack_path)?)),
2345 }
2346}
2347
2348#[cfg(not(feature = "mmap"))]
2349fn load_pack_data(pack_path: &Path) -> Result<PackData> {
2350 Ok(PackData::Heap(fs::read(pack_path)?))
2351}
2352
2353#[cfg(feature = "mmap")]
2354fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2355 match sley_mmap::MappedFile::open_pack(index_path) {
2356 Ok(mapped) => Ok(Arc::new(mapped)),
2357 Err(_) => Ok(Arc::new(fs::read(index_path)?)),
2358 }
2359}
2360
2361#[cfg(not(feature = "mmap"))]
2362fn load_pack_index_data(index_path: &Path) -> Result<Arc<dyn PackIndexByteSource>> {
2363 Ok(Arc::new(fs::read(index_path)?))
2364}
2365
2366type DecodedObjectCache = Arc<Mutex<LruObjectCache>>;
2372
2373type PackDeltaCaches = Arc<Mutex<HashMap<PathBuf, Arc<Mutex<LruOffsetCache>>>>>;
2378
2379type PackHeaderTypeCache = Arc<Mutex<HashMap<u64, (ObjectType, u64)>>>;
2389
2390type PackHeaderTypeCaches = Arc<Mutex<HashMap<PathBuf, PackHeaderTypeCache>>>;
2391
2392const DEFAULT_OBJECT_CACHE_BYTES: usize = 96 * 1024 * 1024;
2398
2399const DEFAULT_DELTA_BASE_CACHE_BYTES: usize = 96 * 1024 * 1024;
2403
2404fn cached_object_cost(object: &EncodedObject) -> usize {
2408 object.body.len().saturating_add(64)
2409}
2410
2411fn cache_budget_from_env(var: &str, default: usize) -> usize {
2414 match env::var(var) {
2415 Ok(value) => value.trim().parse::<usize>().unwrap_or(default),
2416 Err(_) => default,
2417 }
2418}
2419
2420fn object_cache_budget() -> usize {
2427 static BUDGET: OnceLock<usize> = OnceLock::new();
2428 *BUDGET.get_or_init(|| {
2429 cache_budget_from_env("SLEY_OBJECT_CACHE_BYTES", DEFAULT_OBJECT_CACHE_BYTES)
2430 })
2431}
2432
2433fn delta_base_cache_budget() -> usize {
2437 static BUDGET: OnceLock<usize> = OnceLock::new();
2438 *BUDGET.get_or_init(|| {
2439 cache_budget_from_env(
2440 "SLEY_DELTA_BASE_CACHE_BYTES",
2441 DEFAULT_DELTA_BASE_CACHE_BYTES,
2442 )
2443 })
2444}
2445
2446fn verify_reads_enabled() -> bool {
2457 static VERIFY: OnceLock<bool> = OnceLock::new();
2458 *VERIFY.get_or_init(|| match env::var("SLEY_VERIFY_READS") {
2459 Ok(value) => !matches!(value.trim(), "" | "0"),
2460 Err(_) => false,
2461 })
2462}
2463
2464#[derive(Debug)]
2472struct LruCache<K: std::hash::Hash + Eq + Clone> {
2473 budget: usize,
2474 used: usize,
2475 map: HashMap<K, LruEntry<K>>,
2476 head: Option<K>,
2477 tail: Option<K>,
2478}
2479
2480#[derive(Debug)]
2481struct LruEntry<K> {
2482 object: Arc<EncodedObject>,
2483 prev: Option<K>,
2484 next: Option<K>,
2485}
2486
2487impl<K: std::hash::Hash + Eq + Clone> LruCache<K> {
2488 fn new(budget: usize) -> Self {
2489 Self {
2490 budget,
2491 used: 0,
2492 map: HashMap::new(),
2493 head: None,
2494 tail: None,
2495 }
2496 }
2497
2498 fn get(&mut self, key: &K) -> Option<Arc<EncodedObject>> {
2499 let object = Arc::clone(&self.map.get(key)?.object);
2500 self.touch(key);
2501 Some(object)
2502 }
2503
2504 fn touch(&mut self, key: &K) {
2506 if self.tail.as_ref() == Some(key) {
2507 return;
2508 }
2509 if self.map.contains_key(key) {
2510 self.detach(key);
2511 self.attach_back(key.clone());
2512 }
2513 }
2514
2515 fn remove(&mut self, key: &K) {
2517 if let Some(entry) = self.map.get(key) {
2518 self.used = self.used.saturating_sub(cached_object_cost(&entry.object));
2519 }
2520 self.detach(key);
2521 self.map.remove(key);
2522 }
2523
2524 fn detach(&mut self, key: &K) {
2525 let Some((prev, next)) = self.map.get_mut(key).map(|entry| {
2526 let prev = entry.prev.take();
2527 let next = entry.next.take();
2528 (prev, next)
2529 }) else {
2530 return;
2531 };
2532
2533 match &prev {
2534 Some(prev_key) => {
2535 if let Some(prev_entry) = self.map.get_mut(prev_key) {
2536 prev_entry.next = next.clone();
2537 }
2538 }
2539 None => self.head = next.clone(),
2540 }
2541 match &next {
2542 Some(next_key) => {
2543 if let Some(next_entry) = self.map.get_mut(next_key) {
2544 next_entry.prev = prev.clone();
2545 }
2546 }
2547 None => self.tail = prev.clone(),
2548 }
2549 }
2550
2551 fn attach_back(&mut self, key: K) {
2552 let previous_tail = self.tail.replace(key.clone());
2553 match previous_tail {
2554 Some(tail_key) => {
2555 if let Some(tail_entry) = self.map.get_mut(&tail_key) {
2556 tail_entry.next = Some(key.clone());
2557 }
2558 if let Some(entry) = self.map.get_mut(&key) {
2559 entry.prev = Some(tail_key);
2560 entry.next = None;
2561 }
2562 }
2563 None => {
2564 self.head = Some(key.clone());
2565 if let Some(entry) = self.map.get_mut(&key) {
2566 entry.prev = None;
2567 entry.next = None;
2568 }
2569 }
2570 }
2571 }
2572
2573 fn clear(&mut self) {
2574 self.map.clear();
2575 self.head = None;
2576 self.tail = None;
2577 self.used = 0;
2578 }
2579
2580 fn put(&mut self, key: K, object: Arc<EncodedObject>) {
2581 if self.budget == 0 {
2582 return;
2583 }
2584 let cost = cached_object_cost(&object);
2585 if cost > self.budget {
2589 self.remove(&key);
2590 return;
2591 }
2592 if let Some(entry) = self.map.get_mut(&key) {
2593 let previous = std::mem::replace(&mut entry.object, object);
2594 self.used = self
2596 .used
2597 .saturating_sub(cached_object_cost(&previous))
2598 .saturating_add(cost);
2599 self.touch(&key);
2600 } else {
2601 self.used = self.used.saturating_add(cost);
2602 self.map.insert(
2603 key.clone(),
2604 LruEntry {
2605 object,
2606 prev: None,
2607 next: None,
2608 },
2609 );
2610 self.attach_back(key);
2611 }
2612 while self.used > self.budget {
2613 let Some(evicted) = self.head.clone() else {
2614 break;
2615 };
2616 self.remove(&evicted);
2617 }
2618 }
2619}
2620
2621type LruObjectCache = LruCache<ObjectId>;
2623type LruOffsetCache = LruCache<u64>;
2625
2626struct PackDeltaCacheAdapter<'a>(&'a Arc<Mutex<LruOffsetCache>>);
2631
2632impl sley_pack::PackDeltaCache for PackDeltaCacheAdapter<'_> {
2633 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
2634 self.0.lock().ok()?.get(&offset)
2635 }
2636
2637 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
2638 if let Ok(mut cache) = self.0.lock() {
2639 cache.put(offset, object);
2640 }
2641 }
2642}
2643
2644struct PackHeaderTypeCacheAdapter<'a>(&'a PackHeaderTypeCache);
2648
2649impl sley_pack::HeaderTypeCache for PackHeaderTypeCacheAdapter<'_> {
2650 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
2651 self.0.lock().ok()?.get(&pack_offset).copied()
2652 }
2653
2654 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
2655 if let Ok(mut cache) = self.0.lock() {
2656 cache.insert(pack_offset, header);
2657 }
2658 }
2659}
2660
2661type PackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<PackIndex>>>>;
2666
2667type MultiPackIndexCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndex>>>>;
2671
2672type MultiPackIndexOidLookupCache = Arc<Mutex<HashMap<PathBuf, Arc<MultiPackIndexOidLookup>>>>;
2676
2677#[derive(Debug)]
2682struct RegisteredPack {
2683 idx: PathBuf,
2684 pack: PathBuf,
2685 index: Mutex<Option<Arc<PackIndexViewData>>>,
2686 data: Mutex<Option<Arc<PackData>>>,
2687 delta_cache: Arc<Mutex<LruOffsetCache>>,
2688 header_type_cache: PackHeaderTypeCache,
2689}
2690
2691impl RegisteredPack {
2692 fn new(idx: PathBuf, pack: PathBuf) -> Self {
2693 Self {
2694 idx,
2695 pack,
2696 index: Mutex::new(None),
2697 data: Mutex::new(None),
2698 delta_cache: Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget()))),
2699 header_type_cache: Arc::new(Mutex::new(HashMap::new())),
2700 }
2701 }
2702
2703 fn index(&self, format: ObjectFormat) -> Result<Arc<PackIndexViewData>> {
2704 if let Ok(cache) = self.index.lock()
2705 && let Some(index) = cache.as_ref()
2706 {
2707 return Ok(Arc::clone(index));
2708 }
2709 let index_bytes = load_pack_index_data(&self.idx)?;
2710 let index = Arc::new(PackIndexViewData::parse_trusted_source_without_checksum(
2711 index_bytes,
2712 format,
2713 )?);
2714 if let Ok(mut cache) = self.index.lock() {
2715 *cache = Some(Arc::clone(&index));
2716 }
2717 Ok(index)
2718 }
2719
2720 fn bytes(&self, pack_bytes: &PackBytesCache) -> Result<Arc<PackData>> {
2721 if let Ok(cache) = self.data.lock()
2722 && let Some(bytes) = cache.as_ref()
2723 {
2724 return Ok(Arc::clone(bytes));
2725 }
2726 if let Ok(cache) = pack_bytes.lock()
2727 && let Some(bytes) = cache.get(&self.pack)
2728 {
2729 let bytes = Arc::clone(bytes);
2730 if let Ok(mut local_cache) = self.data.lock() {
2731 *local_cache = Some(Arc::clone(&bytes));
2732 }
2733 return Ok(bytes);
2734 }
2735 let bytes = Arc::new(load_pack_data(&self.pack)?);
2736 if let Ok(mut local_cache) = self.data.lock() {
2737 *local_cache = Some(Arc::clone(&bytes));
2738 }
2739 if let Ok(mut cache) = pack_bytes.lock() {
2740 cache.insert(self.pack.clone(), Arc::clone(&bytes));
2741 }
2742 Ok(bytes)
2743 }
2744}
2745
2746#[derive(Debug, Clone, PartialEq, Eq)]
2747struct PackDirFingerprint {
2748 modified: Option<std::time::SystemTime>,
2749 idx_count: usize,
2750 pack_count: usize,
2751}
2752
2753#[derive(Debug)]
2758struct PackRegistrySnapshot {
2759 fingerprint: PackDirFingerprint,
2760 packs: Vec<Arc<RegisteredPack>>,
2761 recent_pack: Mutex<Option<usize>>,
2762}
2763
2764impl PackRegistrySnapshot {
2765 fn new(fingerprint: PackDirFingerprint, packs: Vec<Arc<RegisteredPack>>) -> Self {
2766 Self {
2767 fingerprint,
2768 packs,
2769 recent_pack: Mutex::new(None),
2770 }
2771 }
2772
2773 fn cached_hint(&self) -> Option<usize> {
2774 self.recent_pack
2775 .lock()
2776 .ok()
2777 .and_then(|hint| *hint)
2778 .filter(|pack_index| *pack_index < self.packs.len())
2779 }
2780
2781 fn remember_hint(&self, pack_index: usize) {
2782 if let Ok(mut hint) = self.recent_pack.lock() {
2783 *hint = Some(pack_index);
2784 }
2785 }
2786}
2787
2788type PackRegistryCache = Arc<Mutex<Option<Arc<PackRegistrySnapshot>>>>;
2792
2793#[derive(Debug, Clone)]
2794struct PackLookup {
2795 pack: PathBuf,
2796 registered: Option<Arc<RegisteredPack>>,
2797 offset: u64,
2798}
2799
2800impl PackLookup {
2801 fn from_registered(pack: Arc<RegisteredPack>, offset: u64) -> Self {
2802 Self {
2803 pack: pack.pack.clone(),
2804 registered: Some(pack),
2805 offset,
2806 }
2807 }
2808
2809 fn from_path(pack: PathBuf, offset: u64) -> Self {
2810 Self {
2811 pack,
2812 registered: None,
2813 offset,
2814 }
2815 }
2816
2817 fn pack_path(&self) -> &Path {
2818 &self.pack
2819 }
2820
2821 fn pack_bytes(&self, database: &FileObjectDatabase) -> Result<Arc<PackData>> {
2822 match &self.registered {
2823 Some(pack) => pack.bytes(&database.pack_bytes),
2824 None => database.cached_pack_bytes(&self.pack),
2825 }
2826 }
2827
2828 fn pack_index(&self, database: &FileObjectDatabase) -> Result<Arc<PackIndex>> {
2829 match &self.registered {
2830 Some(pack) => database.cached_pack_index(&pack.idx),
2831 None => database.cached_pack_index(&self.pack.with_extension("idx")),
2832 }
2833 }
2834
2835 fn delta_cache(&self, database: &FileObjectDatabase) -> Option<Arc<Mutex<LruOffsetCache>>> {
2836 match &self.registered {
2837 Some(pack) => Some(Arc::clone(&pack.delta_cache)),
2838 None => database.pack_delta_cache(&self.pack),
2839 }
2840 }
2841
2842 fn header_type_cache(&self, database: &FileObjectDatabase) -> Option<PackHeaderTypeCache> {
2843 match &self.registered {
2844 Some(pack) => Some(Arc::clone(&pack.header_type_cache)),
2845 None => database.pack_header_type_cache(&self.pack),
2846 }
2847 }
2848}
2849
2850#[derive(Debug, Clone)]
2851pub struct FileObjectDatabase {
2852 loose: LooseObjectStore,
2853 objects_dir: PathBuf,
2854 alternates: Vec<PathBuf>,
2855 format: ObjectFormat,
2856 pack_bytes: PackBytesCache,
2857 pack_indexes: PackIndexCache,
2858 multi_pack_indexes: MultiPackIndexCache,
2859 multi_pack_oid_lookups: MultiPackIndexOidLookupCache,
2860 pack_registry: PackRegistryCache,
2861 decoded: DecodedObjectCache,
2862 pack_deltas: PackDeltaCaches,
2863 pack_header_types: PackHeaderTypeCaches,
2864 shallow_grafts: Arc<std::sync::OnceLock<HashSet<ObjectId>>>,
2868}
2869
2870#[derive(Debug)]
2871pub struct ObjectPresenceChecker {
2872 db: FileObjectDatabase,
2873 pack_dir: PathBuf,
2874 midx: Option<Arc<MultiPackIndexOidLookup>>,
2875 registry: Option<Arc<PackRegistrySnapshot>>,
2876 registry_indexes: Vec<Option<Arc<PackIndexViewData>>>,
2877 recent_pack: Option<usize>,
2878 prepared_packs: bool,
2879 prepared_registry: bool,
2880}
2881
2882impl ObjectPresenceChecker {
2883 fn new(db: FileObjectDatabase) -> Self {
2884 let pack_dir = db.objects_dir.join("pack");
2885 Self {
2886 db,
2887 pack_dir,
2888 midx: None,
2889 registry: None,
2890 registry_indexes: Vec::new(),
2891 recent_pack: None,
2892 prepared_packs: false,
2893 prepared_registry: false,
2894 }
2895 }
2896
2897 pub fn contains(&mut self, oid: &ObjectId) -> Result<bool> {
2898 if oid.format() != self.db.format {
2899 return Err(GitError::InvalidObjectId(format!(
2900 "object {oid} uses {}, store uses {}",
2901 oid.format().name(),
2902 self.db.format.name()
2903 )));
2904 }
2905 if self.db.loose.exists(oid)? {
2906 return Ok(true);
2907 }
2908 if self.find_packed(oid, false)? {
2909 return Ok(true);
2910 }
2911 if self.find_packed(oid, true)? {
2912 return Ok(true);
2913 }
2914 for alternate in &self.db.alternates {
2915 if FileObjectDatabase::without_alternates(alternate, self.db.format).contains(oid)? {
2916 return Ok(true);
2917 }
2918 }
2919 self.db.loose.invalidate_cache();
2922 self.db.loose.exists(oid)
2923 }
2924
2925 fn find_packed(&mut self, oid: &ObjectId, force_rescan: bool) -> Result<bool> {
2926 self.prepare_packs(force_rescan)?;
2927 if let Some(midx) = &self.midx
2928 && midx.contains(oid)
2929 {
2930 return Ok(true);
2931 }
2932 self.prepare_registry(force_rescan)?;
2933 self.find_in_registry(oid)
2934 }
2935
2936 fn prepare_packs(&mut self, force_rescan: bool) -> Result<()> {
2937 if self.prepared_packs && !force_rescan {
2938 return Ok(());
2939 }
2940 let midx_path = self.pack_dir.join("multi-pack-index");
2941 self.midx = self.db.cached_multi_pack_index_oid_lookup(&midx_path)?;
2942 self.prepared_packs = true;
2943 Ok(())
2944 }
2945
2946 fn prepare_registry(&mut self, force_rescan: bool) -> Result<()> {
2947 if self.prepared_registry && !force_rescan {
2948 return Ok(());
2949 }
2950 let registry = self.db.cached_pack_registry(&self.pack_dir, force_rescan)?;
2951 let registry_changed = match self.registry.as_ref() {
2952 Some(cached) => !Arc::ptr_eq(cached, ®istry),
2953 None => true,
2954 };
2955 if registry_changed {
2956 self.registry_indexes = vec![None; registry.packs.len()];
2957 self.recent_pack = None;
2958 self.registry = Some(registry);
2959 }
2960 self.prepared_registry = true;
2961 Ok(())
2962 }
2963
2964 fn find_in_registry(&mut self, oid: &ObjectId) -> Result<bool> {
2965 let Some(registry) = self.registry.as_ref().map(Arc::clone) else {
2966 return Ok(false);
2967 };
2968 if let Some(pack_index) = self
2969 .recent_pack
2970 .filter(|pack_index| *pack_index < registry.packs.len())
2971 {
2972 let index = self.registry_index(®istry, pack_index)?;
2973 if index.find(oid).is_some() {
2974 return Ok(true);
2975 }
2976 }
2977 for pack_index in 0..registry.packs.len() {
2978 if Some(pack_index) == self.recent_pack {
2979 continue;
2980 }
2981 let index = self.registry_index(®istry, pack_index)?;
2982 if index.find(oid).is_some() {
2983 self.recent_pack = Some(pack_index);
2984 return Ok(true);
2985 }
2986 }
2987 Ok(false)
2988 }
2989
2990 fn registry_index(
2991 &mut self,
2992 registry: &PackRegistrySnapshot,
2993 pack_index: usize,
2994 ) -> Result<Arc<PackIndexViewData>> {
2995 if self.registry_indexes.len() != registry.packs.len() {
2996 self.registry_indexes = vec![None; registry.packs.len()];
2997 self.recent_pack = None;
2998 }
2999 if let Some(index) = self
3000 .registry_indexes
3001 .get(pack_index)
3002 .and_then(|index| index.as_ref())
3003 {
3004 return Ok(Arc::clone(index));
3005 }
3006 let index = registry.packs[pack_index].index(self.db.format)?;
3007 if let Some(slot) = self.registry_indexes.get_mut(pack_index) {
3008 *slot = Some(Arc::clone(&index));
3009 }
3010 Ok(index)
3011 }
3012}
3013
3014fn read_shallow_grafts(shallow_file: &Path, format: ObjectFormat) -> HashSet<ObjectId> {
3018 let Ok(contents) = std::fs::read_to_string(shallow_file) else {
3019 return HashSet::new();
3020 };
3021 contents
3022 .lines()
3023 .filter_map(|line| ObjectId::from_hex(format, line.trim()).ok())
3024 .collect()
3025}
3026
3027pub fn repository_objects_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3028 env::var_os("GIT_OBJECT_DIRECTORY")
3029 .map(PathBuf::from)
3030 .unwrap_or_else(|| repository_common_dir(git_dir).join("objects"))
3031}
3032
3033pub fn repository_common_dir(git_dir: impl AsRef<Path>) -> PathBuf {
3034 if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
3035 return PathBuf::from(common_dir);
3036 }
3037 let git_dir = git_dir.as_ref();
3038 let commondir = git_dir.join("commondir");
3039 if let Ok(value) = fs::read_to_string(&commondir) {
3040 let path = PathBuf::from(value.trim());
3041 let common = if path.is_absolute() {
3042 path
3043 } else {
3044 git_dir.join(path)
3045 };
3046 return fs::canonicalize(&common).unwrap_or(common);
3047 }
3048 git_dir.to_path_buf()
3049}
3050
3051pub fn repository_object_ids(
3052 git_dir: impl AsRef<Path>,
3053 format: ObjectFormat,
3054) -> Result<Vec<ObjectId>> {
3055 object_ids_in_objects_dir(repository_objects_dir(git_dir), format)
3056}
3057
3058pub fn object_ids_in_objects_dir(
3059 objects_dir: impl AsRef<Path>,
3060 format: ObjectFormat,
3061) -> Result<Vec<ObjectId>> {
3062 let objects_dir = objects_dir.as_ref();
3063 let mut oids = HashSet::new();
3064 collect_loose_object_ids(objects_dir, format, &mut oids)?;
3065 collect_packed_object_ids(&objects_dir.join("pack"), format, &mut oids)?;
3066 let mut oids = oids.into_iter().collect::<Vec<_>>();
3067 oids.sort_by_key(ObjectId::to_hex);
3068 Ok(oids)
3069}
3070
3071fn collect_loose_object_ids(
3072 objects_dir: &Path,
3073 format: ObjectFormat,
3074 oids: &mut HashSet<ObjectId>,
3075) -> Result<()> {
3076 if !objects_dir.exists() {
3077 return Ok(());
3078 }
3079 let hex_len = format.hex_len();
3080 for entry in fs::read_dir(objects_dir)? {
3081 let entry = entry?;
3082 if !entry.file_type()?.is_dir() {
3083 continue;
3084 }
3085 let name = entry.file_name();
3086 let Some(fanout) = name.to_str() else {
3087 continue;
3088 };
3089 if fanout.len() != 2 || !fanout.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3090 continue;
3091 }
3092 for object_entry in fs::read_dir(entry.path())? {
3093 let object_entry = object_entry?;
3094 if !object_entry.file_type()?.is_file() {
3095 continue;
3096 }
3097 let name = object_entry.file_name();
3098 let Some(suffix) = name.to_str() else {
3099 continue;
3100 };
3101 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3102 continue;
3103 }
3104 oids.insert(ObjectId::from_hex(format, &format!("{fanout}{suffix}"))?);
3105 }
3106 }
3107 Ok(())
3108}
3109
3110fn collect_loose_fanout_object_ids(
3111 objects_dir: &Path,
3112 format: ObjectFormat,
3113 fanout: u8,
3114 oids: &mut HashSet<ObjectId>,
3115) -> Result<()> {
3116 let fanout_hex = format!("{fanout:02x}");
3117 let fanout_dir = objects_dir.join(&fanout_hex);
3118 let entries = match fs::read_dir(&fanout_dir) {
3119 Ok(entries) => entries,
3120 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
3121 Err(err) => return Err(GitError::Io(err.to_string())),
3122 };
3123 let hex_len = format.hex_len();
3124 for object_entry in entries {
3125 let object_entry = object_entry?;
3126 let name = object_entry.file_name();
3127 let Some(suffix) = name.to_str() else {
3128 continue;
3129 };
3130 if suffix.len() != hex_len - 2 || !suffix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3131 continue;
3132 }
3133 oids.insert(ObjectId::from_hex(
3134 format,
3135 &format!("{fanout_hex}{suffix}"),
3136 )?);
3137 }
3138 Ok(())
3139}
3140
3141#[derive(Debug, Default)]
3142struct LoosePresenceCache {
3143 loaded_fanouts: HashSet<u8>,
3144 objects: HashSet<ObjectId>,
3145}
3146
3147pub fn packed_object_ids(
3152 objects_dir: impl AsRef<Path>,
3153 format: ObjectFormat,
3154) -> Result<HashSet<ObjectId>> {
3155 let mut oids = HashSet::new();
3156 collect_packed_object_ids(&objects_dir.as_ref().join("pack"), format, &mut oids)?;
3157 Ok(oids)
3158}
3159
3160fn collect_packed_object_ids(
3161 pack_dir: &Path,
3162 format: ObjectFormat,
3163 oids: &mut HashSet<ObjectId>,
3164) -> Result<()> {
3165 if !pack_dir.exists() {
3166 return Ok(());
3167 }
3168 let midx_path = pack_dir.join("multi-pack-index");
3169 if midx_path.exists() {
3170 let midx = MultiPackIndex::parse(&fs::read(&midx_path)?, format)?;
3171 oids.extend(midx.objects.into_iter().map(|entry| entry.oid));
3172 }
3173 for entry in fs::read_dir(pack_dir)? {
3174 let path = entry?.path();
3175 if path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3176 continue;
3177 }
3178 let index = PackIndex::parse(&fs::read(path)?, format)?;
3179 oids.extend(index.entries.into_iter().map(|entry| entry.oid));
3180 }
3181 Ok(())
3182}
3183
3184impl FileObjectDatabase {
3185 pub fn object_format(&self) -> ObjectFormat {
3187 self.format
3188 }
3189
3190 pub fn objects_dir(&self) -> &Path {
3192 &self.objects_dir
3193 }
3194
3195 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3196 let objects_dir = objects_dir.into();
3197 Self {
3198 loose: LooseObjectStore::new(objects_dir.clone(), format),
3199 alternates: alternate_object_dirs(&objects_dir),
3200 objects_dir,
3201 format,
3202 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3203 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3204 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3205 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3206 pack_registry: Arc::new(Mutex::new(None)),
3207 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3208 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3209 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3210 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3211 }
3212 }
3213
3214 fn without_alternates(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
3215 let objects_dir = objects_dir.into();
3216 Self {
3217 loose: LooseObjectStore::new(objects_dir.clone(), format),
3218 alternates: Vec::new(),
3219 objects_dir,
3220 format,
3221 pack_bytes: Arc::new(Mutex::new(HashMap::new())),
3222 pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3223 multi_pack_indexes: Arc::new(Mutex::new(HashMap::new())),
3224 multi_pack_oid_lookups: Arc::new(Mutex::new(HashMap::new())),
3225 pack_registry: Arc::new(Mutex::new(None)),
3226 decoded: Arc::new(Mutex::new(LruObjectCache::new(object_cache_budget()))),
3227 pack_deltas: Arc::new(Mutex::new(HashMap::new())),
3228 pack_header_types: Arc::new(Mutex::new(HashMap::new())),
3229 shallow_grafts: Arc::new(std::sync::OnceLock::new()),
3230 }
3231 }
3232
3233 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
3234 Self::new(repository_objects_dir(git_dir), format)
3235 }
3236
3237 pub fn refresh_read_cache(&self) {
3242 if let Ok(mut cache) = self.pack_registry.lock() {
3243 *cache = None;
3244 }
3245 if let Ok(mut cache) = self.pack_indexes.lock() {
3246 cache.clear();
3247 }
3248 if let Ok(mut cache) = self.multi_pack_indexes.lock() {
3249 cache.clear();
3250 }
3251 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3252 cache.clear();
3253 }
3254 if let Ok(mut cache) = self.pack_bytes.lock() {
3255 cache.clear();
3256 }
3257 if let Ok(mut cache) = self.pack_deltas.lock() {
3258 cache.clear();
3259 }
3260 if let Ok(mut cache) = self.pack_header_types.lock() {
3261 cache.clear();
3262 }
3263 if let Ok(mut cache) = self.decoded.lock() {
3264 cache.clear();
3265 }
3266 self.loose.invalidate_cache();
3267 }
3268
3269 pub fn loose(&self) -> &LooseObjectStore {
3270 &self.loose
3271 }
3272
3273 pub fn presence_checker(&self) -> ObjectPresenceChecker {
3274 ObjectPresenceChecker::new(self.clone())
3275 }
3276
3277 pub fn install_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3278 self.install_pack_with_options(pack, RawPackInstallOptions::default())
3279 }
3280
3281 pub fn install_pack_with_options(
3282 &self,
3283 pack: &PackWrite,
3284 options: RawPackInstallOptions,
3285 ) -> Result<PackInstallResult> {
3286 if pack.checksum.format() != self.format {
3287 return Err(GitError::InvalidObjectId(format!(
3288 "pack checksum uses {}, store uses {}",
3289 pack.checksum.format().name(),
3290 self.format.name()
3291 )));
3292 }
3293 for entry in &pack.entries {
3294 if entry.oid.format() != self.format {
3295 return Err(GitError::InvalidObjectId(format!(
3296 "pack entry {} uses {}, store uses {}",
3297 entry.oid,
3298 entry.oid.format().name(),
3299 self.format.name()
3300 )));
3301 }
3302 }
3303 let canonical_index = PackIndex::write_v2_for_pack(&pack.pack, self.format)?;
3304 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3305 if canonical_index.pack_checksum != pack.checksum
3306 || parsed_index.pack_checksum != pack.checksum
3307 {
3308 return Err(GitError::InvalidFormat(
3309 "pack and index checksums do not match pack write".into(),
3310 ));
3311 }
3312 if pack.index != canonical_index.index {
3313 return Err(GitError::InvalidFormat(
3314 "pack index does not match pack contents".into(),
3315 ));
3316 }
3317
3318 let pack_dir = self.objects_dir.join("pack");
3319 fs::create_dir_all(&pack_dir)?;
3320 let pack_name = format!("pack-{}", pack.checksum.to_hex());
3321 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3322 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3323 if !pack_path.exists() || !index_path.exists() {
3324 write_pack_component(&pack_path, &pack.pack)?;
3325 write_pack_component(&index_path, &pack.index)?;
3326 }
3327 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3328 Ok(PackInstallResult {
3329 pack_name,
3330 pack_path,
3331 index_path,
3332 promisor_path,
3333 object_ids: canonical_index
3334 .entries
3335 .iter()
3336 .map(|entry| entry.oid)
3337 .collect(),
3338 })
3339 }
3340
3341 pub fn install_written_pack(&self, pack: &PackWrite) -> Result<PackInstallResult> {
3349 self.install_written_pack_with_options(pack, RawPackInstallOptions::default())
3350 }
3351
3352 pub fn install_written_pack_with_options(
3353 &self,
3354 pack: &PackWrite,
3355 options: RawPackInstallOptions,
3356 ) -> Result<PackInstallResult> {
3357 validate_pack_checksum(&pack.pack, self.format, &pack.checksum, "pack write")?;
3358 let parsed_index = PackIndex::parse(&pack.index, self.format)?;
3359 if parsed_index.pack_checksum != pack.checksum {
3360 return Err(GitError::InvalidFormat(
3361 "pack write index checksum does not match pack".into(),
3362 ));
3363 }
3364 if !pack_index_entries_match_writer(&parsed_index.entries, &pack.entries) {
3365 return Err(GitError::InvalidFormat(
3366 "pack write index does not match generated entries".into(),
3367 ));
3368 }
3369 self.install_generated_pack_unchecked(pack, options)
3370 }
3371
3372 fn install_generated_pack_unchecked(
3373 &self,
3374 pack: &PackWrite,
3375 options: RawPackInstallOptions,
3376 ) -> Result<PackInstallResult> {
3377 let pack_dir = self.objects_dir.join("pack");
3378 fs::create_dir_all(&pack_dir)?;
3379 let pack_name = format!("pack-{}", pack.checksum.to_hex());
3380 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3381 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3382 if !pack_path.exists() || !index_path.exists() {
3383 write_pack_component(&pack_path, &pack.pack)?;
3384 write_pack_component(&index_path, &pack.index)?;
3385 }
3386 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3387 Ok(PackInstallResult {
3388 pack_name,
3389 pack_path,
3390 index_path,
3391 promisor_path,
3392 object_ids: pack.entries.iter().map(|entry| entry.oid).collect(),
3393 })
3394 }
3395
3396 pub fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<PackInstallResult> {
3397 self.install_raw_pack_with_options(pack_bytes, RawPackInstallOptions::default())
3398 }
3399
3400 pub fn install_raw_pack_with_options(
3401 &self,
3402 pack_bytes: &[u8],
3403 options: RawPackInstallOptions,
3404 ) -> Result<PackInstallResult> {
3405 let built = PackIndex::write_v2_for_pack(pack_bytes, self.format)?;
3406 let pack_dir = self.objects_dir.join("pack");
3407 fs::create_dir_all(&pack_dir)?;
3408 let pack_name = format!("pack-{}", built.pack_checksum.to_hex());
3409 let pack_path = pack_dir.join(format!("{pack_name}.pack"));
3410 let index_path = pack_dir.join(format!("{pack_name}.idx"));
3411 if !pack_path.exists() || !index_path.exists() {
3412 write_pack_component(&pack_path, pack_bytes)?;
3413 write_pack_component(&index_path, &built.index)?;
3414 }
3415 let promisor_path = write_promisor_pack_sidecar(&pack_dir, &pack_name, options.promisor)?;
3416 Ok(PackInstallResult {
3417 pack_name,
3418 pack_path,
3419 index_path,
3420 promisor_path,
3421 object_ids: built.entries.iter().map(|entry| entry.oid).collect(),
3422 })
3423 }
3424
3425 pub fn contains(&self, oid: &ObjectId) -> Result<bool> {
3426 if self.loose.exists(oid)? {
3427 return Ok(true);
3428 }
3429 if self.find_pack_containing(oid)?.is_some() {
3430 return Ok(true);
3431 }
3432 for alternate in &self.alternates {
3433 if Self::without_alternates(alternate, self.format).contains(oid)? {
3434 return Ok(true);
3435 }
3436 }
3437 self.loose.invalidate_cache();
3440 self.loose.exists(oid)
3441 }
3442
3443 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
3444 let mut oids = object_ids_in_objects_dir(&self.objects_dir, self.format)?
3445 .into_iter()
3446 .collect::<HashSet<_>>();
3447 for alternate in &self.alternates {
3448 oids.extend(Self::without_alternates(alternate, self.format).object_ids()?);
3449 }
3450 let mut oids = oids.into_iter().collect::<Vec<_>>();
3451 oids.sort_by_key(ObjectId::to_hex);
3452 Ok(oids)
3453 }
3454
3455 pub fn object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3456 if let Some(disk_size) = self.loose.disk_size(oid)? {
3457 return Ok(Some(ObjectStorageInfo {
3458 disk_size,
3459 deltabase: zero_oid(self.format)?,
3460 }));
3461 }
3462 if let Some(info) = self.packed_object_storage_info(oid)? {
3463 return Ok(Some(info));
3464 }
3465 for alternate in &self.alternates {
3466 if let Some(info) =
3467 Self::without_alternates(alternate, self.format).object_storage_info(oid)?
3468 {
3469 return Ok(Some(info));
3470 }
3471 }
3472 self.loose.invalidate_cache();
3475 if let Some(disk_size) = self.loose.disk_size(oid)? {
3476 return Ok(Some(ObjectStorageInfo {
3477 disk_size,
3478 deltabase: zero_oid(self.format)?,
3479 }));
3480 }
3481 Ok(None)
3482 }
3483
3484 pub fn resolve_prefix(&self, prefix: &str) -> Result<ObjectPrefixResolution> {
3485 validate_object_id_prefix(self.format, prefix)?;
3486 let mut matches = Vec::new();
3487 for oid in self.object_ids()? {
3488 if object_id_matches_prefix(&oid, prefix) {
3489 matches.push(oid);
3490 }
3491 }
3492 Ok(match matches.len() {
3493 0 => ObjectPrefixResolution::Missing,
3494 1 => ObjectPrefixResolution::Unique(matches.remove(0)),
3495 _ => ObjectPrefixResolution::Ambiguous(matches),
3496 })
3497 }
3498
3499 pub fn read_object_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
3509 if implied_empty_tree_object(self.format, oid).is_some() {
3510 return Ok(Some((ObjectType::Tree, 0)));
3511 }
3512 if let Ok(mut cache) = self.decoded.lock()
3513 && let Some(object) = cache.get(oid)
3514 {
3515 return Ok(Some((object.object_type, object.body.len() as u64)));
3516 }
3517 if let Some(header) = self.loose.read_header(oid)? {
3518 return Ok(Some(header));
3519 }
3520 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
3521 let bytes = pack_lookup.pack_bytes(self)?;
3522 let type_cache = pack_lookup.header_type_cache(self);
3527 let resolve_ref_base = |base: &ObjectId| {
3528 self.read_object_header(base)
3529 .map(|header| header.map(|(t, _)| t))
3530 };
3531 let header = match &type_cache {
3532 Some(cache) => {
3533 let mut adapter = PackHeaderTypeCacheAdapter(cache);
3534 sley_pack::read_object_header_at_with_cache(
3535 &bytes,
3536 pack_lookup.offset,
3537 self.format,
3538 resolve_ref_base,
3539 &mut adapter,
3540 )?
3541 }
3542 None => sley_pack::read_object_header_at(
3543 &bytes,
3544 pack_lookup.offset,
3545 self.format,
3546 resolve_ref_base,
3547 )?,
3548 };
3549 return Ok(Some(header));
3550 }
3551 for alternate in &self.alternates {
3552 if let Some(header) =
3553 Self::without_alternates(alternate, self.format).read_object_header(oid)?
3554 {
3555 return Ok(Some(header));
3556 }
3557 }
3558 self.loose.invalidate_cache();
3561 if let Some(header) = self.loose.read_header(oid)? {
3562 return Ok(Some(header));
3563 }
3564 Ok(None)
3565 }
3566
3567 fn read_packed_object(&self, oid: &ObjectId) -> Result<Option<Arc<EncodedObject>>> {
3568 if let Ok(mut cache) = self.decoded.lock()
3571 && let Some(object) = cache.get(oid)
3572 {
3573 return Ok(Some(object));
3574 }
3575 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3576 return Ok(None);
3577 };
3578 self.read_packed_object_at_lookup(oid, &pack_lookup).map(Some)
3579 }
3580
3581 fn read_packed_object_at_lookup(
3582 &self,
3583 oid: &ObjectId,
3584 pack_lookup: &PackLookup,
3585 ) -> Result<Arc<EncodedObject>> {
3586 if let Ok(mut cache) = self.decoded.lock()
3587 && let Some(object) = cache.get(oid)
3588 {
3589 return Ok(object);
3590 }
3591 let bytes = pack_lookup.pack_bytes(self)?;
3592 let delta_cache = pack_lookup.delta_cache(self);
3597 let delta_adapter = delta_cache.as_ref().map(PackDeltaCacheAdapter);
3598 let resolve_ref_base = |base: &ObjectId| self.read_object(base).map(Some);
3604 let object = match &delta_adapter {
3605 Some(adapter) => sley_pack::read_object_at_with_cache_arc(
3606 &bytes,
3607 pack_lookup.offset,
3608 self.format,
3609 resolve_ref_base,
3610 adapter,
3611 )?,
3612 None => sley_pack::read_object_at_arc(
3613 &bytes,
3614 pack_lookup.offset,
3615 self.format,
3616 resolve_ref_base,
3617 )?,
3618 };
3619 if verify_reads_enabled() {
3623 let actual = object.object_id(self.format)?;
3624 if actual != *oid {
3625 return Err(GitError::InvalidObject(format!(
3626 "pack object id mismatch: index says {oid}, decoded {actual}"
3627 )));
3628 }
3629 }
3630 if let Ok(mut cache) = self.decoded.lock() {
3631 cache.put(*oid, Arc::clone(&object));
3632 }
3633 Ok(object)
3634 }
3635
3636 fn pack_delta_cache(&self, pack_path: &Path) -> Option<Arc<Mutex<LruOffsetCache>>> {
3640 let mut caches = self.pack_deltas.lock().ok()?;
3641 let cache = caches.entry(pack_path.to_path_buf()).or_insert_with(|| {
3642 Arc::new(Mutex::new(LruOffsetCache::new(delta_base_cache_budget())))
3643 });
3644 Some(Arc::clone(cache))
3645 }
3646
3647 fn pack_header_type_cache(&self, pack_path: &Path) -> Option<PackHeaderTypeCache> {
3651 let mut caches = self.pack_header_types.lock().ok()?;
3652 let cache = caches
3653 .entry(pack_path.to_path_buf())
3654 .or_insert_with(|| Arc::new(Mutex::new(HashMap::new())));
3655 Some(Arc::clone(cache))
3656 }
3657
3658 fn cached_pack_bytes(&self, pack_path: &Path) -> Result<Arc<PackData>> {
3663 if let Ok(cache) = self.pack_bytes.lock()
3664 && let Some(bytes) = cache.get(pack_path)
3665 {
3666 return Ok(Arc::clone(bytes));
3667 }
3668 let bytes = Arc::new(load_pack_data(pack_path)?);
3669 if let Ok(mut cache) = self.pack_bytes.lock() {
3670 cache.insert(pack_path.to_path_buf(), Arc::clone(&bytes));
3671 }
3672 Ok(bytes)
3673 }
3674
3675 fn cached_pack_index(&self, index_path: &Path) -> Result<Arc<PackIndex>> {
3679 if let Ok(cache) = self.pack_indexes.lock()
3680 && let Some(index) = cache.get(index_path)
3681 {
3682 return Ok(Arc::clone(index));
3683 }
3684 let index = Arc::new(PackIndex::parse(&fs::read(index_path)?, self.format)?);
3685 if let Ok(mut cache) = self.pack_indexes.lock() {
3686 cache.insert(index_path.to_path_buf(), Arc::clone(&index));
3687 }
3688 Ok(index)
3689 }
3690
3691 fn cached_multi_pack_index_oid_lookup(
3692 &self,
3693 midx_path: &Path,
3694 ) -> Result<Option<Arc<MultiPackIndexOidLookup>>> {
3695 if !midx_path.exists() {
3696 return Ok(None);
3697 }
3698 if let Ok(cache) = self.multi_pack_oid_lookups.lock()
3699 && let Some(midx) = cache.get(midx_path)
3700 {
3701 return Ok(Some(Arc::clone(midx)));
3702 }
3703 let bytes = Arc::new(fs::read(midx_path)?);
3704 let midx = Arc::new(MultiPackIndexOidLookup::parse(bytes, self.format)?);
3705 if let Ok(mut cache) = self.multi_pack_oid_lookups.lock() {
3706 cache.insert(midx_path.to_path_buf(), Arc::clone(&midx));
3707 }
3708 Ok(Some(midx))
3709 }
3710
3711 fn cached_pack_registry(
3716 &self,
3717 pack_dir: &Path,
3718 force_rescan: bool,
3719 ) -> Result<Arc<PackRegistrySnapshot>> {
3720 if !force_rescan && let Some(registry) = self.cached_loaded_pack_registry(pack_dir)? {
3721 return Ok(registry);
3722 }
3723 let scanned = Arc::new(scan_pack_registry(pack_dir, self.format)?);
3724 if let Ok(mut cache) = self.pack_registry.lock() {
3725 match cache.as_ref() {
3726 Some(existing)
3727 if existing.fingerprint == scanned.fingerprint
3728 && same_registered_pack_set(&existing.packs, &scanned.packs) =>
3729 {
3730 return Ok(Arc::clone(existing));
3731 }
3732 _ => {
3733 *cache = Some(Arc::clone(&scanned));
3734 }
3735 }
3736 }
3737 Ok(scanned)
3738 }
3739
3740 fn find_in_pack_registry(
3741 &self,
3742 registry: Arc<PackRegistrySnapshot>,
3743 oid: &ObjectId,
3744 ) -> Result<Option<PackLookup>> {
3745 let hinted_pack_index = registry.cached_hint();
3746 if let Some(pack_index) = hinted_pack_index {
3747 let pack = ®istry.packs[pack_index];
3748 let index = pack.index(self.format)?;
3749 if let Some(entry) = index.find(oid) {
3750 return Ok(Some(PackLookup::from_registered(
3751 Arc::clone(pack),
3752 entry.offset,
3753 )));
3754 }
3755 }
3756 for (pack_index, pack) in registry.packs.iter().enumerate() {
3757 if Some(pack_index) == hinted_pack_index {
3758 continue;
3759 }
3760 let index = pack.index(self.format)?;
3761 if let Some(entry) = index.find(oid) {
3762 registry.remember_hint(pack_index);
3763 return Ok(Some(PackLookup::from_registered(
3764 Arc::clone(pack),
3765 entry.offset,
3766 )));
3767 }
3768 }
3769 Ok(None)
3770 }
3771
3772 fn read_packed_object_from_other_packs(
3778 &self,
3779 oid: &ObjectId,
3780 exclude: &PackLookup,
3781 ) -> Result<Option<Arc<EncodedObject>>> {
3782 let pack_dir = self.objects_dir.join("pack");
3783 let Ok(entries) = fs::read_dir(&pack_dir) else {
3784 return Ok(None);
3785 };
3786 let excluded_pack = exclude.pack_path().to_path_buf();
3787 for entry in entries {
3788 let idx_path = entry?.path();
3789 if idx_path.extension().and_then(|ext| ext.to_str()) != Some("idx") {
3790 continue;
3791 }
3792 let pack_path = idx_path.with_extension("pack");
3793 if pack_path == excluded_pack {
3794 continue;
3795 }
3796 let Ok(idx_bytes) = fs::read(&idx_path) else {
3797 continue;
3798 };
3799 let Ok(index) = PackIndex::parse(&idx_bytes, self.format) else {
3800 continue;
3801 };
3802 let Some(entry) = index.find(oid) else {
3803 continue;
3804 };
3805 let candidate = PackLookup::from_path(pack_path, entry.offset);
3806 if let Ok(object) = self.read_packed_object_at_lookup(oid, &candidate) {
3807 return Ok(Some(object));
3808 }
3809 }
3810 Ok(None)
3811 }
3812
3813 fn find_pack_containing(&self, oid: &ObjectId) -> Result<Option<PackLookup>> {
3814 if oid.format() != self.format {
3815 return Err(GitError::InvalidObjectId(format!(
3816 "object {oid} uses {}, store uses {}",
3817 oid.format().name(),
3818 self.format.name()
3819 )));
3820 }
3821 let pack_dir = self.objects_dir.join("pack");
3822 if let Some(midx) = self.cached_loaded_multi_pack_index_oid_lookup()
3827 && let Some(pack_paths) = self.midx_oid_lookup_pack_paths(&pack_dir, &midx, oid)?
3828 {
3829 return Ok(Some(pack_paths));
3830 }
3831 if let Some(registry) = self.cached_loaded_pack_registry(&pack_dir)?
3832 && let Some(pack_paths) = self.find_in_pack_registry(registry, oid)?
3833 {
3834 return Ok(Some(pack_paths));
3835 }
3836
3837 if !pack_dir.exists() {
3838 return Ok(None);
3839 }
3840 if let Some(pack_paths) = self.find_midx_pack_containing(&pack_dir, oid)? {
3841 return Ok(Some(pack_paths));
3842 }
3843 let registry = self.cached_pack_registry(&pack_dir, false)?;
3847 if let Some(pack_paths) = self.find_in_pack_registry(Arc::clone(®istry), oid)? {
3848 return Ok(Some(pack_paths));
3849 }
3850 let refreshed = self.cached_pack_registry(&pack_dir, true)?;
3851 if Arc::ptr_eq(®istry, &refreshed) {
3852 return Ok(None);
3854 }
3855 self.find_in_pack_registry(refreshed, oid)
3856 }
3857
3858 fn packed_object_storage_info(&self, oid: &ObjectId) -> Result<Option<ObjectStorageInfo>> {
3859 let Some(pack_lookup) = self.find_pack_containing(oid)? else {
3860 return Ok(None);
3861 };
3862 let pack_len = fs::metadata(pack_lookup.pack_path())?.len();
3863 let trailer_offset = pack_len
3864 .checked_sub(self.format.raw_len() as u64)
3865 .ok_or_else(|| GitError::InvalidFormat("pack file shorter than checksum".into()))?;
3866 let index = pack_lookup.pack_index(self)?;
3867 let pack = pack_lookup.pack_bytes(self)?;
3868 let delta_base = pack_entry_delta_base(self.format, &pack, pack_lookup.offset)?;
3869 let delta_base_offset = match &delta_base {
3870 Some(PackDeltaBase::Offset(offset)) => Some(*offset),
3871 Some(PackDeltaBase::Ref(_)) | None => None,
3872 };
3873 let offset_info = scan_pack_index_offsets(
3874 &index,
3875 pack_lookup.offset,
3876 trailer_offset,
3877 delta_base_offset,
3878 )?;
3879 let disk_size = offset_info
3880 .end_offset
3881 .checked_sub(pack_lookup.offset)
3882 .ok_or_else(|| GitError::InvalidFormat("pack index offsets are not sorted".into()))?;
3883 let deltabase = match delta_base {
3884 Some(PackDeltaBase::Offset(_)) => offset_info.delta_base_oid.ok_or_else(|| {
3885 GitError::InvalidFormat("ofs-delta base oid missing from pack index".into())
3891 })?,
3892 Some(PackDeltaBase::Ref(oid)) => oid,
3893 None => zero_oid(self.format)?,
3894 };
3895 Ok(Some(ObjectStorageInfo {
3896 disk_size,
3897 deltabase,
3898 }))
3899 }
3900
3901 fn find_midx_pack_containing(
3902 &self,
3903 pack_dir: &Path,
3904 oid: &ObjectId,
3905 ) -> Result<Option<PackLookup>> {
3906 let midx_path = pack_dir.join("multi-pack-index");
3907 let Some(midx) = self.cached_multi_pack_index_oid_lookup(&midx_path)? else {
3908 return Ok(None);
3909 };
3910 self.midx_oid_lookup_pack_paths(pack_dir, &midx, oid)
3911 }
3912
3913 fn midx_oid_lookup_pack_paths(
3914 &self,
3915 pack_dir: &Path,
3916 midx: &MultiPackIndexOidLookup,
3917 oid: &ObjectId,
3918 ) -> Result<Option<PackLookup>> {
3919 let Some(entry) = midx.find(oid)? else {
3920 return Ok(None);
3921 };
3922 let Some(pack_name) = midx.pack_name(entry.pack_int_id) else {
3923 return Err(GitError::InvalidFormat(
3924 "multi-pack-index object points past pack table".into(),
3925 ));
3926 };
3927 let pack_file_name = pack_name
3928 .strip_suffix(".idx")
3929 .map(|stem| format!("{stem}.pack"))
3930 .unwrap_or_else(|| pack_name.to_string());
3931 let pack = pack_dir.join(pack_file_name);
3932 Ok(Some(PackLookup::from_path(pack, entry.offset)))
3933 }
3934
3935 fn cached_loaded_multi_pack_index_oid_lookup(&self) -> Option<Arc<MultiPackIndexOidLookup>> {
3936 let midx_path = self.objects_dir.join("pack").join("multi-pack-index");
3937 let cache = self.multi_pack_oid_lookups.lock().ok()?;
3938 cache.get(&midx_path).map(Arc::clone)
3939 }
3940
3941 fn cached_loaded_pack_registry(
3947 &self,
3948 _pack_dir: &Path,
3949 ) -> Result<Option<Arc<PackRegistrySnapshot>>> {
3950 let cache = match self.pack_registry.lock() {
3951 Ok(cache) => cache,
3952 Err(_) => return Ok(None),
3953 };
3954 Ok(cache.as_ref().map(Arc::clone))
3955 }
3956}
3957
3958fn validate_object_id_prefix(format: ObjectFormat, prefix: &str) -> Result<()> {
3959 if prefix.len() < 4 || prefix.len() > format.hex_len() {
3960 return Err(GitError::InvalidObjectId(format!(
3961 "expected 4 to {} hex digits for {}, got {}",
3962 format.hex_len(),
3963 format.name(),
3964 prefix.len()
3965 )));
3966 }
3967 if !prefix.bytes().all(|byte| byte.is_ascii_hexdigit()) {
3968 return Err(GitError::InvalidObjectId(format!(
3969 "non-hex object id prefix {prefix}"
3970 )));
3971 }
3972 Ok(())
3973}
3974
3975fn object_id_matches_prefix(oid: &ObjectId, prefix: &str) -> bool {
3976 oid.to_hex()
3977 .as_bytes()
3978 .iter()
3979 .zip(prefix.as_bytes())
3980 .all(|(actual, expected)| actual.eq_ignore_ascii_case(expected))
3981}
3982
3983fn pack_dir_modified(pack_dir: &Path) -> Result<Option<std::time::SystemTime>> {
3984 match fs::metadata(pack_dir) {
3985 Ok(metadata) => Ok(metadata.modified().ok()),
3986 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
3987 Err(err) => Err(GitError::Io(err.to_string())),
3988 }
3989}
3990
3991fn scan_pack_registry(pack_dir: &Path, _format: ObjectFormat) -> Result<PackRegistrySnapshot> {
3996 let modified = pack_dir_modified(pack_dir)?;
3997 let entries = match fs::read_dir(pack_dir) {
3998 Ok(entries) => entries,
3999 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4000 return Ok(PackRegistrySnapshot::new(
4001 PackDirFingerprint {
4002 modified,
4003 idx_count: 0,
4004 pack_count: 0,
4005 },
4006 Vec::new(),
4007 ));
4008 }
4009 Err(err) => return Err(GitError::Io(err.to_string())),
4010 };
4011
4012 let mut idx_paths = Vec::new();
4013 let mut idx_count = 0;
4014 let mut pack_count = 0;
4015 for entry in entries {
4016 let entry = entry?;
4017 let path = entry.path();
4018 match path.extension().and_then(|ext| ext.to_str()) {
4019 Some("idx") => {
4020 idx_count += 1;
4021 idx_paths.push(path);
4022 }
4023 Some("pack") => {
4024 pack_count += 1;
4025 }
4026 _ => {}
4027 }
4028 }
4029
4030 let mut packs = Vec::new();
4031 for idx in idx_paths {
4032 let pack = idx.with_extension("pack");
4033 let Ok(metadata) = fs::metadata(&pack) else {
4034 continue;
4035 };
4036 let modified = pack_sort_modified(&metadata);
4037 packs.push((modified, metadata.len(), Arc::new(RegisteredPack::new(idx, pack))));
4038 }
4039 packs.sort_by(|left, right| {
4044 right
4045 .0
4046 .cmp(&left.0)
4047 .then_with(|| right.1.cmp(&left.1))
4048 .then_with(|| left.2.idx.cmp(&right.2.idx))
4049 });
4050 let packs = packs.into_iter().map(|(_, _, pack)| pack).collect();
4051 Ok(PackRegistrySnapshot::new(
4052 PackDirFingerprint {
4053 modified,
4054 idx_count,
4055 pack_count,
4056 },
4057 packs,
4058 ))
4059}
4060
4061fn pack_sort_modified(metadata: &fs::Metadata) -> (u64, u32) {
4062 metadata
4063 .modified()
4064 .ok()
4065 .and_then(|modified| {
4066 modified
4067 .duration_since(std::time::UNIX_EPOCH)
4068 .ok()
4069 .map(|duration| (duration.as_secs(), duration.subsec_nanos()))
4070 })
4071 .unwrap_or((0, 0))
4072}
4073
4074fn same_registered_pack_set(left: &[Arc<RegisteredPack>], right: &[Arc<RegisteredPack>]) -> bool {
4077 left.len() == right.len()
4078 && left
4079 .iter()
4080 .zip(right.iter())
4081 .all(|(a, b)| a.idx == b.idx && a.pack == b.pack)
4082}
4083
4084fn alternate_object_dirs(objects_dir: &Path) -> Vec<PathBuf> {
4085 let mut alternates = Vec::new();
4086 if let Some(value) = env::var_os("GIT_ALTERNATE_OBJECT_DIRECTORIES") {
4087 for raw in value.to_string_lossy().split(':') {
4088 if !raw.is_empty() {
4089 alternates.push(PathBuf::from(raw));
4090 }
4091 }
4092 }
4093 let alternates_path = objects_dir.join("info").join("alternates");
4094 if let Ok(contents) = fs::read(&alternates_path) {
4095 for raw in contents.split(|byte| *byte == b'\n') {
4096 let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4097 if line.is_empty() || line.starts_with(b"#") {
4098 continue;
4099 }
4100 let Ok(value) = std::str::from_utf8(line) else {
4101 continue;
4102 };
4103 let path = Path::new(value);
4104 let absolute = if path.is_absolute() {
4105 path.to_path_buf()
4106 } else {
4107 objects_dir.join(path)
4108 };
4109 alternates.push(absolute);
4110 }
4111 }
4112 alternates
4113}
4114
4115impl ObjectReader for FileObjectDatabase {
4116 fn has_shallow_grafts(&self) -> bool {
4117 !self
4118 .shallow_grafts
4119 .get_or_init(|| {
4120 let shallow_file = self
4121 .objects_dir
4122 .parent()
4123 .map(|git_dir| git_dir.join("shallow"));
4124 match shallow_file {
4125 Some(path) => read_shallow_grafts(&path, self.format),
4126 None => HashSet::new(),
4127 }
4128 })
4129 .is_empty()
4130 }
4131
4132 fn is_shallow_graft(&self, oid: &ObjectId) -> bool {
4133 self.shallow_grafts
4134 .get_or_init(|| {
4135 let shallow_file = self
4136 .objects_dir
4137 .parent()
4138 .map(|git_dir| git_dir.join("shallow"));
4139 match shallow_file {
4140 Some(path) => read_shallow_grafts(&path, self.format),
4141 None => HashSet::new(),
4142 }
4143 })
4144 .contains(oid)
4145 }
4146
4147 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4148 if let Some(object) = implied_empty_tree_object(self.format, oid) {
4149 return Ok(object);
4150 }
4151 if let Some(pack_lookup) = self.find_pack_containing(oid)? {
4159 match self.read_packed_object_at_lookup(oid, &pack_lookup) {
4160 Ok(object) => return Ok(object),
4161 Err(GitError::NotFound(_)) => {}
4162 Err(packed_err) => {
4168 if let Ok(object) = self.loose.read_object(oid) {
4169 return Ok(object);
4170 }
4171 if let Some(object) =
4174 self.read_packed_object_from_other_packs(oid, &pack_lookup)?
4175 {
4176 return Ok(object);
4177 }
4178 for alternate in &self.alternates {
4179 if let Ok(object) =
4180 Self::without_alternates(alternate, self.format).read_object(oid)
4181 {
4182 return Ok(object);
4183 }
4184 }
4185 return Err(packed_err);
4186 }
4187 }
4188 }
4189 let loose_err = match self.loose.read_object(oid) {
4190 Ok(object) => return Ok(object),
4191 Err(GitError::NotFound(_)) => None,
4192 Err(err) => Some(err),
4193 };
4194 if let Some(object) = self.read_packed_object(oid)? {
4195 return Ok(object);
4196 }
4197 for alternate in &self.alternates {
4198 match Self::without_alternates(alternate, self.format).read_object(oid) {
4199 Ok(object) => return Ok(object),
4200 Err(GitError::NotFound(_)) => {}
4201 Err(err) => return Err(err),
4202 }
4203 }
4204 self.loose.invalidate_cache();
4210 match self.loose.read_object(oid) {
4211 Ok(object) => return Ok(object),
4212 Err(GitError::NotFound(_)) => {}
4213 Err(err) => return Err(err),
4214 }
4215 if let Some(err) = loose_err {
4219 return Err(err);
4220 }
4221 Err(GitError::object_not_found_in(
4222 *oid,
4223 MissingObjectContext::Read,
4224 ))
4225 }
4226}
4227
4228impl ObjectWriter for FileObjectDatabase {
4229 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4230 let oid = object.object_id(self.format)?;
4236 if self.contains(&oid)? {
4237 return Ok(oid);
4238 }
4239 self.loose.write_object(object)
4240 }
4241}
4242
4243fn write_pack_component(path: &Path, bytes: &[u8]) -> Result<()> {
4244 if path.exists() {
4245 return Ok(());
4246 }
4247 let parent = path
4248 .parent()
4249 .ok_or_else(|| GitError::InvalidPath("pack component path has no parent".into()))?;
4250 fs::create_dir_all(parent)?;
4251 let temp_path = unique_temp_path(parent);
4252 let write_result = (|| -> Result<()> {
4253 {
4254 let mut file = fs::OpenOptions::new()
4255 .write(true)
4256 .create_new(true)
4257 .open(&temp_path)?;
4258 file.write_all(bytes)?;
4259 file.sync_all()?;
4260 }
4261 match fs::rename(&temp_path, path) {
4262 Ok(()) => Ok(()),
4263 Err(_) if path.exists() => {
4264 let _ = fs::remove_file(&temp_path);
4265 Ok(())
4266 }
4267 Err(err) => Err(GitError::Io(err.to_string())),
4268 }
4269 })();
4270 if write_result.is_err() {
4271 let _ = fs::remove_file(&temp_path);
4272 }
4273 write_result
4274}
4275
4276fn write_promisor_pack_sidecar(
4277 pack_dir: &Path,
4278 pack_name: &str,
4279 promisor: bool,
4280) -> Result<Option<PathBuf>> {
4281 if !promisor {
4282 return Ok(None);
4283 }
4284 let path = pack_dir.join(format!("{pack_name}.promisor"));
4285 write_pack_component(&path, b"")?;
4286 Ok(Some(path))
4287}
4288
4289const MAX_LOOSE_HEADER_LEN: usize = 32;
4294
4295fn loose_header_too_long(oid: &ObjectId) -> GitError {
4300 GitError::InvalidObject(format!(
4301 "header for {oid} too long, exceeds {MAX_LOOSE_HEADER_LEN} bytes"
4302 ))
4303}
4304
4305fn loose_unpack_header_failed(oid: &ObjectId) -> GitError {
4309 GitError::InvalidObject(format!("unable to unpack {oid} header"))
4310}
4311
4312fn inflate_header_diagnostic(input: &[u8]) -> Option<&'static str> {
4320 let [cmf, flg, ..] = *input else { return None };
4321 if ((u16::from(cmf) << 8) | u16::from(flg)) % 31 != 0 {
4322 return Some("inflate: data stream error (incorrect header check)");
4323 }
4324 if cmf & 0x0f != 8 {
4325 return Some("inflate: data stream error (unknown compression method)");
4326 }
4327 if cmf >> 4 > 7 {
4328 return Some("inflate: data stream error (invalid window size)");
4329 }
4330 if flg & 0x20 != 0 {
4331 return Some("inflate: needs dictionary (no message)");
4332 }
4333 None
4334}
4335
4336fn emit_inflate_diagnostic(input: &[u8]) {
4339 if let Some(diagnostic) = inflate_header_diagnostic(input) {
4340 eprintln!("error: {diagnostic}");
4341 }
4342}
4343
4344#[derive(Debug, Clone, PartialEq, Eq)]
4347pub enum LooseObjectIntegrity {
4348 Ok,
4350 HashMismatch { actual: ObjectId },
4353 Corrupt,
4356}
4357
4358#[derive(Debug, Clone)]
4359pub struct LooseObjectStore {
4360 objects_dir: PathBuf,
4361 format: ObjectFormat,
4362 loose_cache: Arc<Mutex<LoosePresenceCache>>,
4371}
4372
4373impl LooseObjectStore {
4374 pub fn new(objects_dir: impl Into<PathBuf>, format: ObjectFormat) -> Self {
4375 Self {
4376 objects_dir: objects_dir.into(),
4377 format,
4378 loose_cache: Arc::new(Mutex::new(LoosePresenceCache::default())),
4379 }
4380 }
4381
4382 fn cached_loose_presence(&self, oid: &ObjectId) -> Option<bool> {
4387 let mut guard = self.loose_cache.lock().ok()?;
4388 let fanout = oid.as_bytes()[0];
4389 if !guard.loaded_fanouts.contains(&fanout) {
4390 collect_loose_fanout_object_ids(
4391 &self.objects_dir,
4392 self.format,
4393 fanout,
4394 &mut guard.objects,
4395 )
4396 .ok()?;
4397 guard.loaded_fanouts.insert(fanout);
4398 }
4399 Some(guard.objects.contains(oid))
4400 }
4401
4402 fn loose_object_ids_cached(&self) -> Result<Vec<ObjectId>> {
4406 if let Ok(mut guard) = self.loose_cache.lock() {
4407 guard.objects = loose_object_id_set(&self.objects_dir, self.format)?;
4408 guard.loaded_fanouts = (0..=u8::MAX).collect();
4409 let mut ids = guard.objects.iter().copied().collect::<Vec<_>>();
4410 ids.sort_by(|left, right| left.as_bytes().cmp(right.as_bytes()));
4411 return Ok(ids);
4412 }
4413 loose_object_ids(&self.objects_dir, self.format)
4414 }
4415
4416 fn note_loose_write(&self, oid: ObjectId) {
4420 if let Ok(mut guard) = self.loose_cache.lock() {
4421 guard.objects.insert(oid);
4422 }
4423 }
4424
4425 pub(crate) fn invalidate_cache(&self) {
4428 if let Ok(mut guard) = self.loose_cache.lock() {
4429 *guard = LoosePresenceCache::default();
4430 }
4431 }
4432
4433 pub fn from_git_dir(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Self {
4434 Self::new(repository_objects_dir(git_dir), format)
4435 }
4436
4437 fn validate_oid_format(&self, oid: &ObjectId) -> Result<()> {
4438 if oid.format() != self.format {
4439 return Err(GitError::InvalidObjectId(format!(
4440 "object {oid} uses {}, store uses {}",
4441 oid.format().name(),
4442 self.format.name()
4443 )));
4444 }
4445 Ok(())
4446 }
4447
4448 pub fn object_path(&self, oid: &ObjectId) -> Result<PathBuf> {
4449 self.validate_oid_format(oid)?;
4450 let hex = oid.to_hex();
4451 Ok(self.objects_dir.join(&hex[..2]).join(&hex[2..]))
4452 }
4453
4454 pub fn exists(&self, oid: &ObjectId) -> Result<bool> {
4455 self.validate_oid_format(oid)?;
4456 if self.cached_loose_presence(oid) == Some(false) {
4457 return Ok(false);
4458 }
4459 let path = self.object_path(oid)?;
4460 Ok(path.exists())
4461 }
4462
4463 pub fn disk_size(&self, oid: &ObjectId) -> Result<Option<u64>> {
4464 self.validate_oid_format(oid)?;
4465 if self.cached_loose_presence(oid) == Some(false) {
4466 return Ok(None);
4467 }
4468 let path = self.object_path(oid)?;
4469 match fs::metadata(path) {
4470 Ok(metadata) => Ok(Some(metadata.len())),
4471 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
4472 Err(err) => Err(GitError::Io(err.to_string())),
4473 }
4474 }
4475
4476 pub fn read_header(&self, oid: &ObjectId) -> Result<Option<(ObjectType, u64)>> {
4481 self.validate_oid_format(oid)?;
4482 if self.cached_loose_presence(oid) == Some(false) {
4483 return Ok(None);
4484 }
4485 let path = self.object_path(oid)?;
4486 let compressed = match fs::read(&path) {
4487 Ok(compressed) => compressed,
4488 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4489 Err(err) => return Err(GitError::Io(err.to_string())),
4490 };
4491 match inflate_loose_header(&compressed)? {
4492 LooseHeader::Ok(header) => {
4493 let header = std::str::from_utf8(&header)
4494 .map_err(|err| GitError::InvalidObject(err.to_string()))?;
4495 let (kind, size) = header
4496 .split_once(' ')
4497 .ok_or_else(|| GitError::InvalidObject("missing object size".into()))?;
4498 let object_type = kind.parse::<ObjectType>()?;
4499 let size = size
4500 .parse::<u64>()
4501 .map_err(|_| GitError::InvalidObject("invalid object size".into()))?;
4502 Ok(Some((object_type, size)))
4503 }
4504 LooseHeader::Bad => {
4505 emit_inflate_diagnostic(compressed.get(..2).unwrap_or(&compressed));
4508 Err(loose_unpack_header_failed(oid))
4509 }
4510 LooseHeader::TooLong => {
4511 Err(loose_header_too_long(oid))
4516 }
4517 }
4518 }
4519
4520 pub fn object_ids(&self) -> Result<Vec<ObjectId>> {
4522 self.loose_object_ids_cached()
4523 }
4524
4525 pub fn verify_object(
4533 &self,
4534 oid: &ObjectId,
4535 display_path: &str,
4536 ) -> Result<Option<LooseObjectIntegrity>> {
4537 let path = self.object_path(oid)?;
4538 let compressed = match fs::read(&path) {
4539 Ok(compressed) => compressed,
4540 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4541 Err(err) => return Err(GitError::Io(err.to_string())),
4542 };
4543 let mut decoder = ZlibDecoder::new(compressed.as_slice());
4544 let mut framed = Vec::new();
4545 if decoder.read_to_end(&mut framed).is_err() {
4546 emit_inflate_diagnostic(&compressed);
4547 if framed_loose_header_terminated(&framed) {
4555 eprintln!("error: corrupt loose object '{oid}'");
4556 eprintln!("error: unable to unpack contents of {display_path}");
4557 } else {
4558 eprintln!("error: unable to unpack header of {display_path}");
4559 }
4560 return Ok(Some(LooseObjectIntegrity::Corrupt));
4561 }
4562 if !framed_loose_header_terminated(&framed) {
4563 eprintln!("error: unable to unpack header of {display_path}");
4566 return Ok(Some(LooseObjectIntegrity::Corrupt));
4567 }
4568 if (decoder.total_in() as usize) < compressed.len() {
4575 eprintln!("error: garbage at end of loose object '{oid}'");
4579 eprintln!("error: unable to unpack contents of {display_path}");
4580 return Ok(Some(LooseObjectIntegrity::Corrupt));
4581 }
4582 if let Some(declared) = loose_header_declared_size(&framed) {
4589 let nul = framed.iter().position(|&b| b == 0).unwrap_or(framed.len());
4590 let body_len = framed.len() - (nul + 1).min(framed.len());
4591 if body_len < declared {
4592 eprintln!("error: corrupt loose object '{oid}'");
4593 eprintln!("error: unable to unpack contents of {display_path}");
4594 return Ok(Some(LooseObjectIntegrity::Corrupt));
4595 }
4596 }
4597 let Ok(object) = parse_framed_object(&framed) else {
4598 if let Some(header) = loose_header_with_unknown_type(&framed) {
4603 eprintln!(
4604 "error: unable to parse type from header '{header}' of {display_path}"
4605 );
4606 } else {
4607 eprintln!("error: unable to parse header of {display_path}");
4608 }
4609 return Ok(Some(LooseObjectIntegrity::Corrupt));
4610 };
4611 let actual = object.object_id(self.format)?;
4612 if &actual != oid {
4613 return Ok(Some(LooseObjectIntegrity::HashMismatch { actual }));
4614 }
4615 Ok(Some(LooseObjectIntegrity::Ok))
4616 }
4617}
4618
4619fn framed_loose_header_terminated(framed: &[u8]) -> bool {
4623 framed
4624 .iter()
4625 .take(MAX_LOOSE_HEADER_LEN)
4626 .any(|byte| *byte == 0)
4627}
4628
4629fn loose_header_with_unknown_type(framed: &[u8]) -> Option<String> {
4634 let nul = framed.iter().position(|&b| b == 0)?;
4635 let header = std::str::from_utf8(&framed[..nul]).ok()?;
4636 let (kind, size) = header.split_once(' ')?;
4637 let size: usize = size.parse().ok()?;
4638 if framed.len() - (nul + 1) != size {
4641 return None;
4642 }
4643 if kind.parse::<ObjectType>().is_ok() {
4646 return None;
4647 }
4648 Some(header.to_string())
4649}
4650
4651fn loose_header_declared_size(framed: &[u8]) -> Option<usize> {
4655 let nul = framed.iter().position(|&b| b == 0)?;
4656 let header = std::str::from_utf8(&framed[..nul]).ok()?;
4657 let (_kind, size) = header.split_once(' ')?;
4658 size.parse::<usize>().ok()
4659}
4660
4661enum LooseHeader {
4667 Ok(Vec<u8>),
4670 Bad,
4672 TooLong,
4674}
4675
4676fn inflate_loose_header(compressed: &[u8]) -> Result<LooseHeader> {
4690 let mut out = [0u8; MAX_LOOSE_HEADER_LEN];
4691 let mut decompress = Decompress::new(true);
4692 let status = decompress.decompress(compressed, &mut out, FlushDecompress::None);
4696 let produced = decompress.total_out() as usize;
4697 match status {
4698 Ok(_) => {
4699 let window = &out[..produced.min(MAX_LOOSE_HEADER_LEN)];
4700 match window.iter().position(|&byte| byte == 0) {
4701 Some(nul) => Ok(LooseHeader::Ok(window[..nul].to_vec())),
4702 None => Ok(LooseHeader::TooLong),
4706 }
4707 }
4708 Err(_) => Ok(LooseHeader::Bad),
4710 }
4711}
4712
4713impl ObjectReader for LooseObjectStore {
4714 fn read_object(&self, oid: &ObjectId) -> Result<Arc<EncodedObject>> {
4715 self.validate_oid_format(oid)?;
4716 if self.cached_loose_presence(oid) == Some(false) {
4720 return Err(GitError::object_not_found_in(
4721 *oid,
4722 MissingObjectContext::Read,
4723 ));
4724 }
4725 let path = self.object_path(oid)?;
4726 let compressed = match fs::read(&path) {
4727 Ok(compressed) => compressed,
4728 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4729 return Err(GitError::object_not_found_in(
4730 *oid,
4731 MissingObjectContext::Read,
4732 ));
4733 }
4734 Err(err) => return Err(GitError::Io(err.to_string())),
4735 };
4736 let mut decoder = ZlibDecoder::new(compressed.as_slice());
4737 let mut framed = Vec::new();
4738 if decoder.read_to_end(&mut framed).is_err() {
4739 emit_inflate_diagnostic(&compressed);
4740 if !framed_loose_header_terminated(&framed) {
4745 return Err(loose_unpack_header_failed(oid));
4746 }
4747 return Err(GitError::InvalidObject(format!(
4748 "corrupt loose object '{oid}'"
4749 )));
4750 }
4751 if framed
4756 .iter()
4757 .take(MAX_LOOSE_HEADER_LEN)
4758 .all(|byte| *byte != 0)
4759 {
4760 return Err(loose_header_too_long(oid));
4761 }
4762 let object = parse_framed_object(&framed)?;
4763 if verify_reads_enabled() {
4767 let actual = object.object_id(self.format)?;
4768 if &actual != oid {
4769 return Err(GitError::InvalidObject(format!(
4770 "loose object {} hashes to {actual}",
4771 path.display()
4772 )));
4773 }
4774 }
4775 Ok(Arc::new(object))
4776 }
4777}
4778
4779impl ObjectWriter for LooseObjectStore {
4780 fn write_object(&self, object: EncodedObject) -> Result<ObjectId> {
4781 let oid = object.object_id(self.format)?;
4782 let path = self.object_path(&oid)?;
4783 if path.exists() {
4784 self.note_loose_write(oid);
4785 return Ok(oid);
4786 }
4787 let parent = path
4788 .parent()
4789 .ok_or_else(|| GitError::InvalidPath("loose object path has no parent".into()))?;
4790 fs::create_dir_all(parent)?;
4791 let temp_path = unique_temp_path(parent);
4792 let write_result = (|| -> Result<()> {
4793 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
4794 encoder.write_all(&object.framed_bytes())?;
4795 let compressed = encoder.finish()?;
4796 {
4797 let mut file = fs::OpenOptions::new()
4798 .write(true)
4799 .create_new(true)
4800 .open(&temp_path)?;
4801 file.write_all(&compressed)?;
4802 }
4812 match fs::rename(&temp_path, &path) {
4813 Ok(()) => Ok(()),
4814 Err(_) if path.exists() => {
4815 let _ = fs::remove_file(&temp_path);
4816 Ok(())
4817 }
4818 Err(err) => Err(GitError::Io(err.to_string())),
4819 }
4820 })();
4821 if write_result.is_err() {
4822 let _ = fs::remove_file(&temp_path);
4823 }
4824 write_result?;
4825 self.note_loose_write(oid);
4826 Ok(oid)
4827 }
4828}
4829
4830fn unique_temp_path(parent: &Path) -> PathBuf {
4831 let id = TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed);
4832 parent.join(format!("tmp_obj_{}_{}", std::process::id(), id))
4833}
4834
4835#[cfg(test)]
4836mod tests {
4837 use super::*;
4838 use sley_core::BString;
4839 use sley_object::{Commit, EncodedObject, ObjectType, Tag, Tree, TreeEntry};
4840 use sley_pack::{PackFile, PackWriteOptions};
4841
4842 fn blob_of(byte: u8, len: usize) -> EncodedObject {
4843 EncodedObject::new(ObjectType::Blob, vec![byte; len])
4844 }
4845
4846 fn cached_blob_of(byte: u8, len: usize) -> Arc<EncodedObject> {
4847 Arc::new(blob_of(byte, len))
4848 }
4849
4850 fn read_object_for_assert(reader: &impl ObjectReader, oid: &ObjectId) -> EncodedObject {
4851 reader
4852 .read_object(oid)
4853 .expect("test operation should succeed")
4854 .as_ref()
4855 .clone()
4856 }
4857
4858 #[test]
4859 fn lru_cache_evicts_by_byte_budget_least_recently_used_first() {
4860 let one = cached_object_cost(&blob_of(0, 1000));
4862 let mut cache = LruCache::<u32>::new(one * 2 + 8);
4863 cache.put(1, cached_blob_of(b'a', 1000));
4864 cache.put(2, cached_blob_of(b'b', 1000));
4865 assert!(cache.get(&1).is_some());
4867 cache.put(3, cached_blob_of(b'c', 1000));
4868 assert!(cache.get(&1).is_some());
4870 assert!(cache.get(&2).is_none());
4871 assert!(cache.get(&3).is_some());
4872 }
4873
4874 #[test]
4875 fn lru_cache_zero_budget_is_inert() {
4876 let mut cache = LruCache::<u32>::new(0);
4877 cache.put(1, cached_blob_of(b'a', 16));
4878 assert!(cache.get(&1).is_none());
4879 }
4880
4881 #[test]
4882 fn lru_cache_skips_object_larger_than_budget_and_clears_stale_entry() {
4883 let mut cache = LruCache::<u32>::new(cached_object_cost(&blob_of(0, 100)));
4884 cache.put(1, cached_blob_of(b'a', 50));
4885 assert!(cache.get(&1).is_some());
4886 cache.put(1, cached_blob_of(b'b', 10_000));
4889 assert!(cache.get(&1).is_none());
4890 cache.put(2, cached_blob_of(b'c', 50));
4893 assert!(cache.get(&2).is_some());
4894 }
4895
4896 #[test]
4897 fn lru_cache_replacing_entry_updates_byte_accounting() {
4898 let small = cached_object_cost(&blob_of(0, 500));
4901 let mut cache = LruCache::<u32>::new(small * 2 + 200);
4902 cache.put(1, cached_blob_of(b'a', 500));
4903 cache.put(2, cached_blob_of(b'b', 500));
4904 assert!(cache.get(&1).is_some());
4905 assert!(cache.get(&2).is_some());
4906 cache.put(2, cached_blob_of(b'b', 1000));
4911 assert!(cache.get(&2).is_some());
4912 assert!(cache.get(&1).is_none());
4913 }
4914
4915 #[test]
4916 fn write_and_validate_blob() {
4917 let db = ObjectDatabase::new(ObjectFormat::Sha1);
4918 let oid = db
4919 .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
4920 .expect("test operation should succeed");
4921 assert_eq!(oid.to_hex(), "ce013625030ba8dba906f756967f9e9ca394464a");
4922 db.validate(&oid).expect("test operation should succeed");
4923 }
4924
4925 #[test]
4926 fn loose_store_writes_and_reads_object() {
4927 let root = std::env::temp_dir().join(format!(
4928 "sley-loose-store-{}-{}",
4929 std::process::id(),
4930 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
4931 ));
4932 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4933 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
4934 let oid = store
4935 .write_object(object.clone())
4936 .expect("test operation should succeed");
4937 assert_eq!(read_object_for_assert(&store, &oid), object);
4938 assert!(
4939 store
4940 .object_path(&oid)
4941 .expect("test operation should succeed")
4942 .exists()
4943 );
4944 fs::remove_dir_all(root).expect("test operation should succeed");
4945 }
4946
4947 #[test]
4948 fn read_header_detects_corruption_within_gits_header_window() {
4949 let root = temp_root("sley-loose-header-corrupt");
4957 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4958 let object = EncodedObject::new(ObjectType::Blob, b"content\n".to_vec());
4959 let oid = store
4960 .write_object(object)
4961 .expect("test operation should succeed");
4962 let path = store
4963 .object_path(&oid)
4964 .expect("test operation should succeed");
4965 let mut bytes = fs::read(&path).expect("test operation should succeed");
4966 bytes[10] = 0;
4970 fs::write(&path, &bytes).expect("test operation should succeed");
4971 store.invalidate_cache();
4972 let err = store
4973 .read_header(&oid)
4974 .expect_err("corrupt loose header must fail like git's ULHR_BAD");
4975 let msg = err.to_string();
4976 assert!(
4977 msg.contains("unable to unpack") && msg.contains(&oid.to_hex()),
4978 "expected git's ULHR_BAD message, got: {msg}"
4979 );
4980 fs::remove_dir_all(root).expect("test operation should succeed");
4981 }
4982
4983 #[test]
4984 fn read_header_ignores_corruption_past_gits_header_window() {
4985 let root = temp_root("sley-loose-header-deep-corrupt");
4990 let store = LooseObjectStore::new(root.join("objects"), ObjectFormat::Sha1);
4991 let body: Vec<u8> = (0..4096u32).map(|i| (i.wrapping_mul(2654435761)) as u8).collect();
4994 let object = EncodedObject::new(ObjectType::Blob, body.clone());
4995 let oid = store
4996 .write_object(object)
4997 .expect("test operation should succeed");
4998 let path = store
4999 .object_path(&oid)
5000 .expect("test operation should succeed");
5001 let mut bytes = fs::read(&path).expect("test operation should succeed");
5002 let deep = bytes.len() / 2;
5003 bytes[deep] ^= 0xff;
5004 fs::write(&path, &bytes).expect("test operation should succeed");
5005 store.invalidate_cache();
5006 let header = store
5007 .read_header(&oid)
5008 .expect("header-only read must still succeed for deep body corruption");
5009 assert_eq!(header, Some((ObjectType::Blob, body.len() as u64)));
5010 fs::remove_dir_all(root).expect("test operation should succeed");
5011 }
5012
5013 #[test]
5014 fn file_database_reads_object_from_pack_index() {
5015 let root = temp_root("sley-file-odb-pack");
5016 let git_dir = root.join(".git");
5017 let pack_dir = git_dir.join("objects").join("pack");
5018 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5019 let object = EncodedObject::new(ObjectType::Blob, b"packed\n".to_vec());
5020 let oid = object
5021 .object_id(ObjectFormat::Sha1)
5022 .expect("test operation should succeed");
5023 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5024 .expect("test operation should succeed");
5025 let pack_name = written.checksum.to_hex();
5026 fs::write(
5027 pack_dir.join(format!("pack-{pack_name}.pack")),
5028 written.pack,
5029 )
5030 .expect("test operation should succeed");
5031 fs::write(
5032 pack_dir.join(format!("pack-{pack_name}.idx")),
5033 written.index,
5034 )
5035 .expect("test operation should succeed");
5036
5037 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5038 assert!(db.contains(&oid).expect("test operation should succeed"));
5039 assert_eq!(read_object_for_assert(&db, &oid), object);
5040 fs::remove_dir_all(root).expect("test operation should succeed");
5041 }
5042
5043 #[test]
5044 fn file_database_loose_cache_observes_same_process_write_after_miss() {
5045 let root = temp_root("sley-file-odb-loose-cache-write");
5046 let git_dir = root.join(".git");
5047 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5048 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5049
5050 let object = EncodedObject::new(ObjectType::Blob, b"written after miss\n".to_vec());
5051 let oid = object
5052 .object_id(ObjectFormat::Sha1)
5053 .expect("test operation should succeed");
5054
5055 assert!(matches!(db.read_object(&oid), Err(GitError::NotFound(_))));
5056 db.loose()
5057 .write_object(object.clone())
5058 .expect("test operation should succeed");
5059
5060 assert_eq!(read_object_for_assert(&db, &oid), object);
5061 fs::remove_dir_all(root).expect("test operation should succeed");
5062 }
5063
5064 #[test]
5065 fn object_presence_checker_observes_same_process_loose_write_after_miss() {
5066 let root = temp_root("sley-presence-checker-loose-cache-write");
5067 let git_dir = root.join(".git");
5068 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5069 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5070 let mut checker = db.presence_checker();
5071
5072 let object = EncodedObject::new(ObjectType::Blob, b"checker loose after miss\n".to_vec());
5073 let oid = object
5074 .object_id(ObjectFormat::Sha1)
5075 .expect("test operation should succeed");
5076
5077 assert!(
5078 !checker
5079 .contains(&oid)
5080 .expect("test operation should succeed")
5081 );
5082 db.loose()
5083 .write_object(object)
5084 .expect("test operation should succeed");
5085
5086 assert!(
5087 checker
5088 .contains(&oid)
5089 .expect("test operation should succeed")
5090 );
5091 fs::remove_dir_all(root).expect("test operation should succeed");
5092 }
5093
5094 #[test]
5095 fn read_object_header_matches_full_read_for_loose_and_packed_and_delta() {
5096 let root = temp_root("sley-read-object-header");
5097 let git_dir = root.join(".git");
5098 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5099 let format = ObjectFormat::Sha1;
5100 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5101
5102 let loose = EncodedObject::new(ObjectType::Blob, b"loose header object\n".to_vec());
5104 let loose_oid = db
5105 .write_object(loose.clone())
5106 .expect("test operation should succeed");
5107
5108 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5112 let mut child_body = vec![b'a'; 4096];
5113 child_body.extend_from_slice(b" plus a deltified tail\n");
5114 let child = EncodedObject::new(ObjectType::Blob, child_body);
5115 let commitish =
5116 EncodedObject::new(ObjectType::Commit, b"header-only type probe\n".to_vec());
5117 let base_oid = base
5118 .object_id(format)
5119 .expect("test operation should succeed");
5120 let child_oid = child
5121 .object_id(format)
5122 .expect("test operation should succeed");
5123 let commit_oid = commitish
5124 .object_id(format)
5125 .expect("test operation should succeed");
5126 let options = PackWriteOptions::new()
5127 .with_prefer_ofs_delta(true)
5128 .with_reorder(false);
5129 let pack = PackFile::write_packed_with_options(
5130 &[base.clone(), child.clone(), commitish.clone()],
5131 format,
5132 &options,
5133 )
5134 .expect("test operation should succeed");
5135 db.install_pack(&pack)
5136 .expect("test operation should succeed");
5137
5138 for (oid, want_type, want_len) in [
5141 (&loose_oid, ObjectType::Blob, loose.body.len()),
5142 (&base_oid, ObjectType::Blob, base.body.len()),
5143 (&child_oid, ObjectType::Blob, child.body.len()),
5144 (&commit_oid, ObjectType::Commit, commitish.body.len()),
5145 ] {
5146 assert_eq!(
5147 db.read_object_header(oid)
5148 .expect("test operation should succeed"),
5149 Some((want_type, want_len as u64)),
5150 "header for {oid}"
5151 );
5152 let full = db.read_object(oid).expect("test operation should succeed");
5153 assert_eq!(
5154 db.read_object_header(oid)
5155 .expect("test operation should succeed"),
5156 Some((full.object_type, full.body.len() as u64))
5157 );
5158 }
5159
5160 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5161 .expect("test operation should succeed");
5162 assert_eq!(
5163 db.read_object_header(&missing)
5164 .expect("test operation should succeed"),
5165 None
5166 );
5167 fs::remove_dir_all(root).expect("test operation should succeed");
5168 }
5169
5170 #[test]
5171 fn object_storage_info_reports_loose_packed_and_delta_metadata() {
5172 let root = temp_root("sley-object-storage-info");
5173 let git_dir = root.join(".git");
5174 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5175 let format = ObjectFormat::Sha1;
5176 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5177
5178 let loose = EncodedObject::new(ObjectType::Blob, b"loose storage object\n".to_vec());
5179 let loose_oid = db
5180 .write_object(loose)
5181 .expect("test operation should succeed");
5182 let loose_size = fs::metadata(
5183 db.loose()
5184 .object_path(&loose_oid)
5185 .expect("test operation should succeed"),
5186 )
5187 .expect("test operation should succeed")
5188 .len();
5189 let loose_info = db
5190 .object_storage_info(&loose_oid)
5191 .expect("test operation should succeed")
5192 .expect("test operation should succeed");
5193 assert_eq!(loose_info.disk_size, loose_size);
5194 assert_eq!(
5195 loose_info.deltabase,
5196 zero_oid(format).expect("test operation should succeed")
5197 );
5198
5199 let base = EncodedObject::new(ObjectType::Blob, vec![b'a'; 4096]);
5200 let mut child_body = vec![b'a'; 4096];
5201 child_body.extend_from_slice(b" changed tail\n");
5202 let child = EncodedObject::new(ObjectType::Blob, child_body);
5203 let base_oid = base
5204 .object_id(format)
5205 .expect("test operation should succeed");
5206 let child_oid = child
5207 .object_id(format)
5208 .expect("test operation should succeed");
5209 let options = PackWriteOptions::new()
5210 .with_prefer_ofs_delta(true)
5211 .with_reorder(false);
5212 let pack = PackFile::write_packed_with_options(&[base, child], format, &options)
5213 .expect("test operation should succeed");
5214 db.install_pack(&pack)
5215 .expect("test operation should succeed");
5216
5217 let base_info = db
5218 .object_storage_info(&base_oid)
5219 .expect("test operation should succeed")
5220 .expect("test operation should succeed");
5221 assert!(base_info.disk_size > 0);
5222 assert_eq!(
5223 base_info.deltabase,
5224 zero_oid(format).expect("test operation should succeed")
5225 );
5226
5227 let child_info = db
5228 .object_storage_info(&child_oid)
5229 .expect("test operation should succeed")
5230 .expect("test operation should succeed");
5231 assert!(child_info.disk_size > 0);
5232 assert_eq!(child_info.deltabase, base_oid);
5233
5234 let missing = ObjectId::from_hex(format, "0000000000000000000000000000000000000001")
5235 .expect("test operation should succeed");
5236 assert_eq!(
5237 db.object_storage_info(&missing)
5238 .expect("test operation should succeed"),
5239 None
5240 );
5241 fs::remove_dir_all(root).expect("test operation should succeed");
5242 }
5243
5244 #[test]
5245 fn file_database_resolves_unique_loose_object_prefix() {
5246 let root = temp_root("sley-file-odb-prefix-loose");
5247 let git_dir = root.join(".git");
5248 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5249 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5250 let object = EncodedObject::new(ObjectType::Blob, b"prefix loose\n".to_vec());
5251 let oid = db
5252 .write_object(object)
5253 .expect("test operation should succeed");
5254 let prefix = &oid.to_hex()[..8];
5255
5256 assert_eq!(
5257 db.resolve_prefix(prefix)
5258 .expect("test operation should succeed"),
5259 ObjectPrefixResolution::Unique(oid)
5260 );
5261 assert!(
5262 db.object_ids()
5263 .expect("test operation should succeed")
5264 .contains(&oid)
5265 );
5266 fs::remove_dir_all(root).expect("test operation should succeed");
5267 }
5268
5269 #[test]
5270 fn file_database_resolves_unique_packed_object_prefix() {
5271 let root = temp_root("sley-file-odb-prefix-packed");
5272 let git_dir = root.join(".git");
5273 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5274 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5275 let object = EncodedObject::new(ObjectType::Blob, b"prefix packed\n".to_vec());
5276 let oid = object
5277 .object_id(ObjectFormat::Sha1)
5278 .expect("test operation should succeed");
5279 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5280 .expect("test operation should succeed");
5281 db.install_pack(&pack)
5282 .expect("test operation should succeed");
5283 let prefix = &oid.to_hex()[..8];
5284
5285 assert_eq!(
5286 db.resolve_prefix(prefix)
5287 .expect("test operation should succeed"),
5288 ObjectPrefixResolution::Unique(oid)
5289 );
5290 fs::remove_dir_all(root).expect("test operation should succeed");
5291 }
5292
5293 #[test]
5294 fn file_database_reports_ambiguous_object_prefix() {
5295 let root = temp_root("sley-file-odb-prefix-ambiguous");
5296 let git_dir = root.join(".git");
5297 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5298 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5299 let mut seen = HashMap::new();
5300 let (prefix, first, second) = (0..10_000)
5301 .find_map(|idx| {
5302 let object =
5303 EncodedObject::new(ObjectType::Blob, format!("ambiguous {idx}\n").into_bytes());
5304 let oid = db
5305 .write_object(object)
5306 .expect("test operation should succeed");
5307 let prefix = oid.to_hex()[..4].to_string();
5308 seen.insert(prefix.clone(), oid)
5309 .map(|first| (prefix, first, oid))
5310 })
5311 .expect("test should find a 4-hex collision");
5312
5313 let ObjectPrefixResolution::Ambiguous(mut matches) = db
5314 .resolve_prefix(&prefix)
5315 .expect("test operation should succeed")
5316 else {
5317 panic!("expected ambiguous prefix {prefix}");
5318 };
5319 matches.sort_by_key(ObjectId::to_hex);
5320 let mut expected = vec![first, second];
5321 expected.sort_by_key(ObjectId::to_hex);
5322 assert_eq!(matches, expected);
5323 fs::remove_dir_all(root).expect("test operation should succeed");
5324 }
5325
5326 #[test]
5327 fn file_database_rejects_too_short_object_prefix() {
5328 let root = temp_root("sley-file-odb-prefix-short");
5329 let git_dir = root.join(".git");
5330 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5331 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5332
5333 assert!(matches!(
5334 db.resolve_prefix("abc"),
5335 Err(GitError::InvalidObjectId(_))
5336 ));
5337 fs::remove_dir_all(root).expect("test operation should succeed");
5338 }
5339
5340 #[test]
5341 fn file_database_reads_sha256_object_from_pack_index() {
5342 let root = temp_root("sley-file-odb-pack-sha256");
5343 let git_dir = root.join(".git");
5344 let pack_dir = git_dir.join("objects").join("pack");
5345 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5346 let object = EncodedObject::new(ObjectType::Blob, b"packed sha256\n".to_vec());
5347 let oid = object
5348 .object_id(ObjectFormat::Sha256)
5349 .expect("test operation should succeed");
5350 let written =
5351 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5352 .expect("test operation should succeed");
5353 let pack_name = written.checksum.to_hex();
5354 fs::write(
5355 pack_dir.join(format!("pack-{pack_name}.pack")),
5356 written.pack,
5357 )
5358 .expect("test operation should succeed");
5359 fs::write(
5360 pack_dir.join(format!("pack-{pack_name}.idx")),
5361 written.index,
5362 )
5363 .expect("test operation should succeed");
5364
5365 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5366 assert!(db.contains(&oid).expect("test operation should succeed"));
5367 assert_eq!(read_object_for_assert(&db, &oid), object);
5368 fs::remove_dir_all(root).expect("test operation should succeed");
5369 }
5370
5371 #[test]
5372 fn file_database_installs_sha256_pack_without_loose_objects() {
5373 let root = temp_root("sley-file-odb-install-pack");
5374 let git_dir = root.join(".git");
5375 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5376 let object = EncodedObject::new(ObjectType::Blob, b"installed sha256 pack\n".to_vec());
5377 let oid = object
5378 .object_id(ObjectFormat::Sha256)
5379 .expect("test operation should succeed");
5380 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5381 .expect("test operation should succeed");
5382 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5383
5384 let result = db
5385 .install_pack(&pack)
5386 .expect("test operation should succeed");
5387
5388 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5389 assert_eq!(result.object_ids, vec![oid]);
5390 assert!(result.pack_path.exists());
5391 assert!(result.index_path.exists());
5392 assert_eq!(result.promisor_path, None);
5393 assert!(
5394 !db.loose()
5395 .object_path(&oid)
5396 .expect("test operation should succeed")
5397 .exists()
5398 );
5399 assert!(db.contains(&oid).expect("test operation should succeed"));
5400 assert_eq!(read_object_for_assert(&db, &oid), object);
5401 fs::remove_dir_all(root).expect("test operation should succeed");
5402 }
5403
5404 #[test]
5405 fn file_database_installs_raw_sha256_pack_without_loose_objects() {
5406 let root = temp_root("sley-file-odb-install-raw-pack");
5407 let git_dir = root.join(".git");
5408 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5409 let object = EncodedObject::new(ObjectType::Blob, b"installed raw sha256 pack\n".to_vec());
5410 let oid = object
5411 .object_id(ObjectFormat::Sha256)
5412 .expect("test operation should succeed");
5413 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5414 .expect("test operation should succeed");
5415 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
5416
5417 let result = db
5418 .install_raw_pack(&pack.pack)
5419 .expect("test operation should succeed");
5420
5421 assert_eq!(result.pack_name, format!("pack-{}", pack.checksum.to_hex()));
5422 assert_eq!(result.object_ids, vec![oid]);
5423 assert!(result.pack_path.exists());
5424 assert!(result.index_path.exists());
5425 assert_eq!(result.promisor_path, None);
5426 assert!(
5427 !db.loose()
5428 .object_path(&oid)
5429 .expect("test operation should succeed")
5430 .exists()
5431 );
5432 assert!(db.contains(&oid).expect("test operation should succeed"));
5433 assert_eq!(read_object_for_assert(&db, &oid), object);
5434 fs::remove_dir_all(root).expect("test operation should succeed");
5435 }
5436
5437 #[test]
5438 fn file_database_rejects_noncanonical_pack_index() {
5439 let root = temp_root("sley-file-odb-install-bad-index");
5440 let git_dir = root.join(".git");
5441 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5442 let object = EncodedObject::new(ObjectType::Blob, b"bad index crc\n".to_vec());
5443 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5444 .expect("test operation should succeed");
5445 let mut entries = pack.entries.clone();
5446 entries[0].crc32 ^= 1;
5447 let mut bad_pack = pack.clone();
5448 bad_pack.index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack.checksum)
5449 .expect("test operation should succeed");
5450 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5451
5452 assert!(db.install_pack(&bad_pack).is_err());
5453
5454 fs::remove_dir_all(root).expect("test operation should succeed");
5455 }
5456
5457 #[test]
5458 fn file_database_installs_raw_promisor_pack_with_sidecar() {
5459 let root = temp_root("sley-file-odb-install-raw-promisor-pack");
5460 let git_dir = root.join(".git");
5461 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5462 let object = EncodedObject::new(ObjectType::Blob, b"installed promisor pack\n".to_vec());
5463 let oid = object
5464 .object_id(ObjectFormat::Sha1)
5465 .expect("test operation should succeed");
5466 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha1)
5467 .expect("test operation should succeed");
5468 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5469
5470 let result = db
5471 .install_raw_pack_with_options(&pack.pack, RawPackInstallOptions { promisor: true })
5472 .expect("test operation should succeed");
5473
5474 let promisor_path = result.promisor_path.expect("promisor sidecar");
5475 assert_eq!(promisor_path.file_stem(), result.pack_path.file_stem());
5476 assert_eq!(
5477 promisor_path.extension().and_then(|ext| ext.to_str()),
5478 Some("promisor")
5479 );
5480 assert!(promisor_path.exists());
5481 assert_eq!(
5482 fs::read(&promisor_path).expect("test operation should succeed"),
5483 b""
5484 );
5485 assert!(result.pack_path.exists());
5486 assert!(result.index_path.exists());
5487 assert!(
5488 !db.loose()
5489 .object_path(&oid)
5490 .expect("test operation should succeed")
5491 .exists()
5492 );
5493 assert_eq!(read_object_for_assert(&db, &oid), object);
5494 fs::remove_dir_all(root).expect("test operation should succeed");
5495 }
5496
5497 #[test]
5498 fn repository_objects_dir_uses_linked_worktree_common_dir() {
5499 let root = temp_root("sley-odb-common-dir");
5500 let common = root.join(".git");
5501 let admin = common.join("worktrees").join("linked");
5502 fs::create_dir_all(&admin).expect("test operation should succeed");
5503 fs::write(admin.join("commondir"), "../..\n").expect("test operation should succeed");
5504
5505 let common = fs::canonicalize(common).expect("test operation should succeed");
5506 assert_eq!(repository_common_dir(&admin), common);
5507 assert_eq!(repository_objects_dir(&admin), common.join("objects"));
5508
5509 fs::remove_dir_all(root).expect("test operation should succeed");
5510 }
5511
5512 #[test]
5513 fn reachable_object_helpers_walk_graph_and_install_pack() {
5514 let root = temp_root("sley-reachable-pack");
5515 let source_git_dir = root.join("source.git");
5516 let destination_git_dir = root.join("destination.git");
5517 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5518 fs::create_dir_all(destination_git_dir.join("objects"))
5519 .expect("test operation should succeed");
5520 let format = ObjectFormat::Sha1;
5521 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5522 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5523
5524 let blob = EncodedObject::new(ObjectType::Blob, b"reachable payload\n".to_vec());
5525 let blob_oid = source
5526 .write_object(blob.clone())
5527 .expect("test operation should succeed");
5528 let tree = EncodedObject::new(
5529 ObjectType::Tree,
5530 Tree {
5531 entries: vec![TreeEntry {
5532 mode: 0o100644,
5533 name: BString::from(b"payload.txt"),
5534 oid: blob_oid,
5535 }],
5536 }
5537 .write(),
5538 );
5539 let tree_oid = source
5540 .write_object(tree.clone())
5541 .expect("test operation should succeed");
5542 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5543 let commit = EncodedObject::new(
5544 ObjectType::Commit,
5545 Commit {
5546 tree: tree_oid,
5547 parents: Vec::new(),
5548 author: identity.clone(),
5549 committer: identity,
5550 encoding: None,
5551 message: b"initial\n".to_vec(),
5552 }
5553 .write(),
5554 );
5555 let commit_oid = source
5556 .write_object(commit.clone())
5557 .expect("test operation should succeed");
5558
5559 let reachable = collect_reachable_object_ids(&source, format, std::iter::once(commit_oid))
5560 .expect("test operation should succeed");
5561 assert!(reachable.contains(&commit_oid));
5562 assert!(reachable.contains(&tree_oid));
5563 assert!(reachable.contains(&blob_oid));
5564
5565 let install =
5566 install_reachable_pack(&source, &destination, format, std::iter::once(commit_oid))
5567 .expect("test operation should succeed")
5568 .expect("reachable pack should be written");
5569 assert_eq!(install.object_ids.len(), 3);
5570 for (oid, object) in [
5571 (&commit_oid, &commit),
5572 (&tree_oid, &tree),
5573 (&blob_oid, &blob),
5574 ] {
5575 assert!(
5576 !destination
5577 .loose()
5578 .object_path(oid)
5579 .expect("test operation should succeed")
5580 .exists()
5581 );
5582 assert!(
5583 destination
5584 .contains(oid)
5585 .expect("test operation should succeed")
5586 );
5587 assert_eq!(read_object_for_assert(&destination, oid), *object);
5588 }
5589 fs::remove_dir_all(root).expect("test operation should succeed");
5590 }
5591
5592 #[test]
5593 fn reachable_object_helpers_respect_exclusions_and_duplicate_starts() {
5594 let root = temp_root("sley-reachable-exclusions");
5595 let git_dir = root.join("repo.git");
5596 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5597 let format = ObjectFormat::Sha1;
5598 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5599
5600 let blob = EncodedObject::new(ObjectType::Blob, b"excluded payload\n".to_vec());
5601 let blob_oid = db
5602 .write_object(blob)
5603 .expect("test operation should succeed");
5604 let tree = EncodedObject::new(
5605 ObjectType::Tree,
5606 Tree {
5607 entries: vec![TreeEntry {
5608 mode: 0o100644,
5609 name: BString::from(b"payload.txt"),
5610 oid: blob_oid,
5611 }],
5612 }
5613 .write(),
5614 );
5615 let tree_oid = db
5616 .write_object(tree)
5617 .expect("test operation should succeed");
5618 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
5619 let commit = EncodedObject::new(
5620 ObjectType::Commit,
5621 Commit {
5622 tree: tree_oid,
5623 parents: Vec::new(),
5624 author: identity.clone(),
5625 committer: identity,
5626 encoding: None,
5627 message: b"initial\n".to_vec(),
5628 }
5629 .write(),
5630 );
5631 let commit_oid = db
5632 .write_object(commit)
5633 .expect("test operation should succeed");
5634 let excluded = HashSet::from([tree_oid]);
5635
5636 let objects = collect_reachable_objects(&db, format, [commit_oid, commit_oid], &excluded)
5637 .expect("test operation should succeed");
5638
5639 assert_eq!(objects.len(), 1);
5640 assert_eq!(
5641 objects[0]
5642 .object_id(format)
5643 .expect("test operation should succeed"),
5644 commit_oid
5645 );
5646 fs::remove_dir_all(root).expect("test operation should succeed");
5647 }
5648
5649 #[test]
5650 fn build_reachable_pack_returns_raw_pack_and_respects_empty_exclusions() {
5651 let root = temp_root("sley-build-reachable-pack");
5652 let git_dir = root.join("repo.git");
5653 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5654 let format = ObjectFormat::Sha1;
5655 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5656
5657 let object = EncodedObject::new(ObjectType::Blob, b"raw reachable pack\n".to_vec());
5658 let oid = db
5659 .write_object(object.clone())
5660 .expect("test operation should succeed");
5661 let pack = build_reachable_pack(&db, format, std::iter::once(oid), &HashSet::new())
5662 .expect("test operation should succeed")
5663 .expect("reachable pack should be built");
5664 assert!(pack.pack.starts_with(b"PACK"));
5665 assert_eq!(pack.entries.len(), 1);
5666 assert_eq!(pack.entries[0].oid, oid);
5667
5668 let excluded = HashSet::from([oid]);
5669 assert!(
5670 build_reachable_pack(
5671 &db,
5672 format,
5673 pack.entries.into_iter().map(|entry| entry.oid),
5674 &excluded
5675 )
5676 .expect("test operation should succeed")
5677 .is_none()
5678 );
5679 fs::remove_dir_all(root).expect("test operation should succeed");
5680 }
5681
5682 #[test]
5683 fn reachable_object_helpers_follow_tags_and_report_missing_objects() {
5684 let root = temp_root("sley-reachable-tags");
5685 let git_dir = root.join("repo.git");
5686 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5687 let format = ObjectFormat::Sha1;
5688 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
5689
5690 let blob = EncodedObject::new(ObjectType::Blob, b"tagged payload\n".to_vec());
5691 let blob_oid = db
5692 .write_object(blob)
5693 .expect("test operation should succeed");
5694 let tag = EncodedObject::new(
5695 ObjectType::Tag,
5696 Tag {
5697 object: blob_oid,
5698 object_type: ObjectType::Blob,
5699 name: b"v1".to_vec(),
5700 tagger: Some(b"Example <example@example.invalid> 0 +0000".to_vec()),
5701 message: b"tag message\n".to_vec(),
5702 raw_body: None,
5703 }
5704 .write(),
5705 );
5706 let tag_oid = db.write_object(tag).expect("test operation should succeed");
5707
5708 let reachable = collect_reachable_object_ids(&db, format, std::iter::once(tag_oid))
5709 .expect("test operation should succeed");
5710 assert!(reachable.contains(&tag_oid));
5711 assert!(reachable.contains(&blob_oid));
5712
5713 let missing = ObjectId::from_hex(format, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
5714 .expect("test operation should succeed");
5715 let err = collect_reachable_object_ids(&db, format, std::iter::once(missing))
5716 .expect_err("missing traversal root should error");
5717 let kind = err.not_found_kind().expect("typed not found");
5718 assert_eq!(kind.object_id(), Some(missing));
5719 assert_eq!(
5720 kind.missing_object_context(),
5721 Some(MissingObjectContext::Traversal)
5722 );
5723 fs::remove_dir_all(root).expect("test operation should succeed");
5724 }
5725
5726 #[test]
5727 fn install_reachable_pack_empty_starts_create_no_pack() {
5728 let root = temp_root("sley-reachable-empty");
5729 let source_git_dir = root.join("source.git");
5730 let destination_git_dir = root.join("destination.git");
5731 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5732 fs::create_dir_all(destination_git_dir.join("objects"))
5733 .expect("test operation should succeed");
5734 let format = ObjectFormat::Sha1;
5735 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5736 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5737
5738 let result = install_reachable_pack(&source, &destination, format, Vec::<ObjectId>::new())
5739 .expect("test operation should succeed");
5740
5741 assert!(result.is_none());
5742 assert!(!destination_git_dir.join("objects").join("pack").exists());
5743 fs::remove_dir_all(root).expect("test operation should succeed");
5744 }
5745
5746 #[test]
5747 fn install_reachable_pack_excluding_skips_fully_excluded_starts() {
5748 let root = temp_root("sley-reachable-install-excluding");
5749 let source_git_dir = root.join("source.git");
5750 let destination_git_dir = root.join("destination.git");
5751 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5752 fs::create_dir_all(destination_git_dir.join("objects"))
5753 .expect("test operation should succeed");
5754 let format = ObjectFormat::Sha1;
5755 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5756 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5757 let object = EncodedObject::new(ObjectType::Blob, b"excluded install\n".to_vec());
5758 let oid = source
5759 .write_object(object)
5760 .expect("test operation should succeed");
5761 let excluded = HashSet::from([oid]);
5762
5763 let result = install_reachable_pack_excluding(
5764 &source,
5765 &destination,
5766 format,
5767 std::iter::once(oid),
5768 &excluded,
5769 )
5770 .expect("test operation should succeed");
5771
5772 assert!(result.is_none());
5773 assert!(!destination_git_dir.join("objects").join("pack").exists());
5774 fs::remove_dir_all(root).expect("test operation should succeed");
5775 }
5776
5777 #[test]
5778 fn install_reachable_pack_supports_sha256() {
5779 let root = temp_root("sley-reachable-pack-sha256");
5780 let source_git_dir = root.join("source.git");
5781 let destination_git_dir = root.join("destination.git");
5782 fs::create_dir_all(source_git_dir.join("objects")).expect("test operation should succeed");
5783 fs::create_dir_all(destination_git_dir.join("objects"))
5784 .expect("test operation should succeed");
5785 let format = ObjectFormat::Sha256;
5786 let source = FileObjectDatabase::from_git_dir(&source_git_dir, format);
5787 let destination = FileObjectDatabase::from_git_dir(&destination_git_dir, format);
5788 let object = EncodedObject::new(ObjectType::Blob, b"sha256 reachable pack\n".to_vec());
5789 let oid = source
5790 .write_object(object.clone())
5791 .expect("test operation should succeed");
5792
5793 let pack = build_reachable_pack(&source, format, std::iter::once(oid), &HashSet::new())
5794 .expect("test operation should succeed")
5795 .expect("sha256 reachable pack should be built");
5796 assert!(pack.pack.starts_with(b"PACK"));
5797 assert_eq!(pack.entries[0].oid, oid);
5798
5799 let result = install_reachable_pack(&source, &destination, format, std::iter::once(oid))
5800 .expect("test operation should succeed")
5801 .expect("sha256 reachable pack should be written");
5802
5803 assert_eq!(result.object_ids, vec![oid]);
5804 assert!(
5805 !destination
5806 .loose()
5807 .object_path(&oid)
5808 .expect("test operation should succeed")
5809 .exists()
5810 );
5811 assert_eq!(read_object_for_assert(&destination, &oid), object);
5812 fs::remove_dir_all(root).expect("test operation should succeed");
5813 }
5814
5815 #[test]
5816 fn install_helpers_accept_custom_raw_pack_installer() {
5817 #[derive(Default)]
5818 struct RecordingInstaller {
5819 packs: std::cell::RefCell<Vec<Vec<u8>>>,
5820 installed: std::cell::RefCell<Vec<ObjectId>>,
5821 }
5822
5823 impl RawPackInstaller for RecordingInstaller {
5824 fn install_raw_pack(&self, pack_bytes: &[u8]) -> Result<RawPackInstallResult> {
5825 self.packs.borrow_mut().push(pack_bytes.to_vec());
5826 let object_ids = self.installed.borrow().clone();
5827 Ok(RawPackInstallResult { object_ids })
5828 }
5829 }
5830
5831 let format = ObjectFormat::Sha1;
5832 let source = ObjectDatabase::new(format);
5833 let object = EncodedObject::new(ObjectType::Blob, b"custom raw installer\n".to_vec());
5834 let oid = source
5835 .write_object(object)
5836 .expect("test operation should succeed");
5837 let installer = RecordingInstaller::default();
5838 installer.installed.borrow_mut().push(oid);
5839
5840 let result = install_reachable_pack(&source, &installer, format, std::iter::once(oid))
5841 .expect("test operation should succeed")
5842 .expect("custom installer should receive pack");
5843
5844 assert_eq!(result.object_ids, installer.installed.into_inner());
5845 let packs = installer.packs.into_inner();
5846 assert_eq!(packs.len(), 1);
5847 assert!(packs[0].starts_with(b"PACK"));
5848 }
5849
5850 #[test]
5851 fn file_database_reads_object_from_multi_pack_index() {
5852 let root = temp_root("sley-file-odb-midx");
5853 let git_dir = root.join(".git");
5854 let pack_dir = git_dir.join("objects").join("pack");
5855 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
5856 let first = EncodedObject::new(ObjectType::Blob, b"first packed\n".to_vec());
5857 let second = EncodedObject::new(ObjectType::Blob, b"second packed\n".to_vec());
5858 let first_oid = first
5859 .object_id(ObjectFormat::Sha1)
5860 .expect("test operation should succeed");
5861 let second_oid = second
5862 .object_id(ObjectFormat::Sha1)
5863 .expect("test operation should succeed");
5864 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5865 .expect("test operation should succeed");
5866 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5867 .expect("test operation should succeed");
5868 let first_pack_name = format!("pack-{}.idx", first_pack.checksum.to_hex());
5869 let second_pack_name = format!("pack-{}.idx", second_pack.checksum.to_hex());
5870 fs::write(
5871 pack_dir.join(first_pack_name.replace(".idx", ".pack")),
5872 first_pack.pack,
5873 )
5874 .expect("test operation should succeed");
5875 fs::write(
5876 pack_dir.join(second_pack_name.replace(".idx", ".pack")),
5877 second_pack.pack,
5878 )
5879 .expect("test operation should succeed");
5880 let midx = MultiPackIndex::write(
5881 ObjectFormat::Sha1,
5882 2,
5883 &[first_pack_name, second_pack_name],
5884 &[
5885 sley_pack::MultiPackIndexEntry {
5886 oid: first_oid,
5887 pack_int_id: 0,
5888 offset: first_pack.entries[0].offset,
5889 },
5890 sley_pack::MultiPackIndexEntry {
5891 oid: second_oid,
5892 pack_int_id: 1,
5893 offset: second_pack.entries[0].offset,
5894 },
5895 ],
5896 )
5897 .expect("test operation should succeed");
5898 fs::write(pack_dir.join("multi-pack-index"), midx).expect("test operation should succeed");
5899
5900 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5901 assert!(
5902 db.contains(&second_oid)
5903 .expect("test operation should succeed")
5904 );
5905 assert_eq!(
5906 db.resolve_prefix(&second_oid.to_hex()[..8])
5907 .expect("test operation should succeed"),
5908 ObjectPrefixResolution::Unique(second_oid)
5909 );
5910 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5911 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5912 fs::remove_dir_all(root).expect("test operation should succeed");
5913 }
5914
5915 #[test]
5916 fn file_database_finds_pack_added_after_registry_was_cached() {
5917 let root = temp_root("sley-file-odb-pack-added-late");
5921 let git_dir = root.join(".git");
5922 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5923 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5924
5925 let first = EncodedObject::new(ObjectType::Blob, b"first late\n".to_vec());
5927 let first_oid = first
5928 .object_id(ObjectFormat::Sha1)
5929 .expect("test operation should succeed");
5930 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5931 .expect("test operation should succeed");
5932 db.install_pack(&first_pack)
5933 .expect("test operation should succeed");
5934 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5935
5936 let second = EncodedObject::new(ObjectType::Blob, b"second late\n".to_vec());
5938 let second_oid = second
5939 .object_id(ObjectFormat::Sha1)
5940 .expect("test operation should succeed");
5941 assert!(matches!(
5943 db.read_object(&second_oid),
5944 Err(GitError::NotFound(_))
5945 ));
5946
5947 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5950 .expect("test operation should succeed");
5951 db.install_pack(&second_pack)
5952 .expect("test operation should succeed");
5953 assert!(
5954 db.contains(&second_oid)
5955 .expect("test operation should succeed")
5956 );
5957 assert_eq!(read_object_for_assert(&db, &second_oid), second);
5958 assert_eq!(read_object_for_assert(&db, &first_oid), first);
5960
5961 fs::remove_dir_all(root).expect("test operation should succeed");
5962 }
5963
5964 #[test]
5965 fn object_presence_checker_finds_pack_added_after_registry_was_cached() {
5966 let root = temp_root("sley-presence-checker-pack-added-late");
5967 let git_dir = root.join(".git");
5968 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
5969 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
5970
5971 let first = EncodedObject::new(ObjectType::Blob, b"checker first late\n".to_vec());
5972 let first_oid = first
5973 .object_id(ObjectFormat::Sha1)
5974 .expect("test operation should succeed");
5975 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
5976 .expect("test operation should succeed");
5977 db.install_pack(&first_pack)
5978 .expect("test operation should succeed");
5979
5980 let second = EncodedObject::new(ObjectType::Blob, b"checker second late\n".to_vec());
5981 let second_oid = second
5982 .object_id(ObjectFormat::Sha1)
5983 .expect("test operation should succeed");
5984 let mut checker = db.presence_checker();
5985 assert!(
5986 checker
5987 .contains(&first_oid)
5988 .expect("test operation should succeed")
5989 );
5990 assert!(
5991 !checker
5992 .contains(&second_oid)
5993 .expect("test operation should succeed")
5994 );
5995
5996 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
5997 .expect("test operation should succeed");
5998 db.install_pack(&second_pack)
5999 .expect("test operation should succeed");
6000
6001 assert!(
6002 checker
6003 .contains(&second_oid)
6004 .expect("test operation should succeed")
6005 );
6006 fs::remove_dir_all(root).expect("test operation should succeed");
6007 }
6008
6009 #[test]
6010 fn file_database_pack_registry_loads_indexes_lazily_and_refreshes_after_count_change() {
6011 let root = temp_root("sley-file-odb-pack-registry-refresh");
6012 let git_dir = root.join(".git");
6013 let pack_dir = git_dir.join("objects").join("pack");
6014 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6015 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6016
6017 let first = EncodedObject::new(ObjectType::Blob, b"registry first\n".to_vec());
6018 let first_oid = first
6019 .object_id(ObjectFormat::Sha1)
6020 .expect("test operation should succeed");
6021 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6022 .expect("test operation should succeed");
6023 db.install_pack(&first_pack)
6024 .expect("test operation should succeed");
6025
6026 let first_registry = db
6027 .cached_pack_registry(&pack_dir, false)
6028 .expect("test operation should succeed");
6029 assert_eq!(first_registry.fingerprint.idx_count, 1);
6030 assert_eq!(first_registry.fingerprint.pack_count, 1);
6031 assert_eq!(first_registry.packs.len(), 1);
6032 assert!(
6033 first_registry.packs[0]
6034 .index
6035 .lock()
6036 .expect("test operation should succeed")
6037 .is_none()
6038 );
6039 assert!(
6040 first_registry.packs[0]
6041 .data
6042 .lock()
6043 .expect("test operation should succeed")
6044 .is_none()
6045 );
6046
6047 assert!(
6050 db.contains(&first_oid)
6051 .expect("test operation should succeed")
6052 );
6053 assert!(
6054 first_registry.packs[0]
6055 .index
6056 .lock()
6057 .expect("test operation should succeed")
6058 .is_some()
6059 );
6060 assert!(
6061 first_registry.packs[0]
6062 .data
6063 .lock()
6064 .expect("test operation should succeed")
6065 .is_none()
6066 );
6067 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6068 assert!(
6069 first_registry.packs[0]
6070 .data
6071 .lock()
6072 .expect("test operation should succeed")
6073 .is_some()
6074 );
6075
6076 let second = EncodedObject::new(ObjectType::Blob, b"registry second\n".to_vec());
6077 let second_oid = second
6078 .object_id(ObjectFormat::Sha1)
6079 .expect("test operation should succeed");
6080 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6081 .expect("test operation should succeed");
6082 db.install_pack(&second_pack)
6083 .expect("test operation should succeed");
6084
6085 let refreshed = db
6086 .cached_pack_registry(&pack_dir, true)
6087 .expect("test operation should succeed");
6088 assert!(!Arc::ptr_eq(&first_registry, &refreshed));
6089 assert_eq!(refreshed.fingerprint.idx_count, 2);
6090 assert_eq!(refreshed.fingerprint.pack_count, 2);
6091 assert_eq!(refreshed.packs.len(), 2);
6092 assert_eq!(read_object_for_assert(&db, &second_oid), second);
6093
6094 fs::remove_dir_all(root).expect("test operation should succeed");
6095 }
6096
6097 #[test]
6098 fn file_database_pack_search_hint_rebuilds_after_pack_added() {
6099 let root = temp_root("sley-file-odb-pack-lookup-added-late");
6103 let git_dir = root.join(".git");
6104 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6105 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6106
6107 let first = EncodedObject::new(ObjectType::Blob, b"first lookup\n".to_vec());
6108 let second = EncodedObject::new(ObjectType::Blob, b"second lookup\n".to_vec());
6109 let third = EncodedObject::new(ObjectType::Blob, b"third lookup\n".to_vec());
6110 let first_oid = first
6111 .object_id(ObjectFormat::Sha1)
6112 .expect("test operation should succeed");
6113 let second_oid = second
6114 .object_id(ObjectFormat::Sha1)
6115 .expect("test operation should succeed");
6116 let third_oid = third
6117 .object_id(ObjectFormat::Sha1)
6118 .expect("test operation should succeed");
6119
6120 let first_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&first))
6121 .expect("test operation should succeed");
6122 let second_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&second))
6123 .expect("test operation should succeed");
6124 db.install_pack(&first_pack)
6125 .expect("test operation should succeed");
6126 db.install_pack(&second_pack)
6127 .expect("test operation should succeed");
6128
6129 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6131 assert_eq!(read_object_for_assert(&db, &second_oid), second);
6132 assert!(matches!(
6133 db.read_object(&third_oid),
6134 Err(GitError::NotFound(_))
6135 ));
6136
6137 let third_pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&third))
6138 .expect("test operation should succeed");
6139 db.install_pack(&third_pack)
6140 .expect("test operation should succeed");
6141
6142 assert_eq!(read_object_for_assert(&db, &third_oid), third);
6143 assert_eq!(read_object_for_assert(&db, &first_oid), first);
6144
6145 fs::remove_dir_all(root).expect("test operation should succeed");
6146 }
6147
6148 #[test]
6149 fn file_database_prefers_loose_object_over_packed_object() {
6150 let root = temp_root("sley-file-odb-prefer-loose");
6151 let git_dir = root.join(".git");
6152 let pack_dir = git_dir.join("objects").join("pack");
6153 fs::create_dir_all(&pack_dir).expect("test operation should succeed");
6154 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6155 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6156 .expect("test operation should succeed");
6157 let pack_name = written.checksum.to_hex();
6158 fs::write(
6159 pack_dir.join(format!("pack-{pack_name}.pack")),
6160 written.pack,
6161 )
6162 .expect("test operation should succeed");
6163 fs::write(
6164 pack_dir.join(format!("pack-{pack_name}.idx")),
6165 written.index,
6166 )
6167 .expect("test operation should succeed");
6168
6169 let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6170 let oid = db
6171 .write_object(object.clone())
6172 .expect("test operation should succeed");
6173 assert_eq!(read_object_for_assert(&db, &oid), object);
6174 fs::remove_dir_all(root).expect("test operation should succeed");
6175 }
6176
6177 #[test]
6178 fn bundle_prerequisite_verification_reads_existing_objects() {
6179 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6180 let oid = db
6181 .write_object(EncodedObject::new(ObjectType::Blob, b"base\n".to_vec()))
6182 .expect("test operation should succeed");
6183 let bundle_bytes = format!("# v2 git bundle\n-{oid} base\n\n").into_bytes();
6184 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6185 .expect("test operation should succeed");
6186
6187 verify_bundle_prerequisites(&bundle, &db).expect("test operation should succeed");
6188 }
6189
6190 #[test]
6191 fn bundle_prerequisite_verification_reports_missing_objects() {
6192 let db = ObjectDatabase::new(ObjectFormat::Sha1);
6193 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6194 .expect("test operation should succeed");
6195 let bundle_bytes = format!("# v2 git bundle\n-{missing} missing\n\n").into_bytes();
6196 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6197 .expect("test operation should succeed");
6198
6199 assert!(verify_bundle_prerequisites(&bundle, &db).is_err());
6200 }
6201
6202 #[test]
6203 fn unbundle_objects_writes_pack_entries_and_returns_refs() {
6204 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6205 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6206 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6207 let oid = object
6208 .object_id(ObjectFormat::Sha1)
6209 .expect("test operation should succeed");
6210 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6211 .expect("test operation should succeed");
6212 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6213 .into_bytes()
6214 .into_iter()
6215 .chain(pack.pack)
6216 .collect::<Vec<_>>();
6217 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6218 .expect("test operation should succeed");
6219
6220 let result = unbundle_objects(&bundle, &prerequisite_reader, &mut writer)
6221 .expect("test operation should succeed");
6222 assert_eq!(result.written_objects, vec![oid]);
6223 assert_eq!(result.references, bundle.references);
6224 assert_eq!(read_object_for_assert(&writer, &oid), object);
6225 }
6226
6227 #[test]
6228 fn install_bundle_pack_writes_pack_and_returns_refs() {
6229 let root = temp_root("sley-install-bundle-pack");
6230 let git_dir = root.join(".git");
6231 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6232 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6233 let database = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
6234 let object = EncodedObject::new(ObjectType::Blob, b"bundle pack object\n".to_vec());
6235 let oid = object
6236 .object_id(ObjectFormat::Sha1)
6237 .expect("test operation should succeed");
6238 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6239 .expect("test operation should succeed");
6240 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
6241 .into_bytes()
6242 .into_iter()
6243 .chain(pack.pack)
6244 .collect::<Vec<_>>();
6245 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6246 .expect("test operation should succeed");
6247
6248 let result = install_bundle_pack(&bundle, &prerequisite_reader, &database)
6249 .expect("test operation should succeed");
6250
6251 assert_eq!(result.written_objects, vec![oid]);
6252 assert_eq!(result.references, bundle.references);
6253 assert!(
6254 database
6255 .contains(&oid)
6256 .expect("test operation should succeed")
6257 );
6258 assert_eq!(read_object_for_assert(&database, &oid), object);
6259 assert!(
6260 !database
6261 .loose()
6262 .object_path(&oid)
6263 .expect("test operation should succeed")
6264 .exists()
6265 );
6266 fs::remove_dir_all(root).expect("test operation should succeed");
6267 }
6268
6269 #[test]
6270 fn unpack_packfile_objects_writes_sha256_pack_entries() {
6271 let writer = ObjectDatabase::new(ObjectFormat::Sha256);
6272 let object = EncodedObject::new(ObjectType::Blob, b"transport pack object\n".to_vec());
6273 let oid = object
6274 .object_id(ObjectFormat::Sha256)
6275 .expect("test operation should succeed");
6276 let pack = PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
6277 .expect("test operation should succeed");
6278
6279 let result = unpack_packfile_objects(&pack.pack, ObjectFormat::Sha256, &writer)
6280 .expect("test operation should succeed");
6281
6282 assert_eq!(result.written_objects, vec![oid]);
6283 assert_eq!(read_object_for_assert(&writer, &oid), object);
6284 }
6285
6286 #[test]
6287 fn unbundle_objects_rejects_missing_prerequisites_before_writing() {
6288 let prerequisite_reader = ObjectDatabase::new(ObjectFormat::Sha1);
6289 let mut writer = ObjectDatabase::new(ObjectFormat::Sha1);
6290 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6291 .expect("test operation should succeed");
6292 let object = EncodedObject::new(ObjectType::Blob, b"bundle object\n".to_vec());
6293 let oid = object
6294 .object_id(ObjectFormat::Sha1)
6295 .expect("test operation should succeed");
6296 let pack = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
6297 .expect("test operation should succeed");
6298 let bundle_bytes =
6299 format!("# v2 git bundle\n-{missing} missing\n{oid} refs/heads/main\n\n")
6300 .into_bytes()
6301 .into_iter()
6302 .chain(pack.pack)
6303 .collect::<Vec<_>>();
6304 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
6305 .expect("test operation should succeed");
6306
6307 assert!(unbundle_objects(&bundle, &prerequisite_reader, &mut writer).is_err());
6308 assert!(!writer.contains(&oid));
6309 }
6310
6311 fn write_commit_graph(
6314 db: &mut FileObjectDatabase,
6315 payload: &[u8],
6316 ) -> Vec<(ObjectId, EncodedObject)> {
6317 let blob = EncodedObject::new(ObjectType::Blob, payload.to_vec());
6318 let blob_oid = db
6319 .write_object(blob.clone())
6320 .expect("test operation should succeed");
6321 let tree = EncodedObject::new(
6322 ObjectType::Tree,
6323 Tree {
6324 entries: vec![TreeEntry {
6325 mode: 0o100644,
6326 name: BString::from(b"payload.txt"),
6327 oid: blob_oid,
6328 }],
6329 }
6330 .write(),
6331 );
6332 let tree_oid = db
6333 .write_object(tree.clone())
6334 .expect("test operation should succeed");
6335 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6336 let commit = EncodedObject::new(
6337 ObjectType::Commit,
6338 Commit {
6339 tree: tree_oid,
6340 parents: Vec::new(),
6341 author: identity.clone(),
6342 committer: identity,
6343 encoding: None,
6344 message: b"initial\n".to_vec(),
6345 }
6346 .write(),
6347 );
6348 let commit_oid = db
6349 .write_object(commit.clone())
6350 .expect("test operation should succeed");
6351 vec![(commit_oid, commit), (tree_oid, tree), (blob_oid, blob)]
6352 }
6353
6354 fn repack_all_objects_consolidates_loose_and_pack(format: ObjectFormat) {
6355 let root = temp_root("sley-repack-all");
6356 let git_dir = root.join(".git");
6357 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6358 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6359
6360 let packed_blob = EncodedObject::new(ObjectType::Blob, b"already packed\n".to_vec());
6362 let packed_oid = packed_blob
6363 .object_id(format)
6364 .expect("test operation should succeed");
6365 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6366 .expect("test operation should succeed");
6367 let existing = db
6368 .install_pack(&existing_pack)
6369 .expect("test operation should succeed");
6370
6371 let graph = write_commit_graph(&mut db, b"repack payload\n");
6372
6373 let mut expected: HashMap<ObjectId, EncodedObject> = graph.iter().cloned().collect();
6374 expected.insert(packed_oid, packed_blob.clone());
6375
6376 let result = repack_all_objects(&git_dir, format)
6377 .expect("test operation should succeed")
6378 .expect("repository has objects");
6379
6380 assert_eq!(result.object_count, expected.len());
6382 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6383 assert_eq!(parsed.entries.len(), expected.len());
6384 for entry in &parsed.entries {
6385 let want = expected
6386 .get(&entry.entry.oid)
6387 .expect("packed object was in the repository");
6388 assert_eq!(&entry.object, want);
6389 assert_eq!(
6390 entry
6391 .object
6392 .object_id(format)
6393 .expect("test operation should succeed"),
6394 entry.entry.oid
6395 );
6396 }
6397 let idx = PackIndex::parse(&result.idx, format).expect("test operation should succeed");
6399 assert_eq!(idx.pack_checksum, parsed.checksum);
6400 assert_eq!(idx.entries.len(), expected.len());
6401
6402 assert_eq!(result.obsolete_packs, vec![existing.pack_path.clone()]);
6404 let mut want_loose: Vec<ObjectId> = graph.iter().map(|(oid, _)| *oid).collect();
6406 want_loose.sort_by_key(ObjectId::to_hex);
6407 assert_eq!(result.packed_loose, want_loose);
6408 assert!(!result.packed_loose.contains(&packed_oid));
6409
6410 fs::remove_dir_all(root).expect("test operation should succeed");
6411 }
6412
6413 #[test]
6414 fn repack_all_objects_consolidates_loose_and_pack_sha1() {
6415 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha1);
6416 }
6417
6418 #[test]
6419 fn repack_all_objects_consolidates_loose_and_pack_sha256() {
6420 repack_all_objects_consolidates_loose_and_pack(ObjectFormat::Sha256);
6421 }
6422
6423 #[test]
6424 fn repack_all_objects_returns_none_for_empty_repository() {
6425 let root = temp_root("sley-repack-empty");
6426 let git_dir = root.join(".git");
6427 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6428
6429 assert!(
6430 repack_all_objects(&git_dir, ObjectFormat::Sha1)
6431 .expect("test operation should succeed")
6432 .is_none()
6433 );
6434
6435 fs::remove_dir_all(root).expect("test operation should succeed");
6436 }
6437
6438 #[test]
6439 fn install_repack_result_writes_pack_without_pruning_by_default() {
6440 let root = temp_root("sley-repack-install-nodelete");
6441 let git_dir = root.join(".git");
6442 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6443 let format = ObjectFormat::Sha1;
6444 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6445 let graph = write_commit_graph(&mut db, b"install no prune\n");
6446
6447 let result = repack_all_objects(&git_dir, format)
6448 .expect("test operation should succeed")
6449 .expect("test operation should succeed");
6450 install_repack_result(&git_dir, format, &result, false)
6451 .expect("test operation should succeed");
6452
6453 let parsed = PackFile::parse(&result.pack, format).expect("test operation should succeed");
6455 let pack_dir = git_dir.join("objects").join("pack");
6456 let pack_path = pack_dir.join(format!("pack-{}.pack", parsed.checksum.to_hex()));
6457 let idx_path = pack_dir.join(format!("pack-{}.idx", parsed.checksum.to_hex()));
6458 assert!(pack_path.exists());
6459 assert!(idx_path.exists());
6460 for (oid, object) in &graph {
6462 assert!(
6463 db.loose()
6464 .object_path(oid)
6465 .expect("test operation should succeed")
6466 .exists()
6467 );
6468 assert_eq!(read_object_for_assert(&db, oid), *object);
6469 }
6470
6471 fs::remove_dir_all(root).expect("test operation should succeed");
6472 }
6473
6474 #[test]
6475 fn install_repack_result_prunes_obsolete_packs_and_loose_objects() {
6476 let root = temp_root("sley-repack-install-prune");
6477 let git_dir = root.join(".git");
6478 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6479 let format = ObjectFormat::Sha1;
6480 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6481
6482 let packed_blob = EncodedObject::new(ObjectType::Blob, b"prune packed\n".to_vec());
6483 let existing_pack = PackFile::write_undeltified(std::slice::from_ref(&packed_blob), format)
6484 .expect("test operation should succeed");
6485 let existing = db
6486 .install_pack(&existing_pack)
6487 .expect("test operation should succeed");
6488 let graph = write_commit_graph(&mut db, b"prune payload\n");
6489
6490 let result = repack_all_objects(&git_dir, format)
6491 .expect("test operation should succeed")
6492 .expect("test operation should succeed");
6493 let new_pack_checksum = PackFile::parse(&result.pack, format)
6494 .expect("test operation should succeed")
6495 .checksum;
6496 install_repack_result(&git_dir, format, &result, true)
6497 .expect("test operation should succeed");
6498
6499 assert!(!existing.pack_path.exists());
6501 assert!(!existing.index_path.exists());
6502 for (oid, _) in &graph {
6504 assert!(
6505 !db.loose()
6506 .object_path(oid)
6507 .expect("test operation should succeed")
6508 .exists()
6509 );
6510 }
6511 let pack_dir = git_dir.join("objects").join("pack");
6513 assert!(
6514 pack_dir
6515 .join(format!("pack-{}.pack", new_pack_checksum.to_hex()))
6516 .exists()
6517 );
6518 let reopened = FileObjectDatabase::from_git_dir(&git_dir, format);
6519 for (oid, object) in &graph {
6520 assert!(
6521 reopened
6522 .contains(oid)
6523 .expect("test operation should succeed")
6524 );
6525 assert_eq!(read_object_for_assert(&reopened, oid), *object);
6526 }
6527 let packed_oid = packed_blob
6528 .object_id(format)
6529 .expect("test operation should succeed");
6530 assert_eq!(read_object_for_assert(&reopened, &packed_oid), packed_blob);
6531
6532 fs::remove_dir_all(root).expect("test operation should succeed");
6533 }
6534
6535 #[test]
6536 fn install_repack_result_preserves_keep_and_promisor_packs() {
6537 let root = temp_root("sley-repack-install-keep-promisor");
6538 let git_dir = root.join(".git");
6539 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6540 let format = ObjectFormat::Sha1;
6541 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6542
6543 let keep_blob = EncodedObject::new(ObjectType::Blob, b"keep protected\n".to_vec());
6544 let keep_pack = PackFile::write_undeltified(std::slice::from_ref(&keep_blob), format)
6545 .expect("test operation should succeed");
6546 let keep_install = db
6547 .install_pack(&keep_pack)
6548 .expect("test operation should succeed");
6549 let keep_sidecar = keep_install.pack_path.with_extension("keep");
6550 fs::write(&keep_sidecar, b"").expect("test operation should succeed");
6551
6552 let promisor_blob = EncodedObject::new(ObjectType::Blob, b"promisor protected\n".to_vec());
6553 let promisor_pack =
6554 PackFile::write_undeltified(std::slice::from_ref(&promisor_blob), format)
6555 .expect("test operation should succeed");
6556 let promisor_install = db
6557 .install_pack_with_options(&promisor_pack, RawPackInstallOptions { promisor: true })
6558 .expect("test operation should succeed");
6559 let promisor_sidecar = promisor_install
6560 .promisor_path
6561 .clone()
6562 .expect("promisor sidecar");
6563
6564 let graph = write_commit_graph(&mut db, b"new consolidated payload\n");
6565 let result = repack_all_objects(&git_dir, format)
6566 .expect("test operation should succeed")
6567 .expect("test operation should succeed");
6568 assert!(result.obsolete_packs.contains(&keep_install.pack_path));
6569 assert!(result.obsolete_packs.contains(&promisor_install.pack_path));
6570
6571 install_repack_result(&git_dir, format, &result, true)
6572 .expect("test operation should succeed");
6573
6574 for path in [
6575 &keep_install.pack_path,
6576 &keep_install.index_path,
6577 &keep_sidecar,
6578 &promisor_install.pack_path,
6579 &promisor_install.index_path,
6580 &promisor_sidecar,
6581 ] {
6582 assert!(path.exists(), "{} should be preserved", path.display());
6583 }
6584 for (oid, _) in &graph {
6585 assert!(
6586 !db.loose()
6587 .object_path(oid)
6588 .expect("test operation should succeed")
6589 .exists()
6590 );
6591 }
6592
6593 fs::remove_dir_all(root).expect("test operation should succeed");
6594 }
6595
6596 #[test]
6597 fn install_repack_result_keeps_loose_object_absent_from_new_pack() {
6598 let root = temp_root("sley-repack-install-safety");
6601 let git_dir = root.join(".git");
6602 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6603 let format = ObjectFormat::Sha1;
6604 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6605 let graph = write_commit_graph(&mut db, b"safety packed\n");
6606
6607 let mut result = repack_all_objects(&git_dir, format)
6608 .expect("test operation should succeed")
6609 .expect("test operation should succeed");
6610
6611 let stray = EncodedObject::new(ObjectType::Blob, b"never packed\n".to_vec());
6613 let stray_oid = db
6614 .write_object(stray.clone())
6615 .expect("test operation should succeed");
6616 assert!(!result.packed_loose.contains(&stray_oid));
6617 result.packed_loose.push(stray_oid);
6618
6619 install_repack_result(&git_dir, format, &result, true)
6620 .expect("test operation should succeed");
6621
6622 assert!(
6624 db.loose()
6625 .object_path(&stray_oid)
6626 .expect("test operation should succeed")
6627 .exists()
6628 );
6629 assert_eq!(read_object_for_assert(&db, &stray_oid), stray);
6630 for (oid, _) in &graph {
6632 assert!(
6633 !db.loose()
6634 .object_path(oid)
6635 .expect("test operation should succeed")
6636 .exists()
6637 );
6638 }
6639
6640 fs::remove_dir_all(root).expect("test operation should succeed");
6641 }
6642
6643 #[test]
6644 fn prune_unreachable_loose_reports_and_deletes_only_unreachable() {
6645 let root = temp_root("sley-prune-unreachable");
6646 let git_dir = root.join(".git");
6647 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6648 let format = ObjectFormat::Sha1;
6649 let mut db = FileObjectDatabase::from_git_dir(&git_dir, format);
6650 let graph = write_commit_graph(&mut db, b"reachable payload\n");
6651 let commit_oid = graph[0].0.clone();
6652
6653 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling\n".to_vec());
6655 let dangling_oid = db
6656 .write_object(dangling)
6657 .expect("test operation should succeed");
6658
6659 let reported = prune_unreachable_loose(&git_dir, format, [commit_oid], false)
6661 .expect("test operation should succeed");
6662 assert_eq!(reported, vec![dangling_oid]);
6663 assert!(
6664 db.loose()
6665 .object_path(&dangling_oid)
6666 .expect("test operation should succeed")
6667 .exists()
6668 );
6669
6670 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6672 .expect("test operation should succeed");
6673 assert_eq!(deleted, vec![dangling_oid]);
6674 assert!(
6675 !db.loose()
6676 .object_path(&dangling_oid)
6677 .expect("test operation should succeed")
6678 .exists()
6679 );
6680 for (oid, object) in &graph {
6681 assert!(
6682 db.loose()
6683 .object_path(oid)
6684 .expect("test operation should succeed")
6685 .exists()
6686 );
6687 assert_eq!(read_object_for_assert(&db, oid), *object);
6688 }
6689
6690 fs::remove_dir_all(root).expect("test operation should succeed");
6691 }
6692
6693 #[test]
6694 fn prune_unreachable_loose_ignores_gitlink_targets() {
6695 let root = temp_root("sley-prune-gitlink");
6696 let git_dir = root.join(".git");
6697 fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
6698 let format = ObjectFormat::Sha1;
6699 let db = FileObjectDatabase::from_git_dir(&git_dir, format);
6700
6701 let submodule_oid = ObjectId::from_hex(format, "1111111111111111111111111111111111111111")
6702 .expect("test operation should succeed");
6703 let tree = EncodedObject::new(
6704 ObjectType::Tree,
6705 Tree {
6706 entries: vec![TreeEntry {
6707 mode: 0o160000,
6708 name: BString::from(b"submodule"),
6709 oid: submodule_oid,
6710 }],
6711 }
6712 .write(),
6713 );
6714 let tree_oid = db
6715 .write_object(tree)
6716 .expect("test operation should succeed");
6717 let identity = b"Example <example@example.invalid> 0 +0000".to_vec();
6718 let commit = EncodedObject::new(
6719 ObjectType::Commit,
6720 Commit {
6721 tree: tree_oid,
6722 parents: Vec::new(),
6723 author: identity.clone(),
6724 committer: identity,
6725 encoding: None,
6726 message: b"gitlink\n".to_vec(),
6727 }
6728 .write(),
6729 );
6730 let commit_oid = db
6731 .write_object(commit)
6732 .expect("test operation should succeed");
6733 let dangling = EncodedObject::new(ObjectType::Blob, b"dangling with gitlink\n".to_vec());
6734 let dangling_oid = db
6735 .write_object(dangling)
6736 .expect("test operation should succeed");
6737
6738 let deleted = prune_unreachable_loose(&git_dir, format, [commit_oid], true)
6739 .expect("test operation should succeed");
6740
6741 assert_eq!(deleted, vec![dangling_oid]);
6742 assert!(
6743 !db.loose()
6744 .object_path(&dangling_oid)
6745 .expect("test operation should succeed")
6746 .exists()
6747 );
6748
6749 fs::remove_dir_all(root).expect("test operation should succeed");
6750 }
6751
6752 fn temp_root(prefix: &str) -> PathBuf {
6753 std::env::temp_dir().join(format!(
6754 "{prefix}-{}-{}",
6755 std::process::id(),
6756 TEMPFILE_COUNTER.fetch_add(1, Ordering::Relaxed)
6757 ))
6758 }
6759}