1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result, StreamingDigest};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
12use std::fmt;
13use std::fs::File;
14use std::io::{Read, Seek, SeekFrom, Write};
15use std::ops::Range;
16use std::path::Path;
17use std::sync::Arc;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct PackEntry {
21 pub oid: ObjectId,
22 pub compressed_size: u64,
23 pub uncompressed_size: u64,
24 pub offset: u64,
25}
26
27pub const DEFAULT_PACK_WINDOW: usize = 10;
33
34pub const DEFAULT_PACK_DEPTH: usize = 50;
40
41const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
45
46const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
50
51const PACK_STREAM_COMPRESSION_WINDOW_OBJECTS: usize = 256;
55
56#[derive(Debug, Clone)]
62pub struct PackWriteOptions {
63 pub window: usize,
66 pub depth: usize,
68 pub prefer_ofs_delta: bool,
72 pub thin_bases: HashMap<ObjectId, EncodedObject>,
77 pub reorder: bool,
83 pub compression_level: u32,
85}
86
87impl Default for PackWriteOptions {
88 fn default() -> Self {
89 Self::new()
90 }
91}
92
93impl PackWriteOptions {
94 pub fn new() -> Self {
98 Self {
99 window: DEFAULT_PACK_WINDOW,
100 depth: DEFAULT_PACK_DEPTH,
101 prefer_ofs_delta: true,
102 thin_bases: HashMap::new(),
103 reorder: true,
104 compression_level: 6,
105 }
106 }
107
108 pub fn with_window(mut self, window: usize) -> Self {
110 self.window = window;
111 self
112 }
113
114 pub fn with_depth(mut self, depth: usize) -> Self {
116 self.depth = depth;
117 self
118 }
119
120 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
123 self.prefer_ofs_delta = prefer_ofs_delta;
124 self
125 }
126
127 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
129 self.thin_bases = thin_bases;
130 self
131 }
132
133 pub fn with_reorder(mut self, reorder: bool) -> Self {
136 self.reorder = reorder;
137 self
138 }
139
140 pub fn with_compression_level(mut self, level: u32) -> Self {
142 self.compression_level = level.min(9);
143 self
144 }
145}
146
147#[derive(Debug, Clone, PartialEq, Eq)]
148pub struct RepackPolicy {
149 pub write_bitmaps: bool,
150 pub cruft_packs: bool,
151 pub geometric_factor: Option<u8>,
152}
153
154#[derive(Debug, Clone, PartialEq, Eq)]
155pub struct PackFile {
156 pub version: u32,
157 pub entries: Vec<PackObject>,
158 pub checksum: ObjectId,
159}
160
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct PackObject {
163 pub entry: PackEntry,
164 pub object: EncodedObject,
165}
166
167#[derive(Debug, Clone, PartialEq, Eq)]
170pub struct PackVerifyStat {
171 pub oid: ObjectId,
173 pub object_type: ObjectType,
175 pub size: u64,
177 pub size_in_pack: u64,
180 pub offset: u64,
182 pub delta_depth: u32,
184 pub base_oid: Option<ObjectId>,
187}
188
189#[derive(Debug, Clone, PartialEq, Eq)]
192pub struct PackVerifyStats {
193 pub objects: Vec<PackVerifyStat>,
194 pub checksum: ObjectId,
195}
196
197#[derive(Debug, Clone, PartialEq, Eq)]
198pub struct PackWrite {
199 pub pack: Vec<u8>,
200 pub index: Vec<u8>,
201 pub checksum: ObjectId,
202 pub entries: Vec<PackIndexEntry>,
203 pub delta_count: u32,
204}
205
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct PackWriteSummary {
208 pub index: Vec<u8>,
209 pub checksum: ObjectId,
210 pub entries: Vec<PackIndexEntry>,
211 pub delta_count: u32,
212 pub pack_size: u64,
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
216pub struct PackInput<'a> {
217 pub oid: &'a ObjectId,
218 pub object: &'a EncodedObject,
219}
220
221#[derive(Debug, Clone, PartialEq, Eq)]
222pub struct PackIndexBuild {
223 pub index: Vec<u8>,
224 pub pack_checksum: ObjectId,
225 pub entries: Vec<PackIndexEntry>,
226}
227
228#[derive(Debug, Clone, PartialEq, Eq)]
229pub struct PackStreamIndexBuild {
230 pub index: Vec<u8>,
231 pub pack_checksum: ObjectId,
232 pub entries: Vec<PackIndexEntry>,
233 pub objects: Vec<PackIndexedObject>,
234}
235
236#[derive(Debug, Clone, PartialEq, Eq)]
237pub struct PackIndexedObject {
238 pub oid: ObjectId,
239 pub object_type: ObjectType,
240 pub size: u64,
241 pub offset: u64,
242}
243
244#[derive(Debug, Clone, PartialEq, Eq)]
245pub struct PackIndex {
246 pub version: u32,
247 pub fanout: [u32; 256],
248 pub entries: Vec<PackIndexEntry>,
249 pub pack_checksum: ObjectId,
250 pub index_checksum: ObjectId,
251}
252
253#[derive(Debug, Clone, PartialEq, Eq)]
254pub struct PackIndexView<'a> {
255 pub version: u32,
256 pub count: usize,
257 pub fanout: [u32; 256],
258 pub pack_checksum: ObjectId,
259 pub index_checksum: ObjectId,
260 bytes: &'a [u8],
261 format: ObjectFormat,
262 tables: PackIndexViewTables,
263}
264
265pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
266 fn as_bytes(&self) -> &[u8];
267}
268
269impl<T> PackIndexByteSource for T
270where
271 T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
272{
273 fn as_bytes(&self) -> &[u8] {
274 self.as_ref()
275 }
276}
277
278#[derive(Debug)]
279struct SharedIndexBytes(Arc<[u8]>);
280
281impl PackIndexByteSource for SharedIndexBytes {
282 fn as_bytes(&self) -> &[u8] {
283 self.0.as_ref()
284 }
285}
286
287#[derive(Debug, Clone)]
288pub struct PackIndexViewData {
289 pub version: u32,
290 pub count: usize,
291 pub fanout: [u32; 256],
292 pub pack_checksum: ObjectId,
293 pub index_checksum: ObjectId,
294 bytes: Arc<dyn PackIndexByteSource>,
295 format: ObjectFormat,
296 tables: PackIndexViewTables,
297}
298
299#[derive(Debug, Clone, PartialEq, Eq)]
300pub struct PackIndexEntry {
301 pub oid: ObjectId,
302 pub crc32: u32,
303 pub offset: u64,
304}
305
306#[derive(Debug, Clone, Copy, PartialEq, Eq)]
307pub struct PackIndexLookup {
308 pub crc32: u32,
309 pub offset: u64,
310}
311
312#[derive(Debug, Clone, PartialEq, Eq)]
313enum PackIndexViewTables {
314 V1 {
315 entry_table: Range<usize>,
316 },
317 V2 {
318 oid_table: Range<usize>,
319 crc_table: Range<usize>,
320 small_offset_table: Range<usize>,
321 large_offset_table: Range<usize>,
322 },
323}
324
325#[derive(Debug, Clone, PartialEq, Eq)]
326pub struct PackReverseIndex {
327 pub version: u32,
328 pub format: ObjectFormat,
329 pub positions: Vec<u32>,
330 pub pack_checksum: ObjectId,
331 pub index_checksum: ObjectId,
332}
333
334#[derive(Debug, Clone, PartialEq, Eq)]
335pub struct PackMtimes {
336 pub version: u32,
337 pub format: ObjectFormat,
338 pub mtimes: Vec<u32>,
339 pub pack_checksum: ObjectId,
340 pub index_checksum: ObjectId,
341}
342
343#[derive(Debug, Clone, PartialEq, Eq)]
344pub struct PackBitmapIndex {
345 pub version: u16,
346 pub format: ObjectFormat,
347 pub options: u16,
348 pub pack_checksum: ObjectId,
349 pub index_checksum: ObjectId,
350 pub type_bitmaps: PackBitmapTypeBitmaps,
351 pub entries: Vec<PackBitmapEntry>,
352 pub pseudo_merges: Vec<PackBitmapPseudoMerge>,
353 pub name_hash_cache: Option<Vec<u32>>,
354}
355
356#[derive(Debug, Clone, PartialEq, Eq)]
357pub struct PackBitmapTypeBitmaps {
358 pub commits: EwahBitmap,
359 pub trees: EwahBitmap,
360 pub blobs: EwahBitmap,
361 pub tags: EwahBitmap,
362}
363
364#[derive(Debug, Clone, PartialEq, Eq)]
365pub struct PackBitmapEntry {
366 pub object_position: u32,
371 pub xor_offset: u8,
372 pub flags: u8,
373 pub bitmap: EwahBitmap,
376}
377
378#[derive(Debug, Clone, PartialEq, Eq)]
379pub struct PackBitmapPseudoMerge {
380 pub commits: EwahBitmap,
383 pub bitmap: EwahBitmap,
386}
387
388#[derive(Debug, Clone, PartialEq, Eq)]
389pub struct EwahBitmap {
390 pub bit_size: u32,
391 pub words: Vec<u64>,
392 pub rlw_position: u32,
393}
394
395#[derive(Debug, Clone, PartialEq, Eq)]
396pub struct MultiPackIndex {
397 pub version: u8,
398 pub format: ObjectFormat,
399 pub pack_count: u32,
400 pub pack_names: Vec<String>,
401 pub object_count: u32,
402 pub fanout: [u32; 256],
403 pub objects: Vec<MultiPackIndexEntry>,
404 pub reverse_index: Option<Vec<u32>>,
405 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
406 pub chunks: Vec<MultiPackIndexChunk>,
407 pub checksum: ObjectId,
408}
409
410#[derive(Debug, Clone)]
411pub struct MultiPackIndexOidLookup {
412 format: ObjectFormat,
413 pack_count: u32,
414 pack_names: Vec<String>,
415 fanout: [u32; 256],
416 object_count: usize,
417 oid_lookup_offset: usize,
418 object_offsets_offset: usize,
419 large_offsets_offset: Option<usize>,
420 large_offsets_len: usize,
421 bytes: Arc<dyn PackIndexByteSource>,
422}
423
424#[derive(Debug, Clone, PartialEq, Eq)]
425pub struct MultiPackIndexEntry {
426 pub oid: ObjectId,
427 pub pack_int_id: u32,
428 pub offset: u64,
429 pub force_large_offset: bool,
430}
431
432#[derive(Debug, Clone, PartialEq, Eq)]
433pub struct MultiPackBitmapPack {
434 pub bitmap_pos: u32,
435 pub bitmap_nr: u32,
436}
437
438#[derive(Debug, Clone, PartialEq, Eq)]
439pub struct MultiPackIndexChunk {
440 pub id: [u8; 4],
441 pub offset: u64,
442 pub len: u64,
443}
444
445#[derive(Debug, Clone, Copy, PartialEq, Eq)]
446enum PackObjectKind {
447 Commit,
448 Tree,
449 Blob,
450 Tag,
451 OfsDelta,
452 RefDelta,
453}
454
455#[derive(Debug, Clone, PartialEq, Eq)]
456enum ParsedPackEntry {
457 Resolved(PackObject),
458 Delta {
459 base: DeltaBase,
460 compressed_size: u64,
461 delta_size: u64,
462 offset: u64,
463 delta: Vec<u8>,
464 },
465}
466
467#[derive(Debug, Clone, PartialEq, Eq)]
468enum DeltaBase {
469 Offset(u64),
470 Ref(ObjectId),
471}
472
473struct OnDiskEntry {
477 offset: u64,
478 base: Option<DeltaBase>,
479 stream_size: u64,
480}
481
482impl PackFile {
483 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
484 Self::parse(bytes, ObjectFormat::Sha1)
485 }
486
487 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
488 Self::parse_with_base(bytes, format, |_| Ok(None))
489 }
490
491 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
492 Self::parse(&bundle.pack, bundle.format)
493 }
494
495 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
496 let PackIndexBuild {
497 index,
498 pack_checksum,
499 entries,
500 } = PackIndex::write_v2_for_pack(bytes, format)?;
501 Ok(PackWrite {
502 pack: bytes.to_vec(),
503 index,
504 checksum: pack_checksum,
505 entries,
506 delta_count: 0,
507 })
508 }
509
510 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
511 where
512 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
513 {
514 Self::parse_with_base(bytes, format, external_base)
515 }
516
517 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
518 where
519 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
520 {
521 let trailer_len = format.raw_len();
522 if bytes.len() < 12 + trailer_len {
523 return Err(GitError::InvalidFormat("pack file too short".into()));
524 }
525 let trailer_offset = bytes.len() - trailer_len;
526 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
527 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
528 if checksum != expected {
529 return Err(GitError::InvalidFormat(format!(
530 "pack checksum mismatch: expected {expected}, got {checksum}"
531 )));
532 }
533
534 if &bytes[..4] != b"PACK" {
535 return Err(GitError::InvalidFormat("missing PACK signature".into()));
536 }
537 let version = u32_be(&bytes[4..8]);
538 if version != 2 && version != 3 {
539 return Err(GitError::Unsupported(format!("pack version {version}")));
540 }
541 let count = u32_be(&bytes[8..12]) as usize;
542 let mut offset = 12usize;
543 let mut entries = Vec::with_capacity(count);
544 for _ in 0..count {
545 let entry_offset = offset;
546 let header = parse_entry_header(bytes, &mut offset)?;
547 let base =
548 match header.kind {
549 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
550 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
551 )),
552 PackObjectKind::RefDelta => {
553 let hash_len = format.raw_len();
554 if offset + hash_len > trailer_offset {
555 return Err(GitError::InvalidFormat(
556 "truncated ref-delta base object id".into(),
557 ));
558 }
559 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
560 offset += hash_len;
561 Some(DeltaBase::Ref(oid))
562 }
563 _ => None,
564 };
565 let mut body = Vec::new();
566 let consumed = inflate_into(
567 &bytes[offset..trailer_offset],
568 &mut body,
569 header.size.min(usize::MAX as u64) as usize,
570 )?;
571 if body.len() as u64 != header.size {
572 return Err(GitError::InvalidObject(format!(
573 "pack object declared {} bytes, decoded {}",
574 header.size,
575 body.len()
576 )));
577 }
578 if consumed == 0 {
579 return Err(GitError::InvalidFormat(
580 "empty compressed pack entry".into(),
581 ));
582 }
583 offset = offset
584 .checked_add(consumed)
585 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
586 if offset > trailer_offset {
587 return Err(GitError::InvalidFormat(
588 "pack entry extends past checksum".into(),
589 ));
590 }
591 if let Some(base) = base {
592 entries.push(ParsedPackEntry::Delta {
593 base,
594 compressed_size: consumed as u64,
595 delta_size: header.size,
596 offset: entry_offset as u64,
597 delta: body,
598 });
599 } else {
600 let object_type = match header.kind {
601 PackObjectKind::Commit => ObjectType::Commit,
602 PackObjectKind::Tree => ObjectType::Tree,
603 PackObjectKind::Blob => ObjectType::Blob,
604 PackObjectKind::Tag => ObjectType::Tag,
605 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
606 };
607 let object = EncodedObject::new(object_type, body);
608 let oid = object.object_id(format)?;
609 entries.push(ParsedPackEntry::Resolved(PackObject {
610 entry: PackEntry {
611 oid,
612 compressed_size: consumed as u64,
613 uncompressed_size: header.size,
614 offset: entry_offset as u64,
615 },
616 object,
617 }));
618 }
619 }
620 if offset != trailer_offset {
621 return Err(GitError::InvalidFormat(format!(
622 "pack has {} trailing bytes before checksum",
623 trailer_offset - offset
624 )));
625 }
626 Ok(Self {
627 version,
628 entries: resolve_pack_entries(entries, format, &mut external_base)?,
629 checksum,
630 })
631 }
632
633 pub fn verify_pack_stats(bytes: &[u8], format: ObjectFormat) -> Result<PackVerifyStats> {
644 let pack = Self::parse(bytes, format)?;
648
649 let trailer_len = format.raw_len();
653 let trailer_offset = bytes.len() - trailer_len;
654 let count = u32_be(&bytes[8..12]) as usize;
655 let mut offset = 12usize;
656 let mut on_disk: Vec<OnDiskEntry> = Vec::with_capacity(count);
662 for _ in 0..count {
663 let entry_offset = offset as u64;
664 let header = parse_entry_header(bytes, &mut offset)?;
665 let stream_size = header.size;
666 let base =
667 match header.kind {
668 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
669 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset)?,
670 )),
671 PackObjectKind::RefDelta => {
672 let hash_len = format.raw_len();
673 if offset + hash_len > trailer_offset {
674 return Err(GitError::InvalidFormat(
675 "truncated ref-delta base object id".into(),
676 ));
677 }
678 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
679 offset += hash_len;
680 Some(DeltaBase::Ref(oid))
681 }
682 _ => None,
683 };
684 let mut body = Vec::new();
686 let consumed = inflate_into(
687 &bytes[offset..trailer_offset],
688 &mut body,
689 header.size.min(usize::MAX as u64) as usize,
690 )?;
691 offset = offset
692 .checked_add(consumed)
693 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
694 on_disk.push(OnDiskEntry {
695 offset: entry_offset,
696 base,
697 stream_size,
698 });
699 }
700
701 let mut resolved_by_offset: HashMap<u64, &PackObject> =
703 HashMap::with_capacity(pack.entries.len());
704 for object in &pack.entries {
705 resolved_by_offset.insert(object.entry.offset, object);
706 }
707 let mut oid_by_offset: HashMap<u64, ObjectId> = HashMap::with_capacity(on_disk.len());
709 for entry in &on_disk {
710 if let Some(object) = resolved_by_offset.get(&entry.offset) {
711 oid_by_offset.insert(entry.offset, object.entry.oid);
712 }
713 }
714 let mut index_by_offset: HashMap<u64, usize> = HashMap::with_capacity(on_disk.len());
716 for (idx, entry) in on_disk.iter().enumerate() {
717 index_by_offset.insert(entry.offset, idx);
718 }
719
720 let mut sorted_offsets: Vec<u64> = on_disk.iter().map(|entry| entry.offset).collect();
723 sorted_offsets.sort_unstable();
724 let mut next_offset: HashMap<u64, u64> = HashMap::with_capacity(sorted_offsets.len());
725 for window in sorted_offsets.windows(2) {
726 next_offset.insert(window[0], window[1]);
727 }
728 if let Some(last) = sorted_offsets.last() {
729 next_offset.insert(*last, trailer_offset as u64);
730 }
731
732 let mut depth = vec![None; on_disk.len()];
738 fn resolve_depth(
739 idx: usize,
740 on_disk: &[OnDiskEntry],
741 index_by_offset: &HashMap<u64, usize>,
742 offset_of_oid: &HashMap<ObjectId, u64>,
743 depth: &mut [Option<u32>],
744 ) -> u32 {
745 if let Some(d) = depth[idx] {
746 return d;
747 }
748 let computed = match &on_disk[idx].base {
749 None => 0,
750 Some(base) => {
751 let base_idx = match base {
752 DeltaBase::Offset(off) => index_by_offset.get(off).copied(),
753 DeltaBase::Ref(oid) => offset_of_oid
754 .get(oid)
755 .and_then(|off| index_by_offset.get(off).copied()),
756 };
757 match base_idx {
758 Some(bi) => {
759 resolve_depth(bi, on_disk, index_by_offset, offset_of_oid, depth) + 1
760 }
761 None => 1,
763 }
764 }
765 };
766 depth[idx] = Some(computed);
767 computed
768 }
769 let mut offset_of_oid: HashMap<ObjectId, u64> = HashMap::with_capacity(oid_by_offset.len());
770 for (off, oid) in &oid_by_offset {
771 offset_of_oid.insert(*oid, *off);
772 }
773 for idx in 0..on_disk.len() {
774 resolve_depth(idx, &on_disk, &index_by_offset, &offset_of_oid, &mut depth);
775 }
776
777 let mut stats = Vec::with_capacity(on_disk.len());
778 for (idx, entry) in on_disk.iter().enumerate() {
779 let off = entry.offset;
780 let object = resolved_by_offset.get(&off).ok_or_else(|| {
781 GitError::InvalidFormat("pack offset missing from resolved set".into())
782 })?;
783 let size_in_pack = next_offset
784 .get(&off)
785 .copied()
786 .unwrap_or(trailer_offset as u64)
787 .saturating_sub(off);
788 let base_oid = match &entry.base {
789 None => None,
790 Some(DeltaBase::Offset(base_off)) => oid_by_offset.get(base_off).copied(),
791 Some(DeltaBase::Ref(oid)) => Some(*oid),
792 };
793 stats.push(PackVerifyStat {
794 oid: object.entry.oid,
795 object_type: object.object.object_type,
796 size: entry.stream_size,
799 size_in_pack,
800 offset: off,
801 delta_depth: depth[idx].unwrap_or(0),
802 base_oid,
803 });
804 }
805 stats.sort_by_key(|stat| stat.offset);
807
808 Ok(PackVerifyStats {
809 objects: stats,
810 checksum: pack.checksum,
811 })
812 }
813
814 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
815 where
816 T: Borrow<EncodedObject>,
817 {
818 Self::write_undeltified(objects, ObjectFormat::Sha1)
819 }
820
821 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
827 where
828 T: Borrow<EncodedObject>,
829 {
830 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
831 Self::write_packed_impl(objects, format, &options)
832 }
833
834 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
843 where
844 T: Borrow<EncodedObject>,
845 {
846 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
847 }
848
849 pub fn write_packed_with_options<T>(
853 objects: &[T],
854 format: ObjectFormat,
855 options: &PackWriteOptions,
856 ) -> Result<PackWrite>
857 where
858 T: Borrow<EncodedObject>,
859 {
860 Self::write_packed_impl(objects, format, options)
861 }
862
863 pub fn write_packed_with_known_ids(
872 inputs: &[PackInput<'_>],
873 format: ObjectFormat,
874 ) -> Result<PackWrite> {
875 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
876 }
877
878 pub fn write_packed_with_known_ids_and_options(
881 inputs: &[PackInput<'_>],
882 format: ObjectFormat,
883 options: &PackWriteOptions,
884 ) -> Result<PackWrite> {
885 if inputs.len() > u32::MAX as usize {
886 return Err(GitError::InvalidFormat("too many pack objects".into()));
887 }
888 let mut objects = Vec::with_capacity(inputs.len());
889 let mut object_ids = Vec::with_capacity(inputs.len());
890 for input in inputs {
891 if input.oid.format() != format {
892 return Err(GitError::InvalidObjectId(format!(
893 "pack object id {} uses {}, pack uses {}",
894 input.oid,
895 input.oid.format().name(),
896 format.name()
897 )));
898 }
899 objects.push(input.object);
900 object_ids.push(*input.oid);
901 }
902 Self::write_packed_from_parts(objects, object_ids, format, options)
903 }
904
905 pub fn write_packed_with_known_ids_to_writer<W>(
906 inputs: &[PackInput<'_>],
907 format: ObjectFormat,
908 options: &PackWriteOptions,
909 writer: &mut W,
910 ) -> Result<PackWriteSummary>
911 where
912 W: Write,
913 {
914 if inputs.len() > u32::MAX as usize {
915 return Err(GitError::InvalidFormat("too many pack objects".into()));
916 }
917 let mut objects = Vec::with_capacity(inputs.len());
918 let mut object_ids = Vec::with_capacity(inputs.len());
919 for input in inputs {
920 if input.oid.format() != format {
921 return Err(GitError::InvalidObjectId(format!(
922 "pack object id {} uses {}, pack uses {}",
923 input.oid,
924 input.oid.format().name(),
925 format.name()
926 )));
927 }
928 objects.push(input.object);
929 object_ids.push(*input.oid);
930 }
931 Self::write_packed_from_parts_to_writer(objects, object_ids, format, options, writer)
932 }
933
934 pub fn write_thin<T>(
943 objects: &[T],
944 format: ObjectFormat,
945 external_bases: HashMap<ObjectId, EncodedObject>,
946 ) -> Result<PackWrite>
947 where
948 T: Borrow<EncodedObject>,
949 {
950 let options = PackWriteOptions::new().with_thin_bases(external_bases);
951 Self::write_packed_impl(objects, format, &options)
952 }
953
954 fn write_packed_impl<T>(
955 objects: &[T],
956 format: ObjectFormat,
957 options: &PackWriteOptions,
958 ) -> Result<PackWrite>
959 where
960 T: Borrow<EncodedObject>,
961 {
962 if objects.len() > u32::MAX as usize {
963 return Err(GitError::InvalidFormat("too many pack objects".into()));
964 }
965 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
966
967 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
970 for object in &objects {
971 object_ids.push(object.object_id(format)?);
972 }
973 Self::write_packed_from_parts(objects, object_ids, format, options)
974 }
975
976 fn write_packed_from_parts(
977 objects: Vec<&EncodedObject>,
978 object_ids: Vec<ObjectId>,
979 format: ObjectFormat,
980 options: &PackWriteOptions,
981 ) -> Result<PackWrite> {
982 let mut seen = HashSet::with_capacity(object_ids.len());
983 for oid in &object_ids {
984 if !seen.insert(oid) {
985 return Err(GitError::InvalidFormat(format!(
986 "pack contains duplicate object id {oid}"
987 )));
988 }
989 }
990
991 for oid in options.thin_bases.keys() {
993 if oid.format() != format {
994 return Err(GitError::InvalidObjectId(
995 "thin pack base object id format does not match pack format".into(),
996 ));
997 }
998 }
999
1000 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
1006
1007 let mut pack = Vec::new();
1008 pack.extend_from_slice(b"PACK");
1009 pack.extend_from_slice(&2u32.to_be_bytes());
1010 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
1011
1012 let mut index_entries = Vec::with_capacity(objects.len());
1013 let mut delta_count = 0u32;
1014 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1017
1018 let compressed_payloads =
1019 compress_planned_payloads(&objects, &plan, &order, options.compression_level)?;
1020
1021 for (order_pos, &idx) in order.iter().enumerate() {
1022 let offset = pack.len() as u64;
1023 let mut entry_bytes = Vec::new();
1024 match &plan[idx].base {
1025 PlannedBase::None => {
1026 write_entry_header(
1027 &mut entry_bytes,
1028 objects[idx].object_type,
1029 objects[idx].body.len() as u64,
1030 );
1031 }
1032 PlannedBase::InPack { base_idx, delta } => {
1033 delta_count += 1;
1034 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1035 GitError::InvalidFormat(
1036 "in-pack delta base emitted after dependent object".into(),
1037 )
1038 })?;
1039 if options.prefer_ofs_delta {
1040 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
1041 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1042 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
1043 })?;
1044 write_ofs_delta_offset(&mut entry_bytes, relative)?;
1045 } else {
1046 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1047 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
1048 }
1049 }
1050 PlannedBase::External { base_oid, delta } => {
1051 delta_count += 1;
1052 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
1053 entry_bytes.extend_from_slice(base_oid.as_bytes());
1054 }
1055 }
1056 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
1057 let crc32 = crc32fast::hash(&entry_bytes);
1058 pack.extend_from_slice(&entry_bytes);
1059 written_offsets[idx] = Some(offset);
1060 index_entries.push(PackIndexEntry {
1061 oid: object_ids[idx].clone(),
1062 crc32,
1063 offset,
1064 });
1065 }
1066
1067 let checksum = sley_core::digest_bytes(format, &pack)?;
1068 pack.extend_from_slice(checksum.as_bytes());
1069 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1070 Ok(PackWrite {
1071 pack,
1072 index,
1073 checksum,
1074 entries: index_entries,
1075 delta_count,
1076 })
1077 }
1078
1079 fn write_packed_from_parts_to_writer<W>(
1080 objects: Vec<&EncodedObject>,
1081 object_ids: Vec<ObjectId>,
1082 format: ObjectFormat,
1083 options: &PackWriteOptions,
1084 writer: &mut W,
1085 ) -> Result<PackWriteSummary>
1086 where
1087 W: Write,
1088 {
1089 let mut seen = HashSet::with_capacity(object_ids.len());
1090 for oid in &object_ids {
1091 if !seen.insert(oid) {
1092 return Err(GitError::InvalidFormat(format!(
1093 "pack contains duplicate object id {oid}"
1094 )));
1095 }
1096 }
1097
1098 for oid in options.thin_bases.keys() {
1099 if oid.format() != format {
1100 return Err(GitError::InvalidObjectId(
1101 "thin pack base object id format does not match pack format".into(),
1102 ));
1103 }
1104 }
1105
1106 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
1107 let mut output = PackDigestWriter::new(writer, format);
1108 output.write_pack_bytes(b"PACK")?;
1109 output.write_pack_bytes(&2u32.to_be_bytes())?;
1110 output.write_pack_bytes(&(objects.len() as u32).to_be_bytes())?;
1111
1112 let mut index_entries = Vec::with_capacity(objects.len());
1113 let mut delta_count = 0u32;
1114 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1115
1116 for order_window in order.chunks(PACK_STREAM_COMPRESSION_WINDOW_OBJECTS) {
1117 let compressed_payloads = compress_planned_payloads(
1118 &objects,
1119 &plan,
1120 order_window,
1121 options.compression_level,
1122 )?;
1123 for (&idx, compressed_payload) in order_window.iter().zip(&compressed_payloads) {
1124 let offset = output.position();
1125 let mut entry_header = Vec::new();
1126 match &plan[idx].base {
1127 PlannedBase::None => {
1128 write_entry_header(
1129 &mut entry_header,
1130 objects[idx].object_type,
1131 objects[idx].body.len() as u64,
1132 );
1133 }
1134 PlannedBase::InPack { base_idx, delta } => {
1135 delta_count += 1;
1136 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1137 GitError::InvalidFormat(
1138 "in-pack delta base emitted after dependent object".into(),
1139 )
1140 })?;
1141 if options.prefer_ofs_delta {
1142 write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1143 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1144 GitError::InvalidFormat(
1145 "ofs-delta base offset is after delta".into(),
1146 )
1147 })?;
1148 write_ofs_delta_offset(&mut entry_header, relative)?;
1149 } else {
1150 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1151 entry_header.extend_from_slice(object_ids[*base_idx].as_bytes());
1152 }
1153 }
1154 PlannedBase::External { base_oid, delta } => {
1155 delta_count += 1;
1156 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1157 entry_header.extend_from_slice(base_oid.as_bytes());
1158 }
1159 }
1160 let mut crc32 = crc32fast::Hasher::new();
1161 crc32.update(&entry_header);
1162 crc32.update(compressed_payload);
1163 output.write_pack_bytes(&entry_header)?;
1164 output.write_pack_bytes(compressed_payload)?;
1165 written_offsets[idx] = Some(offset);
1166 index_entries.push(PackIndexEntry {
1167 oid: object_ids[idx],
1168 crc32: crc32.finalize(),
1169 offset,
1170 });
1171 }
1172 }
1173
1174 let (checksum, pack_size) = output.finish()?;
1175 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1176 Ok(PackWriteSummary {
1177 index,
1178 checksum,
1179 entries: index_entries,
1180 delta_count,
1181 pack_size,
1182 })
1183 }
1184
1185 pub fn write_undeltified_from_source_to_writer<W, F>(
1186 object_ids: &[ObjectId],
1187 format: ObjectFormat,
1188 options: &PackWriteOptions,
1189 mut read_object: F,
1190 writer: &mut W,
1191 ) -> Result<PackWriteSummary>
1192 where
1193 W: Write,
1194 F: FnMut(&ObjectId) -> Result<Arc<EncodedObject>>,
1195 {
1196 let mut seen = HashSet::with_capacity(object_ids.len());
1197 for oid in object_ids {
1198 if oid.format() != format {
1199 return Err(GitError::InvalidObjectId(
1200 "pack object id format does not match pack format".into(),
1201 ));
1202 }
1203 if !seen.insert(oid) {
1204 return Err(GitError::InvalidFormat(format!(
1205 "pack contains duplicate object id {oid}"
1206 )));
1207 }
1208 }
1209
1210 let mut output = PackDigestWriter::new(writer, format);
1211 output.write_pack_bytes(b"PACK")?;
1212 output.write_pack_bytes(&2u32.to_be_bytes())?;
1213 output.write_pack_bytes(&(object_ids.len() as u32).to_be_bytes())?;
1214
1215 let mut index_entries = Vec::with_capacity(object_ids.len());
1216 for oid_window in object_ids.chunks(PACK_STREAM_COMPRESSION_WINDOW_OBJECTS) {
1217 let mut objects = Vec::with_capacity(oid_window.len());
1218 for oid in oid_window {
1219 objects.push(read_object(oid)?);
1220 }
1221 let compressed_payloads =
1222 compress_undeltified_payloads(&objects, options.compression_level)?;
1223 for ((oid, object), compressed_payload) in
1224 oid_window.iter().zip(&objects).zip(&compressed_payloads)
1225 {
1226 let offset = output.position();
1227 let mut entry_header = Vec::new();
1228 write_entry_header(
1229 &mut entry_header,
1230 object.object_type,
1231 object.body.len() as u64,
1232 );
1233 let mut crc32 = crc32fast::Hasher::new();
1234 crc32.update(&entry_header);
1235 crc32.update(compressed_payload);
1236 output.write_pack_bytes(&entry_header)?;
1237 output.write_pack_bytes(compressed_payload)?;
1238 index_entries.push(PackIndexEntry {
1239 oid: *oid,
1240 crc32: crc32.finalize(),
1241 offset,
1242 });
1243 }
1244 }
1245
1246 let (checksum, pack_size) = output.finish()?;
1247 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1248 Ok(PackWriteSummary {
1249 index,
1250 checksum,
1251 entries: index_entries,
1252 delta_count: 0,
1253 pack_size,
1254 })
1255 }
1256
1257 pub fn write_packed_from_source_to_writer<W, F>(
1258 object_ids: &[ObjectId],
1259 format: ObjectFormat,
1260 options: &PackWriteOptions,
1261 mut read_object: F,
1262 writer: &mut W,
1263 ) -> Result<PackWriteSummary>
1264 where
1265 W: Write,
1266 F: FnMut(&ObjectId) -> Result<Arc<EncodedObject>>,
1267 {
1268 if object_ids.len() > u32::MAX as usize {
1269 return Err(GitError::InvalidFormat("too many pack objects".into()));
1270 }
1271
1272 let mut seen = HashSet::with_capacity(object_ids.len());
1273 for oid in object_ids {
1274 if oid.format() != format {
1275 return Err(GitError::InvalidObjectId(
1276 "pack object id format does not match pack format".into(),
1277 ));
1278 }
1279 if !seen.insert(*oid) {
1280 return Err(GitError::InvalidFormat(format!(
1281 "pack contains duplicate object id {oid}"
1282 )));
1283 }
1284 }
1285
1286 for oid in options.thin_bases.keys() {
1287 if oid.format() != format {
1288 return Err(GitError::InvalidObjectId(
1289 "thin pack base object id format does not match pack format".into(),
1290 ));
1291 }
1292 }
1293
1294 let mut output = PackDigestWriter::new(writer, format);
1295 output.write_pack_bytes(b"PACK")?;
1296 output.write_pack_bytes(&2u32.to_be_bytes())?;
1297 output.write_pack_bytes(&(object_ids.len() as u32).to_be_bytes())?;
1298
1299 let mut index_entries = Vec::with_capacity(object_ids.len());
1300 let mut delta_count = 0u32;
1301 let mut base_horizon: VecDeque<StreamingDeltaBase> = VecDeque::new();
1302
1303 for oid_window in object_ids.chunks(PACK_STREAM_COMPRESSION_WINDOW_OBJECTS) {
1304 let mut objects = Vec::with_capacity(oid_window.len());
1305 for oid in oid_window {
1306 objects.push(read_object(oid)?);
1307 }
1308
1309 let (plan, order) =
1310 plan_streaming_window_deltas(&objects, oid_window, &base_horizon, options);
1311 let compressed_payloads = compress_streaming_planned_payloads(
1312 &objects,
1313 &plan,
1314 &order,
1315 options.compression_level,
1316 )?;
1317 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
1318
1319 for (&idx, compressed_payload) in order.iter().zip(&compressed_payloads) {
1320 let offset = output.position();
1321 let mut entry_header = Vec::new();
1322 match &plan[idx].base {
1323 StreamingPlannedBase::None => {
1324 write_entry_header(
1325 &mut entry_header,
1326 objects[idx].object_type,
1327 objects[idx].body.len() as u64,
1328 );
1329 }
1330 StreamingPlannedBase::Current { base_idx, delta } => {
1331 delta_count += 1;
1332 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
1333 GitError::InvalidFormat(
1334 "in-pack delta base emitted after dependent object".into(),
1335 )
1336 })?;
1337 if options.prefer_ofs_delta {
1338 write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1339 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
1340 GitError::InvalidFormat(
1341 "ofs-delta base offset is after delta".into(),
1342 )
1343 })?;
1344 write_ofs_delta_offset(&mut entry_header, relative)?;
1345 } else {
1346 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1347 entry_header.extend_from_slice(oid_window[*base_idx].as_bytes());
1348 }
1349 }
1350 StreamingPlannedBase::Previous {
1351 base_oid,
1352 base_offset,
1353 delta,
1354 } => {
1355 delta_count += 1;
1356 if options.prefer_ofs_delta {
1357 write_pack_entry_header_kind(&mut entry_header, 6, delta.len() as u64);
1358 let relative = offset.checked_sub(*base_offset).ok_or_else(|| {
1359 GitError::InvalidFormat(
1360 "ofs-delta base offset is after delta".into(),
1361 )
1362 })?;
1363 write_ofs_delta_offset(&mut entry_header, relative)?;
1364 } else {
1365 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1366 entry_header.extend_from_slice(base_oid.as_bytes());
1367 }
1368 }
1369 StreamingPlannedBase::External { base_oid, delta } => {
1370 delta_count += 1;
1371 write_pack_entry_header_kind(&mut entry_header, 7, delta.len() as u64);
1372 entry_header.extend_from_slice(base_oid.as_bytes());
1373 }
1374 }
1375
1376 let mut crc32 = crc32fast::Hasher::new();
1377 crc32.update(&entry_header);
1378 crc32.update(compressed_payload);
1379 output.write_pack_bytes(&entry_header)?;
1380 output.write_pack_bytes(compressed_payload)?;
1381 written_offsets[idx] = Some(offset);
1382 index_entries.push(PackIndexEntry {
1383 oid: oid_window[idx],
1384 crc32: crc32.finalize(),
1385 offset,
1386 });
1387
1388 if options.depth > 0 && options.window > 0 {
1389 base_horizon.push_back(StreamingDeltaBase {
1390 oid: oid_window[idx],
1391 object: Arc::clone(&objects[idx]),
1392 offset,
1393 depth: plan[idx].depth,
1394 });
1395 while base_horizon.len() > options.window {
1396 base_horizon.pop_front();
1397 }
1398 }
1399 }
1400 }
1401
1402 let (checksum, pack_size) = output.finish()?;
1403 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
1404 Ok(PackWriteSummary {
1405 index,
1406 checksum,
1407 entries: index_entries,
1408 delta_count,
1409 pack_size,
1410 })
1411 }
1412}
1413
1414struct PackDigestWriter<'a, W> {
1415 writer: &'a mut W,
1416 digest: StreamingDigest,
1417 position: u64,
1418}
1419
1420impl<'a, W> PackDigestWriter<'a, W>
1421where
1422 W: Write,
1423{
1424 fn new(writer: &'a mut W, format: ObjectFormat) -> Self {
1425 Self {
1426 writer,
1427 digest: StreamingDigest::new(format),
1428 position: 0,
1429 }
1430 }
1431
1432 fn position(&self) -> u64 {
1433 self.position
1434 }
1435
1436 fn write_pack_bytes(&mut self, bytes: &[u8]) -> Result<()> {
1437 self.writer.write_all(bytes)?;
1438 self.digest.update(bytes);
1439 self.position = self
1440 .position
1441 .checked_add(bytes.len() as u64)
1442 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1443 Ok(())
1444 }
1445
1446 fn finish(mut self) -> Result<(ObjectId, u64)> {
1447 let checksum = self.digest.finalize()?;
1448 self.writer.write_all(checksum.as_bytes())?;
1449 self.position = self
1450 .position
1451 .checked_add(checksum.as_bytes().len() as u64)
1452 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1453 Ok((checksum, self.position))
1454 }
1455}
1456
1457impl<'a> PackIndexView<'a> {
1458 pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
1459 Self::parse(bytes, ObjectFormat::Sha1)
1460 }
1461
1462 pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1463 Self::parse_impl(bytes, format, true, true)
1464 }
1465
1466 pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1470 Self::parse_impl(bytes, format, false, true)
1471 }
1472
1473 pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
1480 Self::parse_impl(bytes, format, false, false)
1481 }
1482
1483 pub fn count(&self) -> usize {
1484 self.count
1485 }
1486
1487 pub fn fanout(&self) -> &[u32; 256] {
1488 &self.fanout
1489 }
1490
1491 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1492 if oid.format() != self.format {
1493 return None;
1494 }
1495 let bucket = usize::from(oid.as_bytes()[0]);
1496 let mut start = if bucket == 0 {
1497 0
1498 } else {
1499 self.fanout[bucket - 1] as usize
1500 };
1501 let mut end = self.fanout[bucket] as usize;
1502 let target = oid.as_bytes();
1503
1504 while start < end {
1505 let mid = start + (end - start) / 2;
1506 match self.oid_bytes_at(mid).cmp(target) {
1507 std::cmp::Ordering::Less => start = mid + 1,
1508 std::cmp::Ordering::Equal => return self.lookup_at(mid),
1509 std::cmp::Ordering::Greater => end = mid,
1510 }
1511 }
1512 None
1513 }
1514
1515 fn parse_impl(
1516 bytes: &'a [u8],
1517 format: ObjectFormat,
1518 verify_checksum: bool,
1519 validate_entries: bool,
1520 ) -> Result<Self> {
1521 let hash_len = format.raw_len();
1522 if bytes.len() < 4 {
1523 return Err(GitError::InvalidFormat("pack index too short".into()));
1524 }
1525 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1526 return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
1527 }
1528 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1529 return Err(GitError::InvalidFormat("pack index too short".into()));
1530 }
1531 let version = u32_be(&bytes[4..8]);
1532 if version != 2 {
1533 return Err(GitError::Unsupported(format!(
1534 "pack index version {version}"
1535 )));
1536 }
1537 let index_checksum_offset = bytes.len() - hash_len;
1538 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1539 if verify_checksum {
1540 let actual_index_checksum =
1541 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1542 if actual_index_checksum != index_checksum {
1543 return Err(GitError::InvalidFormat(format!(
1544 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1545 )));
1546 }
1547 }
1548
1549 let mut offset = 8usize;
1550 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1551 let count = fanout[255] as usize;
1552 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1553 offset = oid_table.end;
1554 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1555 offset = crc_table.end;
1556 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1557 offset = small_offset_table.end;
1558
1559 let large_offset_count = (0..count)
1560 .filter(|idx| {
1561 let start = small_offset_table.start + idx * 4;
1562 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1563 })
1564 .count();
1565 let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1566 offset = large_offset_table.end;
1567
1568 let expected_trailer_offset = bytes.len() - hash_len * 2;
1569 if offset != expected_trailer_offset {
1570 if !verify_checksum && offset < expected_trailer_offset {
1571 large_offset_table = large_offset_table.start..expected_trailer_offset;
1572 offset = expected_trailer_offset;
1573 } else {
1574 return Err(GitError::InvalidFormat(format!(
1575 "pack index has {} unexpected bytes before trailer",
1576 expected_trailer_offset.saturating_sub(offset)
1577 )));
1578 }
1579 }
1580 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1581
1582 let view = Self {
1583 version,
1584 count,
1585 fanout,
1586 pack_checksum,
1587 index_checksum,
1588 bytes,
1589 format,
1590 tables: PackIndexViewTables::V2 {
1591 oid_table,
1592 crc_table,
1593 small_offset_table,
1594 large_offset_table,
1595 },
1596 };
1597 if validate_entries {
1598 view.validate_v2_entries()?;
1599 }
1600 Ok(view)
1601 }
1602
1603 fn parse_v1_impl(
1604 bytes: &'a [u8],
1605 format: ObjectFormat,
1606 verify_checksum: bool,
1607 validate_entries: bool,
1608 ) -> Result<Self> {
1609 let hash_len = format.raw_len();
1610 if bytes.len() < 256 * 4 + 2 * hash_len {
1611 return Err(GitError::InvalidFormat("pack index too short".into()));
1612 }
1613 let index_checksum_offset = bytes.len() - hash_len;
1614 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1615 if verify_checksum {
1616 let actual_index_checksum =
1617 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1618 if actual_index_checksum != index_checksum {
1619 return Err(GitError::InvalidFormat(format!(
1620 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1621 )));
1622 }
1623 }
1624
1625 let mut offset = 0usize;
1626 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
1627 let count = fanout[255] as usize;
1628 let entry_len = hash_len
1629 .checked_add(4)
1630 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1631 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1632 offset = entry_table.end;
1633 let expected_trailer_offset = bytes.len() - hash_len * 2;
1634 if offset != expected_trailer_offset {
1635 return Err(GitError::InvalidFormat(format!(
1636 "pack index has {} unexpected bytes before trailer",
1637 expected_trailer_offset.saturating_sub(offset)
1638 )));
1639 }
1640 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1641
1642 let view = Self {
1643 version: 1,
1644 count,
1645 fanout,
1646 pack_checksum,
1647 index_checksum,
1648 bytes,
1649 format,
1650 tables: PackIndexViewTables::V1 { entry_table },
1651 };
1652 if validate_entries {
1653 view.validate_v1_entries()?;
1654 }
1655 Ok(view)
1656 }
1657
1658 fn validate_v2_entries(&self) -> Result<()> {
1659 let PackIndexViewTables::V2 {
1660 oid_table,
1661 small_offset_table,
1662 large_offset_table,
1663 ..
1664 } = &self.tables
1665 else {
1666 unreachable!("v2 validation only runs for v2 views");
1667 };
1668 let oid_table = self.slice(oid_table.clone());
1669 let small_offset_table = self.slice(small_offset_table.clone());
1670 let large_offset_table = self.slice(large_offset_table.clone());
1671 let hash_len = self.format.raw_len();
1672 for idx in 0..self.count {
1673 let oid_start = idx * hash_len;
1674 let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
1675 if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
1676 return Err(GitError::InvalidFormat(
1677 "pack index object ids are not strictly ascending".into(),
1678 ));
1679 }
1680 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1681
1682 let offset_start = idx * 4;
1683 let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
1684 pack_index_v2_offset(raw_offset, large_offset_table)?;
1685 }
1686 Ok(())
1687 }
1688
1689 fn validate_v1_entries(&self) -> Result<()> {
1690 let PackIndexViewTables::V1 { entry_table } = &self.tables else {
1691 unreachable!("v1 validation only runs for v1 views");
1692 };
1693 let entry_table = self.slice(entry_table.clone());
1694 let hash_len = self.format.raw_len();
1695 let entry_len = hash_len
1696 .checked_add(4)
1697 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1698 for idx in 0..self.count {
1699 let start = idx * entry_len;
1700 let oid_start = start + 4;
1701 let oid_bytes = &entry_table[oid_start..start + entry_len];
1702 if idx > 0 {
1703 let previous_oid_start = oid_start - entry_len;
1704 let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1705 if previous_oid >= oid_bytes {
1706 return Err(GitError::InvalidFormat(
1707 "pack index object ids are not strictly sorted".into(),
1708 ));
1709 }
1710 }
1711 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1712 }
1713 Ok(())
1714 }
1715
1716 fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1717 let hash_len = self.format.raw_len();
1718 match &self.tables {
1719 PackIndexViewTables::V1 { entry_table } => {
1720 let entry_table = self.slice(entry_table.clone());
1721 let entry_len = hash_len + 4;
1722 let start = idx * entry_len + 4;
1723 &entry_table[start..start + hash_len]
1724 }
1725 PackIndexViewTables::V2 { oid_table, .. } => {
1726 let oid_table = self.slice(oid_table.clone());
1727 let start = idx * hash_len;
1728 &oid_table[start..start + hash_len]
1729 }
1730 }
1731 }
1732
1733 fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1734 if idx >= self.count {
1735 return None;
1736 }
1737 let hash_len = self.format.raw_len();
1738 match &self.tables {
1739 PackIndexViewTables::V1 { entry_table } => {
1740 let entry_table = self.slice(entry_table.clone());
1741 let entry_len = hash_len + 4;
1742 let start = idx * entry_len;
1743 Some(PackIndexLookup {
1744 crc32: 0,
1745 offset: u64::from(u32_be(&entry_table[start..start + 4])),
1746 })
1747 }
1748 PackIndexViewTables::V2 {
1749 crc_table,
1750 small_offset_table,
1751 large_offset_table,
1752 ..
1753 } => {
1754 let crc_table = self.slice(crc_table.clone());
1755 let small_offset_table = self.slice(small_offset_table.clone());
1756 let large_offset_table = self.slice(large_offset_table.clone());
1757 let crc_start = idx * 4;
1758 let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1759 Some(PackIndexLookup {
1760 crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1761 offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1762 })
1763 }
1764 }
1765 }
1766
1767 fn slice(&self, range: Range<usize>) -> &'a [u8] {
1768 &self.bytes[range]
1769 }
1770}
1771
1772impl PackIndexViewData {
1773 pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1774 Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1775 }
1776
1777 pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1781 Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1782 }
1783
1784 pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1787 Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1788 }
1789
1790 pub fn parse_source(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
1791 Self::parse_impl(bytes, format, true, true)
1792 }
1793
1794 pub fn parse_source_without_checksum(
1795 bytes: Arc<dyn PackIndexByteSource>,
1796 format: ObjectFormat,
1797 ) -> Result<Self> {
1798 Self::parse_impl(bytes, format, false, true)
1799 }
1800
1801 pub fn parse_trusted_source_without_checksum(
1802 bytes: Arc<dyn PackIndexByteSource>,
1803 format: ObjectFormat,
1804 ) -> Result<Self> {
1805 Self::parse_impl(bytes, format, false, false)
1806 }
1807
1808 pub fn count(&self) -> usize {
1809 self.count
1810 }
1811
1812 pub fn fanout(&self) -> &[u32; 256] {
1813 &self.fanout
1814 }
1815
1816 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1817 self.as_view().find(oid)
1818 }
1819
1820 pub fn as_view(&self) -> PackIndexView<'_> {
1821 PackIndexView {
1822 version: self.version,
1823 count: self.count,
1824 fanout: self.fanout,
1825 pack_checksum: self.pack_checksum,
1826 index_checksum: self.index_checksum,
1827 bytes: self.bytes.as_bytes(),
1828 format: self.format,
1829 tables: self.tables.clone(),
1830 }
1831 }
1832
1833 fn parse_impl(
1834 bytes: Arc<dyn PackIndexByteSource>,
1835 format: ObjectFormat,
1836 verify_checksum: bool,
1837 validate_entries: bool,
1838 ) -> Result<Self> {
1839 let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1840 let view = PackIndexView::parse_impl(
1841 bytes.as_bytes(),
1842 format,
1843 verify_checksum,
1844 validate_entries,
1845 )?;
1846 (
1847 view.version,
1848 view.count,
1849 view.fanout,
1850 view.pack_checksum,
1851 view.index_checksum,
1852 view.tables,
1853 )
1854 };
1855 Ok(Self {
1856 version,
1857 count,
1858 fanout,
1859 pack_checksum,
1860 index_checksum,
1861 bytes,
1862 format,
1863 tables,
1864 })
1865 }
1866}
1867
1868impl PackIndex {
1869 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1870 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1871 }
1872
1873 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1874 let trailer_len = format.raw_len();
1875 if pack_bytes.len() < 12 + trailer_len {
1876 return Err(GitError::InvalidFormat("pack file too short".into()));
1877 }
1878 let trailer_offset = pack_bytes.len() - trailer_len;
1879 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1880 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1881 if pack_checksum != expected {
1882 return Err(GitError::InvalidFormat(format!(
1883 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1884 )));
1885 }
1886
1887 if &pack_bytes[..4] != b"PACK" {
1888 return Err(GitError::InvalidFormat("missing PACK signature".into()));
1889 }
1890 let version = u32_be(&pack_bytes[4..8]);
1891 if version != 2 && version != 3 {
1892 return Err(GitError::Unsupported(format!("pack version {version}")));
1893 }
1894 let count = u32_be(&pack_bytes[8..12]) as usize;
1895 let mut offset = 12usize;
1896 let mut parsed_entries = Vec::with_capacity(count);
1897 let mut raw_entries = Vec::with_capacity(count);
1898 for _ in 0..count {
1899 let entry_offset = offset;
1900 let header = parse_entry_header(pack_bytes, &mut offset)?;
1901 let base = match header.kind {
1902 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1903 pack_bytes,
1904 &mut offset,
1905 entry_offset as u64,
1906 )?)),
1907 PackObjectKind::RefDelta => {
1908 let hash_len = format.raw_len();
1909 if offset + hash_len > trailer_offset {
1910 return Err(GitError::InvalidFormat(
1911 "truncated ref-delta base object id".into(),
1912 ));
1913 }
1914 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1915 offset += hash_len;
1916 Some(DeltaBase::Ref(oid))
1917 }
1918 _ => None,
1919 };
1920 let mut body = Vec::new();
1921 let consumed = inflate_into(
1922 &pack_bytes[offset..trailer_offset],
1923 &mut body,
1924 header.size.min(usize::MAX as u64) as usize,
1925 )?;
1926 if body.len() as u64 != header.size {
1927 return Err(GitError::InvalidObject(format!(
1928 "pack object declared {} bytes, decoded {}",
1929 header.size,
1930 body.len()
1931 )));
1932 }
1933 if consumed == 0 {
1934 return Err(GitError::InvalidFormat(
1935 "empty compressed pack entry".into(),
1936 ));
1937 }
1938 offset = offset
1939 .checked_add(consumed)
1940 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1941 if offset > trailer_offset {
1942 return Err(GitError::InvalidFormat(
1943 "pack entry extends past checksum".into(),
1944 ));
1945 }
1946 raw_entries.push((
1947 entry_offset as u64,
1948 crc32fast::hash(&pack_bytes[entry_offset..offset]),
1949 ));
1950 if let Some(base) = base {
1951 parsed_entries.push(ParsedPackEntry::Delta {
1952 base,
1953 compressed_size: consumed as u64,
1954 delta_size: header.size,
1955 offset: entry_offset as u64,
1956 delta: body,
1957 });
1958 } else {
1959 let object_type = match header.kind {
1960 PackObjectKind::Commit => ObjectType::Commit,
1961 PackObjectKind::Tree => ObjectType::Tree,
1962 PackObjectKind::Blob => ObjectType::Blob,
1963 PackObjectKind::Tag => ObjectType::Tag,
1964 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1965 };
1966 let object = EncodedObject::new(object_type, body);
1967 let oid = object.object_id(format)?;
1968 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1969 entry: PackEntry {
1970 oid,
1971 compressed_size: consumed as u64,
1972 uncompressed_size: header.size,
1973 offset: entry_offset as u64,
1974 },
1975 object,
1976 }));
1977 }
1978 }
1979 if offset != trailer_offset {
1980 return Err(GitError::InvalidFormat(format!(
1981 "pack has {} trailing bytes before checksum",
1982 trailer_offset - offset
1983 )));
1984 }
1985
1986 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1987 let entries = resolved
1988 .iter()
1989 .zip(raw_entries)
1990 .map(|(object, (offset, crc32))| PackIndexEntry {
1991 oid: object.entry.oid,
1992 crc32,
1993 offset,
1994 })
1995 .collect::<Vec<_>>();
1996 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1997 Ok(PackIndexBuild {
1998 index,
1999 pack_checksum,
2000 entries,
2001 })
2002 }
2003
2004 pub fn write_v2_for_pack_reader<R>(
2011 reader: &mut R,
2012 format: ObjectFormat,
2013 ) -> Result<PackStreamIndexBuild>
2014 where
2015 R: Read + Seek,
2016 {
2017 let start = reader.stream_position()?;
2018 let end = reader.seek(SeekFrom::End(0))?;
2019 let pack_len = end
2020 .checked_sub(start)
2021 .ok_or_else(|| GitError::InvalidFormat("pack stream position overflow".into()))?;
2022 reader.seek(SeekFrom::Start(start))?;
2023 index_pack_from_reader(reader, format, pack_len)
2024 }
2025
2026 pub fn write_v2_for_pack_reader_to_trailer<R>(
2033 reader: &mut R,
2034 format: ObjectFormat,
2035 ) -> Result<PackStreamIndexBuild>
2036 where
2037 R: Read,
2038 {
2039 index_pack_from_reader_to_trailer(reader, format)
2040 }
2041
2042 pub fn write_v2_for_pack_reader_with_len<R>(
2043 reader: &mut R,
2044 format: ObjectFormat,
2045 pack_len: u64,
2046 ) -> Result<PackStreamIndexBuild>
2047 where
2048 R: Read,
2049 {
2050 index_pack_from_reader(reader, format, pack_len)
2051 }
2052
2053 pub fn write_v2_for_pack_path(
2056 path: impl AsRef<Path>,
2057 format: ObjectFormat,
2058 ) -> Result<PackStreamIndexBuild> {
2059 let mut file = File::open(path)?;
2060 Self::write_v2_for_pack_reader(&mut file, format)
2061 }
2062
2063 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
2064 Self::parse(bytes, ObjectFormat::Sha1)
2065 }
2066
2067 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2068 Self::parse_impl(bytes, format, true)
2069 }
2070
2071 pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2072 Self::parse_impl(bytes, format, false)
2073 }
2074
2075 fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
2076 let hash_len = format.raw_len();
2077 if bytes.len() < 4 {
2078 return Err(GitError::InvalidFormat("pack index too short".into()));
2079 }
2080 if bytes[..4] != [0xff, b't', b'O', b'c'] {
2081 return Self::parse_v1_impl(bytes, format, verify_checksum);
2082 }
2083 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
2084 return Err(GitError::InvalidFormat("pack index too short".into()));
2085 }
2086 let version = u32_be(&bytes[4..8]);
2087 if version != 2 {
2088 return Err(GitError::Unsupported(format!(
2089 "pack index version {version}"
2090 )));
2091 }
2092 let index_checksum_offset = bytes.len() - hash_len;
2093 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2094 if verify_checksum {
2095 let actual_index_checksum =
2096 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2097 if actual_index_checksum != index_checksum {
2098 return Err(GitError::InvalidFormat(format!(
2099 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2100 )));
2101 }
2102 }
2103
2104 let mut offset = 8usize;
2105 let mut fanout = [0u32; 256];
2106 let mut previous = 0u32;
2107 for slot in &mut fanout {
2108 *slot = u32_be(&bytes[offset..offset + 4]);
2109 if *slot < previous {
2110 return Err(GitError::InvalidFormat(
2111 "pack index fanout is not monotonic".into(),
2112 ));
2113 }
2114 previous = *slot;
2115 offset += 4;
2116 }
2117 let count = fanout[255] as usize;
2118 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
2119 offset = oid_table.end;
2120 let crc_table = checked_range(offset, count, 4, bytes.len())?;
2121 offset = crc_table.end;
2122 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
2123 offset = small_offset_table.end;
2124
2125 let large_offset_count = (0..count)
2126 .filter(|idx| {
2127 let start = small_offset_table.start + idx * 4;
2128 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
2129 })
2130 .count();
2131 let mut large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
2132 offset = large_offset_table.end;
2133
2134 let expected_trailer_offset = bytes.len() - hash_len * 2;
2135 if offset != expected_trailer_offset {
2136 if !verify_checksum && offset < expected_trailer_offset {
2137 large_offset_table = large_offset_table.start..expected_trailer_offset;
2138 offset = expected_trailer_offset;
2139 } else {
2140 return Err(GitError::InvalidFormat(format!(
2141 "pack index has {} unexpected bytes before trailer",
2142 expected_trailer_offset.saturating_sub(offset)
2143 )));
2144 }
2145 }
2146 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
2147
2148 let mut entries = Vec::with_capacity(count);
2149 for idx in 0..count {
2150 let oid_start = oid_table.start + idx * hash_len;
2151 let crc_start = crc_table.start + idx * 4;
2152 let offset_start = small_offset_table.start + idx * 4;
2153 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
2154 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
2158 return Err(GitError::InvalidFormat(
2159 "pack index object ids are not strictly ascending".into(),
2160 ));
2161 }
2162 let expected_min = if oid_bytes[0] == 0 {
2163 0
2164 } else {
2165 fanout[usize::from(oid_bytes[0] - 1)]
2166 };
2167 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
2168 return Err(GitError::InvalidFormat(
2169 "pack index object id is outside its fanout bucket".into(),
2170 ));
2171 }
2172 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
2173 let offset = if raw_offset & 0x8000_0000 == 0 {
2174 u64::from(raw_offset)
2175 } else {
2176 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2177 let large_start = large_offset_table.start + large_idx * 8;
2178 if large_idx >= large_offset_table.len() / 8 {
2179 return Err(GitError::InvalidFormat(
2180 "pack index large offset points past table".into(),
2181 ));
2182 }
2183 u64_be(&bytes[large_start..large_start + 8])
2184 };
2185 entries.push(PackIndexEntry {
2186 oid: ObjectId::from_raw(format, oid_bytes)?,
2187 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
2188 offset,
2189 });
2190 }
2191 Ok(Self {
2192 version,
2193 fanout,
2194 entries,
2195 pack_checksum,
2196 index_checksum,
2197 })
2198 }
2199
2200 fn parse_v1_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
2201 let hash_len = format.raw_len();
2202 if bytes.len() < 256 * 4 + 2 * hash_len {
2203 return Err(GitError::InvalidFormat("pack index too short".into()));
2204 }
2205 let index_checksum_offset = bytes.len() - hash_len;
2206 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2207 if verify_checksum {
2208 let actual_index_checksum =
2209 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2210 if actual_index_checksum != index_checksum {
2211 return Err(GitError::InvalidFormat(format!(
2212 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2213 )));
2214 }
2215 }
2216
2217 let mut offset = 0usize;
2218 let mut fanout = [0u32; 256];
2219 let mut previous = 0u32;
2220 for slot in &mut fanout {
2221 *slot = u32_be(&bytes[offset..offset + 4]);
2222 if *slot < previous {
2223 return Err(GitError::InvalidFormat(
2224 "pack index fanout is not monotonic".into(),
2225 ));
2226 }
2227 previous = *slot;
2228 offset += 4;
2229 }
2230 let count = fanout[255] as usize;
2231 let entry_len = hash_len
2232 .checked_add(4)
2233 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
2234 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
2235 offset = entry_table.end;
2236 let expected_trailer_offset = bytes.len() - hash_len * 2;
2237 if offset != expected_trailer_offset {
2238 return Err(GitError::InvalidFormat(format!(
2239 "pack index has {} unexpected bytes before trailer",
2240 expected_trailer_offset.saturating_sub(offset)
2241 )));
2242 }
2243 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
2244
2245 let mut entries = Vec::with_capacity(count);
2246 let mut previous_oid: Option<ObjectId> = None;
2247 for idx in 0..count {
2248 let start = entry_table.start + idx * entry_len;
2249 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
2250 if let Some(previous) = &previous_oid
2251 && previous.as_bytes() >= oid.as_bytes()
2252 {
2253 return Err(GitError::InvalidFormat(
2254 "pack index object ids are not strictly sorted".into(),
2255 ));
2256 }
2257 previous_oid = Some(oid);
2258 entries.push(PackIndexEntry {
2259 oid,
2260 crc32: 0,
2261 offset: u64::from(u32_be(&bytes[start..start + 4])),
2262 });
2263 }
2264 Ok(Self {
2265 version: 1,
2266 fanout,
2267 entries,
2268 pack_checksum,
2269 index_checksum,
2270 })
2271 }
2272
2273 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
2274 self.entries
2275 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2276 .ok()
2277 .map(|idx| &self.entries[idx])
2278 }
2279
2280 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
2281 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
2282 }
2283
2284 pub fn write_v2(
2285 format: ObjectFormat,
2286 entries: &[PackIndexEntry],
2287 pack_checksum: &ObjectId,
2288 ) -> Result<Vec<u8>> {
2289 if pack_checksum.format() != format {
2290 return Err(GitError::InvalidObjectId(
2291 "pack checksum format does not match index format".into(),
2292 ));
2293 }
2294 let mut entries = entries.iter().collect::<Vec<_>>();
2295 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2296 for pair in entries.windows(2) {
2297 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2298 return Err(GitError::InvalidFormat(format!(
2299 "pack index contains duplicate object id {}",
2300 pair[0].oid
2301 )));
2302 }
2303 }
2304 let mut fanout = [0u32; 256];
2305 for entry in &entries {
2306 if entry.oid.format() != format {
2307 return Err(GitError::InvalidObjectId(
2308 "pack index entry format does not match index format".into(),
2309 ));
2310 }
2311 let first = entry.oid.as_bytes()[0] as usize;
2312 fanout[first] = fanout[first]
2313 .checked_add(1)
2314 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2315 }
2316 let mut running = 0u32;
2317 for slot in &mut fanout {
2318 running = running
2319 .checked_add(*slot)
2320 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2321 *slot = running;
2322 }
2323
2324 let mut index = Vec::new();
2325 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
2326 index.extend_from_slice(&2u32.to_be_bytes());
2327 for count in fanout {
2328 index.extend_from_slice(&count.to_be_bytes());
2329 }
2330 for entry in &entries {
2331 index.extend_from_slice(entry.oid.as_bytes());
2332 }
2333 for entry in &entries {
2334 index.extend_from_slice(&entry.crc32.to_be_bytes());
2335 }
2336
2337 let mut large_offsets = Vec::new();
2338 for entry in &entries {
2339 if entry.offset < 0x8000_0000 {
2340 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2341 } else {
2342 if large_offsets.len() > 0x7fff_ffff {
2343 return Err(GitError::InvalidFormat(
2344 "too many large pack offsets".into(),
2345 ));
2346 }
2347 let large_idx = large_offsets.len() as u32;
2348 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
2349 large_offsets.push(entry.offset);
2350 }
2351 }
2352 for offset in large_offsets {
2353 index.extend_from_slice(&offset.to_be_bytes());
2354 }
2355 index.extend_from_slice(pack_checksum.as_bytes());
2356 let index_checksum = sley_core::digest_bytes(format, &index)?;
2357 index.extend_from_slice(index_checksum.as_bytes());
2358 Ok(index)
2359 }
2360
2361 pub fn write_v1(
2367 format: ObjectFormat,
2368 entries: &[PackIndexEntry],
2369 pack_checksum: &ObjectId,
2370 ) -> Result<Vec<u8>> {
2371 if pack_checksum.format() != format {
2372 return Err(GitError::InvalidObjectId(
2373 "pack checksum format does not match index format".into(),
2374 ));
2375 }
2376 let mut entries = entries.iter().collect::<Vec<_>>();
2377 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2378 for pair in entries.windows(2) {
2379 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
2380 return Err(GitError::InvalidFormat(format!(
2381 "pack index contains duplicate object id {}",
2382 pair[0].oid
2383 )));
2384 }
2385 }
2386 let mut fanout = [0u32; 256];
2387 for entry in &entries {
2388 if entry.oid.format() != format {
2389 return Err(GitError::InvalidObjectId(
2390 "pack index entry format does not match index format".into(),
2391 ));
2392 }
2393 if entry.offset > 0xffff_ffff {
2394 return Err(GitError::InvalidFormat(
2395 "pack offset too large for a version-1 index".into(),
2396 ));
2397 }
2398 let first = entry.oid.as_bytes()[0] as usize;
2399 fanout[first] = fanout[first]
2400 .checked_add(1)
2401 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2402 }
2403 let mut running = 0u32;
2404 for slot in &mut fanout {
2405 running = running
2406 .checked_add(*slot)
2407 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
2408 *slot = running;
2409 }
2410
2411 let mut index = Vec::new();
2412 for count in fanout {
2413 index.extend_from_slice(&count.to_be_bytes());
2414 }
2415 for entry in &entries {
2416 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
2417 index.extend_from_slice(entry.oid.as_bytes());
2418 }
2419 index.extend_from_slice(pack_checksum.as_bytes());
2420 let index_checksum = sley_core::digest_bytes(format, &index)?;
2421 index.extend_from_slice(index_checksum.as_bytes());
2422 Ok(index)
2423 }
2424}
2425
2426fn index_pack_from_reader<R>(
2427 reader: &mut R,
2428 format: ObjectFormat,
2429 pack_len: u64,
2430) -> Result<PackStreamIndexBuild>
2431where
2432 R: Read,
2433{
2434 index_pack_from_stream(PackReadStream::new(reader, format, Some(pack_len))?, format)
2435}
2436
2437fn index_pack_from_reader_to_trailer<R>(
2438 reader: &mut R,
2439 format: ObjectFormat,
2440) -> Result<PackStreamIndexBuild>
2441where
2442 R: Read,
2443{
2444 index_pack_from_stream(PackReadStream::new(reader, format, None)?, format)
2445}
2446
2447fn index_pack_from_stream<R>(
2448 mut stream: PackReadStream<'_, R>,
2449 format: ObjectFormat,
2450) -> Result<PackStreamIndexBuild>
2451where
2452 R: Read,
2453{
2454 let mut header = [0u8; 12];
2455 stream.read_pack_bytes(&mut header)?;
2456 if &header[..4] != b"PACK" {
2457 return Err(GitError::InvalidFormat("missing PACK signature".into()));
2458 }
2459 let version = u32_be(&header[4..8]);
2460 if version != 2 && version != 3 {
2461 return Err(GitError::Unsupported(format!("pack version {version}")));
2462 }
2463 let count = u32_be(&header[8..12]) as usize;
2464 let mut parsed_entries = Vec::with_capacity(count);
2465 let mut raw_entries = Vec::with_capacity(count);
2466 for _ in 0..count {
2467 let entry_offset = stream.pack_offset();
2468 let mut entry_crc = crc32fast::Hasher::new();
2469 let header = parse_entry_header_from_stream(&mut stream, &mut entry_crc)?;
2470 let base = match header.kind {
2471 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
2472 parse_ofs_delta_base_offset_from_stream(&mut stream, &mut entry_crc, entry_offset)?,
2473 )),
2474 PackObjectKind::RefDelta => {
2475 let mut raw = vec![0u8; format.raw_len()];
2476 stream.read_entry_bytes(&mut raw, &mut entry_crc)?;
2477 Some(DeltaBase::Ref(ObjectId::from_raw(format, &raw)?))
2478 }
2479 _ => None,
2480 };
2481 let (body, consumed) = inflate_entry_from_stream(
2482 &mut stream,
2483 &mut entry_crc,
2484 header.size.min(usize::MAX as u64) as usize,
2485 )?;
2486 if body.len() as u64 != header.size {
2487 return Err(GitError::InvalidObject(format!(
2488 "pack object declared {} bytes, decoded {}",
2489 header.size,
2490 body.len()
2491 )));
2492 }
2493 if consumed == 0 {
2494 return Err(GitError::InvalidFormat(
2495 "empty compressed pack entry".into(),
2496 ));
2497 }
2498 raw_entries.push((entry_offset, entry_crc.finalize()));
2499 if let Some(base) = base {
2500 parsed_entries.push(ParsedPackEntry::Delta {
2501 base,
2502 compressed_size: consumed as u64,
2503 delta_size: header.size,
2504 offset: entry_offset,
2505 delta: body,
2506 });
2507 } else {
2508 let object_type = pack_object_kind_to_object_type(header.kind)?;
2509 let object = EncodedObject::new(object_type, body);
2510 let oid = object.object_id(format)?;
2511 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
2512 entry: PackEntry {
2513 oid,
2514 compressed_size: consumed as u64,
2515 uncompressed_size: header.size,
2516 offset: entry_offset,
2517 },
2518 object,
2519 }));
2520 }
2521 }
2522 if stream.pack_offset() != stream.trailer_pack_offset() {
2523 return Err(GitError::InvalidFormat(format!(
2524 "pack has {} trailing bytes before checksum",
2525 stream.trailer_pack_offset() - stream.pack_offset()
2526 )));
2527 }
2528 let expected = stream.read_trailer_oid()?;
2529 let pack_checksum = stream.finish_digest()?;
2530 if pack_checksum != expected {
2531 return Err(GitError::InvalidFormat(format!(
2532 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
2533 )));
2534 }
2535
2536 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
2537 let entries = resolved
2538 .iter()
2539 .zip(raw_entries)
2540 .map(|(object, (offset, crc32))| PackIndexEntry {
2541 oid: object.entry.oid,
2542 crc32,
2543 offset,
2544 })
2545 .collect::<Vec<_>>();
2546 let objects = resolved
2547 .iter()
2548 .map(|object| PackIndexedObject {
2549 oid: object.entry.oid,
2550 object_type: object.object.object_type,
2551 size: object.object.body.len() as u64,
2552 offset: object.entry.offset,
2553 })
2554 .collect::<Vec<_>>();
2555 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
2556 Ok(PackStreamIndexBuild {
2557 index,
2558 pack_checksum,
2559 entries,
2560 objects,
2561 })
2562}
2563
2564fn pack_object_kind_to_object_type(kind: PackObjectKind) -> Result<ObjectType> {
2565 match kind {
2566 PackObjectKind::Commit => Ok(ObjectType::Commit),
2567 PackObjectKind::Tree => Ok(ObjectType::Tree),
2568 PackObjectKind::Blob => Ok(ObjectType::Blob),
2569 PackObjectKind::Tag => Ok(ObjectType::Tag),
2570 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => Err(GitError::InvalidFormat(
2571 "delta entry cannot be used as an object type".into(),
2572 )),
2573 }
2574}
2575
2576struct PackReadStream<'a, R> {
2577 reader: &'a mut R,
2578 position: u64,
2579 pack_len: Option<u64>,
2580 trailer_position: Option<u64>,
2581 digest: StreamingDigest,
2582 format: ObjectFormat,
2583 pending: VecDeque<u8>,
2584}
2585
2586impl<'a, R> PackReadStream<'a, R>
2587where
2588 R: Read,
2589{
2590 fn new(reader: &'a mut R, format: ObjectFormat, pack_len: Option<u64>) -> Result<Self> {
2591 let trailer_len = format.raw_len() as u64;
2592 let trailer_position = pack_len
2593 .map(|pack_len| {
2594 if pack_len < 12 + trailer_len {
2595 return Err(GitError::InvalidFormat("pack file too short".into()));
2596 }
2597 Ok(pack_len - trailer_len)
2598 })
2599 .transpose()?;
2600 Ok(Self {
2601 reader,
2602 position: 0,
2603 pack_len,
2604 trailer_position,
2605 digest: StreamingDigest::new(format),
2606 format,
2607 pending: VecDeque::new(),
2608 })
2609 }
2610
2611 fn pack_offset(&self) -> u64 {
2612 self.position
2613 }
2614
2615 fn trailer_pack_offset(&self) -> u64 {
2616 self.trailer_position.unwrap_or(self.position)
2617 }
2618
2619 fn read_pack_bytes(&mut self, bytes: &mut [u8]) -> Result<()> {
2620 let end = self
2621 .position
2622 .checked_add(bytes.len() as u64)
2623 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2624 if self
2625 .trailer_position
2626 .is_some_and(|trailer_position| end > trailer_position)
2627 {
2628 return Err(GitError::InvalidFormat(
2629 "pack entry extends past checksum".into(),
2630 ));
2631 }
2632 self.read_exact_raw(bytes)?;
2633 self.position = end;
2634 self.digest.update(bytes);
2635 Ok(())
2636 }
2637
2638 fn read_exact_raw(&mut self, bytes: &mut [u8]) -> Result<()> {
2639 let mut written = 0usize;
2640 while written < bytes.len() {
2641 if let Some(byte) = self.pending.pop_front() {
2642 bytes[written] = byte;
2643 written += 1;
2644 continue;
2645 }
2646 self.reader.read_exact(&mut bytes[written..])?;
2647 break;
2648 }
2649 Ok(())
2650 }
2651
2652 fn read_entry_bytes(&mut self, bytes: &mut [u8], crc: &mut crc32fast::Hasher) -> Result<()> {
2653 self.read_pack_bytes(bytes)?;
2654 crc.update(bytes);
2655 Ok(())
2656 }
2657
2658 fn read_entry_byte(&mut self, crc: &mut crc32fast::Hasher) -> Result<u8> {
2659 let mut byte = [0u8; 1];
2660 self.read_entry_bytes(&mut byte, crc)?;
2661 Ok(byte[0])
2662 }
2663
2664 fn read_compressed_chunk(&mut self, bytes: &mut [u8]) -> Result<usize> {
2665 let len = if let Some(trailer_position) = self.trailer_position {
2666 if self.position >= trailer_position {
2667 return Ok(0);
2668 }
2669 let remaining = trailer_position - self.position;
2670 if remaining < bytes.len() as u64 {
2671 remaining as usize
2672 } else {
2673 bytes.len()
2674 }
2675 } else {
2676 bytes.len()
2677 };
2678 let mut read = 0usize;
2679 while read < len {
2680 let Some(byte) = self.pending.pop_front() else {
2681 break;
2682 };
2683 bytes[read] = byte;
2684 read += 1;
2685 }
2686 if read < len {
2687 read += self.reader.read(&mut bytes[read..len])?;
2688 }
2689 self.position = self
2690 .position
2691 .checked_add(read as u64)
2692 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2693 Ok(read)
2694 }
2695
2696 fn accept_compressed_bytes(&mut self, bytes: &[u8], crc: &mut crc32fast::Hasher) {
2697 self.digest.update(bytes);
2698 crc.update(bytes);
2699 }
2700
2701 fn push_back_compressed_bytes(&mut self, bytes: &[u8]) -> Result<()> {
2702 if bytes.is_empty() {
2703 return Ok(());
2704 }
2705 self.position = self
2706 .position
2707 .checked_sub(bytes.len() as u64)
2708 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2709 for byte in bytes.iter().rev() {
2710 self.pending.push_front(*byte);
2711 }
2712 Ok(())
2713 }
2714
2715 fn read_trailer_oid(&mut self) -> Result<ObjectId> {
2716 let mut raw = vec![0u8; self.format.raw_len()];
2717 self.read_exact_raw(&mut raw)?;
2718 self.position = self
2719 .position
2720 .checked_add(raw.len() as u64)
2721 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2722 if let Some(pack_len) = self.pack_len
2723 && self.position != pack_len
2724 {
2725 return Err(GitError::InvalidFormat(format!(
2726 "pack has {} trailing bytes after checksum",
2727 pack_len - self.position
2728 )));
2729 }
2730 if self.pack_len.is_none() && !self.pending.is_empty() {
2731 return Err(GitError::InvalidFormat(
2732 "pack has trailing bytes after checksum".into(),
2733 ));
2734 }
2735 ObjectId::from_raw(self.format, &raw)
2736 }
2737
2738 fn finish_digest(self) -> Result<ObjectId> {
2739 self.digest.finalize()
2740 }
2741}
2742
2743const STREAM_INFLATE_CHUNK: usize = 32 * 1024;
2744
2745fn inflate_entry_from_stream<R>(
2746 stream: &mut PackReadStream<'_, R>,
2747 crc: &mut crc32fast::Hasher,
2748 size_hint: usize,
2749) -> Result<(Vec<u8>, usize)>
2750where
2751 R: Read,
2752{
2753 INFLATE.with(|cell| {
2754 let mut decompress = cell.borrow_mut();
2755 decompress.reset(true);
2756 let mut out = Vec::with_capacity(bounded_inflate_reserve(size_hint, STREAM_INFLATE_CHUNK));
2757 let mut compressed_total = 0usize;
2758 let mut input = [0u8; STREAM_INFLATE_CHUNK];
2759 loop {
2760 let read = stream.read_compressed_chunk(&mut input)?;
2761 if read == 0 {
2762 return Err(GitError::InvalidObject("truncated zlib stream".into()));
2763 }
2764 let mut cursor = 0usize;
2765 while cursor < read {
2766 if out.len() == out.capacity() {
2767 out.reserve(out.len().max(64));
2768 }
2769 let before_in = decompress.total_in();
2770 let before_out = decompress.total_out();
2771 let status = decompress
2772 .decompress_vec(
2773 &input[cursor..read],
2774 &mut out,
2775 flate2::FlushDecompress::None,
2776 )
2777 .map_err(|err| {
2778 GitError::InvalidObject(format!("zlib inflate failed: {err}"))
2779 })?;
2780 let consumed = (decompress.total_in() - before_in) as usize;
2781 let produced = decompress.total_out() - before_out;
2782 if consumed > 0 {
2783 let consumed_end = cursor + consumed;
2784 stream.accept_compressed_bytes(&input[cursor..consumed_end], crc);
2785 compressed_total = compressed_total
2786 .checked_add(consumed)
2787 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
2788 cursor = consumed_end;
2789 }
2790 match status {
2791 flate2::Status::StreamEnd => {
2792 stream.push_back_compressed_bytes(&input[cursor..read])?;
2793 return Ok((out, compressed_total));
2794 }
2795 _ if consumed == 0 && produced == 0 => {
2796 return Err(GitError::InvalidObject("truncated zlib stream".into()));
2797 }
2798 _ => {}
2799 }
2800 }
2801 }
2802 })
2803}
2804
2805fn parse_entry_header_from_stream<R>(
2806 stream: &mut PackReadStream<'_, R>,
2807 crc: &mut crc32fast::Hasher,
2808) -> Result<EntryHeader>
2809where
2810 R: Read,
2811{
2812 let first = stream.read_entry_byte(crc)?;
2813 let mut size = u64::from(first & 0x0f);
2814 let kind = match (first >> 4) & 0x07 {
2815 1 => PackObjectKind::Commit,
2816 2 => PackObjectKind::Tree,
2817 3 => PackObjectKind::Blob,
2818 4 => PackObjectKind::Tag,
2819 6 => PackObjectKind::OfsDelta,
2820 7 => PackObjectKind::RefDelta,
2821 other => {
2822 return Err(GitError::InvalidFormat(format!(
2823 "invalid pack object type {other}"
2824 )));
2825 }
2826 };
2827 let mut shift = 4;
2828 let mut byte = first;
2829 while byte & 0x80 != 0 {
2830 byte = stream.read_entry_byte(crc)?;
2831 let part = u64::from(byte & 0x7f);
2832 size = size
2833 .checked_add(
2834 part.checked_shl(shift)
2835 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
2836 )
2837 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
2838 shift += 7;
2839 }
2840 Ok(EntryHeader { kind, size })
2841}
2842
2843fn parse_ofs_delta_base_offset_from_stream<R>(
2844 stream: &mut PackReadStream<'_, R>,
2845 crc: &mut crc32fast::Hasher,
2846 entry_offset: u64,
2847) -> Result<u64>
2848where
2849 R: Read,
2850{
2851 let mut byte = stream.read_entry_byte(crc)?;
2852 let mut relative = u64::from(byte & 0x7f);
2853 while byte & 0x80 != 0 {
2854 byte = stream.read_entry_byte(crc)?;
2855 relative = relative
2856 .checked_add(1)
2857 .and_then(|value| value.checked_shl(7))
2858 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
2859 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
2860 }
2861 entry_offset
2862 .checked_sub(relative)
2863 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
2864}
2865
2866pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
2871 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
2872 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
2873 let mut index_position = vec![0u32; entries.len()];
2874 for (position, &entry) in oid_sorted.iter().enumerate() {
2875 index_position[entry] = position as u32;
2876 }
2877 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
2878 by_offset.sort_by_key(|&entry| entries[entry].offset);
2879 by_offset
2880 .into_iter()
2881 .map(|entry| index_position[entry])
2882 .collect()
2883}
2884
2885impl PackReverseIndex {
2886 pub fn write(
2887 format: ObjectFormat,
2888 positions: &[u32],
2889 pack_checksum: &ObjectId,
2890 ) -> Result<Vec<u8>> {
2891 if pack_checksum.format() != format {
2892 return Err(GitError::InvalidObjectId(
2893 "pack checksum format does not match reverse index format".into(),
2894 ));
2895 }
2896 validate_position_permutation(positions)?;
2897
2898 let mut out = Vec::new();
2899 out.extend_from_slice(b"RIDX");
2900 out.extend_from_slice(&1u32.to_be_bytes());
2901 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2902 for position in positions {
2903 out.extend_from_slice(&position.to_be_bytes());
2904 }
2905 out.extend_from_slice(pack_checksum.as_bytes());
2906 let checksum = sley_core::digest_bytes(format, &out)?;
2907 out.extend_from_slice(checksum.as_bytes());
2908 Ok(out)
2909 }
2910
2911 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
2912 let hash_len = format.raw_len();
2913 let table_len = object_count
2914 .checked_mul(4)
2915 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
2916 let min_len = 12usize
2917 .checked_add(table_len)
2918 .and_then(|len| len.checked_add(hash_len * 2))
2919 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
2920 if bytes.len() < min_len {
2921 return Err(GitError::InvalidFormat("reverse index too short".into()));
2922 }
2923 if bytes.len() != min_len {
2924 return Err(GitError::InvalidFormat(format!(
2925 "reverse index has {} trailing bytes",
2926 bytes.len() - min_len
2927 )));
2928 }
2929 if &bytes[..4] != b"RIDX" {
2930 return Err(GitError::InvalidFormat(
2931 "missing reverse index signature".into(),
2932 ));
2933 }
2934 let version = u32_be(&bytes[4..8]);
2935 if version != 1 {
2936 return Err(GitError::Unsupported(format!(
2937 "reverse index version {version}"
2938 )));
2939 }
2940 let hash_id = u32_be(&bytes[8..12]);
2941 if hash_id != hash_function_id(format) {
2942 return Err(GitError::InvalidFormat(format!(
2943 "reverse index hash id {hash_id} does not match {}",
2944 format.name()
2945 )));
2946 }
2947
2948 let index_checksum_offset = bytes.len() - hash_len;
2949 let actual_index_checksum =
2950 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
2951 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
2952 if actual_index_checksum != index_checksum {
2953 return Err(GitError::InvalidFormat(format!(
2954 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
2955 )));
2956 }
2957
2958 let pack_checksum_offset = index_checksum_offset - hash_len;
2959 let pack_checksum =
2960 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
2961 let mut positions = Vec::with_capacity(object_count);
2962 let mut offset = 12usize;
2963 for _ in 0..object_count {
2964 let position = u32_be(&bytes[offset..offset + 4]);
2965 positions.push(position);
2966 offset += 4;
2967 }
2968 validate_position_permutation(&positions)?;
2969
2970 Ok(Self {
2971 version,
2972 format,
2973 positions,
2974 pack_checksum,
2975 index_checksum,
2976 })
2977 }
2978}
2979
2980impl PackMtimes {
2981 pub fn write(
2982 format: ObjectFormat,
2983 mtimes: &[u32],
2984 pack_checksum: &ObjectId,
2985 ) -> Result<Vec<u8>> {
2986 if pack_checksum.format() != format {
2987 return Err(GitError::InvalidObjectId(
2988 "pack checksum format does not match mtimes format".into(),
2989 ));
2990 }
2991
2992 let mut out = Vec::new();
2993 out.extend_from_slice(b"MTME");
2994 out.extend_from_slice(&1u32.to_be_bytes());
2995 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
2996 for mtime in mtimes {
2997 out.extend_from_slice(&mtime.to_be_bytes());
2998 }
2999 out.extend_from_slice(pack_checksum.as_bytes());
3000 let checksum = sley_core::digest_bytes(format, &out)?;
3001 out.extend_from_slice(checksum.as_bytes());
3002 Ok(out)
3003 }
3004
3005 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
3006 let hash_len = format.raw_len();
3007 let table_len = object_count
3008 .checked_mul(4)
3009 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
3010 let expected_len = 12usize
3011 .checked_add(table_len)
3012 .and_then(|len| len.checked_add(hash_len * 2))
3013 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
3014 if bytes.len() < expected_len {
3015 return Err(GitError::InvalidFormat("mtimes file too short".into()));
3016 }
3017 if bytes.len() != expected_len {
3018 return Err(GitError::InvalidFormat(format!(
3019 "mtimes file has {} trailing bytes",
3020 bytes.len() - expected_len
3021 )));
3022 }
3023 if &bytes[..4] != b"MTME" {
3024 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
3025 }
3026 let version = u32_be(&bytes[4..8]);
3027 if version != 1 {
3028 return Err(GitError::Unsupported(format!("mtimes version {version}")));
3029 }
3030 let hash_id = u32_be(&bytes[8..12]);
3031 if hash_id != hash_function_id(format) {
3032 return Err(GitError::InvalidFormat(format!(
3033 "mtimes hash id {hash_id} does not match {}",
3034 format.name()
3035 )));
3036 }
3037
3038 let index_checksum_offset = bytes.len() - hash_len;
3039 let actual_index_checksum =
3040 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
3041 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
3042 if actual_index_checksum != index_checksum {
3043 return Err(GitError::InvalidFormat(format!(
3044 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
3045 )));
3046 }
3047
3048 let pack_checksum_offset = index_checksum_offset - hash_len;
3049 let pack_checksum =
3050 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
3051 let mut mtimes = Vec::with_capacity(object_count);
3052 let mut offset = 12usize;
3053 for _ in 0..object_count {
3054 mtimes.push(u32_be(&bytes[offset..offset + 4]));
3055 offset += 4;
3056 }
3057
3058 Ok(Self {
3059 version,
3060 format,
3061 mtimes,
3062 pack_checksum,
3063 index_checksum,
3064 })
3065 }
3066}
3067
3068impl PackBitmapIndex {
3069 pub const OPTION_FULL_DAG: u16 = 0x0001;
3070 pub const OPTION_HASH_CACHE: u16 = 0x0004;
3071 pub const OPTION_PSEUDO_MERGES: u16 = 0x0020;
3072
3073 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
3074 let hash_len = format.raw_len();
3075 let min_len = 12usize
3076 .checked_add(hash_len * 2)
3077 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
3078 if bytes.len() < min_len {
3079 return Err(GitError::InvalidFormat("bitmap index too short".into()));
3080 }
3081 if &bytes[..4] != b"BITM" {
3082 return Err(GitError::InvalidFormat(
3083 "missing bitmap index signature".into(),
3084 ));
3085 }
3086 let version = u16_be(&bytes[4..6]);
3087 if version != 1 {
3088 return Err(GitError::Unsupported(format!(
3089 "bitmap index version {version}"
3090 )));
3091 }
3092 let options = u16_be(&bytes[6..8]);
3093 let known_options =
3094 Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE | Self::OPTION_PSEUDO_MERGES;
3095 if options & !known_options != 0 {
3096 return Err(GitError::Unsupported(format!(
3097 "bitmap index options {:#06x}",
3098 options & !known_options
3099 )));
3100 }
3101 let entry_count = u32_be(&bytes[8..12]) as usize;
3102 let checksum_offset = bytes.len() - hash_len;
3103 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
3104 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
3105 if actual_index_checksum != index_checksum {
3106 return Err(GitError::InvalidFormat(format!(
3107 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
3108 )));
3109 }
3110 let mut extras_end = checksum_offset;
3111 let hash_cache_range = if options & Self::OPTION_HASH_CACHE != 0 {
3112 let cache_len = object_count
3113 .checked_mul(4)
3114 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
3115 if cache_len > extras_end {
3116 return Err(GitError::InvalidFormat(
3117 "truncated bitmap hash cache".into(),
3118 ));
3119 }
3120 extras_end -= cache_len;
3121 Some(extras_end..extras_end + cache_len)
3122 } else {
3123 None
3124 };
3125 let pseudo_merge_range = if options & Self::OPTION_PSEUDO_MERGES != 0 {
3126 if extras_end < 24 {
3127 return Err(GitError::InvalidFormat(
3128 "truncated bitmap pseudo-merge extension".into(),
3129 ));
3130 }
3131 let extension_size = u64_be(&bytes[extras_end - 8..extras_end]) as usize;
3132 if extension_size > extras_end {
3133 return Err(GitError::InvalidFormat(
3134 "bitmap pseudo-merge extension points before file start".into(),
3135 ));
3136 }
3137 let start = extras_end - extension_size;
3138 Some(start..extras_end)
3139 } else {
3140 None
3141 };
3142 let entries_end = pseudo_merge_range
3143 .as_ref()
3144 .map(|range| range.start)
3145 .unwrap_or(extras_end);
3146
3147 let pack_checksum_end = 12usize
3148 .checked_add(hash_len)
3149 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
3150 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
3151 let mut offset = pack_checksum_end;
3152 let commits = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3153 let trees = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3154 let blobs = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3155 let tags = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3156
3157 let mut entries = Vec::with_capacity(entry_count);
3158 for idx in 0..entry_count {
3159 if entries_end.saturating_sub(offset) < 6 {
3160 return Err(GitError::InvalidFormat(
3161 "truncated bitmap index entry".into(),
3162 ));
3163 }
3164 let object_position = u32_be(&bytes[offset..offset + 4]);
3165 offset += 4;
3166 if object_position as usize >= object_count {
3167 return Err(GitError::InvalidFormat(
3168 "bitmap index entry points past object table".into(),
3169 ));
3170 }
3171 let xor_offset = bytes[offset];
3172 offset += 1;
3173 if xor_offset as usize > idx || xor_offset > 160 {
3174 return Err(GitError::InvalidFormat(
3175 "bitmap index entry has invalid XOR offset".into(),
3176 ));
3177 }
3178 let flags = bytes[offset];
3179 offset += 1;
3180 let bitmap = parse_bitmap_ewah(bytes, &mut offset, entries_end, object_count)?;
3181 entries.push(PackBitmapEntry {
3182 object_position,
3183 xor_offset,
3184 flags,
3185 bitmap,
3186 });
3187 }
3188
3189 if offset != entries_end {
3190 return Err(GitError::InvalidFormat(format!(
3191 "bitmap index has {} trailing entry bytes",
3192 entries_end - offset
3193 )));
3194 }
3195
3196 let pseudo_merges = if let Some(range) = pseudo_merge_range {
3197 parse_bitmap_pseudo_merges(bytes, range, object_count)?
3198 } else {
3199 Vec::new()
3200 };
3201
3202 let name_hash_cache = if let Some(range) = hash_cache_range {
3203 let mut cache = Vec::with_capacity(object_count);
3204 let mut offset = range.start;
3205 for _ in 0..object_count {
3206 cache.push(u32_be(&bytes[offset..offset + 4]));
3207 offset += 4;
3208 }
3209 Some(cache)
3210 } else {
3211 None
3212 };
3213
3214 Ok(Self {
3215 version,
3216 format,
3217 options,
3218 pack_checksum,
3219 index_checksum,
3220 type_bitmaps: PackBitmapTypeBitmaps {
3221 commits,
3222 trees,
3223 blobs,
3224 tags,
3225 },
3226 entries,
3227 pseudo_merges,
3228 name_hash_cache,
3229 })
3230 }
3231
3232 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
3235 self.entries
3236 .iter()
3237 .find(|entry| entry.object_position == position)
3238 }
3239}
3240
3241fn parse_bitmap_pseudo_merges(
3242 bytes: &[u8],
3243 range: std::ops::Range<usize>,
3244 object_count: usize,
3245) -> Result<Vec<PackBitmapPseudoMerge>> {
3246 if range.end < range.start || range.end > bytes.len() || range.end - range.start < 24 {
3247 return Err(GitError::InvalidFormat(
3248 "truncated bitmap pseudo-merge extension".into(),
3249 ));
3250 }
3251 let trailer_start = range.end - 24;
3252 let pseudo_merge_count = u32_be(&bytes[trailer_start..trailer_start + 4]) as usize;
3253 let commit_count = u32_be(&bytes[trailer_start + 4..trailer_start + 8]) as usize;
3254 let lookup_offset = u64_be(&bytes[trailer_start + 8..trailer_start + 16]) as usize;
3255 let extension_size = u64_be(&bytes[trailer_start + 16..trailer_start + 24]) as usize;
3256 if extension_size != range.end - range.start {
3257 return Err(GitError::InvalidFormat(
3258 "bitmap pseudo-merge extension size mismatch".into(),
3259 ));
3260 }
3261 let lookup_start = range
3262 .start
3263 .checked_add(lookup_offset)
3264 .ok_or_else(|| GitError::InvalidFormat("bitmap pseudo-merge lookup overflow".into()))?;
3265 if lookup_start > trailer_start {
3266 return Err(GitError::InvalidFormat(
3267 "bitmap pseudo-merge lookup points past extension".into(),
3268 ));
3269 }
3270 let lookup_len = commit_count
3271 .checked_mul(12)
3272 .ok_or_else(|| GitError::InvalidFormat("bitmap pseudo-merge lookup overflow".into()))?;
3273 if lookup_start
3274 .checked_add(lookup_len)
3275 .is_none_or(|end| end > trailer_start)
3276 {
3277 return Err(GitError::InvalidFormat(
3278 "truncated bitmap pseudo-merge lookup".into(),
3279 ));
3280 }
3281 let position_table_len = pseudo_merge_count.checked_mul(8).ok_or_else(|| {
3282 GitError::InvalidFormat("bitmap pseudo-merge position table overflow".into())
3283 })?;
3284 let position_table_start = trailer_start
3285 .checked_sub(position_table_len)
3286 .filter(|start| *start >= range.start)
3287 .ok_or_else(|| {
3288 GitError::InvalidFormat("truncated bitmap pseudo-merge position table".into())
3289 })?;
3290
3291 let mut pseudo_merges = Vec::with_capacity(pseudo_merge_count);
3292 let mut cursor = position_table_start;
3293 for _ in 0..pseudo_merge_count {
3294 let pseudo_offset = u64_be(&bytes[cursor..cursor + 8]) as usize;
3295 cursor += 8;
3296 if pseudo_offset < range.start || pseudo_offset >= position_table_start {
3297 return Err(GitError::InvalidFormat(
3298 "bitmap pseudo-merge offset out of range".into(),
3299 ));
3300 }
3301 let mut offset = pseudo_offset;
3302 let commits = parse_bitmap_ewah(bytes, &mut offset, range.end, object_count)?;
3303 let bitmap = parse_bitmap_ewah(bytes, &mut offset, range.end, object_count)?;
3304 pseudo_merges.push(PackBitmapPseudoMerge { commits, bitmap });
3305 }
3306 Ok(pseudo_merges)
3307}
3308
3309fn parse_bitmap_ewah(
3310 bytes: &[u8],
3311 offset: &mut usize,
3312 checksum_offset: usize,
3313 _object_count: usize,
3314) -> Result<EwahBitmap> {
3315 if checksum_offset.saturating_sub(*offset) < 12 {
3316 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
3317 }
3318 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
3319 *offset += 4;
3320 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
3321 *offset += 4;
3322 let words_len = word_count
3323 .checked_mul(8)
3324 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
3325 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
3326 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
3327 }
3328 let mut words = Vec::with_capacity(word_count);
3329 for _ in 0..word_count {
3330 words.push(u64_be(&bytes[*offset..*offset + 8]));
3331 *offset += 8;
3332 }
3333 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
3334 *offset += 4;
3335 validate_ewah_words(bit_size, &words, rlw_position)?;
3336 Ok(EwahBitmap {
3337 bit_size,
3338 words,
3339 rlw_position,
3340 })
3341}
3342
3343fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
3344 if words.is_empty() {
3345 if rlw_position != 0 || bit_size != 0 {
3346 return Err(GitError::InvalidFormat(
3347 "EWAH bitmap has invalid empty RLW".into(),
3348 ));
3349 }
3350 return Ok(());
3351 }
3352 if rlw_position as usize >= words.len() {
3353 return Err(GitError::InvalidFormat(
3354 "EWAH RLW position points past word table".into(),
3355 ));
3356 }
3357 let mut word_idx = 0usize;
3358 let mut decoded_words = 0u64;
3359 while word_idx < words.len() {
3360 let rlw = words[word_idx];
3361 let run_words = (rlw >> 1) & 0xffff_ffff;
3362 let literal_words = (rlw >> 33) as usize;
3363 word_idx += 1;
3364 word_idx = word_idx
3365 .checked_add(literal_words)
3366 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
3367 if word_idx > words.len() {
3368 return Err(GitError::InvalidFormat(
3369 "EWAH literal words extend past word table".into(),
3370 ));
3371 }
3372 decoded_words = decoded_words
3373 .checked_add(run_words)
3374 .and_then(|value| value.checked_add(literal_words as u64))
3375 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
3376 }
3377 let decoded_bits = decoded_words
3378 .checked_mul(64)
3379 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
3380 if decoded_bits < u64::from(bit_size) {
3381 return Err(GitError::InvalidFormat(
3382 "EWAH bitmap decodes fewer bits than declared".into(),
3383 ));
3384 }
3385 Ok(())
3386}
3387
3388impl MultiPackIndex {
3389 pub fn write(
3390 format: ObjectFormat,
3391 version: u8,
3392 pack_names: &[String],
3393 objects: &[MultiPackIndexEntry],
3394 ) -> Result<Vec<u8>> {
3395 Self::write_with_reverse_index(format, version, pack_names, objects, None)
3396 }
3397
3398 pub fn write_with_reverse_index(
3407 format: ObjectFormat,
3408 version: u8,
3409 pack_names: &[String],
3410 objects: &[MultiPackIndexEntry],
3411 preferred_pack: Option<u32>,
3412 ) -> Result<Vec<u8>> {
3413 Self::write_with_bitmap_packs(format, version, pack_names, objects, preferred_pack, None)
3414 }
3415
3416 pub fn write_with_bitmap_packs(
3417 format: ObjectFormat,
3418 version: u8,
3419 pack_names: &[String],
3420 objects: &[MultiPackIndexEntry],
3421 preferred_pack: Option<u32>,
3422 bitmapped_packs: Option<&[MultiPackBitmapPack]>,
3423 ) -> Result<Vec<u8>> {
3424 if let Some(preferred) = preferred_pack
3425 && preferred as usize >= pack_names.len()
3426 {
3427 return Err(GitError::InvalidFormat(format!(
3428 "preferred pack {preferred} out of range for {} packs",
3429 pack_names.len()
3430 )));
3431 }
3432 if version != 1 && version != 2 {
3433 return Err(GitError::Unsupported(format!(
3434 "multi-pack-index version {version}"
3435 )));
3436 }
3437 if pack_names.len() > u32::MAX as usize {
3438 return Err(GitError::InvalidFormat(
3439 "too many multi-pack-index packs".into(),
3440 ));
3441 }
3442 if objects.len() > u32::MAX as usize {
3443 return Err(GitError::InvalidFormat(
3444 "too many multi-pack-index objects".into(),
3445 ));
3446 }
3447 if let Some(bitmapped_packs) = bitmapped_packs {
3448 if bitmapped_packs.len() != pack_names.len() {
3449 return Err(GitError::InvalidFormat(
3450 "multi-pack-index BTMP pack count mismatch".into(),
3451 ));
3452 }
3453 for pack in bitmapped_packs {
3454 let bitmap_end = u64::from(pack.bitmap_pos)
3455 .checked_add(u64::from(pack.bitmap_nr))
3456 .ok_or_else(|| {
3457 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
3458 })?;
3459 if bitmap_end > objects.len() as u64 {
3460 return Err(GitError::InvalidFormat(
3461 "multi-pack-index BTMP range points past object table".into(),
3462 ));
3463 }
3464 }
3465 }
3466 validate_midx_pack_names(pack_names)?;
3467 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
3468 return Err(GitError::InvalidFormat(
3469 "multi-pack-index v1 pack names must be sorted".into(),
3470 ));
3471 }
3472
3473 let mut objects = objects.iter().collect::<Vec<_>>();
3474 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
3475 let mut previous_oid: Option<&ObjectId> = None;
3476 for object in &objects {
3477 if object.oid.format() != format {
3478 return Err(GitError::InvalidObjectId(
3479 "multi-pack-index object format does not match index format".into(),
3480 ));
3481 }
3482 if let Some(previous) = previous_oid
3483 && previous.as_bytes() == object.oid.as_bytes()
3484 {
3485 return Err(GitError::InvalidFormat(
3486 "multi-pack-index contains duplicate object ids".into(),
3487 ));
3488 }
3489 if object.pack_int_id as usize >= pack_names.len() {
3490 return Err(GitError::InvalidFormat(
3491 "multi-pack-index object points past pack table".into(),
3492 ));
3493 }
3494 previous_oid = Some(&object.oid);
3495 }
3496
3497 let mut large_offsets = Vec::new();
3498 let mut chunks = vec![
3499 (*b"PNAM", write_midx_pack_names(pack_names)),
3500 (*b"OIDF", write_midx_oid_fanout(&objects)?),
3501 (*b"OIDL", write_midx_oid_lookup(&objects)),
3502 (
3503 *b"OOFF",
3504 write_midx_object_offsets(&objects, &mut large_offsets)?,
3505 ),
3506 ];
3507 if !large_offsets.is_empty() {
3508 chunks.push((*b"LOFF", large_offsets));
3509 }
3510 if let Some(preferred) = preferred_pack {
3511 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
3514 pseudo.sort_by_key(|&midx_pos| {
3515 let object = objects[midx_pos as usize];
3516 (
3517 object.pack_int_id != preferred,
3518 object.pack_int_id,
3519 object.offset,
3520 )
3521 });
3522 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
3523 for midx_pos in pseudo {
3524 ridx.extend_from_slice(&midx_pos.to_be_bytes());
3525 }
3526 chunks.push((*b"RIDX", ridx));
3527 }
3528 if let Some(bitmapped_packs) = bitmapped_packs {
3529 let mut btmp = Vec::with_capacity(bitmapped_packs.len() * 8);
3530 for pack in bitmapped_packs {
3531 btmp.extend_from_slice(&pack.bitmap_pos.to_be_bytes());
3532 btmp.extend_from_slice(&pack.bitmap_nr.to_be_bytes());
3533 }
3534 chunks.push((*b"BTMP", btmp));
3535 }
3536 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
3537 }
3538
3539 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3540 Self::parse_impl(bytes, format, true)
3541 }
3542
3543 pub fn parse_without_checksum(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
3544 Self::parse_impl(bytes, format, false)
3545 }
3546
3547 fn parse_impl(bytes: &[u8], format: ObjectFormat, verify_checksum: bool) -> Result<Self> {
3548 let hash_len = format.raw_len();
3549 if bytes.len() < 12 + 12 + hash_len {
3550 return Err(GitError::InvalidFormat(
3551 "multi-pack-index file too short".into(),
3552 ));
3553 }
3554 if &bytes[..4] != b"MIDX" {
3555 return Err(GitError::InvalidFormat(
3556 "missing multi-pack-index signature".into(),
3557 ));
3558 }
3559 let version = bytes[4];
3560 if version != 1 && version != 2 {
3561 return Err(GitError::Unsupported(format!(
3562 "multi-pack-index version {version}"
3563 )));
3564 }
3565 let hash_id = bytes[5];
3566 if u32::from(hash_id) != hash_function_id(format) {
3567 return Err(GitError::InvalidFormat(format!(
3568 "multi-pack-index hash id {hash_id} does not match {}",
3569 format.name()
3570 )));
3571 }
3572 let chunk_count = bytes[6] as usize;
3573 let base_midx_count = bytes[7];
3574 if base_midx_count != 0 {
3575 return Err(GitError::Unsupported(format!(
3576 "multi-pack-index base count {base_midx_count}"
3577 )));
3578 }
3579 let pack_count = u32_be(&bytes[8..12]);
3580 let lookup_len = (chunk_count + 1)
3581 .checked_mul(12)
3582 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3583 let data_start = 12usize
3584 .checked_add(lookup_len)
3585 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3586 let checksum_offset = bytes.len() - hash_len;
3587 if data_start > checksum_offset {
3588 return Err(GitError::InvalidFormat(
3589 "truncated multi-pack-index chunk lookup".into(),
3590 ));
3591 }
3592
3593 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
3594 if verify_checksum {
3595 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
3596 if actual_checksum != checksum {
3597 return Err(GitError::InvalidFormat(format!(
3598 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
3599 )));
3600 }
3601 }
3602
3603 let mut entries = Vec::with_capacity(chunk_count + 1);
3604 let mut offset = 12usize;
3605 for _ in 0..=chunk_count {
3606 let id = [
3607 bytes[offset],
3608 bytes[offset + 1],
3609 bytes[offset + 2],
3610 bytes[offset + 3],
3611 ];
3612 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
3613 entries.push((id, chunk_offset));
3614 offset += 12;
3615 }
3616 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3617 return Err(GitError::InvalidFormat(
3618 "multi-pack-index chunk lookup is empty".into(),
3619 ));
3620 };
3621 if terminator_id != [0, 0, 0, 0] {
3622 return Err(GitError::InvalidFormat(
3623 "multi-pack-index chunk lookup missing terminator".into(),
3624 ));
3625 }
3626 if terminator_offset != checksum_offset as u64 {
3627 return Err(GitError::InvalidFormat(
3628 "multi-pack-index terminator does not point at checksum".into(),
3629 ));
3630 }
3631
3632 let mut chunks = Vec::with_capacity(chunk_count);
3633 let mut previous_offset = data_start as u64;
3634 let mut reported_unaligned = false;
3635 for pair in entries.windows(2) {
3636 let (id, chunk_offset) = pair[0];
3637 let (_next_id, next_offset) = pair[1];
3638 if id == [0, 0, 0, 0] {
3639 return Err(GitError::InvalidFormat(
3640 "multi-pack-index chunk id is zero before terminator".into(),
3641 ));
3642 }
3643 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3644 return Err(GitError::InvalidFormat(
3645 "multi-pack-index chunk offsets are not monotonic".into(),
3646 ));
3647 }
3648 if chunk_offset % 4 != 0 && !reported_unaligned {
3649 eprintln!(
3650 "error: chunk id {:08x} not 4-byte aligned",
3651 u32::from_be_bytes(id)
3652 );
3653 reported_unaligned = true;
3654 }
3655 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3656 return Err(GitError::InvalidFormat(
3657 "multi-pack-index chunk length is invalid".into(),
3658 ));
3659 }
3660 chunks.push(MultiPackIndexChunk {
3661 id,
3662 offset: chunk_offset,
3663 len: next_offset - chunk_offset,
3664 });
3665 previous_offset = chunk_offset;
3666 }
3667
3668 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
3669 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
3670 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
3671 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
3672 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
3673 let bitmapped_packs =
3674 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
3675
3676 Ok(Self {
3677 version,
3678 format,
3679 pack_count,
3680 pack_names,
3681 object_count: object_count as u32,
3682 fanout,
3683 objects,
3684 reverse_index,
3685 bitmapped_packs,
3686 chunks,
3687 checksum,
3688 })
3689 }
3690
3691 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
3692 self.objects
3693 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
3694 .ok()
3695 .map(|idx| &self.objects[idx])
3696 }
3697}
3698
3699impl MultiPackIndexOidLookup {
3700 pub fn parse(bytes: Arc<dyn PackIndexByteSource>, format: ObjectFormat) -> Result<Self> {
3701 let raw = bytes.as_bytes();
3702 let hash_len = format.raw_len();
3703 if raw.len() < 12 + 12 + hash_len {
3704 return Err(GitError::InvalidFormat(
3705 "multi-pack-index file too short".into(),
3706 ));
3707 }
3708 if &raw[..4] != b"MIDX" {
3709 return Err(GitError::InvalidFormat(
3710 "missing multi-pack-index signature".into(),
3711 ));
3712 }
3713 let version = raw[4];
3714 if version != 1 && version != 2 {
3715 return Err(GitError::Unsupported(format!(
3716 "multi-pack-index version {version}"
3717 )));
3718 }
3719 let hash_id = raw[5];
3720 if u32::from(hash_id) != hash_function_id(format) {
3721 return Err(GitError::InvalidFormat(format!(
3722 "multi-pack-index hash id {hash_id} does not match {}",
3723 format.name()
3724 )));
3725 }
3726 let chunk_count = raw[6] as usize;
3727 let base_midx_count = raw[7];
3728 if base_midx_count != 0 {
3729 return Err(GitError::Unsupported(format!(
3730 "multi-pack-index base count {base_midx_count}"
3731 )));
3732 }
3733 let pack_count = u32_be(&raw[8..12]);
3734 let lookup_len = (chunk_count + 1)
3735 .checked_mul(12)
3736 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3737 let data_start = 12usize
3738 .checked_add(lookup_len)
3739 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
3740 let checksum_offset = raw.len() - hash_len;
3741 if data_start > checksum_offset {
3742 return Err(GitError::InvalidFormat(
3743 "truncated multi-pack-index chunk lookup".into(),
3744 ));
3745 }
3746
3747 let mut entries = Vec::with_capacity(chunk_count + 1);
3748 let mut offset = 12usize;
3749 for _ in 0..=chunk_count {
3750 let id = [
3751 raw[offset],
3752 raw[offset + 1],
3753 raw[offset + 2],
3754 raw[offset + 3],
3755 ];
3756 let chunk_offset = u64_be(&raw[offset + 4..offset + 12]);
3757 entries.push((id, chunk_offset));
3758 offset += 12;
3759 }
3760 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
3761 return Err(GitError::InvalidFormat(
3762 "multi-pack-index chunk lookup is empty".into(),
3763 ));
3764 };
3765 if terminator_id != [0, 0, 0, 0] {
3766 return Err(GitError::InvalidFormat(
3767 "multi-pack-index chunk lookup missing terminator".into(),
3768 ));
3769 }
3770 if terminator_offset != checksum_offset as u64 {
3771 return Err(GitError::InvalidFormat(
3772 "multi-pack-index terminator does not point at checksum".into(),
3773 ));
3774 }
3775
3776 let mut chunks = Vec::with_capacity(chunk_count);
3777 let mut previous_offset = data_start as u64;
3778 let mut reported_unaligned = false;
3779 for pair in entries.windows(2) {
3780 let (id, chunk_offset) = pair[0];
3781 let (_next_id, next_offset) = pair[1];
3782 if id == [0, 0, 0, 0] {
3783 return Err(GitError::InvalidFormat(
3784 "multi-pack-index chunk id is zero before terminator".into(),
3785 ));
3786 }
3787 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
3788 return Err(GitError::InvalidFormat(
3789 "multi-pack-index chunk offsets are not monotonic".into(),
3790 ));
3791 }
3792 if chunk_offset % 4 != 0 && !reported_unaligned {
3793 eprintln!(
3794 "error: chunk id {:08x} not 4-byte aligned",
3795 u32::from_be_bytes(id)
3796 );
3797 reported_unaligned = true;
3798 }
3799 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
3800 return Err(GitError::InvalidFormat(
3801 "multi-pack-index chunk length is invalid".into(),
3802 ));
3803 }
3804 chunks.push(MultiPackIndexChunk {
3805 id,
3806 offset: chunk_offset,
3807 len: next_offset - chunk_offset,
3808 });
3809 previous_offset = chunk_offset;
3810 }
3811
3812 let pack_names = parse_midx_pack_names(raw, &chunks, pack_count as usize, version)?;
3813 let (fanout, object_count) = parse_midx_oid_fanout(raw, &chunks)?;
3814 let oid_lookup = midx_chunk_data(raw, &chunks, *b"OIDL", true)?
3815 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
3816 let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
3817 GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
3818 })?;
3819 if oid_lookup.len() != expected_len {
3820 return Err(GitError::InvalidFormat(
3821 "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
3822 ));
3823 }
3824 let object_offsets = midx_chunk_data(raw, &chunks, *b"OOFF", true)?
3825 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
3826 let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
3827 GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
3828 })?;
3829 if object_offsets.len() != expected_offsets_len {
3830 return Err(GitError::InvalidFormat(
3831 "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
3832 ));
3833 }
3834 let large_offsets = midx_chunk_data(raw, &chunks, *b"LOFF", false)?;
3835 if let Some(large_offsets) = large_offsets
3836 && large_offsets.len() % 8 != 0
3837 {
3838 return Err(GitError::InvalidFormat(
3839 "multi-pack-index LOFF chunk has invalid length".into(),
3840 ));
3841 }
3842 let oid_lookup_offset = oid_lookup.as_ptr() as usize - raw.as_ptr() as usize;
3843 let object_offsets_offset = object_offsets.as_ptr() as usize - raw.as_ptr() as usize;
3844 let (large_offsets_offset, large_offsets_len) = match large_offsets {
3845 Some(large_offsets) => (
3846 Some(large_offsets.as_ptr() as usize - raw.as_ptr() as usize),
3847 large_offsets.len(),
3848 ),
3849 None => (None, 0),
3850 };
3851 Ok(Self {
3852 format,
3853 pack_count,
3854 pack_names,
3855 fanout,
3856 object_count,
3857 oid_lookup_offset,
3858 object_offsets_offset,
3859 large_offsets_offset,
3860 large_offsets_len,
3861 bytes,
3862 })
3863 }
3864
3865 pub fn contains(&self, oid: &ObjectId) -> bool {
3866 self.find_position(oid).is_some()
3867 }
3868
3869 pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
3870 let Some(position) = self.find_position(oid) else {
3871 return Ok(None);
3872 };
3873 let bytes = self.bytes.as_bytes();
3874 let hash_len = self.format.raw_len();
3875 let oid_start = self
3876 .oid_lookup_offset
3877 .checked_add(position * hash_len)
3878 .ok_or_else(|| {
3879 GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
3880 })?;
3881 let oid = ObjectId::from_raw(self.format, &bytes[oid_start..oid_start + hash_len])?;
3882 let offset_start = self
3883 .object_offsets_offset
3884 .checked_add(position * 8)
3885 .ok_or_else(|| {
3886 GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
3887 })?;
3888 let data = &bytes[offset_start..offset_start + 8];
3889 let pack_int_id = u32_be(&data[..4]);
3890 if pack_int_id >= self.pack_count {
3891 return Err(GitError::InvalidFormat(
3892 "multi-pack-index object points past pack table".into(),
3893 ));
3894 }
3895 let raw_offset = u32_be(&data[4..8]);
3896 let offset = if raw_offset & 0x8000_0000 == 0 {
3897 u64::from(raw_offset)
3898 } else {
3899 let Some(large_offsets_offset) = self.large_offsets_offset else {
3900 return Err(GitError::InvalidFormat(
3901 "multi-pack-index large offset missing LOFF chunk".into(),
3902 ));
3903 };
3904 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
3905 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
3906 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3907 })?;
3908 let large_end = large_start.checked_add(8).ok_or_else(|| {
3909 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
3910 })?;
3911 if large_end > self.large_offsets_len {
3912 return Err(GitError::InvalidFormat(
3913 "fatal: multi-pack-index large offset out of bounds".into(),
3914 ));
3915 }
3916 let start = large_offsets_offset + large_start;
3917 u64_be(&bytes[start..start + 8])
3918 };
3919 Ok(Some(MultiPackIndexEntry {
3920 oid,
3921 pack_int_id,
3922 offset,
3923 force_large_offset: raw_offset & 0x8000_0000 != 0,
3924 }))
3925 }
3926
3927 pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
3928 self.pack_names
3929 .get(pack_int_id as usize)
3930 .map(String::as_str)
3931 }
3932
3933 fn find_position(&self, oid: &ObjectId) -> Option<usize> {
3934 if oid.format() != self.format || self.object_count == 0 {
3935 return None;
3936 }
3937 let first = oid.as_bytes()[0] as usize;
3938 let start = if first == 0 {
3939 0
3940 } else {
3941 self.fanout[first - 1] as usize
3942 };
3943 let end = self.fanout[first] as usize;
3944 if start >= end || end > self.object_count {
3945 return None;
3946 }
3947 let hash_len = self.format.raw_len();
3948 let table_start = self.oid_lookup_offset;
3949 let table_end = table_start + self.object_count * hash_len;
3950 let bytes = self.bytes.as_bytes();
3951 let table = &bytes[table_start..table_end];
3952 let needle = oid.as_bytes();
3953 let mut low = start;
3954 let mut high = end;
3955 while low < high {
3956 let mid = low + (high - low) / 2;
3957 let raw = &table[mid * hash_len..(mid + 1) * hash_len];
3958 match raw.cmp(needle) {
3959 std::cmp::Ordering::Less => low = mid + 1,
3960 std::cmp::Ordering::Equal => return Some(mid),
3961 std::cmp::Ordering::Greater => high = mid,
3962 }
3963 }
3964 None
3965 }
3966}
3967
3968fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
3969 for name in pack_names {
3970 if name.is_empty() {
3971 return Err(GitError::InvalidFormat(
3972 "multi-pack-index pack name is empty".into(),
3973 ));
3974 }
3975 if name
3976 .bytes()
3977 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
3978 {
3979 return Err(GitError::InvalidFormat(
3980 "multi-pack-index pack name contains an invalid byte".into(),
3981 ));
3982 }
3983 }
3984 Ok(())
3985}
3986
3987fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
3988 let mut out = Vec::new();
3989 for name in pack_names {
3990 out.extend_from_slice(name.as_bytes());
3991 out.push(0);
3992 }
3993 while out.len() % 4 != 0 {
3994 out.push(0);
3995 }
3996 out
3997}
3998
3999fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
4000 let mut counts = [0u32; 256];
4001 for object in objects {
4002 let first = object.oid.as_bytes()[0] as usize;
4003 counts[first] = counts[first]
4004 .checked_add(1)
4005 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4006 }
4007 let mut running = 0u32;
4008 let mut out = Vec::with_capacity(256 * 4);
4009 for count in counts {
4010 running = running
4011 .checked_add(count)
4012 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4013 out.extend_from_slice(&running.to_be_bytes());
4014 }
4015 Ok(out)
4016}
4017
4018fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
4019 let mut out = Vec::new();
4020 for object in objects {
4021 out.extend_from_slice(object.oid.as_bytes());
4022 }
4023 out
4024}
4025
4026fn write_midx_object_offsets(
4027 objects: &[&MultiPackIndexEntry],
4028 large_offsets: &mut Vec<u8>,
4029) -> Result<Vec<u8>> {
4030 let mut out = Vec::new();
4031 for object in objects {
4032 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
4033 if object.offset < 0x8000_0000 && !object.force_large_offset {
4034 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
4035 } else {
4036 let large_idx = large_offsets.len() / 8;
4037 if large_idx > 0x7fff_ffff {
4038 return Err(GitError::InvalidFormat(
4039 "too many multi-pack-index large offsets".into(),
4040 ));
4041 }
4042 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
4043 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
4044 }
4045 }
4046 Ok(out)
4047}
4048
4049fn write_multi_pack_index_chunks(
4050 format: ObjectFormat,
4051 version: u8,
4052 pack_count: u32,
4053 chunks: &[([u8; 4], Vec<u8>)],
4054) -> Result<Vec<u8>> {
4055 if chunks.len() > u8::MAX as usize {
4056 return Err(GitError::InvalidFormat(
4057 "too many multi-pack-index chunks".into(),
4058 ));
4059 }
4060 let lookup_len = (chunks.len() + 1)
4061 .checked_mul(12)
4062 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
4063 let mut out = Vec::new();
4064 out.extend_from_slice(b"MIDX");
4065 out.push(version);
4066 out.push(hash_function_id(format) as u8);
4067 out.push(chunks.len() as u8);
4068 out.push(0);
4069 out.extend_from_slice(&pack_count.to_be_bytes());
4070 let mut chunk_offset = (12usize)
4071 .checked_add(lookup_len)
4072 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
4073 as u64;
4074 for (id, data) in chunks {
4075 out.extend_from_slice(id);
4076 out.extend_from_slice(&chunk_offset.to_be_bytes());
4077 chunk_offset = chunk_offset
4078 .checked_add(data.len() as u64)
4079 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
4080 }
4081 out.extend_from_slice(&[0, 0, 0, 0]);
4082 out.extend_from_slice(&chunk_offset.to_be_bytes());
4083 for (_id, data) in chunks {
4084 out.extend_from_slice(data);
4085 }
4086 let checksum = sley_core::digest_bytes(format, &out)?;
4087 out.extend_from_slice(checksum.as_bytes());
4088 Ok(out)
4089}
4090
4091#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4092struct EntryHeader {
4093 kind: PackObjectKind,
4094 size: u64,
4095}
4096
4097pub trait PackDeltaCache {
4111 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
4113 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
4115}
4116
4117struct NoopDeltaCache;
4120
4121impl PackDeltaCache for NoopDeltaCache {
4122 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
4123 None
4124 }
4125 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
4126}
4127
4128thread_local! {
4134 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
4135}
4136
4137const MAX_INFLATE_EXPANSION: usize = 1032;
4148
4149const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
4155
4156fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
4164 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
4165 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
4167}
4168
4169fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
4178 INFLATE.with(|cell| {
4179 let mut decompress = cell.borrow_mut();
4180 decompress.reset(true);
4181 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
4182 let mut input = compressed;
4183 let mut consumed_total = 0usize;
4184 loop {
4185 if out.len() == out.capacity() {
4188 out.reserve(out.len().max(64));
4189 }
4190 let before_in = decompress.total_in();
4191 let before_out = decompress.total_out();
4192 let status = decompress
4193 .decompress_vec(input, out, flate2::FlushDecompress::None)
4194 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
4195 let consumed = (decompress.total_in() - before_in) as usize;
4196 let produced = decompress.total_out() - before_out;
4197 input = &input[consumed..];
4198 consumed_total += consumed;
4199 match status {
4200 flate2::Status::StreamEnd => return Ok(consumed_total),
4201 _ if consumed == 0 && produced == 0 => {
4202 return Err(GitError::InvalidObject("truncated zlib stream".into()));
4203 }
4204 _ => {}
4205 }
4206 }
4207 })
4208}
4209
4210fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
4214 INFLATE.with(|cell| {
4215 let mut decompress = cell.borrow_mut();
4216 decompress.reset(true);
4217 out.reserve(max_out.max(16));
4218 let mut input = compressed;
4219 while out.len() < max_out {
4220 if out.len() == out.capacity() {
4221 out.reserve(out.len().max(16));
4222 }
4223 let before_in = decompress.total_in();
4224 let before_out = decompress.total_out();
4225 let status = decompress
4226 .decompress_vec(input, out, flate2::FlushDecompress::None)
4227 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
4228 let consumed = (decompress.total_in() - before_in) as usize;
4229 let produced = decompress.total_out() - before_out;
4230 input = &input[consumed..];
4231 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
4232 break;
4233 }
4234 }
4235 Ok(())
4236 })
4237}
4238
4239pub fn read_object_at_arc<F>(
4247 pack_bytes: &[u8],
4248 offset: u64,
4249 format: ObjectFormat,
4250 resolve_ref_base: F,
4251) -> Result<Arc<EncodedObject>>
4252where
4253 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4254{
4255 read_object_at_with_cache_arc(
4256 pack_bytes,
4257 offset,
4258 format,
4259 resolve_ref_base,
4260 &NoopDeltaCache,
4261 )
4262}
4263
4264pub fn read_object_at_with_cache_arc<F, C>(
4273 pack_bytes: &[u8],
4274 offset: u64,
4275 format: ObjectFormat,
4276 mut resolve_ref_base: F,
4277 cache: &C,
4278) -> Result<Arc<EncodedObject>>
4279where
4280 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4281 C: PackDeltaCache + ?Sized,
4282{
4283 read_object_at_with_cache_and_ofs_base_arc(
4284 pack_bytes,
4285 offset,
4286 format,
4287 &mut resolve_ref_base,
4288 |_offset| Ok(None),
4289 cache,
4290 )
4291}
4292
4293pub fn read_object_at_with_cache_and_ofs_base_arc<F, G, C>(
4299 pack_bytes: &[u8],
4300 offset: u64,
4301 format: ObjectFormat,
4302 mut resolve_ref_base: F,
4303 mut resolve_ofs_base: G,
4304 cache: &C,
4305) -> Result<Arc<EncodedObject>>
4306where
4307 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4308 G: FnMut(u64) -> Result<Option<Arc<EncodedObject>>>,
4309 C: PackDeltaCache + ?Sized,
4310{
4311 read_object_at_inner(
4312 pack_bytes,
4313 offset,
4314 format,
4315 &mut resolve_ref_base,
4316 &mut resolve_ofs_base,
4317 cache,
4318 )
4319}
4320
4321pub fn read_object_at_with_ofs_base_arc<F, G>(
4323 pack_bytes: &[u8],
4324 offset: u64,
4325 format: ObjectFormat,
4326 resolve_ref_base: F,
4327 resolve_ofs_base: G,
4328) -> Result<Arc<EncodedObject>>
4329where
4330 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4331 G: FnMut(u64) -> Result<Option<Arc<EncodedObject>>>,
4332{
4333 read_object_at_with_cache_and_ofs_base_arc(
4334 pack_bytes,
4335 offset,
4336 format,
4337 resolve_ref_base,
4338 resolve_ofs_base,
4339 &NoopDeltaCache,
4340 )
4341}
4342
4343fn read_object_at_inner<F, G, C>(
4344 pack_bytes: &[u8],
4345 offset: u64,
4346 format: ObjectFormat,
4347 resolve_ref_base: &mut F,
4348 resolve_ofs_base: &mut G,
4349 cache: &C,
4350) -> Result<Arc<EncodedObject>>
4351where
4352 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
4353 G: FnMut(u64) -> Result<Option<Arc<EncodedObject>>>,
4354 C: PackDeltaCache + ?Sized,
4355{
4356 if let Some(object) = cache.get(offset) {
4359 return Ok(object);
4360 }
4361 let trailer_offset = pack_bytes
4362 .len()
4363 .checked_sub(format.raw_len())
4364 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
4365 let mut cursor = usize::try_from(offset)
4366 .ok()
4367 .filter(|&value| value < trailer_offset)
4368 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
4369 let header = parse_entry_header(pack_bytes, &mut cursor)?;
4370 let base = match header.kind {
4371 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
4372 pack_bytes,
4373 &mut cursor,
4374 offset,
4375 )?)),
4376 PackObjectKind::RefDelta => {
4377 let hash_len = format.raw_len();
4378 if cursor + hash_len > trailer_offset {
4379 return Err(GitError::InvalidFormat(
4380 "truncated ref-delta base object id".into(),
4381 ));
4382 }
4383 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
4384 cursor += hash_len;
4385 Some(DeltaBase::Ref(oid))
4386 }
4387 _ => None,
4388 };
4389 let mut body = Vec::new();
4390 inflate_into(
4391 &pack_bytes[cursor..trailer_offset],
4392 &mut body,
4393 header.size.min(usize::MAX as u64) as usize,
4394 )?;
4395 if body.len() as u64 != header.size {
4396 return Err(GitError::InvalidObject(format!(
4397 "pack object declared {} bytes, decoded {}",
4398 header.size,
4399 body.len()
4400 )));
4401 }
4402 let object = match base {
4403 None => {
4404 let object_type = match header.kind {
4405 PackObjectKind::Commit => ObjectType::Commit,
4406 PackObjectKind::Tree => ObjectType::Tree,
4407 PackObjectKind::Blob => ObjectType::Blob,
4408 PackObjectKind::Tag => ObjectType::Tag,
4409 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
4410 return Err(GitError::InvalidFormat(
4411 "delta pack entry decoded without a base".into(),
4412 ));
4413 }
4414 };
4415 Arc::new(EncodedObject::new(object_type, body))
4416 }
4417 Some(DeltaBase::Offset(base_offset)) => {
4418 let base = match read_object_at_inner(
4419 pack_bytes,
4420 base_offset,
4421 format,
4422 resolve_ref_base,
4423 resolve_ofs_base,
4424 cache,
4425 ) {
4426 Ok(base) => base,
4427 Err(pack_err) => match resolve_ofs_base(base_offset)? {
4428 Some(base) => base,
4429 None => return Err(pack_err),
4430 },
4431 };
4432 let resolved = apply_pack_delta(&base.body, &body)?;
4433 Arc::new(EncodedObject::new(base.object_type, resolved))
4434 }
4435 Some(DeltaBase::Ref(base_oid)) => {
4436 let base = resolve_ref_base(&base_oid)?
4437 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
4438 let resolved = apply_pack_delta(&base.body, &body)?;
4439 Arc::new(EncodedObject::new(base.object_type, resolved))
4440 }
4441 };
4442 cache.insert(offset, Arc::clone(&object));
4446 Ok(object)
4447}
4448
4449pub fn read_object_header_at<F>(
4459 pack_bytes: &[u8],
4460 offset: u64,
4461 format: ObjectFormat,
4462 mut resolve_ref_base_type: F,
4463) -> Result<(ObjectType, u64)>
4464where
4465 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4466{
4467 read_object_header_at_inner(
4468 pack_bytes,
4469 offset,
4470 format,
4471 &mut resolve_ref_base_type,
4472 &mut NoopHeaderTypeCache,
4473 )
4474}
4475
4476pub trait HeaderTypeCache {
4493 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
4495 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
4497}
4498
4499struct NoopHeaderTypeCache;
4500
4501impl HeaderTypeCache for NoopHeaderTypeCache {
4502 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
4503 None
4504 }
4505 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
4506}
4507
4508pub fn read_object_header_at_with_cache<F, C>(
4514 pack_bytes: &[u8],
4515 offset: u64,
4516 format: ObjectFormat,
4517 mut resolve_ref_base_type: F,
4518 type_cache: &mut C,
4519) -> Result<(ObjectType, u64)>
4520where
4521 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4522 C: HeaderTypeCache + ?Sized,
4523{
4524 if let Some(header) = type_cache.get(offset) {
4525 return Ok(header);
4526 }
4527 read_object_header_at_inner(
4528 pack_bytes,
4529 offset,
4530 format,
4531 &mut resolve_ref_base_type,
4532 type_cache,
4533 )
4534}
4535
4536fn read_object_header_at_inner<F, C>(
4537 pack_bytes: &[u8],
4538 offset: u64,
4539 format: ObjectFormat,
4540 resolve_ref_base_type: &mut F,
4541 type_cache: &mut C,
4542) -> Result<(ObjectType, u64)>
4543where
4544 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
4545 C: HeaderTypeCache + ?Sized,
4546{
4547 let trailer_offset = pack_bytes
4548 .len()
4549 .checked_sub(format.raw_len())
4550 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
4551 let mut cursor = usize::try_from(offset)
4552 .ok()
4553 .filter(|&value| value < trailer_offset)
4554 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
4555 let header = parse_entry_header(pack_bytes, &mut cursor)?;
4556 let resolved = match header.kind {
4557 PackObjectKind::Commit => (ObjectType::Commit, header.size),
4558 PackObjectKind::Tree => (ObjectType::Tree, header.size),
4559 PackObjectKind::Blob => (ObjectType::Blob, header.size),
4560 PackObjectKind::Tag => (ObjectType::Tag, header.size),
4561 PackObjectKind::OfsDelta => {
4562 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
4563 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4564 let base_type = match type_cache.get(base_offset) {
4567 Some((base_type, _)) => base_type,
4568 None => {
4569 let (base_type, _) = read_object_header_at_inner(
4570 pack_bytes,
4571 base_offset,
4572 format,
4573 resolve_ref_base_type,
4574 type_cache,
4575 )?;
4576 base_type
4577 }
4578 };
4579 (base_type, size)
4580 }
4581 PackObjectKind::RefDelta => {
4582 let hash_len = format.raw_len();
4583 if cursor + hash_len > trailer_offset {
4584 return Err(GitError::InvalidFormat(
4585 "truncated ref-delta base object id".into(),
4586 ));
4587 }
4588 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
4589 cursor += hash_len;
4590 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
4591 let base_type = resolve_ref_base_type(&oid)?
4592 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
4593 (base_type, size)
4594 }
4595 };
4596 type_cache.put(offset, resolved);
4599 Ok(resolved)
4600}
4601
4602const DELTA_HEADER_PREFIX_LEN: usize = 32;
4606
4607fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
4610 let mut prefix = Vec::new();
4611 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
4612 decoded_delta_result_size(&prefix)
4613}
4614
4615fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
4616 let first = next_byte(bytes, offset)?;
4617 let mut size = u64::from(first & 0x0f);
4618 let kind = match (first >> 4) & 0x07 {
4619 1 => PackObjectKind::Commit,
4620 2 => PackObjectKind::Tree,
4621 3 => PackObjectKind::Blob,
4622 4 => PackObjectKind::Tag,
4623 6 => PackObjectKind::OfsDelta,
4624 7 => PackObjectKind::RefDelta,
4625 other => {
4626 return Err(GitError::InvalidFormat(format!(
4627 "invalid pack object type {other}"
4628 )));
4629 }
4630 };
4631 let mut shift = 4;
4632 let mut byte = first;
4633 while byte & 0x80 != 0 {
4634 byte = next_byte(bytes, offset)?;
4635 let part = u64::from(byte & 0x7f);
4636 size = size
4637 .checked_add(
4638 part.checked_shl(shift)
4639 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
4640 )
4641 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
4642 shift += 7;
4643 }
4644 Ok(EntryHeader { kind, size })
4645}
4646
4647fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
4648 let mut byte = next_byte(bytes, offset)?;
4649 let mut relative = u64::from(byte & 0x7f);
4650 while byte & 0x80 != 0 {
4651 byte = next_byte(bytes, offset)?;
4652 relative = relative
4653 .checked_add(1)
4654 .and_then(|value| value.checked_shl(7))
4655 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
4656 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
4657 }
4658 entry_offset
4659 .checked_sub(relative)
4660 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
4661}
4662
4663fn resolve_pack_entries<F>(
4664 parsed: Vec<ParsedPackEntry>,
4665 format: ObjectFormat,
4666 external_base: &mut F,
4667) -> Result<Vec<PackObject>>
4668where
4669 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4670{
4671 let mut offset_to_index = HashMap::with_capacity(parsed.len());
4672 for (idx, entry) in parsed.iter().enumerate() {
4673 offset_to_index.insert(parsed_entry_offset(entry), idx);
4674 }
4675
4676 let mut resolved = vec![None; parsed.len()];
4677 let mut oid_to_index = HashMap::new();
4678 let mut unresolved = 0usize;
4679 for (idx, entry) in parsed.iter().enumerate() {
4680 match entry {
4681 ParsedPackEntry::Resolved(object) => {
4682 oid_to_index.insert(object.entry.oid, idx);
4683 resolved[idx] = Some(object.clone());
4684 }
4685 ParsedPackEntry::Delta { .. } => unresolved += 1,
4686 }
4687 }
4688
4689 while unresolved != 0 {
4690 let mut progress = false;
4691 for idx in 0..parsed.len() {
4692 if resolved[idx].is_some() {
4693 continue;
4694 }
4695 let ParsedPackEntry::Delta {
4696 base,
4697 compressed_size,
4698 delta_size,
4699 offset,
4700 delta,
4701 } = &parsed[idx]
4702 else {
4703 continue;
4704 };
4705 let Some(base_object) = delta_base_object(
4706 base,
4707 &offset_to_index,
4708 &oid_to_index,
4709 &resolved,
4710 external_base,
4711 )?
4712 else {
4713 continue;
4714 };
4715 let body = apply_pack_delta(base_object.body(), delta)?;
4716 let object = EncodedObject::new(base_object.object_type(), body);
4717 let oid = object.object_id(format)?;
4718 let pack_object = PackObject {
4719 entry: PackEntry {
4720 oid,
4721 compressed_size: *compressed_size,
4722 uncompressed_size: object.body.len() as u64,
4723 offset: *offset,
4724 },
4725 object,
4726 };
4727 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
4728 return Err(GitError::InvalidObject(
4729 "resolved delta size does not match delta header".into(),
4730 ));
4731 }
4732 if *delta_size != delta.len() as u64 {
4733 return Err(GitError::InvalidObject(format!(
4734 "pack delta declared {delta_size} bytes, decoded {}",
4735 delta.len()
4736 )));
4737 }
4738 oid_to_index.insert(oid, idx);
4739 resolved[idx] = Some(pack_object);
4740 unresolved -= 1;
4741 progress = true;
4742 }
4743 if !progress {
4744 return Err(GitError::Unsupported("unresolved delta base".into()));
4745 }
4746 }
4747
4748 resolved
4749 .into_iter()
4750 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
4751 .collect()
4752}
4753
4754fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
4755 match entry {
4756 ParsedPackEntry::Resolved(object) => object.entry.offset,
4757 ParsedPackEntry::Delta { offset, .. } => *offset,
4758 }
4759}
4760
4761enum DeltaBaseObject<'a> {
4762 Borrowed(&'a EncodedObject),
4763 Owned(EncodedObject),
4764}
4765
4766impl DeltaBaseObject<'_> {
4767 fn object_type(&self) -> ObjectType {
4768 match self {
4769 Self::Borrowed(object) => object.object_type,
4770 Self::Owned(object) => object.object_type,
4771 }
4772 }
4773
4774 fn body(&self) -> &[u8] {
4775 match self {
4776 Self::Borrowed(object) => &object.body,
4777 Self::Owned(object) => &object.body,
4778 }
4779 }
4780}
4781
4782fn delta_base_object<'a, F>(
4783 base: &DeltaBase,
4784 offset_to_index: &HashMap<u64, usize>,
4785 oid_to_index: &HashMap<ObjectId, usize>,
4786 resolved: &'a [Option<PackObject>],
4787 external_base: &mut F,
4788) -> Result<Option<DeltaBaseObject<'a>>>
4789where
4790 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
4791{
4792 match base {
4793 DeltaBase::Offset(offset) => {
4794 let Some(index) = offset_to_index.get(offset).copied() else {
4795 return Err(GitError::InvalidFormat(format!(
4796 "ofs-delta base offset {offset} not found"
4797 )));
4798 };
4799 Ok(resolved[index]
4800 .as_ref()
4801 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
4802 }
4803 DeltaBase::Ref(oid) => {
4804 if let Some(index) = oid_to_index.get(oid).copied() {
4805 return Ok(resolved[index]
4806 .as_ref()
4807 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
4808 }
4809 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
4810 }
4811 }
4812}
4813
4814fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
4815 let mut cursor = 0usize;
4816 let base_size = read_delta_varint(delta, &mut cursor)?;
4817 if base_size != base.len() as u64 {
4818 return Err(GitError::InvalidObject(format!(
4819 "delta base size mismatch: expected {base_size}, got {}",
4820 base.len()
4821 )));
4822 }
4823 let result_size = read_delta_varint(delta, &mut cursor)?;
4824 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
4833 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
4834 while cursor < delta.len() {
4835 let command = delta[cursor];
4836 cursor += 1;
4837 if command & 0x80 != 0 {
4838 let copy_offset =
4839 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
4840 let mut copy_size =
4841 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
4842 if copy_size == 0 {
4843 copy_size = 0x10000;
4844 }
4845 let start = usize::try_from(copy_offset)
4846 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
4847 let len = usize::try_from(copy_size)
4848 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
4849 let end = start
4850 .checked_add(len)
4851 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
4852 let Some(slice) = base.get(start..end) else {
4853 return Err(GitError::InvalidObject(
4854 "delta copy range exceeds base object".into(),
4855 ));
4856 };
4857 result.extend_from_slice(slice);
4858 } else if command != 0 {
4859 let len = usize::from(command);
4860 let end = cursor
4861 .checked_add(len)
4862 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
4863 let Some(slice) = delta.get(cursor..end) else {
4864 return Err(GitError::InvalidObject(
4865 "delta insert range exceeds delta data".into(),
4866 ));
4867 };
4868 result.extend_from_slice(slice);
4869 cursor = end;
4870 } else {
4871 return Err(GitError::InvalidObject(
4872 "delta contains reserved zero command".into(),
4873 ));
4874 }
4875 }
4876 if result.len() as u64 != result_size {
4877 return Err(GitError::InvalidObject(format!(
4878 "delta result size mismatch: expected {result_size}, got {}",
4879 result.len()
4880 )));
4881 }
4882 Ok(result)
4883}
4884
4885fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
4886 let mut cursor = 0usize;
4887 let _ = read_delta_varint(delta, &mut cursor)?;
4888 read_delta_varint(delta, &mut cursor)
4889}
4890
4891const DELTA_BLOCK_SIZE: usize = 16;
4894
4895const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
4900
4901const DELTA_BUCKET_BITS: usize = 12;
4904const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
4905const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
4906
4907struct DeltaIndex<'a> {
4914 base: &'a [u8],
4915 blocks: Vec<DeltaBlock>,
4916 buckets: Vec<usize>,
4917}
4918
4919#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4920struct DeltaBlock {
4921 hash: u32,
4922 offset: usize,
4923}
4924
4925impl<'a> DeltaIndex<'a> {
4926 fn new(base: &'a [u8]) -> Self {
4927 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
4928 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
4929 for_each_delta_anchor(base.len(), |offset| {
4930 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
4931 buckets[delta_bucket(hash) + 1] += 1;
4932 anchors.push(DeltaBlock { hash, offset });
4933 });
4934 for idx in 1..buckets.len() {
4935 buckets[idx] += buckets[idx - 1];
4936 }
4937
4938 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
4939 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
4940 for anchor in anchors {
4941 let bucket = delta_bucket(anchor.hash);
4942 let next = &mut next_offsets[bucket];
4943 blocks[*next] = anchor;
4944 *next += 1;
4945 }
4946
4947 Self {
4948 base,
4949 blocks,
4950 buckets,
4951 }
4952 }
4953
4954 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
4955 let bucket = delta_bucket(hash);
4956 let start = self.buckets[bucket];
4957 let end = self.buckets[bucket + 1];
4958 self.blocks[start..end]
4959 .iter()
4960 .filter(move |block| block.hash == hash)
4961 }
4962
4963 fn has_hash(&self, hash: u32) -> bool {
4964 self.candidate_blocks(hash).next().is_some()
4965 }
4966
4967 fn has_shared_anchor(&self, target: &[u8]) -> bool {
4968 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
4969 return false;
4970 }
4971 let last = target.len() - DELTA_BLOCK_SIZE;
4972 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
4973 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
4974 if self.has_hash(hash) {
4975 return true;
4976 }
4977 }
4978 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
4979 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
4980 if self.has_hash(hash) {
4981 return true;
4982 }
4983 }
4984 false
4985 }
4986
4987 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
4989 if !self.has_shared_anchor(target) {
4990 return None;
4991 }
4992 let base = self.base;
4993 let mut delta = Vec::new();
4994 write_delta_varint(&mut delta, base.len() as u64);
4995 write_delta_varint(&mut delta, target.len() as u64);
4996
4997 let mut pending_insert_start = 0usize;
4998 let mut pos = 0usize;
4999 while pos < target.len() {
5000 let mut best_len = 0usize;
5001 let mut best_offset = 0usize;
5002 if pos + DELTA_BLOCK_SIZE <= target.len() {
5003 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
5004 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
5005 let candidate = candidate.offset;
5008 let max_len = (base.len() - candidate).min(target.len() - pos);
5009 let mut len = 0usize;
5010 while len < max_len && base[candidate + len] == target[pos + len] {
5011 len += 1;
5012 }
5013 if len > best_len {
5014 best_len = len;
5015 best_offset = candidate;
5016 }
5017 }
5018 }
5019
5020 if best_len >= DELTA_BLOCK_SIZE {
5021 if pending_insert_start < pos {
5022 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
5023 }
5024 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
5025 pos += best_len;
5026 pending_insert_start = pos;
5027 } else {
5028 pos += 1;
5029 }
5030 }
5031 if pending_insert_start < target.len() {
5032 write_delta_insert(&mut delta, &target[pending_insert_start..]);
5033 }
5034 Some(delta)
5035 }
5036}
5037
5038fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
5039 if len < DELTA_BLOCK_SIZE {
5040 return;
5041 }
5042 len -= DELTA_BLOCK_SIZE;
5043 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
5044 visit(offset);
5045 }
5046 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
5047 visit(len);
5048 }
5049}
5050
5051fn delta_anchor_count(len: usize) -> usize {
5052 if len < DELTA_BLOCK_SIZE {
5053 return 0;
5054 }
5055 let last = len - DELTA_BLOCK_SIZE;
5056 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
5057}
5058
5059fn delta_bucket(hash: u32) -> usize {
5060 (hash as usize) & DELTA_BUCKET_MASK
5061}
5062
5063const DELTA_MAX_CHAIN: usize = 64;
5066
5067fn block_hash(block: &[u8]) -> u32 {
5073 let mut hash = 0u32;
5074 for &byte in block {
5075 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
5076 }
5077 hash
5078}
5079
5080#[derive(Debug, Clone, PartialEq, Eq)]
5082enum PlannedBase {
5083 None,
5085 InPack { base_idx: usize, delta: Vec<u8> },
5089 External { base_oid: ObjectId, delta: Vec<u8> },
5091}
5092
5093#[derive(Debug, Clone, PartialEq, Eq)]
5094struct PlannedEntry {
5095 base: PlannedBase,
5096}
5097
5098#[derive(Debug, Clone)]
5099struct StreamingDeltaBase {
5100 oid: ObjectId,
5101 object: Arc<EncodedObject>,
5102 offset: u64,
5103 depth: usize,
5104}
5105
5106#[derive(Debug, Clone, PartialEq, Eq)]
5107enum StreamingPlannedBase {
5108 None,
5109 Current {
5110 base_idx: usize,
5111 delta: Vec<u8>,
5112 },
5113 Previous {
5114 base_oid: ObjectId,
5115 base_offset: u64,
5116 delta: Vec<u8>,
5117 },
5118 External {
5119 base_oid: ObjectId,
5120 delta: Vec<u8>,
5121 },
5122}
5123
5124#[derive(Debug, Clone, PartialEq, Eq)]
5125struct StreamingPlannedEntry {
5126 base: StreamingPlannedBase,
5127 depth: usize,
5128}
5129
5130#[derive(Debug, Clone, PartialEq, Eq)]
5131enum StreamingCandidateBase {
5132 Previous {
5133 oid: ObjectId,
5134 offset: u64,
5135 depth: usize,
5136 },
5137 Current {
5138 idx: usize,
5139 depth: usize,
5140 },
5141}
5142
5143struct StreamingDeltaWindowEntry<'a> {
5144 base: StreamingCandidateBase,
5145 object_type: ObjectType,
5146 index: DeltaIndex<'a>,
5147}
5148
5149fn compress_planned_payloads(
5150 objects: &[&EncodedObject],
5151 plan: &[PlannedEntry],
5152 order: &[usize],
5153 compression_level: u32,
5154) -> Result<Vec<Vec<u8>>> {
5155 if order.is_empty() {
5156 return Ok(Vec::new());
5157 }
5158
5159 let worker_count = std::thread::available_parallelism()
5160 .map(|threads| threads.get())
5161 .unwrap_or(1)
5162 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
5163 .min(order.len());
5164 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
5165 let mut payloads = Vec::with_capacity(order.len());
5166 for &idx in order {
5167 payloads.push(compressed_payload(
5168 planned_payload(objects, plan, idx),
5169 compression_level,
5170 )?);
5171 }
5172 return Ok(payloads);
5173 }
5174
5175 let chunk_len = order.len().div_ceil(worker_count);
5176 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
5177 std::thread::scope(|scope| {
5178 let mut handles = Vec::new();
5179 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
5180 let chunk_start = chunk_idx * chunk_len;
5181 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
5182 let mut chunk_payloads = Vec::with_capacity(chunk.len());
5183 for (offset, &idx) in chunk.iter().enumerate() {
5184 chunk_payloads.push((
5185 chunk_start + offset,
5186 compressed_payload(planned_payload(objects, plan, idx), compression_level)?,
5187 ));
5188 }
5189 Ok(chunk_payloads)
5190 }));
5191 }
5192
5193 let mut first_error = None;
5194 for handle in handles {
5195 match handle.join() {
5196 Ok(Ok(chunk_payloads)) => {
5197 if first_error.is_none() {
5198 for (pos, payload) in chunk_payloads {
5199 payloads[pos] = payload;
5200 }
5201 }
5202 }
5203 Ok(Err(err)) => {
5204 first_error.get_or_insert(err);
5205 }
5206 Err(_) => {
5207 first_error.get_or_insert_with(|| {
5208 GitError::InvalidObject("pack compression worker panicked".into())
5209 });
5210 }
5211 }
5212 }
5213
5214 match first_error {
5215 Some(err) => Err(err),
5216 None => Ok(()),
5217 }
5218 })?;
5219 Ok(payloads)
5220}
5221
5222fn compress_streaming_planned_payloads(
5223 objects: &[Arc<EncodedObject>],
5224 plan: &[StreamingPlannedEntry],
5225 order: &[usize],
5226 compression_level: u32,
5227) -> Result<Vec<Vec<u8>>> {
5228 if order.is_empty() {
5229 return Ok(Vec::new());
5230 }
5231
5232 let worker_count = std::thread::available_parallelism()
5233 .map(|threads| threads.get())
5234 .unwrap_or(1)
5235 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
5236 .min(order.len());
5237 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
5238 let mut payloads = Vec::with_capacity(order.len());
5239 for &idx in order {
5240 payloads.push(compressed_payload(
5241 streaming_planned_payload(objects, plan, idx),
5242 compression_level,
5243 )?);
5244 }
5245 return Ok(payloads);
5246 }
5247
5248 let chunk_len = order.len().div_ceil(worker_count);
5249 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
5250 std::thread::scope(|scope| {
5251 let mut handles = Vec::new();
5252 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
5253 let chunk_start = chunk_idx * chunk_len;
5254 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
5255 let mut chunk_payloads = Vec::with_capacity(chunk.len());
5256 for (offset, &idx) in chunk.iter().enumerate() {
5257 chunk_payloads.push((
5258 chunk_start + offset,
5259 compressed_payload(
5260 streaming_planned_payload(objects, plan, idx),
5261 compression_level,
5262 )?,
5263 ));
5264 }
5265 Ok(chunk_payloads)
5266 }));
5267 }
5268
5269 let mut first_error = None;
5270 for handle in handles {
5271 match handle.join() {
5272 Ok(Ok(chunk_payloads)) => {
5273 if first_error.is_none() {
5274 for (pos, payload) in chunk_payloads {
5275 payloads[pos] = payload;
5276 }
5277 }
5278 }
5279 Ok(Err(err)) => {
5280 first_error.get_or_insert(err);
5281 }
5282 Err(_) => {
5283 first_error.get_or_insert_with(|| {
5284 GitError::InvalidObject("pack compression worker panicked".into())
5285 });
5286 }
5287 }
5288 }
5289
5290 match first_error {
5291 Some(err) => Err(err),
5292 None => Ok(()),
5293 }
5294 })?;
5295 Ok(payloads)
5296}
5297
5298fn compress_undeltified_payloads(
5299 objects: &[Arc<EncodedObject>],
5300 compression_level: u32,
5301) -> Result<Vec<Vec<u8>>> {
5302 if objects.is_empty() {
5303 return Ok(Vec::new());
5304 }
5305
5306 let worker_count = std::thread::available_parallelism()
5307 .map(|threads| threads.get())
5308 .unwrap_or(1)
5309 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
5310 .min(objects.len());
5311 if worker_count <= 1 || objects.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
5312 let mut payloads = Vec::with_capacity(objects.len());
5313 for object in objects {
5314 payloads.push(compressed_payload(&object.body, compression_level)?);
5315 }
5316 return Ok(payloads);
5317 }
5318
5319 let chunk_len = objects.len().div_ceil(worker_count);
5320 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new)
5321 .take(objects.len())
5322 .collect();
5323 std::thread::scope(|scope| {
5324 let mut handles = Vec::new();
5325 for (chunk_idx, chunk) in objects.chunks(chunk_len).enumerate() {
5326 let chunk_start = chunk_idx * chunk_len;
5327 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
5328 let mut chunk_payloads = Vec::with_capacity(chunk.len());
5329 for (offset, object) in chunk.iter().enumerate() {
5330 chunk_payloads.push((
5331 chunk_start + offset,
5332 compressed_payload(&object.body, compression_level)?,
5333 ));
5334 }
5335 Ok(chunk_payloads)
5336 }));
5337 }
5338
5339 let mut first_error = None;
5340 for handle in handles {
5341 match handle.join() {
5342 Ok(Ok(chunk_payloads)) => {
5343 if first_error.is_none() {
5344 for (pos, payload) in chunk_payloads {
5345 payloads[pos] = payload;
5346 }
5347 }
5348 }
5349 Ok(Err(err)) => {
5350 first_error.get_or_insert(err);
5351 }
5352 Err(_) => {
5353 first_error.get_or_insert_with(|| {
5354 GitError::InvalidObject("pack compression worker panicked".into())
5355 });
5356 }
5357 }
5358 }
5359
5360 match first_error {
5361 Some(err) => Err(err),
5362 None => Ok(()),
5363 }
5364 })?;
5365 Ok(payloads)
5366}
5367
5368fn streaming_planned_payload<'a>(
5369 objects: &'a [Arc<EncodedObject>],
5370 plan: &'a [StreamingPlannedEntry],
5371 idx: usize,
5372) -> &'a [u8] {
5373 match &plan[idx].base {
5374 StreamingPlannedBase::None => &objects[idx].body,
5375 StreamingPlannedBase::Current { delta, .. }
5376 | StreamingPlannedBase::Previous { delta, .. }
5377 | StreamingPlannedBase::External { delta, .. } => delta,
5378 }
5379}
5380
5381fn planned_payload<'a>(
5382 objects: &'a [&'a EncodedObject],
5383 plan: &'a [PlannedEntry],
5384 idx: usize,
5385) -> &'a [u8] {
5386 match &plan[idx].base {
5387 PlannedBase::None => &objects[idx].body,
5388 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
5389 }
5390}
5391
5392fn compressed_payload(body: &[u8], compression_level: u32) -> Result<Vec<u8>> {
5393 let mut out = Vec::new();
5394 write_compressed_payload(&mut out, body, compression_level)?;
5395 Ok(out)
5396}
5397
5398const DELTA_MAX_EXTERNAL_BASES: usize = 64;
5401
5402struct DeltaWindowEntry<'a> {
5403 idx: usize,
5404 index: DeltaIndex<'a>,
5405}
5406
5407fn delta_type_rank(object_type: ObjectType) -> u8 {
5410 match object_type {
5411 ObjectType::Commit => 0,
5412 ObjectType::Tree => 1,
5413 ObjectType::Blob => 2,
5414 ObjectType::Tag => 3,
5415 }
5416}
5417
5418fn plan_streaming_window_deltas(
5419 objects: &[Arc<EncodedObject>],
5420 object_ids: &[ObjectId],
5421 base_horizon: &VecDeque<StreamingDeltaBase>,
5422 options: &PackWriteOptions,
5423) -> (Vec<StreamingPlannedEntry>, Vec<usize>) {
5424 let count = objects.len();
5425 let mut plan: Vec<StreamingPlannedEntry> = (0..count)
5426 .map(|_| StreamingPlannedEntry {
5427 base: StreamingPlannedBase::None,
5428 depth: 0,
5429 })
5430 .collect();
5431
5432 let mut order: Vec<usize> = (0..count).collect();
5433 if options.reorder && options.depth > 0 {
5434 order.sort_by(|&left, &right| {
5435 delta_type_rank(objects[left].object_type)
5436 .cmp(&delta_type_rank(objects[right].object_type))
5437 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
5438 .then_with(|| {
5439 object_ids[left]
5440 .as_bytes()
5441 .cmp(object_ids[right].as_bytes())
5442 })
5443 });
5444 }
5445
5446 if options.depth == 0 || options.window == 0 {
5447 return (plan, order);
5448 }
5449
5450 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
5451 Vec::with_capacity(options.thin_bases.len());
5452 let mut external_bases = options.thin_bases.iter().collect::<Vec<_>>();
5453 external_bases
5454 .sort_by(|(left_oid, _), (right_oid, _)| left_oid.as_bytes().cmp(right_oid.as_bytes()));
5455 for (oid, object) in external_bases {
5456 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
5457 }
5458
5459 let mut window: VecDeque<StreamingDeltaWindowEntry<'_>> =
5460 VecDeque::with_capacity(options.window.min(base_horizon.len() + count));
5461 for base in base_horizon {
5462 window.push_back(StreamingDeltaWindowEntry {
5463 base: StreamingCandidateBase::Previous {
5464 oid: base.oid,
5465 offset: base.offset,
5466 depth: base.depth,
5467 },
5468 object_type: base.object.object_type,
5469 index: DeltaIndex::new(&base.object.body),
5470 });
5471 }
5472 while window.len() > options.window {
5473 window.pop_front();
5474 }
5475
5476 for &idx in &order {
5477 let target = &objects[idx].body;
5478 let target_type = objects[idx].object_type;
5479
5480 let mut best_delta: Option<Vec<u8>> = None;
5481 let mut best_base = StreamingPlannedBase::None;
5482 let mut best_base_depth = 0usize;
5483
5484 for base_entry in window.iter().rev() {
5485 if base_entry.object_type != target_type {
5486 continue;
5487 }
5488 let base_depth = match &base_entry.base {
5489 StreamingCandidateBase::Previous { depth, .. }
5490 | StreamingCandidateBase::Current { depth, .. } => *depth,
5491 };
5492 if base_depth + 1 > options.depth {
5493 continue;
5494 }
5495 let Some(delta) = base_entry.index.delta(target) else {
5496 continue;
5497 };
5498 if !delta_is_acceptable(&delta, target.len()) {
5499 continue;
5500 }
5501 if best_delta
5502 .as_ref()
5503 .is_none_or(|current| delta.len() < current.len())
5504 {
5505 best_delta = Some(delta);
5506 best_base_depth = base_depth;
5507 best_base = match &base_entry.base {
5508 StreamingCandidateBase::Previous { oid, offset, .. } => {
5509 StreamingPlannedBase::Previous {
5510 base_oid: *oid,
5511 base_offset: *offset,
5512 delta: Vec::new(),
5513 }
5514 }
5515 StreamingCandidateBase::Current { idx: base_idx, .. } => {
5516 StreamingPlannedBase::Current {
5517 base_idx: *base_idx,
5518 delta: Vec::new(),
5519 }
5520 }
5521 };
5522 }
5523 }
5524
5525 for (base_oid, base_type, base_index) in
5526 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
5527 {
5528 if *base_type != target_type {
5529 continue;
5530 }
5531 let Some(delta) = base_index.delta(target) else {
5532 continue;
5533 };
5534 if !delta_is_acceptable(&delta, target.len()) {
5535 continue;
5536 }
5537 if best_delta
5538 .as_ref()
5539 .is_none_or(|current| delta.len() < current.len())
5540 {
5541 best_delta = Some(delta);
5542 best_base_depth = 0;
5543 best_base = StreamingPlannedBase::External {
5544 base_oid: *base_oid,
5545 delta: Vec::new(),
5546 };
5547 }
5548 }
5549
5550 if let Some(delta) = best_delta {
5551 plan[idx].depth = best_base_depth + 1;
5552 plan[idx].base = match best_base {
5553 StreamingPlannedBase::Current { base_idx, .. } => {
5554 StreamingPlannedBase::Current { base_idx, delta }
5555 }
5556 StreamingPlannedBase::Previous {
5557 base_oid,
5558 base_offset,
5559 ..
5560 } => StreamingPlannedBase::Previous {
5561 base_oid,
5562 base_offset,
5563 delta,
5564 },
5565 StreamingPlannedBase::External { base_oid, .. } => {
5566 StreamingPlannedBase::External { base_oid, delta }
5567 }
5568 StreamingPlannedBase::None => StreamingPlannedBase::None,
5569 };
5570 }
5571
5572 window.push_back(StreamingDeltaWindowEntry {
5573 base: StreamingCandidateBase::Current {
5574 idx,
5575 depth: plan[idx].depth,
5576 },
5577 object_type: objects[idx].object_type,
5578 index: DeltaIndex::new(&objects[idx].body),
5579 });
5580 while window.len() > options.window {
5581 window.pop_front();
5582 }
5583 }
5584
5585 (plan, order)
5586}
5587
5588fn plan_pack_deltas(
5618 objects: &[&EncodedObject],
5619 object_ids: &[ObjectId],
5620 options: &PackWriteOptions,
5621) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
5622 let count = objects.len();
5623 let mut plan: Vec<PlannedEntry> = (0..count)
5624 .map(|_| PlannedEntry {
5625 base: PlannedBase::None,
5626 })
5627 .collect();
5628
5629 let mut order: Vec<usize> = (0..count).collect();
5633 if options.reorder && options.depth > 0 {
5634 order.sort_by(|&left, &right| {
5635 delta_type_rank(objects[left].object_type)
5636 .cmp(&delta_type_rank(objects[right].object_type))
5637 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
5638 .then_with(|| {
5639 object_ids[left]
5640 .as_bytes()
5641 .cmp(object_ids[right].as_bytes())
5642 })
5643 });
5644 }
5645
5646 if options.depth == 0 {
5647 return Ok((plan, order));
5648 }
5649
5650 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
5653 Vec::with_capacity(options.thin_bases.len());
5654 for (oid, object) in &options.thin_bases {
5655 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
5656 }
5657
5658 let mut depth = vec![0usize; count];
5661 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
5663 std::collections::VecDeque::new();
5664
5665 for &idx in &order {
5666 let target = &objects[idx].body;
5667 let target_type = objects[idx].object_type;
5668
5669 let mut best_delta: Option<Vec<u8>> = None;
5670 let mut best_base = PlannedBase::None;
5671
5672 for base_entry in window.iter().rev() {
5674 let base_idx = base_entry.idx;
5675 if objects[base_idx].object_type != target_type {
5676 continue;
5677 }
5678 if depth[base_idx] + 1 > options.depth {
5681 continue;
5682 }
5683 let Some(delta) = base_entry.index.delta(target) else {
5684 continue;
5685 };
5686 if !delta_is_acceptable(&delta, target.len()) {
5687 continue;
5688 }
5689 if best_delta
5690 .as_ref()
5691 .is_none_or(|current| delta.len() < current.len())
5692 {
5693 best_delta = Some(delta);
5694 best_base = PlannedBase::InPack {
5695 base_idx,
5696 delta: Vec::new(),
5697 };
5698 }
5699 }
5700
5701 for (base_oid, base_type, base_index) in
5704 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
5705 {
5706 if *base_type != target_type {
5707 continue;
5708 }
5709 let Some(delta) = base_index.delta(target) else {
5710 continue;
5711 };
5712 if !delta_is_acceptable(&delta, target.len()) {
5713 continue;
5714 }
5715 if best_delta
5716 .as_ref()
5717 .is_none_or(|current| delta.len() < current.len())
5718 {
5719 best_delta = Some(delta);
5720 best_base = PlannedBase::External {
5721 base_oid: *base_oid,
5722 delta: Vec::new(),
5723 };
5724 }
5725 }
5726
5727 if let Some(delta) = best_delta {
5728 match best_base {
5729 PlannedBase::InPack { base_idx, .. } => {
5730 depth[idx] = depth[base_idx] + 1;
5731 plan[idx].base = PlannedBase::InPack { base_idx, delta };
5732 }
5733 PlannedBase::External { base_oid, .. } => {
5734 depth[idx] = 1;
5735 plan[idx].base = PlannedBase::External { base_oid, delta };
5736 }
5737 PlannedBase::None => {}
5738 }
5739 }
5740
5741 window.push_back(DeltaWindowEntry {
5743 idx,
5744 index: DeltaIndex::new(&objects[idx].body),
5745 });
5746 while window.len() > options.window {
5747 window.pop_front();
5748 }
5749 }
5750
5751 Ok((plan, order))
5752}
5753
5754fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
5759 !delta.is_empty() && delta.len() < target_len
5760}
5761
5762fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
5763 loop {
5764 let mut byte = (value as u8) & 0x7f;
5765 value >>= 7;
5766 if value != 0 {
5767 byte |= 0x80;
5768 }
5769 out.push(byte);
5770 if value == 0 {
5771 break;
5772 }
5773 }
5774}
5775
5776fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
5777 while size != 0 {
5778 let chunk = size.min(0x10000);
5779 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
5780 let mut command = 0x80u8;
5781 let mut payload = [0u8; 7];
5782 let mut payload_len = 0usize;
5783 for idx in 0..4 {
5784 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
5785 if byte != 0 {
5786 command |= 1 << idx;
5787 payload[payload_len] = byte;
5788 payload_len += 1;
5789 }
5790 }
5791 for idx in 0..3 {
5792 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
5793 if byte != 0 {
5794 command |= 0x10 << idx;
5795 payload[payload_len] = byte;
5796 payload_len += 1;
5797 }
5798 }
5799 out.push(command);
5800 out.extend_from_slice(&payload[..payload_len]);
5801 offset += chunk;
5802 size -= chunk;
5803 }
5804}
5805
5806fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
5807 while !bytes.is_empty() {
5808 let chunk_len = bytes.len().min(0x7f);
5809 out.push(chunk_len as u8);
5810 out.extend_from_slice(&bytes[..chunk_len]);
5811 bytes = &bytes[chunk_len..];
5812 }
5813}
5814
5815fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
5816 let mut value = 0u64;
5817 let mut shift = 0u32;
5818 loop {
5819 let Some(byte) = delta.get(*cursor).copied() else {
5820 return Err(GitError::InvalidObject("truncated delta size".into()));
5821 };
5822 *cursor += 1;
5823 value = value
5824 .checked_add(
5825 u64::from(byte & 0x7f)
5826 .checked_shl(shift)
5827 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
5828 )
5829 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5830 if byte & 0x80 == 0 {
5831 return Ok(value);
5832 }
5833 shift = shift
5834 .checked_add(7)
5835 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
5836 }
5837}
5838
5839fn read_delta_copy_value(
5840 delta: &[u8],
5841 cursor: &mut usize,
5842 command: u8,
5843 masks: &[u8],
5844) -> Result<u64> {
5845 let mut value = 0u64;
5846 for (shift, mask) in masks.iter().enumerate() {
5847 if command & mask != 0 {
5848 let Some(byte) = delta.get(*cursor).copied() else {
5849 return Err(GitError::InvalidObject(
5850 "truncated delta copy command".into(),
5851 ));
5852 };
5853 *cursor += 1;
5854 value |= u64::from(byte) << (shift * 8);
5855 }
5856 }
5857 Ok(value)
5858}
5859
5860fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8], compression_level: u32) -> Result<()> {
5861 let mut compressor = Compress::new(Compression::new(compression_level.min(9)), true);
5862 out.reserve(zlib_compress_bound(body.len()));
5863 let status = compressor
5864 .compress_vec(body, out, FlushCompress::Finish)
5865 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
5866 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
5867 return Err(GitError::InvalidObject(
5868 "zlib compression did not finish pack entry".into(),
5869 ));
5870 }
5871 Ok(())
5872}
5873
5874fn zlib_compress_bound(len: usize) -> usize {
5875 len.saturating_add(len >> 12)
5876 .saturating_add(len >> 14)
5877 .saturating_add(len >> 25)
5878 .saturating_add(13)
5879}
5880
5881fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
5882 let type_code = match object_type {
5883 ObjectType::Commit => 1,
5884 ObjectType::Tree => 2,
5885 ObjectType::Blob => 3,
5886 ObjectType::Tag => 4,
5887 };
5888 write_pack_entry_header_kind(out, type_code, size);
5889}
5890
5891fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
5892 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
5893 size >>= 4;
5894 if size != 0 {
5895 byte |= 0x80;
5896 }
5897 out.push(byte);
5898 while size != 0 {
5899 let mut byte = (size as u8) & 0x7f;
5900 size >>= 7;
5901 if size != 0 {
5902 byte |= 0x80;
5903 }
5904 out.push(byte);
5905 }
5906}
5907
5908fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
5909 if relative == 0 {
5910 return Err(GitError::InvalidFormat(
5911 "ofs-delta relative offset cannot be zero".into(),
5912 ));
5913 }
5914 let mut value = relative;
5915 let mut bytes = vec![(value & 0x7f) as u8];
5916 value >>= 7;
5917 while value != 0 {
5918 value -= 1;
5919 bytes.push(((value & 0x7f) as u8) | 0x80);
5920 value >>= 7;
5921 }
5922 bytes.reverse();
5923 out.extend_from_slice(&bytes);
5924 Ok(())
5925}
5926
5927fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
5928 let Some(byte) = bytes.get(*offset).copied() else {
5929 return Err(GitError::InvalidFormat(
5930 "truncated pack entry header".into(),
5931 ));
5932 };
5933 *offset += 1;
5934 Ok(byte)
5935}
5936
5937fn u16_be(bytes: &[u8]) -> u16 {
5938 u16::from_be_bytes([bytes[0], bytes[1]])
5939}
5940
5941fn u32_be(bytes: &[u8]) -> u32 {
5942 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
5943}
5944
5945fn u64_be(bytes: &[u8]) -> u64 {
5946 u64::from_be_bytes([
5947 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
5948 ])
5949}
5950
5951fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
5952 let mut fanout = [0u32; 256];
5953 let mut previous = 0u32;
5954 for slot in &mut fanout {
5955 *slot = u32_be(&bytes[*offset..*offset + 4]);
5956 if *slot < previous {
5957 return Err(GitError::InvalidFormat(
5958 "pack index fanout is not monotonic".into(),
5959 ));
5960 }
5961 previous = *slot;
5962 *offset += 4;
5963 }
5964 Ok(fanout)
5965}
5966
5967fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
5968 let expected_min = if oid_bytes[0] == 0 {
5969 0
5970 } else {
5971 fanout[usize::from(oid_bytes[0] - 1)]
5972 };
5973 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
5974 return Err(GitError::InvalidFormat(
5975 "pack index object id is outside its fanout bucket".into(),
5976 ));
5977 }
5978 Ok(())
5979}
5980
5981fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
5982 if raw_offset & 0x8000_0000 == 0 {
5983 return Ok(u64::from(raw_offset));
5984 }
5985 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
5986 let large_start = large_idx
5987 .checked_mul(8)
5988 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5989 let large_end = large_start
5990 .checked_add(8)
5991 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
5992 if large_end > large_offset_table.len() {
5993 return Err(GitError::InvalidFormat(
5994 "pack index large offset points past table".into(),
5995 ));
5996 }
5997 Ok(u64_be(&large_offset_table[large_start..large_end]))
5998}
5999
6000fn checked_range(
6001 start: usize,
6002 count: usize,
6003 width: usize,
6004 total: usize,
6005) -> Result<std::ops::Range<usize>> {
6006 let len = count
6007 .checked_mul(width)
6008 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
6009 let end = start
6010 .checked_add(len)
6011 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
6012 if end > total {
6013 return Err(GitError::InvalidFormat("truncated pack index table".into()));
6014 }
6015 Ok(start..end)
6016}
6017
6018fn validate_position_permutation(positions: &[u32]) -> Result<()> {
6019 let mut seen = vec![false; positions.len()];
6020 for position in positions {
6021 let idx = *position as usize;
6022 if idx >= positions.len() {
6023 return Err(GitError::InvalidFormat(
6024 "reverse index position points past object table".into(),
6025 ));
6026 }
6027 if seen[idx] {
6028 return Err(GitError::InvalidFormat(
6029 "reverse index position is duplicated".into(),
6030 ));
6031 }
6032 seen[idx] = true;
6033 }
6034 Ok(())
6035}
6036
6037fn parse_midx_pack_names(
6038 bytes: &[u8],
6039 chunks: &[MultiPackIndexChunk],
6040 pack_count: usize,
6041 version: u8,
6042) -> Result<Vec<String>> {
6043 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
6044 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
6045 let mut names = Vec::with_capacity(pack_count);
6046 let mut offset = 0usize;
6047 while names.len() < pack_count {
6048 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
6049 return Err(GitError::InvalidFormat(
6050 "fatal: multi-pack-index pack-name chunk is too short".into(),
6051 ));
6052 };
6053 let name_bytes = &data[offset..offset + relative_end];
6054 if name_bytes.is_empty() {
6055 return Err(GitError::InvalidFormat(
6056 "multi-pack-index PNAM entry is empty".into(),
6057 ));
6058 }
6059 let name = std::str::from_utf8(name_bytes)
6060 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
6061 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
6062 return Err(GitError::InvalidFormat(
6063 "multi-pack-index PNAM entry contains a path separator".into(),
6064 ));
6065 }
6066 names.push(name.to_string());
6067 offset += relative_end + 1;
6068 }
6069 let padding = &data[offset..];
6070 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
6071 return Err(GitError::InvalidFormat(
6072 "multi-pack-index PNAM padding is invalid".into(),
6073 ));
6074 }
6075 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
6076 return Err(GitError::InvalidFormat(
6077 "multi-pack-index v1 PNAM entries are not sorted".into(),
6078 ));
6079 }
6080 Ok(names)
6081}
6082
6083fn parse_midx_oid_fanout(
6084 bytes: &[u8],
6085 chunks: &[MultiPackIndexChunk],
6086) -> Result<([u32; 256], usize)> {
6087 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
6088 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
6089 if data.len() != 256 * 4 {
6090 return Err(GitError::InvalidFormat(
6091 "error: multi-pack-index OID fanout is of the wrong size\nfatal: multi-pack-index required OID fanout chunk missing or corrupted".into(),
6092 ));
6093 }
6094 let mut fanout = [0u32; 256];
6095 let mut previous = 0u32;
6096 for (idx, slot) in fanout.iter_mut().enumerate() {
6097 let start = idx * 4;
6098 *slot = u32_be(&data[start..start + 4]);
6099 if *slot < previous {
6100 return Err(GitError::InvalidFormat(format!(
6101 "error: oid fanout out of order: fanout[{}] = {:x} > {:x} = fanout[{idx}]\nfatal: multi-pack-index required OID fanout chunk missing or corrupted",
6102 idx - 1,
6103 previous,
6104 *slot
6105 )));
6106 }
6107 previous = *slot;
6108 }
6109 Ok((fanout, fanout[255] as usize))
6110}
6111
6112fn parse_midx_object_ids(
6113 bytes: &[u8],
6114 chunks: &[MultiPackIndexChunk],
6115 format: ObjectFormat,
6116 object_count: usize,
6117 fanout: &[u32; 256],
6118) -> Result<Vec<ObjectId>> {
6119 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
6120 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
6121 let expected_len = object_count
6122 .checked_mul(format.raw_len())
6123 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
6124 if data.len() != expected_len {
6125 return Err(GitError::InvalidFormat(
6126 "error: multi-pack-index OID lookup chunk is the wrong size\nfatal: multi-pack-index required OID lookup chunk missing or corrupted".into(),
6127 ));
6128 }
6129
6130 let mut ids = Vec::with_capacity(object_count);
6131 let mut counts = [0u32; 256];
6132 let mut previous_oid: Option<ObjectId> = None;
6133 for idx in 0..object_count {
6134 let start = idx * format.raw_len();
6135 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
6136 if let Some(previous) = &previous_oid
6137 && previous.as_bytes() >= oid.as_bytes()
6138 {
6139 return Err(GitError::InvalidFormat(
6140 "multi-pack-index OIDL object ids are not strictly sorted".into(),
6141 ));
6142 }
6143 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
6144 .checked_add(1)
6145 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
6146 previous_oid = Some(oid);
6147 ids.push(oid);
6148 }
6149
6150 let mut running = 0u32;
6151 for (idx, count) in counts.iter().enumerate() {
6152 running = running
6153 .checked_add(*count)
6154 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
6155 if fanout[idx] != running {
6156 return Err(GitError::InvalidFormat(
6157 "multi-pack-index OIDF fanout does not match OIDL".into(),
6158 ));
6159 }
6160 }
6161 Ok(ids)
6162}
6163
6164fn parse_midx_object_offsets(
6165 bytes: &[u8],
6166 chunks: &[MultiPackIndexChunk],
6167 object_ids: Vec<ObjectId>,
6168 pack_count: u32,
6169) -> Result<Vec<MultiPackIndexEntry>> {
6170 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
6171 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
6172 let expected_len = object_ids
6173 .len()
6174 .checked_mul(8)
6175 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
6176 if data.len() != expected_len {
6177 return Err(GitError::InvalidFormat(
6178 "error: multi-pack-index object offset chunk is the wrong size\nfatal: multi-pack-index required object offsets chunk missing or corrupted".into(),
6179 ));
6180 }
6181 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
6182 if let Some(large_offsets) = large_offsets
6183 && large_offsets.len() % 8 != 0
6184 {
6185 return Err(GitError::InvalidFormat(
6186 "multi-pack-index LOFF chunk has invalid length".into(),
6187 ));
6188 }
6189
6190 let mut entries = Vec::with_capacity(object_ids.len());
6191 for (idx, oid) in object_ids.into_iter().enumerate() {
6192 let start = idx * 8;
6193 let pack_int_id = u32_be(&data[start..start + 4]);
6194 if pack_int_id >= pack_count {
6195 return Err(GitError::InvalidFormat(
6196 "multi-pack-index object points past pack table".into(),
6197 ));
6198 }
6199 let raw_offset = u32_be(&data[start + 4..start + 8]);
6200 let offset = if raw_offset & 0x8000_0000 == 0 {
6201 u64::from(raw_offset)
6202 } else {
6203 let Some(large_offsets) = large_offsets else {
6204 return Err(GitError::InvalidFormat(
6205 "multi-pack-index large offset missing LOFF chunk".into(),
6206 ));
6207 };
6208 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
6209 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
6210 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
6211 })?;
6212 let large_end = large_start.checked_add(8).ok_or_else(|| {
6213 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
6214 })?;
6215 if large_end > large_offsets.len() {
6216 return Err(GitError::InvalidFormat(
6217 "fatal: multi-pack-index large offset out of bounds".into(),
6218 ));
6219 }
6220 u64_be(&large_offsets[large_start..large_end])
6221 };
6222 entries.push(MultiPackIndexEntry {
6223 oid,
6224 pack_int_id,
6225 offset,
6226 force_large_offset: raw_offset & 0x8000_0000 != 0,
6227 });
6228 }
6229 Ok(entries)
6230}
6231
6232fn parse_midx_reverse_index(
6233 bytes: &[u8],
6234 chunks: &[MultiPackIndexChunk],
6235 object_count: usize,
6236) -> Result<Option<Vec<u32>>> {
6237 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
6238 return Ok(None);
6239 };
6240 let expected_len = object_count
6241 .checked_mul(4)
6242 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
6243 if data.len() != expected_len {
6244 return Err(GitError::InvalidFormat(
6245 "multi-pack-index reverse-index chunk is the wrong size".into(),
6246 ));
6247 }
6248 let mut positions = Vec::with_capacity(object_count);
6249 for idx in 0..object_count {
6250 let start = idx * 4;
6251 positions.push(u32_be(&data[start..start + 4]));
6252 }
6253 validate_position_permutation(&positions)?;
6254 Ok(Some(positions))
6255}
6256
6257fn parse_midx_bitmapped_packs(
6258 bytes: &[u8],
6259 chunks: &[MultiPackIndexChunk],
6260 pack_count: usize,
6261 object_count: usize,
6262) -> Result<Option<Vec<MultiPackBitmapPack>>> {
6263 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
6264 return Ok(None);
6265 };
6266 let expected_len = pack_count
6267 .checked_mul(8)
6268 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
6269 if data.len() != expected_len {
6270 return Err(GitError::InvalidFormat(
6271 "multi-pack-index BTMP chunk has invalid length".into(),
6272 ));
6273 }
6274 let mut entries = Vec::with_capacity(pack_count);
6275 for idx in 0..pack_count {
6276 let start = idx * 8;
6277 let bitmap_pos = u32_be(&data[start..start + 4]);
6278 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
6279 let bitmap_end = u64::from(bitmap_pos)
6280 .checked_add(u64::from(bitmap_nr))
6281 .ok_or_else(|| {
6282 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
6283 })?;
6284 if bitmap_end > object_count as u64 {
6285 return Err(GitError::InvalidFormat(
6286 "multi-pack-index BTMP range points past object table".into(),
6287 ));
6288 }
6289 entries.push(MultiPackBitmapPack {
6290 bitmap_pos,
6291 bitmap_nr,
6292 });
6293 }
6294 Ok(Some(entries))
6295}
6296
6297fn midx_chunk_data<'a>(
6298 bytes: &'a [u8],
6299 chunks: &[MultiPackIndexChunk],
6300 id: [u8; 4],
6301 required: bool,
6302) -> Result<Option<&'a [u8]>> {
6303 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
6304 if required {
6305 return Err(GitError::InvalidFormat(format!(
6306 "multi-pack-index missing {} chunk",
6307 std::str::from_utf8(&id).unwrap_or("required")
6308 )));
6309 }
6310 return Ok(None);
6311 };
6312 let start = usize::try_from(chunk.offset)
6313 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
6314 let len = usize::try_from(chunk.len)
6315 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
6316 let end = start
6317 .checked_add(len)
6318 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
6319 let Some(data) = bytes.get(start..end) else {
6320 return Err(GitError::InvalidFormat(
6321 "multi-pack-index chunk extends past file".into(),
6322 ));
6323 };
6324 Ok(Some(data))
6325}
6326
6327fn hash_function_id(format: ObjectFormat) -> u32 {
6328 match format {
6329 ObjectFormat::Sha1 => 1,
6330 ObjectFormat::Sha256 => 2,
6331 }
6332}
6333
6334const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
6337
6338const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
6341
6342const EWAH_ALL_ONES: u64 = u64::MAX;
6344
6345impl EwahBitmap {
6346 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
6360 let required_words = bit_size.div_ceil(64) as usize;
6361 if required_words > words.len() {
6362 return Err(GitError::InvalidFormat(format!(
6363 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
6364 words.len()
6365 )));
6366 }
6367 let significant = &words[..required_words];
6370 let mut builder = EwahBuilder::new(bit_size);
6371 for &word in significant {
6372 if word == 0 {
6373 builder.add_empty_words(false, 1);
6374 } else if word == EWAH_ALL_ONES {
6375 builder.add_empty_words(true, 1);
6376 } else {
6377 builder.add_literal(word);
6378 }
6379 }
6380 builder.finish()
6381 }
6382
6383 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
6389 let word_count = bit_size.div_ceil(64) as usize;
6390 let mut words = vec![0u64; word_count];
6391 for &position in positions {
6392 if position >= bit_size {
6393 return Err(GitError::InvalidFormat(format!(
6394 "EWAH bit position {position} out of range for bit_size {bit_size}"
6395 )));
6396 }
6397 let word_index = (position / 64) as usize;
6398 let bit_index = position % 64;
6399 words[word_index] |= 1u64 << bit_index;
6400 }
6401 Self::from_words(bit_size, &words)
6402 }
6403
6404 pub fn empty() -> Self {
6407 Self {
6408 bit_size: 0,
6409 words: Vec::new(),
6410 rlw_position: 0,
6411 }
6412 }
6413
6414 pub fn to_words(&self) -> Result<Vec<u64>> {
6420 let mut out = Vec::new();
6421 let mut word_idx = 0usize;
6422 while word_idx < self.words.len() {
6423 let rlw = self.words[word_idx];
6424 let run_bit = rlw & 1;
6425 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
6426 let literal_words = (rlw >> 33) as usize;
6427 word_idx += 1;
6428 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
6429 for _ in 0..run_words {
6430 out.push(fill);
6431 }
6432 let literal_end = word_idx
6433 .checked_add(literal_words)
6434 .filter(|end| *end <= self.words.len())
6435 .ok_or_else(|| {
6436 GitError::InvalidFormat("EWAH literal words extend past word table".into())
6437 })?;
6438 out.extend_from_slice(&self.words[word_idx..literal_end]);
6439 word_idx = literal_end;
6440 }
6441 let required_words = (self.bit_size as usize).div_ceil(64);
6442 if out.len() < required_words {
6443 out.resize(required_words, 0);
6444 }
6445 out.truncate(required_words);
6446 Ok(out)
6447 }
6448
6449 pub fn to_positions(&self) -> Result<Vec<u32>> {
6451 let words = self.to_words()?;
6452 let mut positions = Vec::new();
6453 for (word_index, word) in words.iter().enumerate() {
6454 let mut remaining = *word;
6455 while remaining != 0 {
6456 let bit = remaining.trailing_zeros();
6457 let position = (word_index as u64) * 64 + u64::from(bit);
6458 if position < u64::from(self.bit_size) {
6459 positions.push(position as u32);
6461 }
6462 remaining &= remaining - 1;
6463 }
6464 }
6465 Ok(positions)
6466 }
6467
6468 pub fn to_bytes(&self) -> Vec<u8> {
6472 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
6473 self.append_bytes(&mut out);
6474 out
6475 }
6476
6477 fn append_bytes(&self, out: &mut Vec<u8>) {
6478 out.extend_from_slice(&self.bit_size.to_be_bytes());
6479 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
6480 for word in &self.words {
6481 out.extend_from_slice(&word.to_be_bytes());
6482 }
6483 out.extend_from_slice(&self.rlw_position.to_be_bytes());
6484 }
6485}
6486
6487struct EwahBuilder {
6495 bit_size: u32,
6496 words: Vec<u64>,
6497 rlw_position: usize,
6498}
6499
6500impl EwahBuilder {
6501 fn new(bit_size: u32) -> Self {
6502 Self {
6504 bit_size,
6505 words: vec![0u64],
6506 rlw_position: 0,
6507 }
6508 }
6509
6510 fn rlw(&self) -> u64 {
6511 self.words[self.rlw_position]
6512 }
6513
6514 fn set_rlw(&mut self, value: u64) {
6515 self.words[self.rlw_position] = value;
6516 }
6517
6518 fn rlw_running_len(&self) -> u64 {
6519 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
6520 }
6521
6522 fn rlw_running_bit(&self) -> bool {
6523 self.rlw() & 1 == 1
6524 }
6525
6526 fn rlw_literal_len(&self) -> u64 {
6527 self.rlw() >> 33
6528 }
6529
6530 fn set_running_bit(&mut self, bit: bool) {
6531 let mut value = self.rlw();
6532 value &= !1;
6533 value |= u64::from(bit);
6534 self.set_rlw(value);
6535 }
6536
6537 fn set_running_len(&mut self, len: u64) {
6538 let mut value = self.rlw();
6539 value &= !(EWAH_MAX_RUNNING_LEN << 1);
6540 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
6541 self.set_rlw(value);
6542 }
6543
6544 fn set_literal_len(&mut self, len: u64) {
6545 let mut value = self.rlw();
6546 value &= (1u64 << 33) - 1;
6547 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
6548 self.set_rlw(value);
6549 }
6550
6551 fn push_rlw(&mut self) {
6553 self.rlw_position = self.words.len();
6554 self.words.push(0);
6555 }
6556
6557 fn add_empty_words(&mut self, value: bool, mut number: u64) {
6565 while number > 0 {
6566 let can_extend = self.rlw_literal_len() == 0
6570 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
6571 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
6572 if !can_extend {
6573 self.push_rlw();
6574 }
6575 if self.rlw_running_len() == 0 {
6576 self.set_running_bit(value);
6577 }
6578 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
6579 let take = available.min(number);
6580 self.set_running_len(self.rlw_running_len() + take);
6581 number -= take;
6582 }
6583 }
6584
6585 fn add_literal(&mut self, word: u64) {
6588 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
6589 self.push_rlw();
6590 }
6591 let literal_len = self.rlw_literal_len();
6592 self.set_literal_len(literal_len + 1);
6593 self.words.push(word);
6594 }
6595
6596 fn finish(self) -> Result<EwahBitmap> {
6597 let rlw_position = u32::try_from(self.rlw_position)
6598 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
6599 if self.words.len() > u32::MAX as usize {
6600 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
6601 }
6602 Ok(EwahBitmap {
6603 bit_size: self.bit_size,
6604 words: self.words,
6605 rlw_position,
6606 })
6607 }
6608}
6609
6610#[derive(Debug, Clone)]
6623pub struct PackBitmapWriter {
6624 format: ObjectFormat,
6625 pack_checksum: ObjectId,
6626 object_count: u32,
6627 commit_positions: Vec<u32>,
6628 tree_positions: Vec<u32>,
6629 blob_positions: Vec<u32>,
6630 tag_positions: Vec<u32>,
6631 name_hash_cache: Option<Vec<u32>>,
6632 selected: Vec<SelectedCommit>,
6633 pseudo_merges: Vec<PackBitmapPseudoMerge>,
6634}
6635
6636#[derive(Debug, Clone)]
6637struct SelectedCommit {
6638 commit_index_position: u32,
6642 flags: u8,
6643 reachable: Vec<u32>,
6644}
6645
6646impl PackBitmapWriter {
6647 pub const FLAG_NONE: u8 = 0;
6651
6652 pub fn new(
6659 format: ObjectFormat,
6660 pack_checksum: ObjectId,
6661 object_types: &[ObjectType],
6662 ) -> Result<Self> {
6663 if object_types.len() > u32::MAX as usize {
6664 return Err(GitError::InvalidFormat(
6665 "too many objects for a pack bitmap".into(),
6666 ));
6667 }
6668 if pack_checksum.format() != format {
6669 return Err(GitError::InvalidObjectId(
6670 "pack checksum format does not match bitmap format".into(),
6671 ));
6672 }
6673 let object_count = object_types.len() as u32;
6674 let mut commit_positions = Vec::new();
6675 let mut tree_positions = Vec::new();
6676 let mut blob_positions = Vec::new();
6677 let mut tag_positions = Vec::new();
6678 for (index, object_type) in object_types.iter().enumerate() {
6679 let position = index as u32;
6680 match object_type {
6681 ObjectType::Commit => commit_positions.push(position),
6682 ObjectType::Tree => tree_positions.push(position),
6683 ObjectType::Blob => blob_positions.push(position),
6684 ObjectType::Tag => tag_positions.push(position),
6685 }
6686 }
6687 Ok(Self {
6688 format,
6689 pack_checksum,
6690 object_count,
6691 commit_positions,
6692 tree_positions,
6693 blob_positions,
6694 tag_positions,
6695 name_hash_cache: None,
6696 selected: Vec::new(),
6697 pseudo_merges: Vec::new(),
6698 })
6699 }
6700
6701 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
6707 if cache.len() != self.object_count as usize {
6708 return Err(GitError::InvalidFormat(format!(
6709 "name hash cache has {} entries but pack has {} objects",
6710 cache.len(),
6711 self.object_count
6712 )));
6713 }
6714 self.name_hash_cache = Some(cache);
6715 Ok(self)
6716 }
6717
6718 pub fn add_commit(
6730 &mut self,
6731 commit_position: u32,
6732 commit_index_position: u32,
6733 reachable: &[u32],
6734 ) -> Result<()> {
6735 if commit_position >= self.object_count {
6736 return Err(GitError::InvalidFormat(format!(
6737 "commit position {commit_position} out of range for {} objects",
6738 self.object_count
6739 )));
6740 }
6741 if commit_index_position >= self.object_count {
6742 return Err(GitError::InvalidFormat(format!(
6743 "commit index position {commit_index_position} out of range for {} objects",
6744 self.object_count
6745 )));
6746 }
6747 if !self.commit_positions.contains(&commit_position) {
6748 return Err(GitError::InvalidFormat(format!(
6749 "bitmap commit position {commit_position} is not a commit object"
6750 )));
6751 }
6752 for &position in reachable {
6753 if position >= self.object_count {
6754 return Err(GitError::InvalidFormat(format!(
6755 "reachable position {position} out of range for {} objects",
6756 self.object_count
6757 )));
6758 }
6759 }
6760 let mut reachable = reachable.to_vec();
6761 reachable.push(commit_position);
6762 self.selected.push(SelectedCommit {
6763 commit_index_position,
6764 flags: Self::FLAG_NONE,
6765 reachable,
6766 });
6767 Ok(())
6768 }
6769
6770 pub fn add_pseudo_merge(&mut self, commits: &[u32], reachable: &[u32]) -> Result<()> {
6775 if commits.is_empty() {
6776 return Err(GitError::InvalidFormat(
6777 "pseudo-merge must contain at least one commit".into(),
6778 ));
6779 }
6780 for &position in commits {
6781 if position >= self.object_count {
6782 return Err(GitError::InvalidFormat(format!(
6783 "pseudo-merge commit position {position} out of range for {} objects",
6784 self.object_count
6785 )));
6786 }
6787 if !self.commit_positions.contains(&position) {
6788 return Err(GitError::InvalidFormat(format!(
6789 "pseudo-merge commit position {position} is not a commit object"
6790 )));
6791 }
6792 }
6793 for &position in reachable {
6794 if position >= self.object_count {
6795 return Err(GitError::InvalidFormat(format!(
6796 "pseudo-merge reachable position {position} out of range for {} objects",
6797 self.object_count
6798 )));
6799 }
6800 }
6801 self.pseudo_merges.push(PackBitmapPseudoMerge {
6802 commits: EwahBitmap::from_positions(self.object_count, commits)?,
6803 bitmap: EwahBitmap::from_positions(self.object_count, reachable)?,
6804 });
6805 Ok(())
6806 }
6807
6808 pub fn build(&self) -> Result<PackBitmapIndex> {
6815 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
6816 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
6817 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
6818 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
6819
6820 let mut entries = Vec::with_capacity(self.selected.len());
6821 for selected in &self.selected {
6822 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
6823 entries.push(PackBitmapEntry {
6824 object_position: selected.commit_index_position,
6825 xor_offset: 0,
6826 flags: selected.flags,
6827 bitmap,
6828 });
6829 }
6830
6831 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
6832 if self.name_hash_cache.is_some() {
6833 options |= PackBitmapIndex::OPTION_HASH_CACHE;
6834 }
6835 if !self.pseudo_merges.is_empty() {
6836 options |= PackBitmapIndex::OPTION_PSEUDO_MERGES;
6837 }
6838
6839 let placeholder_checksum = ObjectId::null(self.format);
6844 Ok(PackBitmapIndex {
6845 version: 1,
6846 format: self.format,
6847 options,
6848 pack_checksum: self.pack_checksum.clone(),
6849 index_checksum: placeholder_checksum,
6850 type_bitmaps: PackBitmapTypeBitmaps {
6851 commits,
6852 trees,
6853 blobs,
6854 tags,
6855 },
6856 entries,
6857 pseudo_merges: self.pseudo_merges.clone(),
6858 name_hash_cache: self.name_hash_cache.clone(),
6859 })
6860 }
6861
6862 pub fn write(&self) -> Result<Vec<u8>> {
6865 self.build()?.write()
6866 }
6867}
6868
6869impl PackBitmapIndex {
6870 pub fn write(&self) -> Result<Vec<u8>> {
6884 if self.version != 1 {
6885 return Err(GitError::Unsupported(format!(
6886 "bitmap index version {}",
6887 self.version
6888 )));
6889 }
6890 let mut options = self.options;
6891 if !self.pseudo_merges.is_empty() {
6892 options |= Self::OPTION_PSEUDO_MERGES;
6893 }
6894 let known_options =
6895 Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE | Self::OPTION_PSEUDO_MERGES;
6896 if options & !known_options != 0 {
6897 return Err(GitError::Unsupported(format!(
6898 "bitmap index options {:#06x}",
6899 options & !known_options
6900 )));
6901 }
6902 if self.pack_checksum.format() != self.format {
6903 return Err(GitError::InvalidObjectId(
6904 "bitmap pack checksum format does not match index format".into(),
6905 ));
6906 }
6907 if self.entries.len() > u32::MAX as usize {
6908 return Err(GitError::InvalidFormat(
6909 "too many bitmap index entries".into(),
6910 ));
6911 }
6912 if options & Self::OPTION_PSEUDO_MERGES != 0 && self.pseudo_merges.is_empty() {
6913 return Err(GitError::InvalidFormat(
6914 "OPTION_PSEUDO_MERGES set without pseudo-merge records".into(),
6915 ));
6916 }
6917 let want_cache = options & Self::OPTION_HASH_CACHE != 0;
6918 match (&self.name_hash_cache, want_cache) {
6919 (Some(_), false) => {
6920 return Err(GitError::InvalidFormat(
6921 "name hash cache present without OPTION_HASH_CACHE".into(),
6922 ));
6923 }
6924 (None, true) => {
6925 return Err(GitError::InvalidFormat(
6926 "OPTION_HASH_CACHE set without a name hash cache".into(),
6927 ));
6928 }
6929 _ => {}
6930 }
6931
6932 let mut out = Vec::new();
6933 out.extend_from_slice(b"BITM");
6934 out.extend_from_slice(&self.version.to_be_bytes());
6935 out.extend_from_slice(&options.to_be_bytes());
6936 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
6937 out.extend_from_slice(self.pack_checksum.as_bytes());
6938
6939 self.type_bitmaps.commits.append_bytes(&mut out);
6940 self.type_bitmaps.trees.append_bytes(&mut out);
6941 self.type_bitmaps.blobs.append_bytes(&mut out);
6942 self.type_bitmaps.tags.append_bytes(&mut out);
6943
6944 for (idx, entry) in self.entries.iter().enumerate() {
6945 if entry.xor_offset as usize > idx {
6946 return Err(GitError::InvalidFormat(
6947 "bitmap index entry has invalid XOR offset".into(),
6948 ));
6949 }
6950 out.extend_from_slice(&entry.object_position.to_be_bytes());
6951 out.push(entry.xor_offset);
6952 out.push(entry.flags);
6953 entry.bitmap.append_bytes(&mut out);
6954 }
6955
6956 if !self.pseudo_merges.is_empty() {
6957 append_bitmap_pseudo_merges(&mut out, &self.pseudo_merges)?;
6958 }
6959
6960 if let Some(cache) = &self.name_hash_cache {
6961 for value in cache {
6962 out.extend_from_slice(&value.to_be_bytes());
6963 }
6964 }
6965
6966 let checksum = sley_core::digest_bytes(self.format, &out)?;
6967 out.extend_from_slice(checksum.as_bytes());
6968 Ok(out)
6969 }
6970}
6971
6972fn append_bitmap_pseudo_merges(
6973 out: &mut Vec<u8>,
6974 pseudo_merges: &[PackBitmapPseudoMerge],
6975) -> Result<()> {
6976 if pseudo_merges.len() > u32::MAX as usize {
6977 return Err(GitError::InvalidFormat(
6978 "too many pseudo-merge bitmap records".into(),
6979 ));
6980 }
6981 let start = out.len();
6982 let mut pseudo_offsets = Vec::with_capacity(pseudo_merges.len());
6983 let mut commit_to_offsets: BTreeMap<u32, Vec<u64>> = BTreeMap::new();
6984 for merge in pseudo_merges {
6985 let offset = u64::try_from(out.len())
6986 .map_err(|_| GitError::InvalidFormat("bitmap file offset overflow".into()))?;
6987 pseudo_offsets.push(offset);
6988 for commit_pos in merge.commits.to_positions()? {
6989 commit_to_offsets
6990 .entry(commit_pos)
6991 .or_default()
6992 .push(offset);
6993 }
6994 merge.commits.append_bytes(out);
6995 merge.bitmap.append_bytes(out);
6996 }
6997 if commit_to_offsets.len() > u32::MAX as usize {
6998 return Err(GitError::InvalidFormat(
6999 "too many pseudo-merge commits".into(),
7000 ));
7001 }
7002
7003 let lookup_start = out.len();
7004 let lookup_len = commit_to_offsets
7005 .len()
7006 .checked_mul(12)
7007 .ok_or_else(|| GitError::InvalidFormat("pseudo-merge lookup overflow".into()))?;
7008 let mut next_extended = u64::try_from(
7009 lookup_start
7010 .checked_add(lookup_len)
7011 .ok_or_else(|| GitError::InvalidFormat("pseudo-merge lookup overflow".into()))?,
7012 )
7013 .map_err(|_| GitError::InvalidFormat("bitmap file offset overflow".into()))?;
7014 let mut rows = Vec::with_capacity(commit_to_offsets.len());
7015 for (commit_pos, offsets) in commit_to_offsets {
7016 let extended_offset = if offsets.len() > 1 {
7017 if next_extended & (1u64 << 63) != 0 {
7018 return Err(GitError::InvalidFormat(
7019 "pseudo-merge extended offset overflow".into(),
7020 ));
7021 }
7022 let offset = next_extended;
7023 let ext_len = offsets
7024 .len()
7025 .checked_mul(8)
7026 .and_then(|len| len.checked_add(4))
7027 .ok_or_else(|| {
7028 GitError::InvalidFormat("pseudo-merge extended lookup overflow".into())
7029 })?;
7030 next_extended = next_extended.checked_add(ext_len as u64).ok_or_else(|| {
7031 GitError::InvalidFormat("pseudo-merge extended lookup overflow".into())
7032 })?;
7033 Some(offset)
7034 } else {
7035 None
7036 };
7037 rows.push((commit_pos, offsets, extended_offset));
7038 }
7039
7040 for (commit_pos, offsets, extended_offset) in &rows {
7041 out.extend_from_slice(&commit_pos.to_be_bytes());
7042 match extended_offset {
7043 Some(offset) => out.extend_from_slice(&(offset | (1u64 << 63)).to_be_bytes()),
7044 None => out.extend_from_slice(&offsets[0].to_be_bytes()),
7045 }
7046 }
7047
7048 for (_commit_pos, offsets, extended_offset) in &rows {
7049 if extended_offset.is_none() {
7050 continue;
7051 }
7052 let count = u32::try_from(offsets.len())
7053 .map_err(|_| GitError::InvalidFormat("pseudo-merge extended lookup overflow".into()))?;
7054 out.extend_from_slice(&count.to_be_bytes());
7055 for offset in offsets {
7056 out.extend_from_slice(&offset.to_be_bytes());
7057 }
7058 }
7059
7060 for offset in &pseudo_offsets {
7061 out.extend_from_slice(&offset.to_be_bytes());
7062 }
7063 out.extend_from_slice(&(pseudo_merges.len() as u32).to_be_bytes());
7064 out.extend_from_slice(&(rows.len() as u32).to_be_bytes());
7065 let lookup_relative = lookup_start
7066 .checked_sub(start)
7067 .ok_or_else(|| GitError::InvalidFormat("pseudo-merge lookup underflow".into()))?;
7068 out.extend_from_slice(&(lookup_relative as u64).to_be_bytes());
7069 let extension_size = out
7070 .len()
7071 .checked_sub(start)
7072 .and_then(|len| len.checked_add(8))
7073 .ok_or_else(|| GitError::InvalidFormat("pseudo-merge extension overflow".into()))?;
7074 out.extend_from_slice(&(extension_size as u64).to_be_bytes());
7075 Ok(())
7076}
7077
7078pub fn write_bitmap(
7087 format: ObjectFormat,
7088 pack_checksum: ObjectId,
7089 object_types: &[ObjectType],
7090 commits: &[(u32, u32, Vec<u32>)],
7091 name_hash_cache: Option<Vec<u32>>,
7092) -> Result<Vec<u8>> {
7093 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
7094 if let Some(cache) = name_hash_cache {
7095 writer = writer.with_name_hash_cache(cache)?;
7096 }
7097 for (commit_position, commit_index_position, reachable) in commits {
7098 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
7099 }
7100 writer.write()
7101}
7102
7103#[cfg(test)]
7104mod tests {
7105 use super::*;
7106 use flate2::Compression;
7107 use flate2::read::ZlibDecoder;
7108 use flate2::write::ZlibEncoder;
7109 use std::fs;
7110 use std::io::Read;
7111 use std::io::Write;
7112 use std::path::{Path, PathBuf};
7113 use std::process::Command;
7114 use std::time::{SystemTime, UNIX_EPOCH};
7115
7116 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
7117 PackWriteOptions::new()
7118 .with_prefer_ofs_delta(prefer_ofs_delta)
7119 .with_reorder(false)
7120 }
7121
7122 #[test]
7123 fn parses_single_blob_pack() {
7124 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
7125 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
7126 assert_eq!(parsed.version, 2);
7127 assert_eq!(parsed.entries.len(), 1);
7128 let object = &parsed.entries[0].object;
7129 assert_eq!(object.object_type, ObjectType::Blob);
7130 assert_eq!(object.body, b"hello\n");
7131 assert_eq!(
7132 parsed.entries[0].entry.oid.to_hex(),
7133 "ce013625030ba8dba906f756967f9e9ca394464a"
7134 );
7135 }
7136
7137 #[test]
7138 fn parses_single_blob_pack_sha256() {
7139 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
7140 let parsed =
7141 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
7142 assert_eq!(parsed.version, 2);
7143 assert_eq!(parsed.entries.len(), 1);
7144 let object = &parsed.entries[0].object;
7145 assert_eq!(object.object_type, ObjectType::Blob);
7146 assert_eq!(object.body, b"hello\n");
7147 assert_eq!(
7148 parsed.entries[0].entry.oid,
7149 object
7150 .object_id(ObjectFormat::Sha256)
7151 .expect("test operation should succeed")
7152 );
7153 }
7154
7155 #[test]
7156 fn parses_bundle_pack_payload_with_bundle_format() {
7157 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
7158 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
7159 .expect("test operation should succeed");
7160 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
7161 .into_bytes()
7162 .into_iter()
7163 .chain(pack)
7164 .collect::<Vec<_>>();
7165 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7166 .expect("test operation should succeed");
7167
7168 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
7169 assert_eq!(parsed.entries.len(), 1);
7170 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
7171 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
7172 }
7173
7174 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
7180 let mut pack = Vec::new();
7181 pack.extend_from_slice(b"PACK");
7182 pack.extend_from_slice(&2u32.to_be_bytes());
7183 pack.extend_from_slice(&1u32.to_be_bytes());
7184 write_pack_entry_header_kind(&mut pack, 3, declared_size);
7186 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7187 encoder
7188 .write_all(real_body)
7189 .expect("test operation should succeed");
7190 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7191 let checksum =
7192 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7193 pack.extend_from_slice(checksum.as_bytes());
7194 pack
7195 }
7196
7197 #[test]
7210 fn rejects_decompression_bomb_header_without_oom() {
7211 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
7212 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
7213 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
7214 let result = handle.join();
7215 assert!(
7217 result.is_ok(),
7218 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
7219 );
7220 let parse_result = result.expect("parse thread should not panic on a bomb header");
7222 assert!(
7223 parse_result.is_err(),
7224 "bomb header (declared={declared}) should be rejected as invalid"
7225 );
7226 }
7227 }
7228
7229 fn lying_result_size_delta_pack(
7236 format: ObjectFormat,
7237 declared_result_size: u64,
7238 delta_kind: DeltaKind,
7239 ) -> Vec<u8> {
7240 let base = b"hello";
7241 let result = b"hello world"; let mut delta = Vec::new();
7245 write_delta_varint(&mut delta, base.len() as u64);
7246 write_delta_varint(&mut delta, declared_result_size);
7247 let suffix = &result[base.len()..];
7249 delta.push(0x90); delta.push(base.len() as u8);
7251 delta.push(suffix.len() as u8);
7252 delta.extend_from_slice(suffix);
7253
7254 let mut pack = Vec::new();
7255 pack.extend_from_slice(b"PACK");
7256 pack.extend_from_slice(&2u32.to_be_bytes());
7257 pack.extend_from_slice(&2u32.to_be_bytes());
7258
7259 let base_offset = pack.len();
7260 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7261 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7262 encoder
7263 .write_all(base)
7264 .expect("test operation should succeed");
7265 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7266
7267 let delta_offset = pack.len();
7268 write_pack_entry_header_kind(
7269 &mut pack,
7270 match delta_kind {
7271 DeltaKind::Offset => 6,
7272 DeltaKind::Ref => 7,
7273 },
7274 delta.len() as u64,
7275 );
7276 match delta_kind {
7277 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7278 DeltaKind::Ref => {
7279 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7280 .expect("test operation should succeed");
7281 pack.extend_from_slice(base_oid.as_bytes());
7282 }
7283 }
7284 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7285 encoder
7286 .write_all(&delta)
7287 .expect("test operation should succeed");
7288 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7289
7290 let checksum =
7291 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7292 pack.extend_from_slice(checksum.as_bytes());
7293 pack
7294 }
7295
7296 #[test]
7306 fn rejects_delta_result_size_bomb_without_oom() {
7307 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
7308 for &declared in bombs {
7309 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
7310 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
7311 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
7312 let join_result = handle.join();
7313 assert!(
7314 join_result.is_ok(),
7315 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
7316 instead of erroring cleanly"
7317 );
7318 let parse_result =
7319 join_result.expect("parse thread should not panic on a delta bomb");
7320 assert!(
7321 parse_result.is_err(),
7322 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
7323 as invalid (result.len() != declared)"
7324 );
7325 }
7326 }
7327 }
7328
7329 #[test]
7333 fn applies_legitimate_delta_after_result_size_bound() {
7334 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
7335 let base = b"hello";
7336 let result = b"hello world";
7337 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
7338 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
7339 assert_eq!(parsed.entries.len(), 2);
7340 assert_eq!(parsed.entries[0].object.body, base);
7341 assert_eq!(parsed.entries[1].object.body, result);
7342 }
7343 }
7344
7345 #[test]
7346 fn bounded_inflate_reserve_caps_attacker_declared_size() {
7347 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
7349 assert_eq!(
7351 bounded_inflate_reserve(usize::MAX, usize::MAX),
7352 MAX_INFLATE_RESERVE
7353 );
7354 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
7358 assert_eq!(bounded_inflate_reserve(0, 0), 64);
7360 }
7361
7362 #[test]
7363 fn rejects_bundle_pack_payload_with_wrong_object_format() {
7364 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
7365 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
7366 .expect("test operation should succeed");
7367 let bundle_bytes =
7368 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
7369 .into_bytes()
7370 .into_iter()
7371 .chain(pack)
7372 .collect::<Vec<_>>();
7373 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
7374 .expect("test operation should succeed");
7375
7376 assert!(PackFile::parse_bundle(&bundle).is_err());
7377 }
7378
7379 fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
7380 let owned = PackIndex::parse(index, format).expect("test operation should succeed");
7381 let view = PackIndexView::parse(index, format).expect("test operation should succeed");
7382 let owned_view =
7383 PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
7384 .expect("test operation should succeed");
7385
7386 assert_eq!(view.version, owned.version);
7387 assert_eq!(view.count, owned.entries.len());
7388 assert_eq!(view.count(), owned.entries.len());
7389 assert_eq!(view.fanout(), &owned.fanout);
7390 assert_eq!(view.pack_checksum, owned.pack_checksum);
7391 assert_eq!(view.index_checksum, owned.index_checksum);
7392 assert_eq!(owned_view.version, owned.version);
7393 assert_eq!(owned_view.count(), owned.entries.len());
7394 assert_eq!(owned_view.fanout(), &owned.fanout);
7395 assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
7396 assert_eq!(owned_view.index_checksum, owned.index_checksum);
7397 for entry in &owned.entries {
7398 let owned_found = owned
7399 .find(&entry.oid)
7400 .expect("test operation should succeed");
7401 let expected = Some(PackIndexLookup {
7402 crc32: owned_found.crc32,
7403 offset: owned_found.offset,
7404 });
7405 assert_eq!(view.find(&entry.oid), expected);
7406 assert_eq!(owned_view.find(&entry.oid), expected);
7407 }
7408 }
7409
7410 #[test]
7411 fn writes_pack_and_index_that_round_trip() {
7412 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
7413 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
7414 .expect("test operation should succeed");
7415 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7416 let index =
7417 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
7418 let oid = object
7419 .object_id(ObjectFormat::Sha1)
7420 .expect("test operation should succeed");
7421 assert_eq!(pack.entries[0].object, object);
7422 assert_eq!(index.pack_checksum, pack.checksum);
7423 assert_eq!(
7424 index
7425 .find(&oid)
7426 .expect("test operation should succeed")
7427 .offset,
7428 12
7429 );
7430 }
7431
7432 #[test]
7433 fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
7434 let objects = (0..8)
7435 .map(|idx| {
7436 EncodedObject::new(
7437 ObjectType::Blob,
7438 format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
7439 )
7440 })
7441 .collect::<Vec<_>>();
7442 let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
7443 .expect("test operation should succeed");
7444
7445 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
7446
7447 let view =
7448 PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
7449 let missing = sley_core::object_id_for_bytes(
7450 ObjectFormat::Sha1,
7451 "blob",
7452 b"not present in borrowed index\n",
7453 )
7454 .expect("test operation should succeed");
7455 assert_eq!(view.find(&missing), None);
7456 }
7457
7458 #[test]
7459 fn writes_sha256_pack_and_index_that_round_trip() {
7460 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
7461 let written =
7462 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7463 .expect("test operation should succeed");
7464 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
7465 .expect("test operation should succeed");
7466 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
7467 .expect("test operation should succeed");
7468 let oid = object
7469 .object_id(ObjectFormat::Sha256)
7470 .expect("test operation should succeed");
7471 assert_eq!(pack.entries[0].object, object);
7472 assert_eq!(index.pack_checksum, pack.checksum);
7473 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
7474 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
7475 assert_eq!(
7476 index
7477 .find(&oid)
7478 .expect("test operation should succeed")
7479 .offset,
7480 12
7481 );
7482 }
7483
7484 #[test]
7485 fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
7486 let objects = (0..4)
7487 .map(|idx| {
7488 EncodedObject::new(
7489 ObjectType::Blob,
7490 format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
7491 )
7492 })
7493 .collect::<Vec<_>>();
7494 let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
7495 .expect("test operation should succeed");
7496
7497 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
7498 }
7499
7500 #[test]
7501 fn indexes_existing_sha256_pack_bytes() {
7502 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
7503 let written =
7504 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
7505 .expect("test operation should succeed");
7506
7507 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
7508 .expect("test operation should succeed");
7509 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
7510 .expect("test operation should succeed");
7511
7512 assert_eq!(indexed.pack_checksum, written.checksum);
7513 assert_eq!(indexed.entries, written.entries);
7514 assert_eq!(index.pack_checksum, written.checksum);
7515 assert_eq!(index.entries, written.entries);
7516 }
7517
7518 #[test]
7519 fn indexes_existing_delta_pack_bytes() {
7520 let (base, changed) = similar_blob_objects();
7521 let options = delta_pack_options(true);
7522 let written = PackFile::write_packed_with_options(
7523 &[base, changed.clone()],
7524 ObjectFormat::Sha1,
7525 &options,
7526 )
7527 .expect("test operation should succeed");
7528
7529 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
7530 .expect("test operation should succeed");
7531 let index =
7532 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
7533 let changed_oid = changed
7534 .object_id(ObjectFormat::Sha1)
7535 .expect("test operation should succeed");
7536
7537 assert_eq!(indexed.pack_checksum, written.checksum);
7538 assert_eq!(indexed.entries, written.entries);
7539 assert_eq!(
7540 index
7541 .find(&changed_oid)
7542 .expect("test operation should succeed")
7543 .offset,
7544 written.entries[1].offset
7545 );
7546 assert_eq!(
7547 index
7548 .find(&changed_oid)
7549 .expect("test operation should succeed")
7550 .crc32,
7551 written.entries[1].crc32
7552 );
7553 }
7554
7555 #[test]
7556 fn writes_ref_delta_pack_and_index_that_round_trip() {
7557 let (base, changed) = similar_blob_objects();
7558 let options = delta_pack_options(false);
7559 let written = PackFile::write_packed_with_options(
7560 &[base.clone(), changed.clone()],
7561 ObjectFormat::Sha1,
7562 &options,
7563 )
7564 .expect("test operation should succeed");
7565 let mut second_offset = written.entries[1].offset as usize;
7566 let header = parse_entry_header(&written.pack, &mut second_offset)
7567 .expect("test operation should succeed");
7568 assert_eq!(header.kind, PackObjectKind::RefDelta);
7569
7570 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7571 let index =
7572 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
7573 let oid = changed
7574 .object_id(ObjectFormat::Sha1)
7575 .expect("test operation should succeed");
7576 assert_eq!(pack.entries[0].object, base);
7577 assert_eq!(pack.entries[1].object, changed);
7578 assert_eq!(index.pack_checksum, pack.checksum);
7579 assert_eq!(
7580 index
7581 .find(&oid)
7582 .expect("test operation should succeed")
7583 .offset,
7584 written.entries[1].offset
7585 );
7586 }
7587
7588 #[test]
7589 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
7590 let (base, changed) = similar_blob_objects();
7591 let options = delta_pack_options(true);
7592 let written = PackFile::write_packed_with_options(
7593 &[base, changed.clone()],
7594 ObjectFormat::Sha1,
7595 &options,
7596 )
7597 .expect("test operation should succeed");
7598 let mut second = written.entries[1].offset as usize;
7600 assert_eq!(
7601 parse_entry_header(&written.pack, &mut second)
7602 .expect("test operation should succeed")
7603 .kind,
7604 PackObjectKind::OfsDelta
7605 );
7606 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7608 for po in &parsed.entries {
7609 let got =
7610 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
7611 Ok(None)
7612 })
7613 .expect("test operation should succeed");
7614 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
7615 }
7616 }
7617
7618 #[derive(Default)]
7621 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
7622
7623 impl HeaderTypeCache for MapHeaderTypeCache {
7624 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
7625 self.0.get(&pack_offset).copied()
7626 }
7627 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
7628 self.0.insert(pack_offset, header);
7629 }
7630 }
7631
7632 #[test]
7633 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
7634 let (base, changed) = similar_blob_objects();
7635 let options = delta_pack_options(true);
7636 let written =
7637 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
7638 .expect("test operation should succeed");
7639 let mut second = written.entries[1].offset as usize;
7641 assert_eq!(
7642 parse_entry_header(&written.pack, &mut second)
7643 .expect("test operation should succeed")
7644 .kind,
7645 PackObjectKind::OfsDelta
7646 );
7647
7648 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7649 let mut cache = MapHeaderTypeCache::default();
7650 for po in &parsed.entries {
7651 let uncached =
7652 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
7653 Ok(None)
7654 })
7655 .expect("test operation should succeed");
7656 assert_eq!(
7658 uncached,
7659 (po.object.object_type, po.object.body.len() as u64),
7660 "uncached header at offset {}",
7661 po.entry.offset
7662 );
7663 let cold = read_object_header_at_with_cache(
7665 &written.pack,
7666 po.entry.offset,
7667 ObjectFormat::Sha1,
7668 |_| Ok(None),
7669 &mut cache,
7670 )
7671 .expect("test operation should succeed");
7672 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
7673 }
7674 for po in &parsed.entries {
7677 let warm = read_object_header_at_with_cache(
7678 &written.pack,
7679 po.entry.offset,
7680 ObjectFormat::Sha1,
7681 |_| panic!("warm cache must not re-walk the chain"),
7682 &mut cache,
7683 )
7684 .expect("test operation should succeed");
7685 assert_eq!(
7686 warm,
7687 (po.object.object_type, po.object.body.len() as u64),
7688 "warm cache at offset {}",
7689 po.entry.offset
7690 );
7691 }
7692 }
7693
7694 #[test]
7695 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
7696 let (base, changed) = similar_blob_objects();
7697 let options = delta_pack_options(false);
7698 let written = PackFile::write_packed_with_options(
7699 &[base, changed.clone()],
7700 ObjectFormat::Sha1,
7701 &options,
7702 )
7703 .expect("test operation should succeed");
7704 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7705 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
7706 .entries
7707 .iter()
7708 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
7709 .collect();
7710 for po in &parsed.entries {
7711 let got =
7712 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
7713 Ok(by_oid.get(oid).cloned())
7714 })
7715 .expect("test operation should succeed");
7716 assert_eq!(*got, po.object);
7717 }
7718 }
7719
7720 #[derive(Default)]
7724 struct CountingDeltaCache {
7725 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
7726 hits: std::cell::Cell<usize>,
7727 inserts: std::cell::Cell<usize>,
7728 }
7729
7730 impl PackDeltaCache for CountingDeltaCache {
7731 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
7732 let hit = self.map.borrow().get(&offset).cloned();
7733 if hit.is_some() {
7734 self.hits.set(self.hits.get() + 1);
7735 }
7736 hit
7737 }
7738 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
7739 self.inserts.set(self.inserts.get() + 1);
7740 self.map.borrow_mut().insert(offset, object);
7741 }
7742 }
7743
7744 #[test]
7745 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
7746 let mut objects = Vec::new();
7749 for idx in 0..8u32 {
7750 let mut body = vec![b'x'; 4096];
7751 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
7752 objects.push(EncodedObject::new(ObjectType::Blob, body));
7753 }
7754 let options = delta_pack_options(true);
7755 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
7756 .expect("test operation should succeed");
7757 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7758
7759 let cache = CountingDeltaCache::default();
7760 for _ in 0..2 {
7763 for po in &parsed.entries {
7764 let got = read_object_at_with_cache_arc(
7765 &written.pack,
7766 po.entry.offset,
7767 ObjectFormat::Sha1,
7768 |_| Ok(None),
7769 &cache,
7770 )
7771 .expect("test operation should succeed");
7772 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
7773 }
7774 }
7775 assert!(cache.hits.get() > 0, "cache never served a warm object");
7778 }
7779
7780 #[test]
7781 fn writes_ofs_delta_pack_and_index_that_round_trip() {
7782 let (base, changed) = similar_blob_objects();
7783 let options = delta_pack_options(true);
7784 let written = PackFile::write_packed_with_options(
7785 &[base.clone(), changed.clone()],
7786 ObjectFormat::Sha1,
7787 &options,
7788 )
7789 .expect("test operation should succeed");
7790 let mut second_offset = written.entries[1].offset as usize;
7791 let header = parse_entry_header(&written.pack, &mut second_offset)
7792 .expect("test operation should succeed");
7793 assert_eq!(header.kind, PackObjectKind::OfsDelta);
7794
7795 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
7796 let index =
7797 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
7798 let oid = changed
7799 .object_id(ObjectFormat::Sha1)
7800 .expect("test operation should succeed");
7801 assert_eq!(pack.entries[0].object, base);
7802 assert_eq!(pack.entries[1].object, changed);
7803 assert_eq!(index.pack_checksum, pack.checksum);
7804 assert_eq!(
7805 index
7806 .find(&oid)
7807 .expect("test operation should succeed")
7808 .offset,
7809 written.entries[1].offset
7810 );
7811 }
7812
7813 #[test]
7814 fn resolves_ofs_delta_pack_entry() {
7815 let base = b"hello";
7816 let result = b"hello world";
7817 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
7818 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
7819 assert_eq!(parsed.entries.len(), 2);
7820 assert_eq!(parsed.entries[0].object.body, base);
7821 assert_eq!(parsed.entries[1].object.body, result);
7822 assert_eq!(
7823 parsed.entries[1].entry.oid,
7824 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
7825 .expect("test operation should succeed")
7826 );
7827 }
7828
7829 #[test]
7830 fn resolves_ref_delta_pack_entry() {
7831 let base = b"hello";
7832 let result = b"hello world";
7833 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
7834 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
7835 assert_eq!(parsed.entries.len(), 2);
7836 assert_eq!(parsed.entries[0].object.body, base);
7837 assert_eq!(parsed.entries[1].object.body, result);
7838 assert_eq!(
7839 parsed.entries[1].entry.oid,
7840 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
7841 .expect("test operation should succeed")
7842 );
7843 }
7844
7845 #[test]
7846 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
7847 let base = b"hello";
7848 let result = b"hello world";
7849 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
7850 assert!(PackFile::parse_sha1(&pack).is_err());
7851
7852 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
7853 .expect("test operation should succeed");
7854 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
7855 if oid == &base_oid {
7856 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
7857 } else {
7858 Ok(None)
7859 }
7860 })
7861 .expect("test operation should succeed");
7862 assert_eq!(parsed.entries.len(), 1);
7863 assert_eq!(parsed.entries[0].object.body, result);
7864 assert_eq!(
7865 parsed.entries[0].entry.oid,
7866 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
7867 .expect("test operation should succeed")
7868 );
7869 }
7870
7871 #[test]
7872 fn rejects_bad_pack_checksum() {
7873 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
7874 let last = pack.len() - 1;
7875 pack[last] ^= 1;
7876 assert!(PackFile::parse_sha1(&pack).is_err());
7877 }
7878
7879 #[test]
7880 fn raw_pack_index_rejects_bad_pack_checksum() {
7881 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
7882 let last = pack.len() - 1;
7883 pack[last] ^= 1;
7884 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
7885 }
7886
7887 #[test]
7888 fn pack_index_writer_rejects_duplicate_object_ids() {
7889 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
7890 .expect("test operation should succeed");
7891 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7892 .expect("test operation should succeed");
7893 let entries = vec![
7894 PackIndexEntry {
7895 oid,
7896 crc32: 1,
7897 offset: 12,
7898 },
7899 PackIndexEntry {
7900 oid,
7901 crc32: 2,
7902 offset: 24,
7903 },
7904 ];
7905 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
7906 }
7907
7908 #[test]
7909 fn parses_single_entry_pack_index() {
7910 let oid = ObjectId::from_hex(
7911 ObjectFormat::Sha1,
7912 "ce013625030ba8dba906f756967f9e9ca394464a",
7913 )
7914 .expect("test operation should succeed");
7915 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7916 .expect("test operation should succeed");
7917 let index = single_entry_index(
7918 ObjectFormat::Sha1,
7919 oid,
7920 0x1234_5678,
7921 12,
7922 pack_checksum.clone(),
7923 );
7924 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
7925 assert_eq!(parsed.version, 2);
7926 assert_eq!(parsed.pack_checksum, pack_checksum);
7927 assert_eq!(parsed.entries.len(), 1);
7928 assert_eq!(
7929 parsed
7930 .find(&oid)
7931 .expect("test operation should succeed")
7932 .offset,
7933 12
7934 );
7935 assert_eq!(
7936 parsed
7937 .find(&oid)
7938 .expect("test operation should succeed")
7939 .crc32,
7940 0x1234_5678
7941 );
7942 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7943 }
7944
7945 #[test]
7946 fn parses_single_entry_pack_index_v1() {
7947 let oid = ObjectId::from_hex(
7948 ObjectFormat::Sha1,
7949 "ce013625030ba8dba906f756967f9e9ca394464a",
7950 )
7951 .expect("test operation should succeed");
7952 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7953 .expect("test operation should succeed");
7954 let index =
7955 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
7956 let parsed =
7957 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
7958 assert_eq!(parsed.version, 1);
7959 assert_eq!(parsed.pack_checksum, pack_checksum);
7960 assert_eq!(parsed.entries.len(), 1);
7961 assert_eq!(
7962 parsed
7963 .find(&oid)
7964 .expect("test operation should succeed")
7965 .offset,
7966 0x1234_5678
7967 );
7968 assert_eq!(
7969 parsed
7970 .find(&oid)
7971 .expect("test operation should succeed")
7972 .crc32,
7973 0
7974 );
7975 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
7976 }
7977
7978 #[test]
7979 fn rejects_bad_pack_index_v1_checksum() {
7980 let oid = ObjectId::from_hex(
7981 ObjectFormat::Sha1,
7982 "ce013625030ba8dba906f756967f9e9ca394464a",
7983 )
7984 .expect("test operation should succeed");
7985 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
7986 .expect("test operation should succeed");
7987 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
7988 let last = index.len() - 1;
7989 index[last] ^= 1;
7990 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
7991 }
7992
7993 #[test]
7994 fn pack_index_view_reads_v2_large_offsets() {
7995 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
7996 .expect("test operation should succeed");
7997 let second =
7998 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
7999 .expect("test operation should succeed");
8000 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8001 .expect("test operation should succeed");
8002 let entries = vec![
8003 PackIndexEntry {
8004 oid: first,
8005 crc32: 0x1111_2222,
8006 offset: 0x8000_0000,
8007 },
8008 PackIndexEntry {
8009 oid: second,
8010 crc32: 0x3333_4444,
8011 offset: 0x1_0000_0042,
8012 },
8013 ];
8014 let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
8015 .expect("test operation should succeed");
8016
8017 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
8018 let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
8019 .expect("test operation should succeed");
8020 for entry in entries {
8021 assert_eq!(
8022 view.find(&entry.oid),
8023 Some(PackIndexLookup {
8024 crc32: entry.crc32,
8025 offset: entry.offset,
8026 })
8027 );
8028 }
8029 }
8030
8031 #[test]
8032 fn pack_index_view_default_parse_checks_index_checksum() {
8033 let oid = ObjectId::from_hex(
8034 ObjectFormat::Sha1,
8035 "ce013625030ba8dba906f756967f9e9ca394464a",
8036 )
8037 .expect("test operation should succeed");
8038 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8039 .expect("test operation should succeed");
8040 let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
8041 let last = index.len() - 1;
8042 index[last] ^= 1;
8043
8044 assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
8045 let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
8046 .expect("test operation should succeed");
8047 let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
8048 Arc::from(index.clone().into_boxed_slice()),
8049 ObjectFormat::Sha1,
8050 )
8051 .expect("test operation should succeed");
8052 assert_eq!(
8053 view.find(&oid),
8054 Some(PackIndexLookup {
8055 crc32: 0x1234_5678,
8056 offset: 12,
8057 })
8058 );
8059 assert_eq!(
8060 trusted_view.find(&oid),
8061 Some(PackIndexLookup {
8062 crc32: 0x1234_5678,
8063 offset: 12,
8064 })
8065 );
8066 }
8067
8068 #[test]
8069 fn parses_pack_reverse_index() {
8070 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8071 .expect("test operation should succeed");
8072 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
8073 .expect("test operation should succeed");
8074 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
8075 .expect("test operation should succeed");
8076 assert_eq!(parsed.version, 1);
8077 assert_eq!(parsed.format, ObjectFormat::Sha1);
8078 assert_eq!(parsed.positions, vec![2, 0, 1]);
8079 assert_eq!(parsed.pack_checksum, pack_checksum);
8080 assert_eq!(
8081 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
8082 .expect("test operation should succeed"),
8083 reverse_index
8084 );
8085 }
8086
8087 #[test]
8088 fn rejects_bad_pack_reverse_index_checksum() {
8089 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8090 .expect("test operation should succeed");
8091 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
8092 .expect("test operation should succeed");
8093 let last = reverse_index.len() - 1;
8094 reverse_index[last] ^= 1;
8095 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
8096 }
8097
8098 #[test]
8099 fn rejects_bad_pack_reverse_index_positions() {
8100 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8101 .expect("test operation should succeed");
8102 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
8103 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
8104 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
8105 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
8106 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8107 .expect("test operation should succeed");
8108 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
8109 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
8110 }
8111
8112 #[test]
8113 fn parses_pack_mtimes() {
8114 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8115 .expect("test operation should succeed");
8116 let mtimes = PackMtimes::write(
8117 ObjectFormat::Sha1,
8118 &[1, 1_700_000_000, u32::MAX],
8119 &pack_checksum,
8120 )
8121 .expect("test operation should succeed");
8122 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
8123 .expect("test operation should succeed");
8124 assert_eq!(parsed.version, 1);
8125 assert_eq!(parsed.format, ObjectFormat::Sha1);
8126 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
8127 assert_eq!(parsed.pack_checksum, pack_checksum);
8128 assert_eq!(
8129 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
8130 .expect("test operation should succeed"),
8131 mtimes
8132 );
8133 }
8134
8135 #[test]
8136 fn rejects_bad_pack_mtimes_checksum() {
8137 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8138 .expect("test operation should succeed");
8139 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
8140 .expect("test operation should succeed");
8141 let last = mtimes.len() - 1;
8142 mtimes[last] ^= 1;
8143 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
8144 }
8145
8146 #[test]
8147 fn rejects_bad_pack_mtimes_shape() {
8148 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8149 .expect("test operation should succeed");
8150 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
8151 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
8152
8153 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
8154 wrong_hash[11] = 2;
8155 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
8156 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
8157 .expect("test operation should succeed");
8158 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
8159 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
8160 }
8161
8162 #[test]
8163 fn parses_multi_pack_index_header_and_chunk_lookup() {
8164 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8165 .expect("test operation should succeed");
8166 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8167 .expect("test operation should succeed");
8168 let chunks = midx_chunks_with_pack_names(
8169 ObjectFormat::Sha1,
8170 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8171 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
8172 );
8173 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
8174 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
8175 .expect("test operation should succeed");
8176 assert_eq!(parsed.version, 2);
8177 assert_eq!(parsed.format, ObjectFormat::Sha1);
8178 assert_eq!(parsed.pack_count, 2);
8179 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
8180 assert_eq!(parsed.object_count, 2);
8181 assert_eq!(parsed.objects.len(), 2);
8182 assert_eq!(
8183 parsed
8184 .find(&first)
8185 .expect("test operation should succeed")
8186 .pack_int_id,
8187 0
8188 );
8189 assert_eq!(
8190 parsed
8191 .find(&first)
8192 .expect("test operation should succeed")
8193 .offset,
8194 12
8195 );
8196 assert_eq!(
8197 parsed
8198 .find(&second)
8199 .expect("test operation should succeed")
8200 .pack_int_id,
8201 1
8202 );
8203 assert_eq!(
8204 parsed
8205 .find(&second)
8206 .expect("test operation should succeed")
8207 .offset,
8208 0x1_0000_0000
8209 );
8210 assert_eq!(parsed.reverse_index, None);
8211 assert_eq!(parsed.bitmapped_packs, None);
8212 assert_eq!(parsed.chunks.len(), 5);
8213 assert_eq!(parsed.chunks[0].id, *b"PNAM");
8214 assert_eq!(parsed.chunks[0].offset, 84);
8215 assert_eq!(parsed.chunks[0].len, 24);
8216 assert_eq!(parsed.chunks[1].id, *b"OIDF");
8217 assert_eq!(parsed.chunks[1].offset, 108);
8218 assert_eq!(parsed.chunks[1].len, 1024);
8219 }
8220
8221 #[test]
8222 fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
8223 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8224 .expect("test operation should succeed");
8225 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8226 .expect("test operation should succeed");
8227 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
8228 .expect("test operation should succeed");
8229 let chunks = midx_chunks_with_pack_names(
8230 ObjectFormat::Sha1,
8231 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8232 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
8233 );
8234 let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
8235 let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
8236 .expect("test operation should succeed");
8237
8238 assert!(lookup.contains(&first));
8239 assert!(lookup.contains(&second));
8240 assert!(!lookup.contains(&missing));
8241
8242 let first_entry = lookup
8243 .find(&first)
8244 .expect("test operation should succeed")
8245 .expect("object should be present");
8246 assert_eq!(
8247 lookup.pack_name(first_entry.pack_int_id),
8248 Some("pack-a.idx")
8249 );
8250 assert_eq!(first_entry.offset, 12);
8251
8252 let second_entry = lookup
8253 .find(&second)
8254 .expect("test operation should succeed")
8255 .expect("object should be present");
8256 assert_eq!(
8257 lookup.pack_name(second_entry.pack_int_id),
8258 Some("pack-b.idx")
8259 );
8260 assert_eq!(second_entry.offset, 0x1_0000_0000);
8261 assert!(
8262 lookup
8263 .find(&missing)
8264 .expect("test operation should succeed")
8265 .is_none()
8266 );
8267 }
8268
8269 #[test]
8270 fn rejects_bad_multi_pack_index_checksum() {
8271 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
8272 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
8273 let last = midx.len() - 1;
8274 midx[last] ^= 1;
8275 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
8276 }
8277
8278 #[test]
8279 fn rejects_bad_multi_pack_index_shape() {
8280 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
8281 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
8282 wrong_hash[5] = 2;
8283 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
8284 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
8285 .expect("test operation should succeed");
8286 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
8287 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
8288
8289 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
8290 missing_terminator[12] = b'B';
8291 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
8292 let checksum =
8293 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
8294 .expect("test operation should succeed");
8295 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
8296 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
8297
8298 let mut bad_offset = multi_pack_index(
8299 ObjectFormat::Sha1,
8300 2,
8301 0,
8302 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
8303 );
8304 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
8305 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
8306 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
8307 .expect("test operation should succeed");
8308 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
8309 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
8310 }
8311
8312 #[test]
8313 fn rejects_bad_multi_pack_index_pack_names() {
8314 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
8315 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
8316
8317 let too_few = multi_pack_index(
8318 ObjectFormat::Sha1,
8319 2,
8320 2,
8321 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
8322 );
8323 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
8324
8325 let bad_padding = multi_pack_index(
8326 ObjectFormat::Sha1,
8327 2,
8328 1,
8329 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
8330 );
8331 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
8332
8333 let unsorted_v1 = multi_pack_index(
8334 ObjectFormat::Sha1,
8335 1,
8336 2,
8337 &midx_chunks_with_pack_names(
8338 ObjectFormat::Sha1,
8339 b"pack-b.idx\0pack-a.idx\0".to_vec(),
8340 &[],
8341 ),
8342 );
8343 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
8344
8345 let unsorted_v2 = multi_pack_index(
8346 ObjectFormat::Sha1,
8347 2,
8348 2,
8349 &midx_chunks_with_pack_names(
8350 ObjectFormat::Sha1,
8351 b"pack-b.idx\0pack-a.idx\0".to_vec(),
8352 &[],
8353 ),
8354 );
8355 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
8356 .expect("test operation should succeed");
8357 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
8358 }
8359
8360 #[test]
8361 fn rejects_bad_multi_pack_index_object_tables() {
8362 let oid_a = ObjectId::from_hex(
8363 ObjectFormat::Sha1,
8364 "1111111111111111111111111111111111111111",
8365 )
8366 .expect("test operation should succeed");
8367 let oid_b = ObjectId::from_hex(
8368 ObjectFormat::Sha1,
8369 "2222222222222222222222222222222222222222",
8370 )
8371 .expect("test operation should succeed");
8372
8373 let missing_oidf = multi_pack_index(
8374 ObjectFormat::Sha1,
8375 2,
8376 1,
8377 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
8378 );
8379 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
8380
8381 let bad_fanout = vec![
8382 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
8383 (*b"OIDF", vec![0; 256 * 4]),
8384 (*b"OIDL", oid_a.as_bytes().to_vec()),
8385 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
8386 ];
8387 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
8388 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
8389
8390 let mut unsorted = Vec::new();
8391 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
8392 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
8393 let mut oid_lookup = Vec::new();
8394 oid_lookup.extend_from_slice(oid_b.as_bytes());
8395 oid_lookup.extend_from_slice(oid_a.as_bytes());
8396 unsorted.push((*b"OIDL", oid_lookup));
8397 unsorted.push((
8398 *b"OOFF",
8399 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
8400 ));
8401 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
8402 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
8403
8404 let bad_pack = multi_pack_index(
8405 ObjectFormat::Sha1,
8406 2,
8407 1,
8408 &midx_chunks_with_pack_names(
8409 ObjectFormat::Sha1,
8410 b"pack-a.idx\0\0".to_vec(),
8411 &[(oid_a.clone(), 1, 12)],
8412 ),
8413 );
8414 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
8415
8416 let mut large_offsets = Vec::new();
8417 let missing_loff = vec![
8418 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
8419 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
8420 (*b"OIDL", oid_a.as_bytes().to_vec()),
8421 (
8422 *b"OOFF",
8423 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
8424 ),
8425 ];
8426 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
8427 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
8428
8429 let mut bad_loff =
8430 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
8431 bad_loff.push((*b"LOFF", vec![0]));
8432 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
8433 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
8434 }
8435
8436 #[test]
8437 fn parses_multi_pack_index_bitmap_chunks() {
8438 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8439 .expect("test operation should succeed");
8440 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8441 .expect("test operation should succeed");
8442 let mut chunks = midx_chunks_with_pack_names(
8443 ObjectFormat::Sha1,
8444 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8445 &[(first, 0, 12), (second, 1, 24)],
8446 );
8447 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
8448 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
8449 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
8450
8451 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
8452 .expect("test operation should succeed");
8453 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
8454 assert_eq!(
8455 parsed.bitmapped_packs,
8456 Some(vec![
8457 MultiPackBitmapPack {
8458 bitmap_pos: 0,
8459 bitmap_nr: 1,
8460 },
8461 MultiPackBitmapPack {
8462 bitmap_pos: 1,
8463 bitmap_nr: 1,
8464 },
8465 ])
8466 );
8467 }
8468
8469 #[test]
8470 fn writes_multi_pack_index_that_round_trips() {
8471 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
8472 .expect("test operation should succeed");
8473 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
8474 .expect("test operation should succeed");
8475 let bytes = MultiPackIndex::write(
8476 ObjectFormat::Sha1,
8477 2,
8478 &["pack-b.idx".into(), "pack-a.idx".into()],
8479 &[
8480 MultiPackIndexEntry {
8481 oid: second.clone(),
8482 pack_int_id: 0,
8483 offset: 0x1_0000_0000,
8484 force_large_offset: false,
8485 },
8486 MultiPackIndexEntry {
8487 oid: first.clone(),
8488 pack_int_id: 1,
8489 offset: 12,
8490 force_large_offset: false,
8491 },
8492 ],
8493 )
8494 .expect("test operation should succeed");
8495
8496 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
8497 .expect("test operation should succeed");
8498 assert_eq!(parsed.version, 2);
8499 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
8500 assert_eq!(parsed.object_count, 2);
8501 assert_eq!(
8502 parsed
8503 .find(&first)
8504 .expect("test operation should succeed")
8505 .pack_int_id,
8506 1
8507 );
8508 assert_eq!(
8509 parsed
8510 .find(&first)
8511 .expect("test operation should succeed")
8512 .offset,
8513 12
8514 );
8515 assert_eq!(
8516 parsed
8517 .find(&second)
8518 .expect("test operation should succeed")
8519 .pack_int_id,
8520 0
8521 );
8522 assert_eq!(
8523 parsed
8524 .find(&second)
8525 .expect("test operation should succeed")
8526 .offset,
8527 0x1_0000_0000
8528 );
8529 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
8530 }
8531
8532 #[test]
8533 fn write_multi_pack_index_rejects_invalid_inputs() {
8534 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
8535 .expect("test operation should succeed");
8536 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
8537 assert!(
8538 MultiPackIndex::write(
8539 ObjectFormat::Sha1,
8540 1,
8541 &["pack-b.idx".into(), "pack-a.idx".into()],
8542 &[],
8543 )
8544 .is_err()
8545 );
8546 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
8547 assert!(
8548 MultiPackIndex::write(
8549 ObjectFormat::Sha1,
8550 2,
8551 &["pack-a.idx".into()],
8552 &[MultiPackIndexEntry {
8553 oid,
8554 pack_int_id: 1,
8555 offset: 12,
8556 force_large_offset: false,
8557 }],
8558 )
8559 .is_err()
8560 );
8561 assert!(
8562 MultiPackIndex::write(
8563 ObjectFormat::Sha1,
8564 2,
8565 &["pack-a.idx".into()],
8566 &[
8567 MultiPackIndexEntry {
8568 oid,
8569 pack_int_id: 0,
8570 offset: 12,
8571 force_large_offset: false,
8572 },
8573 MultiPackIndexEntry {
8574 oid,
8575 pack_int_id: 0,
8576 offset: 24,
8577 force_large_offset: false,
8578 },
8579 ],
8580 )
8581 .is_err()
8582 );
8583 }
8584
8585 #[test]
8586 fn rejects_bad_multi_pack_index_bitmap_chunks() {
8587 let oid_a = ObjectId::from_hex(
8588 ObjectFormat::Sha1,
8589 "1111111111111111111111111111111111111111",
8590 )
8591 .expect("test operation should succeed");
8592 let oid_b = ObjectId::from_hex(
8593 ObjectFormat::Sha1,
8594 "2222222222222222222222222222222222222222",
8595 )
8596 .expect("test operation should succeed");
8597
8598 let mut duplicate_ridx = midx_chunks_with_pack_names(
8599 ObjectFormat::Sha1,
8600 b"pack-a.idx\0\0".to_vec(),
8601 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
8602 );
8603 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
8604 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
8605 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
8606
8607 let mut short_btmp = midx_chunks_with_pack_names(
8608 ObjectFormat::Sha1,
8609 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
8610 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
8611 );
8612 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
8613 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
8614 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
8615
8616 let mut out_of_range_btmp = midx_chunks_with_pack_names(
8617 ObjectFormat::Sha1,
8618 b"pack-a.idx\0\0".to_vec(),
8619 &[(oid_a, 0, 12), (oid_b, 0, 24)],
8620 );
8621 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
8622 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
8623 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
8624 }
8625
8626 #[test]
8627 fn parses_pack_bitmap_index_with_hash_cache() {
8628 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8629 .expect("test operation should succeed");
8630 let bitmap = pack_bitmap_index(
8631 ObjectFormat::Sha1,
8632 3,
8633 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
8634 &pack_checksum,
8635 &[(2, 0, 1, &[0b101])],
8636 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
8637 );
8638
8639 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
8640 .expect("test operation should succeed");
8641 assert_eq!(parsed.version, 1);
8642 assert_eq!(parsed.format, ObjectFormat::Sha1);
8643 assert_eq!(
8644 parsed.options,
8645 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
8646 );
8647 assert_eq!(parsed.pack_checksum, pack_checksum);
8648 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
8649 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
8650 assert_eq!(parsed.entries.len(), 1);
8651 let entry = parsed
8652 .entry_for_index_position(2)
8653 .expect("test operation should succeed");
8654 assert_eq!(entry.xor_offset, 0);
8655 assert_eq!(entry.flags, 1);
8656 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
8657 assert_eq!(
8658 parsed.name_hash_cache,
8659 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
8660 );
8661 }
8662
8663 #[test]
8664 fn parses_pack_bitmap_index_sha256() {
8665 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8666 .expect("test operation should succeed");
8667 let bitmap = pack_bitmap_index(
8668 ObjectFormat::Sha256,
8669 2,
8670 PackBitmapIndex::OPTION_FULL_DAG,
8671 &pack_checksum,
8672 &[(0, 0, 0, &[0b11])],
8673 None,
8674 );
8675
8676 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
8677 .expect("test operation should succeed");
8678 assert_eq!(parsed.version, 1);
8679 assert_eq!(parsed.format, ObjectFormat::Sha256);
8680 assert_eq!(parsed.pack_checksum, pack_checksum);
8681 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8682 assert_eq!(parsed.entries[0].object_position, 0);
8683 assert_eq!(parsed.name_hash_cache, None);
8684 }
8685
8686 #[test]
8687 fn parses_upstream_git_written_pack_bitmap_index() {
8688 let root = unique_temp_dir("git-pack-bitmap-upstream");
8689 fs::create_dir_all(&root).expect("test operation should succeed");
8690 {
8691 run_git_success(&root, &["init", "-q", "-b", "main"]);
8692 run_git_success(
8693 &root,
8694 &[
8695 "-c",
8696 "user.name=Example User",
8697 "-c",
8698 "user.email=example@example.invalid",
8699 "commit",
8700 "--allow-empty",
8701 "-q",
8702 "-m",
8703 "one",
8704 ],
8705 );
8706 run_git_success(
8707 &root,
8708 &[
8709 "-c",
8710 "user.name=Example User",
8711 "-c",
8712 "user.email=example@example.invalid",
8713 "commit",
8714 "--allow-empty",
8715 "-q",
8716 "-m",
8717 "two",
8718 ],
8719 );
8720 run_git_success(&root, &["repack", "-adb"]);
8721 let pack_dir = root.join(".git").join("objects").join("pack");
8722 let idx_path = single_path_with_extension(&pack_dir, "idx");
8723 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
8724 let index = PackIndex::parse(
8725 &fs::read(idx_path).expect("test operation should succeed"),
8726 ObjectFormat::Sha1,
8727 )
8728 .expect("test operation should succeed");
8729 let bitmap = PackBitmapIndex::parse(
8730 &fs::read(bitmap_path).expect("test operation should succeed"),
8731 ObjectFormat::Sha1,
8732 index.entries.len(),
8733 )
8734 .expect("test operation should succeed");
8735 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
8736 assert!(!bitmap.entries.is_empty());
8737 };
8738 let _ = fs::remove_dir_all(&root);
8739 }
8740
8741 #[test]
8742 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
8743 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8744 .expect("test operation should succeed");
8745 let bitmap = pack_bitmap_index(
8746 ObjectFormat::Sha1,
8747 1,
8748 PackBitmapIndex::OPTION_FULL_DAG,
8749 &pack_checksum,
8750 &[(0, 0, 0, &[1])],
8751 None,
8752 );
8753
8754 let mut bad_signature = bitmap.clone();
8755 bad_signature[0] = b'X';
8756 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
8757
8758 let mut bad_version = bitmap.clone();
8759 bad_version[5] = 2;
8760 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
8761 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
8762
8763 let mut bad_option = bitmap.clone();
8764 bad_option[7] = 0x20;
8765 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
8766 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
8767
8768 let mut bad_checksum = bitmap;
8769 let last = bad_checksum.len() - 1;
8770 bad_checksum[last] ^= 1;
8771 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
8772 }
8773
8774 #[test]
8775 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
8776 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
8777 .expect("test operation should succeed");
8778 let bitmap = pack_bitmap_index(
8779 ObjectFormat::Sha1,
8780 2,
8781 PackBitmapIndex::OPTION_FULL_DAG,
8782 &pack_checksum,
8783 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
8784 None,
8785 );
8786
8787 let mut truncated = bitmap.clone();
8788 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
8789 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
8790 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
8791
8792 let mut out_of_range_position = pack_bitmap_index(
8793 ObjectFormat::Sha1,
8794 2,
8795 PackBitmapIndex::OPTION_FULL_DAG,
8796 &pack_checksum,
8797 &[(2, 0, 0, &[0b01])],
8798 None,
8799 );
8800 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
8801 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
8802 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
8803
8804 let invalid_xor = pack_bitmap_index(
8805 ObjectFormat::Sha1,
8806 2,
8807 PackBitmapIndex::OPTION_FULL_DAG,
8808 &pack_checksum,
8809 &[(0, 1, 0, &[0b01])],
8810 None,
8811 );
8812 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
8813 }
8814
8815 #[test]
8816 fn parses_single_entry_pack_index_sha256() {
8817 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
8818 .expect("test operation should succeed");
8819 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8820 .expect("test operation should succeed");
8821 let index = single_entry_index(
8822 ObjectFormat::Sha256,
8823 oid,
8824 0x1234_5678,
8825 12,
8826 pack_checksum.clone(),
8827 );
8828 let parsed =
8829 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
8830 assert_eq!(parsed.version, 2);
8831 assert_eq!(parsed.pack_checksum, pack_checksum);
8832 assert_eq!(parsed.entries.len(), 1);
8833 assert_eq!(
8834 parsed
8835 .find(&oid)
8836 .expect("test operation should succeed")
8837 .offset,
8838 12
8839 );
8840 assert_eq!(
8841 parsed
8842 .find(&oid)
8843 .expect("test operation should succeed")
8844 .crc32,
8845 0x1234_5678
8846 );
8847 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
8848 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
8849 }
8850
8851 #[test]
8852 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
8853 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
8854 }
8855
8856 #[test]
8857 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
8858 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
8859 }
8860
8861 #[test]
8862 fn write_packed_rejects_duplicate_objects() {
8863 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
8864 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
8865 }
8866
8867 #[test]
8868 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
8869 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
8870 let sha1 = object
8871 .object_id(ObjectFormat::Sha1)
8872 .expect("test operation should succeed");
8873 let sha256 = object
8874 .object_id(ObjectFormat::Sha256)
8875 .expect("test operation should succeed");
8876 let duplicate = [
8877 PackInput {
8878 oid: &sha1,
8879 object: &object,
8880 },
8881 PackInput {
8882 oid: &sha1,
8883 object: &object,
8884 },
8885 ];
8886 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
8887
8888 let wrong_format = [PackInput {
8889 oid: &sha256,
8890 object: &object,
8891 }];
8892 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
8893 }
8894
8895 #[test]
8896 fn write_packed_with_known_ids_to_writer_matches_in_memory_pack() {
8897 let objects = similar_blob_family(6);
8898 let object_ids = objects
8899 .iter()
8900 .map(|object| {
8901 object
8902 .object_id(ObjectFormat::Sha1)
8903 .expect("test operation should succeed")
8904 })
8905 .collect::<Vec<_>>();
8906 let inputs = objects
8907 .iter()
8908 .zip(&object_ids)
8909 .map(|(object, oid)| PackInput { oid, object })
8910 .collect::<Vec<_>>();
8911 let options = PackWriteOptions::new();
8912 let in_memory = PackFile::write_packed_with_known_ids_and_options(
8913 &inputs,
8914 ObjectFormat::Sha1,
8915 &options,
8916 )
8917 .expect("test operation should succeed");
8918 let mut written = Vec::new();
8919 let streamed = PackFile::write_packed_with_known_ids_to_writer(
8920 &inputs,
8921 ObjectFormat::Sha1,
8922 &options,
8923 &mut written,
8924 )
8925 .expect("test operation should succeed");
8926
8927 assert_eq!(written, in_memory.pack);
8928 assert_eq!(streamed.index, in_memory.index);
8929 assert_eq!(streamed.checksum, in_memory.checksum);
8930 assert_eq!(streamed.entries, in_memory.entries);
8931 assert_eq!(streamed.delta_count, in_memory.delta_count);
8932 assert_eq!(streamed.pack_size, in_memory.pack.len() as u64);
8933 }
8934
8935 #[test]
8936 fn write_packed_from_source_to_writer_deltifies_across_windows() {
8937 let format = ObjectFormat::Sha1;
8938 let mut objects = Vec::new();
8939 for idx in 0..PACK_STREAM_COMPRESSION_WINDOW_OBJECTS - 1 {
8940 objects.push(EncodedObject::new(
8941 ObjectType::Blob,
8942 format!("unrelated streamed source object {idx:04}\n").into_bytes(),
8943 ));
8944 }
8945 let base_body = b"cross-window base payload with enough shared anchors\nbase\n".to_vec();
8946 let target_body =
8947 b"cross-window base payload with enough shared anchors\ntarget\n".to_vec();
8948 objects.push(EncodedObject::new(ObjectType::Blob, base_body));
8949 objects.push(EncodedObject::new(ObjectType::Blob, target_body));
8950
8951 let object_ids = objects
8952 .iter()
8953 .map(|object| {
8954 object
8955 .object_id(format)
8956 .expect("test operation should succeed")
8957 })
8958 .collect::<Vec<_>>();
8959 let base_oid = object_ids[PACK_STREAM_COMPRESSION_WINDOW_OBJECTS - 1];
8960 let target_oid = object_ids[PACK_STREAM_COMPRESSION_WINDOW_OBJECTS];
8961 let object_map = object_ids
8962 .iter()
8963 .copied()
8964 .zip(objects.into_iter().map(Arc::new))
8965 .collect::<HashMap<_, _>>();
8966
8967 let options = PackWriteOptions::new().with_reorder(false).with_window(10);
8968 let mut written = Vec::new();
8969 let summary = PackFile::write_packed_from_source_to_writer(
8970 &object_ids,
8971 format,
8972 &options,
8973 |oid| {
8974 object_map
8975 .get(oid)
8976 .cloned()
8977 .ok_or_else(|| GitError::not_found(format!("missing test object {oid}")))
8978 },
8979 &mut written,
8980 )
8981 .expect("test operation should succeed");
8982
8983 assert!(
8984 summary.delta_count > 0,
8985 "expected source-backed streaming writer to find deltas"
8986 );
8987 let stats =
8988 PackFile::verify_pack_stats(&written, format).expect("test operation should succeed");
8989 let target = stats
8990 .objects
8991 .iter()
8992 .find(|entry| entry.oid == target_oid)
8993 .expect("target object should be present");
8994 assert_eq!(target.base_oid, Some(base_oid));
8995 }
8996
8997 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
8998 let objects = similar_blob_family(8);
8999 let packed =
9000 PackFile::write_packed(&objects, format).expect("test operation should succeed");
9001 let undeltified =
9002 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
9003
9004 assert!(
9007 packed.pack.len() < undeltified.pack.len(),
9008 "expected delta pack ({}) smaller than undeltified pack ({})",
9009 packed.pack.len(),
9010 undeltified.pack.len()
9011 );
9012
9013 let kinds = pack_entry_kinds(&packed.pack, format);
9015 let delta_count = kinds
9016 .iter()
9017 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
9018 .count();
9019 assert!(
9020 delta_count >= 1,
9021 "expected at least one delta entry, found kinds {kinds:?}"
9022 );
9023
9024 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
9026 assert_eq!(parsed.entries.len(), objects.len());
9027 for object in &objects {
9028 let oid = object
9029 .object_id(format)
9030 .expect("test operation should succeed");
9031 let found = parsed
9032 .entries
9033 .iter()
9034 .find(|entry| entry.entry.oid == oid)
9035 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
9036 assert_eq!(&found.object, object, "object {oid} did not round-trip");
9037 }
9038
9039 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
9041 assert_eq!(index.pack_checksum, packed.checksum);
9042 for object in &objects {
9043 let oid = object
9044 .object_id(format)
9045 .expect("test operation should succeed");
9046 assert!(index.find(&oid).is_some(), "index missing {oid}");
9047 }
9048 }
9049
9050 #[test]
9051 fn write_packed_emits_ofs_delta_by_default() {
9052 let objects = similar_blob_family(6);
9053 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
9054 .expect("test operation should succeed");
9055 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
9056 assert!(
9057 kinds.contains(&PackObjectKind::OfsDelta),
9058 "expected an ofs-delta entry by default, found {kinds:?}"
9059 );
9060 assert!(
9061 !kinds.contains(&PackObjectKind::RefDelta),
9062 "default self-contained pack must not use ref-delta, found {kinds:?}"
9063 );
9064 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
9066 }
9067
9068 #[test]
9069 fn write_packed_can_emit_ref_delta() {
9070 let objects = similar_blob_family(6);
9071 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
9072 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
9073 .expect("test operation should succeed");
9074 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
9075 assert!(
9076 kinds.contains(&PackObjectKind::RefDelta),
9077 "expected a ref-delta entry, found {kinds:?}"
9078 );
9079 assert!(
9080 !kinds.contains(&PackObjectKind::OfsDelta),
9081 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
9082 );
9083
9084 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
9087 .expect("test operation should succeed");
9088 assert_eq!(parsed.entries.len(), objects.len());
9089 }
9090
9091 #[test]
9092 fn write_packed_bounds_delta_chain_depth() {
9093 let objects = incremental_blob_chain(20);
9097 let format = ObjectFormat::Sha1;
9098
9099 for max_depth in [1usize, 2, 5] {
9100 let options = PackWriteOptions::new()
9101 .with_window(20)
9102 .with_depth(max_depth);
9103 let packed = PackFile::write_packed_with_options(&objects, format, &options)
9104 .expect("test operation should succeed");
9105
9106 let depths = pack_entry_depths(&packed.pack, format);
9107 let observed = depths.iter().copied().max().unwrap_or(0);
9108 assert!(
9109 observed <= max_depth,
9110 "max chain depth {observed} exceeded bound {max_depth}"
9111 );
9112
9113 let parsed =
9115 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
9116 for object in &objects {
9117 let oid = object
9118 .object_id(format)
9119 .expect("test operation should succeed");
9120 let found = parsed
9121 .entries
9122 .iter()
9123 .find(|entry| entry.entry.oid == oid)
9124 .expect("test operation should succeed");
9125 assert_eq!(&found.object, object);
9126 }
9127 }
9128 }
9129
9130 #[test]
9131 fn write_packed_depth_zero_stores_everything_undeltified() {
9132 let objects = similar_blob_family(5);
9133 let options = PackWriteOptions::new().with_depth(0);
9134 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
9135 .expect("test operation should succeed");
9136 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
9137 assert!(
9138 kinds
9139 .iter()
9140 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
9141 "depth 0 must disable deltas, found {kinds:?}"
9142 );
9143 }
9144
9145 #[test]
9146 fn write_thin_uses_external_base_and_round_trips_sha1() {
9147 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
9148 }
9149
9150 #[test]
9151 fn write_thin_uses_external_base_and_round_trips_sha256() {
9152 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
9153 }
9154
9155 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
9156 let base = blob_with_marker("EXTERNAL-BASE");
9159 let target = blob_with_marker("EXTERNAL-TARGET");
9160 let base_oid = base
9161 .object_id(format)
9162 .expect("test operation should succeed");
9163
9164 let mut external = HashMap::new();
9165 external.insert(base_oid, base.clone());
9166 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
9167 .expect("test operation should succeed");
9168
9169 let kinds = pack_entry_kinds(&packed.pack, format);
9171 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
9172
9173 let mut offset = 12usize;
9175 let header =
9176 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
9177 assert_eq!(header.kind, PackObjectKind::RefDelta);
9178 let referenced =
9179 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
9180 .expect("test operation should succeed");
9181 assert_eq!(referenced, base_oid);
9182
9183 assert!(PackFile::parse(&packed.pack, format).is_err());
9185
9186 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
9188 if oid == &base_oid {
9189 Ok(Some(base.clone()))
9190 } else {
9191 Ok(None)
9192 }
9193 })
9194 .expect("test operation should succeed");
9195 assert_eq!(parsed.entries.len(), 1);
9196 assert_eq!(parsed.entries[0].object, target);
9197 }
9198
9199 #[test]
9200 fn write_packed_preserves_distinct_objects_with_no_similarity() {
9201 let objects = vec![
9204 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
9205 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
9206 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
9207 ];
9208 let format = ObjectFormat::Sha1;
9209 let packed =
9210 PackFile::write_packed(&objects, format).expect("test operation should succeed");
9211 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
9212 assert_eq!(parsed.entries.len(), objects.len());
9213 for object in &objects {
9214 let oid = object
9215 .object_id(format)
9216 .expect("test operation should succeed");
9217 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
9218 }
9219 }
9220
9221 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
9225 let mut common_head = Vec::new();
9226 for _ in 0..200 {
9227 common_head.extend_from_slice(b"shared header line for delta testing\n");
9228 }
9229 let mut common_tail = Vec::new();
9230 for _ in 0..200 {
9231 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
9232 }
9233 (0..count)
9234 .map(|idx| {
9235 let mut body = common_head.clone();
9236 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
9237 body.extend_from_slice(&common_tail);
9238 EncodedObject::new(ObjectType::Blob, body)
9239 })
9240 .collect()
9241 }
9242
9243 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
9246 let mut body = Vec::new();
9247 for _ in 0..100 {
9248 body.extend_from_slice(b"baseline content shared across the whole chain\n");
9249 }
9250 let mut objects = Vec::with_capacity(count);
9251 for idx in 0..count {
9252 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
9253 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
9254 }
9255 objects
9256 }
9257
9258 fn blob_with_marker(marker: &str) -> EncodedObject {
9259 let mut body = Vec::new();
9260 for _ in 0..150 {
9261 body.extend_from_slice(b"common body shared between base and target\n");
9262 }
9263 body.extend_from_slice(marker.as_bytes());
9264 body.push(b'\n');
9265 for _ in 0..150 {
9266 body.extend_from_slice(b"more common body shared between objects\n");
9267 }
9268 EncodedObject::new(ObjectType::Blob, body)
9269 }
9270
9271 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
9273 pack_entry_descriptors(pack, format)
9274 .into_iter()
9275 .map(|descriptor| descriptor.kind)
9276 .collect()
9277 }
9278
9279 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
9283 let descriptors = pack_entry_descriptors(pack, format);
9284 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
9285 let mut depths = Vec::with_capacity(descriptors.len());
9286 for descriptor in &descriptors {
9287 let depth = match &descriptor.base {
9288 EntryBase::None => 0,
9289 EntryBase::Offset(base_offset) => {
9290 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
9291 }
9292 EntryBase::Ref => 1,
9296 };
9297 depth_by_offset.insert(descriptor.offset, depth);
9298 depths.push(depth);
9299 }
9300 depths
9301 }
9302
9303 struct EntryDescriptor {
9304 offset: u64,
9305 kind: PackObjectKind,
9306 base: EntryBase,
9307 }
9308
9309 enum EntryBase {
9310 None,
9311 Offset(u64),
9312 Ref,
9313 }
9314
9315 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
9316 let trailer_offset = pack.len() - format.raw_len();
9317 let count = u32_be(&pack[8..12]) as usize;
9318 let mut offset = 12usize;
9319 let mut descriptors = Vec::with_capacity(count);
9320 for _ in 0..count {
9321 let entry_offset = offset as u64;
9322 let header =
9323 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
9324 let base = match header.kind {
9325 PackObjectKind::OfsDelta => {
9326 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
9327 .expect("test operation should succeed");
9328 EntryBase::Offset(base_offset)
9329 }
9330 PackObjectKind::RefDelta => {
9331 offset += format.raw_len();
9332 EntryBase::Ref
9333 }
9334 _ => EntryBase::None,
9335 };
9336 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
9337 let mut body = Vec::new();
9338 decoder
9339 .read_to_end(&mut body)
9340 .expect("test operation should succeed");
9341 offset += decoder.total_in() as usize;
9342 descriptors.push(EntryDescriptor {
9343 offset: entry_offset,
9344 kind: header.kind,
9345 base,
9346 });
9347 }
9348 descriptors
9349 }
9350
9351 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
9352 let mut base = Vec::new();
9353 for _ in 0..300 {
9354 base.extend_from_slice(b"common payload\n");
9355 }
9356 base.extend_from_slice(b"base\n");
9357 let mut changed = Vec::new();
9358 for _ in 0..300 {
9359 changed.extend_from_slice(b"common payload\n");
9360 }
9361 changed.extend_from_slice(b"changed\n");
9362 (
9363 EncodedObject::new(ObjectType::Blob, base),
9364 EncodedObject::new(ObjectType::Blob, changed),
9365 )
9366 }
9367
9368 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
9369 let mut pack = Vec::new();
9370 pack.extend_from_slice(b"PACK");
9371 pack.extend_from_slice(&2u32.to_be_bytes());
9372 pack.extend_from_slice(&1u32.to_be_bytes());
9373 write_entry_header(&mut pack, object_type, body.len() as u64);
9374 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9375 encoder
9376 .write_all(body)
9377 .expect("test operation should succeed");
9378 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9379 let checksum =
9380 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
9381 pack.extend_from_slice(checksum.as_bytes());
9382 pack
9383 }
9384
9385 #[derive(Clone, Copy, Debug)]
9386 enum DeltaKind {
9387 Offset,
9388 Ref,
9389 }
9390
9391 fn two_object_delta_pack(
9392 format: ObjectFormat,
9393 base: &[u8],
9394 result: &[u8],
9395 delta_kind: DeltaKind,
9396 ) -> Vec<u8> {
9397 let mut pack = Vec::new();
9398 pack.extend_from_slice(b"PACK");
9399 pack.extend_from_slice(&2u32.to_be_bytes());
9400 pack.extend_from_slice(&2u32.to_be_bytes());
9401
9402 let base_offset = pack.len();
9403 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
9404 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9405 encoder
9406 .write_all(base)
9407 .expect("test operation should succeed");
9408 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9409
9410 let delta = append_suffix_delta(base, result);
9411 let delta_offset = pack.len();
9412 write_pack_entry_header_kind(
9413 &mut pack,
9414 match delta_kind {
9415 DeltaKind::Offset => 6,
9416 DeltaKind::Ref => 7,
9417 },
9418 delta.len() as u64,
9419 );
9420 match delta_kind {
9421 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
9422 DeltaKind::Ref => {
9423 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
9424 .expect("test operation should succeed");
9425 pack.extend_from_slice(base_oid.as_bytes());
9426 }
9427 }
9428 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9429 encoder
9430 .write_all(&delta)
9431 .expect("test operation should succeed");
9432 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9433
9434 let checksum =
9435 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
9436 pack.extend_from_slice(checksum.as_bytes());
9437 pack
9438 }
9439
9440 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
9441 let mut pack = Vec::new();
9442 pack.extend_from_slice(b"PACK");
9443 pack.extend_from_slice(&2u32.to_be_bytes());
9444 pack.extend_from_slice(&1u32.to_be_bytes());
9445
9446 let delta = append_suffix_delta(base, result);
9447 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
9448 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
9449 .expect("test operation should succeed");
9450 pack.extend_from_slice(base_oid.as_bytes());
9451 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
9452 encoder
9453 .write_all(&delta)
9454 .expect("test operation should succeed");
9455 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
9456
9457 let checksum =
9458 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
9459 pack.extend_from_slice(checksum.as_bytes());
9460 pack
9461 }
9462
9463 fn unique_temp_dir(name: &str) -> PathBuf {
9464 let nanos = SystemTime::now()
9465 .duration_since(UNIX_EPOCH)
9466 .expect("test operation should succeed")
9467 .as_nanos();
9468 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
9469 }
9470
9471 fn run_git_success(cwd: &Path, args: &[&str]) {
9472 let output = Command::new("git")
9473 .current_dir(cwd)
9474 .args(args)
9475 .output()
9476 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
9477 assert!(
9478 output.status.success(),
9479 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
9480 output.status.code(),
9481 String::from_utf8_lossy(&output.stdout),
9482 String::from_utf8_lossy(&output.stderr)
9483 );
9484 }
9485
9486 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
9487 let mut paths = fs::read_dir(dir)
9488 .expect("test operation should succeed")
9489 .map(|entry| entry.expect("test operation should succeed").path())
9490 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
9491 .collect::<Vec<_>>();
9492 assert_eq!(paths.len(), 1, "expected one .{extension} file");
9493 paths.remove(0)
9494 }
9495
9496 fn pack_bitmap_index(
9497 format: ObjectFormat,
9498 object_count: u32,
9499 options: u16,
9500 pack_checksum: &ObjectId,
9501 entries: &[(u32, u8, u8, &[u64])],
9502 name_hash_cache: Option<&[u32]>,
9503 ) -> Vec<u8> {
9504 let mut out = Vec::new();
9505 out.extend_from_slice(b"BITM");
9506 out.extend_from_slice(&1u16.to_be_bytes());
9507 out.extend_from_slice(&options.to_be_bytes());
9508 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
9509 out.extend_from_slice(pack_checksum.as_bytes());
9510 write_test_ewah(&mut out, object_count, &[0b001]);
9511 write_test_ewah(&mut out, object_count, &[0b010]);
9512 write_test_ewah(&mut out, object_count, &[0b100]);
9513 write_test_ewah(&mut out, object_count, &[0]);
9514 for (position, xor_offset, flags, words) in entries {
9515 out.extend_from_slice(&position.to_be_bytes());
9516 out.push(*xor_offset);
9517 out.push(*flags);
9518 write_test_ewah(&mut out, object_count, words);
9519 }
9520 if let Some(cache) = name_hash_cache {
9521 for value in cache {
9522 out.extend_from_slice(&value.to_be_bytes());
9523 }
9524 }
9525 let checksum =
9526 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
9527 out.extend_from_slice(checksum.as_bytes());
9528 out
9529 }
9530
9531 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
9532 out.extend_from_slice(&bit_size.to_be_bytes());
9533 let words = ewah_literal_words(literals);
9534 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
9535 for word in words {
9536 out.extend_from_slice(&word.to_be_bytes());
9537 }
9538 out.extend_from_slice(&0u32.to_be_bytes());
9539 }
9540
9541 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
9542 let rlw = (literals.len() as u64) << 33;
9543 let mut words = vec![rlw];
9544 words.extend_from_slice(literals);
9545 words
9546 }
9547
9548 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
9549 let checksum_offset = bytes.len() - format.raw_len();
9550 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
9551 .expect("test operation should succeed");
9552 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
9553 }
9554
9555 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
9556 assert!(result.starts_with(base));
9557 let suffix = &result[base.len()..];
9558 assert!(base.len() < 0x10000);
9559 assert!(suffix.len() < 0x80);
9560 let mut delta = Vec::new();
9561 write_delta_varint(&mut delta, base.len() as u64);
9562 write_delta_varint(&mut delta, result.len() as u64);
9563 delta.push(0x90);
9564 delta.push(base.len() as u8);
9565 delta.push(suffix.len() as u8);
9566 delta.extend_from_slice(suffix);
9567 delta
9568 }
9569
9570 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
9571 loop {
9572 let mut byte = (value as u8) & 0x7f;
9573 value >>= 7;
9574 if value != 0 {
9575 byte |= 0x80;
9576 }
9577 out.push(byte);
9578 if value == 0 {
9579 break;
9580 }
9581 }
9582 }
9583
9584 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
9585 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
9586 size >>= 4;
9587 if size != 0 {
9588 byte |= 0x80;
9589 }
9590 out.push(byte);
9591 while size != 0 {
9592 let mut byte = (size as u8) & 0x7f;
9593 size >>= 7;
9594 if size != 0 {
9595 byte |= 0x80;
9596 }
9597 out.push(byte);
9598 }
9599 }
9600
9601 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
9602 assert!(relative < 0x80);
9603 out.push(relative as u8);
9604 }
9605
9606 fn single_entry_index(
9607 format: ObjectFormat,
9608 oid: ObjectId,
9609 crc32: u32,
9610 offset: u32,
9611 pack_checksum: ObjectId,
9612 ) -> Vec<u8> {
9613 let mut index = Vec::new();
9614 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
9615 index.extend_from_slice(&2u32.to_be_bytes());
9616 for idx in 0..256 {
9617 let count = if idx >= usize::from(oid.as_bytes()[0]) {
9618 1u32
9619 } else {
9620 0u32
9621 };
9622 index.extend_from_slice(&count.to_be_bytes());
9623 }
9624 index.extend_from_slice(oid.as_bytes());
9625 index.extend_from_slice(&crc32.to_be_bytes());
9626 index.extend_from_slice(&offset.to_be_bytes());
9627 index.extend_from_slice(pack_checksum.as_bytes());
9628 let checksum =
9629 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
9630 index.extend_from_slice(checksum.as_bytes());
9631 index
9632 }
9633
9634 fn single_entry_index_v1(
9635 format: ObjectFormat,
9636 oid: ObjectId,
9637 offset: u32,
9638 pack_checksum: ObjectId,
9639 ) -> Vec<u8> {
9640 let mut index = Vec::new();
9641 for idx in 0..256 {
9642 let count = if idx >= usize::from(oid.as_bytes()[0]) {
9643 1u32
9644 } else {
9645 0u32
9646 };
9647 index.extend_from_slice(&count.to_be_bytes());
9648 }
9649 index.extend_from_slice(&offset.to_be_bytes());
9650 index.extend_from_slice(oid.as_bytes());
9651 index.extend_from_slice(pack_checksum.as_bytes());
9652 let checksum =
9653 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
9654 index.extend_from_slice(checksum.as_bytes());
9655 index
9656 }
9657
9658 fn pack_reverse_index(
9659 format: ObjectFormat,
9660 positions: &[u32],
9661 pack_checksum: ObjectId,
9662 ) -> Vec<u8> {
9663 let mut reverse_index = Vec::new();
9664 reverse_index.extend_from_slice(b"RIDX");
9665 reverse_index.extend_from_slice(&1u32.to_be_bytes());
9666 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
9667 for position in positions {
9668 reverse_index.extend_from_slice(&position.to_be_bytes());
9669 }
9670 reverse_index.extend_from_slice(pack_checksum.as_bytes());
9671 let checksum =
9672 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
9673 reverse_index.extend_from_slice(checksum.as_bytes());
9674 reverse_index
9675 }
9676
9677 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
9678 let mut out = Vec::new();
9679 out.extend_from_slice(b"MTME");
9680 out.extend_from_slice(&1u32.to_be_bytes());
9681 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
9682 for mtime in mtimes {
9683 out.extend_from_slice(&mtime.to_be_bytes());
9684 }
9685 out.extend_from_slice(pack_checksum.as_bytes());
9686 let checksum =
9687 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
9688 out.extend_from_slice(checksum.as_bytes());
9689 out
9690 }
9691
9692 fn midx_chunks_with_pack_names(
9693 _format: ObjectFormat,
9694 pack_names: Vec<u8>,
9695 entries: &[(ObjectId, u32, u64)],
9696 ) -> Vec<([u8; 4], Vec<u8>)> {
9697 let mut entries = entries.to_vec();
9698 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
9699 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
9700 let mut large_offsets = Vec::new();
9701 let mut chunks = vec![
9702 (*b"PNAM", pack_names),
9703 (*b"OIDF", midx_oid_fanout(&object_ids)),
9704 (*b"OIDL", midx_oid_lookup(&object_ids)),
9705 (
9706 *b"OOFF",
9707 midx_ooff_entries(
9708 &entries
9709 .iter()
9710 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
9711 .collect::<Vec<_>>(),
9712 &mut large_offsets,
9713 ),
9714 ),
9715 ];
9716 if !large_offsets.is_empty() {
9717 chunks.push((*b"LOFF", large_offsets));
9718 }
9719 chunks
9720 }
9721
9722 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
9723 let mut counts = [0u32; 256];
9724 for oid in object_ids {
9725 counts[oid.as_bytes()[0] as usize] += 1;
9726 }
9727 let mut running = 0u32;
9728 let mut out = Vec::new();
9729 for count in counts {
9730 running += count;
9731 out.extend_from_slice(&running.to_be_bytes());
9732 }
9733 out
9734 }
9735
9736 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
9737 let mut out = Vec::new();
9738 for oid in object_ids {
9739 out.extend_from_slice(oid.as_bytes());
9740 }
9741 out
9742 }
9743
9744 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
9745 let mut out = Vec::new();
9746 for (pack_int_id, offset) in entries {
9747 out.extend_from_slice(&pack_int_id.to_be_bytes());
9748 if *offset < 0x8000_0000 {
9749 out.extend_from_slice(&(*offset as u32).to_be_bytes());
9750 } else {
9751 let large_idx = (large_offsets.len() / 8) as u32;
9752 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
9753 large_offsets.extend_from_slice(&offset.to_be_bytes());
9754 }
9755 }
9756 out
9757 }
9758
9759 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
9760 let mut out = Vec::new();
9761 for value in values {
9762 out.extend_from_slice(&value.to_be_bytes());
9763 }
9764 out
9765 }
9766
9767 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
9768 let mut out = Vec::new();
9769 for (bitmap_pos, bitmap_nr) in entries {
9770 out.extend_from_slice(&bitmap_pos.to_be_bytes());
9771 out.extend_from_slice(&bitmap_nr.to_be_bytes());
9772 }
9773 out
9774 }
9775
9776 fn multi_pack_index(
9777 format: ObjectFormat,
9778 version: u8,
9779 pack_count: u32,
9780 chunks: &[([u8; 4], Vec<u8>)],
9781 ) -> Vec<u8> {
9782 let lookup_len = (chunks.len() + 1) * 12;
9783 let mut out = Vec::new();
9784 out.extend_from_slice(b"MIDX");
9785 out.push(version);
9786 out.push(hash_function_id(format) as u8);
9787 out.push(chunks.len() as u8);
9788 out.push(0);
9789 out.extend_from_slice(&pack_count.to_be_bytes());
9790 let mut chunk_offset = (12 + lookup_len) as u64;
9791 for (id, data) in chunks {
9792 out.extend_from_slice(id);
9793 out.extend_from_slice(&chunk_offset.to_be_bytes());
9794 chunk_offset += data.len() as u64;
9795 }
9796 out.extend_from_slice(&[0, 0, 0, 0]);
9797 out.extend_from_slice(&chunk_offset.to_be_bytes());
9798 for (_id, data) in chunks {
9799 out.extend_from_slice(data);
9800 }
9801 let checksum =
9802 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
9803 out.extend_from_slice(checksum.as_bytes());
9804 out
9805 }
9806
9807 fn pack_checksum_sha1() -> ObjectId {
9810 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
9811 }
9812
9813 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
9814 let mut offset = 0usize;
9817 let checksum_offset = bytes.len();
9818 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
9819 .expect("test operation should succeed")
9820 }
9821
9822 #[test]
9823 fn ewah_encodes_single_literal_word_matching_helper() {
9824 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
9828 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
9829 assert_eq!(ewah.rlw_position, 0);
9830 assert_eq!(ewah.bit_size, 64);
9831 }
9832
9833 #[test]
9834 fn ewah_byte_layout_is_big_endian() {
9835 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
9836 .expect("test operation should succeed");
9837 let bytes = ewah.to_bytes();
9838 let mut expected = Vec::new();
9839 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
9843 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
9845 }
9846
9847 #[test]
9848 fn ewah_empty_bitmap_serialises_like_git() {
9849 let ewah = EwahBitmap::empty();
9850 let bytes = ewah.to_bytes();
9851 assert_eq!(bytes, vec![0u8; 12]);
9853 let parsed = parse_ewah_bytes(&bytes);
9855 assert_eq!(parsed, ewah);
9856 assert!(
9857 parsed
9858 .to_positions()
9859 .expect("test operation should succeed")
9860 .is_empty()
9861 );
9862 }
9863
9864 #[test]
9865 fn ewah_compresses_clean_zero_run() {
9866 let ewah =
9869 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
9870 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
9871 let rlw = ewah.words[0];
9872 assert_eq!(rlw & 1, 0, "run bit should be zero");
9873 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
9874 assert_eq!(rlw >> 33, 1, "literal length should be 1");
9875 assert_eq!(ewah.words[1], 0b1);
9876 }
9877
9878 #[test]
9879 fn ewah_compresses_clean_ones_run() {
9880 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
9881 .expect("test operation should succeed");
9882 assert_eq!(ewah.words.len(), 1);
9884 let rlw = ewah.words[0];
9885 assert_eq!(rlw & 1, 1, "run bit should be one");
9886 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
9887 assert_eq!(rlw >> 33, 0, "no literals");
9888 }
9889
9890 #[test]
9891 fn ewah_run_then_literal_then_run_roundtrips() {
9892 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
9893 let bit_size = (words.len() * 64) as u32;
9894 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
9895 assert_eq!(
9896 ewah.to_words().expect("test operation should succeed"),
9897 words
9898 );
9899 }
9900
9901 #[test]
9902 fn ewah_drops_trailing_clean_zero_words() {
9903 let words = vec![0b1, 0, 0, 0];
9906 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
9907 assert_eq!(ewah.bit_size, 1);
9909 assert_eq!(
9910 ewah.to_words().expect("test operation should succeed"),
9911 vec![0b1]
9912 );
9913 }
9914
9915 #[test]
9916 fn ewah_from_positions_roundtrips_via_positions() {
9917 let positions = [0u32, 1, 63, 64, 65, 200, 511];
9918 let ewah =
9919 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
9920 let mut decoded = ewah.to_positions().expect("test operation should succeed");
9921 decoded.sort_unstable();
9922 assert_eq!(decoded, positions);
9923 }
9924
9925 #[test]
9926 fn ewah_from_positions_dedupes_and_orders() {
9927 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
9928 .expect("test operation should succeed");
9929 assert_eq!(
9930 ewah.to_positions().expect("test operation should succeed"),
9931 vec![5, 100]
9932 );
9933 }
9934
9935 #[test]
9936 fn ewah_huge_zero_run_spans_multiple_rlws() {
9937 let mut builder = EwahBuilder::new(0);
9942 builder.add_empty_words(false, 0xffff_ffff);
9943 builder.add_empty_words(false, 5);
9944 let ewah = builder.finish().expect("test operation should succeed");
9945 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
9946 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
9947 assert_eq!(ewah.words[1] & 1, 0);
9948 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
9949 assert_eq!(ewah.rlw_position, 1);
9950 }
9951
9952 #[test]
9953 fn ewah_from_words_rejects_oversized_bit_size() {
9954 assert!(EwahBitmap::from_words(65, &[0]).is_err());
9956 }
9957
9958 #[test]
9959 fn ewah_from_positions_rejects_out_of_range() {
9960 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
9961 }
9962
9963 #[test]
9964 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
9965 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
9968 let bit_size = (words.len() * 64) as u32;
9969 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
9970 let bytes = ewah.to_bytes();
9971 let parsed = parse_ewah_bytes(&bytes);
9972 assert_eq!(parsed, ewah);
9973 assert_eq!(
9974 parsed.to_words().expect("test operation should succeed"),
9975 words
9976 );
9977 }
9978
9979 #[test]
9980 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
9981 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
9983 let bytes = write_bitmap(
9984 ObjectFormat::Sha1,
9985 pack_checksum_sha1(),
9986 &object_types,
9987 &[(0u32, 0u32, vec![1u32, 2u32])],
9988 None,
9989 )
9990 .expect("test operation should succeed");
9991 assert_eq!(&bytes[..4], b"BITM");
9992
9993 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
9994 .expect("test operation should succeed");
9995 assert_eq!(parsed.version, 1);
9996 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
9997 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
9998 assert_eq!(
9999 parsed
10000 .type_bitmaps
10001 .commits
10002 .to_positions()
10003 .expect("test operation should succeed"),
10004 vec![0]
10005 );
10006 assert_eq!(
10007 parsed
10008 .type_bitmaps
10009 .trees
10010 .to_positions()
10011 .expect("test operation should succeed"),
10012 vec![1]
10013 );
10014 assert_eq!(
10015 parsed
10016 .type_bitmaps
10017 .blobs
10018 .to_positions()
10019 .expect("test operation should succeed"),
10020 vec![2]
10021 );
10022 assert!(
10023 parsed
10024 .type_bitmaps
10025 .tags
10026 .to_positions()
10027 .expect("test operation should succeed")
10028 .is_empty()
10029 );
10030 assert_eq!(parsed.entries.len(), 1);
10031 let entry = parsed
10032 .entry_for_index_position(0)
10033 .expect("test operation should succeed");
10034 assert_eq!(entry.xor_offset, 0);
10035 assert_eq!(entry.flags, 0);
10036 assert_eq!(
10037 entry
10038 .bitmap
10039 .to_positions()
10040 .expect("test operation should succeed"),
10041 vec![0, 1, 2]
10042 );
10043 assert_eq!(parsed.name_hash_cache, None);
10044 }
10045
10046 #[test]
10047 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
10048 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
10049 .expect("test operation should succeed");
10050 let object_types = [ObjectType::Commit, ObjectType::Tree];
10051 let bytes = write_bitmap(
10052 ObjectFormat::Sha256,
10053 pack_checksum.clone(),
10054 &object_types,
10055 &[(0u32, 0u32, vec![1u32])],
10056 None,
10057 )
10058 .expect("test operation should succeed");
10059 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
10060 .expect("test operation should succeed");
10061 assert_eq!(parsed.format, ObjectFormat::Sha256);
10062 assert_eq!(parsed.pack_checksum, pack_checksum);
10063 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
10064 assert_eq!(
10065 parsed.entries[0]
10066 .bitmap
10067 .to_positions()
10068 .expect("test operation should succeed"),
10069 vec![0, 1]
10070 );
10071 }
10072
10073 #[test]
10074 fn pack_bitmap_index_write_includes_name_hash_cache() {
10075 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
10076 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
10077 let bytes = write_bitmap(
10078 ObjectFormat::Sha1,
10079 pack_checksum_sha1(),
10080 &object_types,
10081 &[(0u32, 0u32, vec![1u32, 2u32])],
10082 Some(cache.clone()),
10083 )
10084 .expect("test operation should succeed");
10085 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
10086 .expect("test operation should succeed");
10087 assert_eq!(
10088 parsed.options,
10089 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
10090 );
10091 assert_eq!(parsed.name_hash_cache, Some(cache));
10092 }
10093
10094 #[test]
10095 fn pack_bitmap_writer_supports_multiple_commits() {
10096 let object_types = [
10097 ObjectType::Commit,
10098 ObjectType::Commit,
10099 ObjectType::Tree,
10100 ObjectType::Blob,
10101 ];
10102 let mut writer =
10103 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
10104 .expect("test operation should succeed");
10105 writer
10106 .add_commit(0, 0, &[2, 3])
10107 .expect("test operation should succeed");
10108 writer
10109 .add_commit(1, 1, &[2])
10110 .expect("test operation should succeed");
10111 let bytes = writer.write().expect("test operation should succeed");
10112 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
10113 .expect("test operation should succeed");
10114 assert_eq!(parsed.entries.len(), 2);
10115 assert_eq!(
10116 parsed
10117 .type_bitmaps
10118 .commits
10119 .to_positions()
10120 .expect("test operation should succeed"),
10121 vec![0, 1]
10122 );
10123 let first = parsed
10124 .entry_for_index_position(0)
10125 .expect("test operation should succeed");
10126 assert_eq!(
10127 first
10128 .bitmap
10129 .to_positions()
10130 .expect("test operation should succeed"),
10131 vec![0, 2, 3]
10132 );
10133 let second = parsed
10134 .entry_for_index_position(1)
10135 .expect("test operation should succeed");
10136 assert_eq!(
10137 second
10138 .bitmap
10139 .to_positions()
10140 .expect("test operation should succeed"),
10141 vec![1, 2]
10142 );
10143 }
10144
10145 #[test]
10146 fn pack_bitmap_index_recomputes_checksum_on_write() {
10147 let object_types = [ObjectType::Commit, ObjectType::Blob];
10150 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
10151 .expect("test operation should succeed");
10152 let mut index = writer.build().expect("test operation should succeed");
10153 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
10155 index.entries.clear(); index.entries.push(PackBitmapEntry {
10157 object_position: 0,
10158 xor_offset: 0,
10159 flags: 0,
10160 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
10161 });
10162 let bytes = index.write().expect("test operation should succeed");
10163 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
10165 .expect("test operation should succeed");
10166 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
10167 }
10168
10169 #[test]
10170 fn pack_bitmap_writer_rejects_non_commit_selection() {
10171 let object_types = [ObjectType::Commit, ObjectType::Blob];
10172 let mut writer =
10173 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
10174 .expect("test operation should succeed");
10175 assert!(writer.add_commit(1, 1, &[]).is_err());
10177 assert!(writer.add_commit(5, 5, &[]).is_err());
10179 assert!(writer.add_commit(0, 5, &[]).is_err());
10181 assert!(writer.add_commit(0, 0, &[9]).is_err());
10183 }
10184
10185 #[test]
10186 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
10187 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
10188 .expect("test operation should succeed");
10189 assert!(
10190 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
10191 .is_err()
10192 );
10193 }
10194
10195 #[test]
10196 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
10197 let writer = PackBitmapWriter::new(
10198 ObjectFormat::Sha1,
10199 pack_checksum_sha1(),
10200 &[ObjectType::Commit],
10201 )
10202 .expect("test operation should succeed");
10203 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
10204 }
10205
10206 #[test]
10207 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
10208 let mut index = PackBitmapWriter::new(
10209 ObjectFormat::Sha1,
10210 pack_checksum_sha1(),
10211 &[ObjectType::Commit],
10212 )
10213 .expect("test operation should succeed")
10214 .build()
10215 .expect("test operation should succeed");
10216 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
10218 assert!(index.write().is_err());
10219 index.options = PackBitmapIndex::OPTION_FULL_DAG;
10221 index.name_hash_cache = Some(vec![0]);
10222 assert!(index.write().is_err());
10223 }
10224
10225 #[test]
10226 fn write_bitmap_roundtrips_through_upstream_git_parser() {
10227 let root = unique_temp_dir("git-pack-bitmap-writer");
10231 fs::create_dir_all(&root).expect("test operation should succeed");
10232 {
10233 run_git_success(&root, &["init", "-q", "-b", "main"]);
10234 run_git_success(
10235 &root,
10236 &[
10237 "-c",
10238 "user.name=Example User",
10239 "-c",
10240 "user.email=example@example.invalid",
10241 "commit",
10242 "--allow-empty",
10243 "-q",
10244 "-m",
10245 "one",
10246 ],
10247 );
10248 run_git_success(&root, &["repack", "-adb"]);
10249 let pack_dir = root.join(".git").join("objects").join("pack");
10250 let idx_path = single_path_with_extension(&pack_dir, "idx");
10251 let index = PackIndex::parse(
10252 &fs::read(idx_path).expect("test operation should succeed"),
10253 ObjectFormat::Sha1,
10254 )
10255 .expect("test operation should succeed");
10256 let pack_path = single_path_with_extension(&pack_dir, "pack");
10258 let pack =
10259 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
10260 .expect("test operation should succeed");
10261 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
10264 offsets.sort_unstable();
10265 let position_of = |offset: u64| -> u32 {
10266 offsets
10267 .iter()
10268 .position(|value| *value == offset)
10269 .expect("test operation should succeed") as u32
10270 };
10271 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
10272 for entry in &index.entries {
10273 let position = position_of(entry.offset) as usize;
10274 if let Some(parsed) = pack
10276 .entries
10277 .iter()
10278 .find(|po| po.entry.offset == entry.offset)
10279 {
10280 object_types[position] = parsed.object.object_type;
10281 }
10282 }
10283 let commit_position = object_types
10285 .iter()
10286 .position(|ty| *ty == ObjectType::Commit)
10287 .expect("test operation should succeed") as u32;
10288 let commit_index_position = index
10290 .entries
10291 .iter()
10292 .position(|entry| position_of(entry.offset) == commit_position)
10293 .expect("test operation should succeed")
10294 as u32;
10295 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
10296 let bytes = write_bitmap(
10297 ObjectFormat::Sha1,
10298 index.pack_checksum.clone(),
10299 &object_types,
10300 &[(commit_position, commit_index_position, reachable)],
10301 None,
10302 )
10303 .expect("test operation should succeed");
10304 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
10305 .expect("test operation should succeed");
10306 assert_eq!(parsed.pack_checksum, index.pack_checksum);
10307 assert_eq!(parsed.entries.len(), 1);
10308 assert_eq!(
10309 parsed.entries[0]
10310 .bitmap
10311 .to_positions()
10312 .expect("test operation should succeed")
10313 .len(),
10314 index.entries.len()
10315 );
10316 };
10317 let _ = fs::remove_dir_all(&root);
10318 }
10319}