1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
4
5use flate2::{Compress, Compression, FlushCompress, Status};
6use sley_core::{GitError, ObjectFormat, ObjectId, Result};
7use sley_formats::Bundle;
8use sley_object::{EncodedObject, ObjectType};
9use std::borrow::Borrow;
10use std::cell::RefCell;
11use std::collections::{HashMap, HashSet};
12use std::fmt;
13use std::ops::Range;
14use std::sync::Arc;
15
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct PackEntry {
18 pub oid: ObjectId,
19 pub compressed_size: u64,
20 pub uncompressed_size: u64,
21 pub offset: u64,
22}
23
24pub const DEFAULT_PACK_WINDOW: usize = 10;
30
31pub const DEFAULT_PACK_DEPTH: usize = 50;
37
38const PACK_PARALLEL_COMPRESSION_MIN_OBJECTS: usize = 64;
42
43const PACK_PARALLEL_COMPRESSION_MAX_THREADS: usize = 4;
47
48#[derive(Debug, Clone)]
54pub struct PackWriteOptions {
55 pub window: usize,
58 pub depth: usize,
60 pub prefer_ofs_delta: bool,
64 pub thin_bases: HashMap<ObjectId, EncodedObject>,
69 pub reorder: bool,
75}
76
77impl Default for PackWriteOptions {
78 fn default() -> Self {
79 Self::new()
80 }
81}
82
83impl PackWriteOptions {
84 pub fn new() -> Self {
88 Self {
89 window: DEFAULT_PACK_WINDOW,
90 depth: DEFAULT_PACK_DEPTH,
91 prefer_ofs_delta: true,
92 thin_bases: HashMap::new(),
93 reorder: true,
94 }
95 }
96
97 pub fn with_window(mut self, window: usize) -> Self {
99 self.window = window;
100 self
101 }
102
103 pub fn with_depth(mut self, depth: usize) -> Self {
105 self.depth = depth;
106 self
107 }
108
109 pub fn with_prefer_ofs_delta(mut self, prefer_ofs_delta: bool) -> Self {
112 self.prefer_ofs_delta = prefer_ofs_delta;
113 self
114 }
115
116 pub fn with_thin_bases(mut self, thin_bases: HashMap<ObjectId, EncodedObject>) -> Self {
118 self.thin_bases = thin_bases;
119 self
120 }
121
122 pub fn with_reorder(mut self, reorder: bool) -> Self {
125 self.reorder = reorder;
126 self
127 }
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub struct RepackPolicy {
132 pub write_bitmaps: bool,
133 pub cruft_packs: bool,
134 pub geometric_factor: Option<u8>,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq)]
138pub struct PackFile {
139 pub version: u32,
140 pub entries: Vec<PackObject>,
141 pub checksum: ObjectId,
142}
143
144#[derive(Debug, Clone, PartialEq, Eq)]
145pub struct PackObject {
146 pub entry: PackEntry,
147 pub object: EncodedObject,
148}
149
150#[derive(Debug, Clone, PartialEq, Eq)]
151pub struct PackWrite {
152 pub pack: Vec<u8>,
153 pub index: Vec<u8>,
154 pub checksum: ObjectId,
155 pub entries: Vec<PackIndexEntry>,
156}
157
158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub struct PackInput<'a> {
160 pub oid: &'a ObjectId,
161 pub object: &'a EncodedObject,
162}
163
164#[derive(Debug, Clone, PartialEq, Eq)]
165pub struct PackIndexBuild {
166 pub index: Vec<u8>,
167 pub pack_checksum: ObjectId,
168 pub entries: Vec<PackIndexEntry>,
169}
170
171#[derive(Debug, Clone, PartialEq, Eq)]
172pub struct PackIndex {
173 pub version: u32,
174 pub fanout: [u32; 256],
175 pub entries: Vec<PackIndexEntry>,
176 pub pack_checksum: ObjectId,
177 pub index_checksum: ObjectId,
178}
179
180#[derive(Debug, Clone, PartialEq, Eq)]
181pub struct PackIndexView<'a> {
182 pub version: u32,
183 pub count: usize,
184 pub fanout: [u32; 256],
185 pub pack_checksum: ObjectId,
186 pub index_checksum: ObjectId,
187 bytes: &'a [u8],
188 format: ObjectFormat,
189 tables: PackIndexViewTables,
190}
191
192pub trait PackIndexByteSource: fmt::Debug + Send + Sync {
193 fn as_bytes(&self) -> &[u8];
194}
195
196impl<T> PackIndexByteSource for T
197where
198 T: AsRef<[u8]> + fmt::Debug + Send + Sync + ?Sized,
199{
200 fn as_bytes(&self) -> &[u8] {
201 self.as_ref()
202 }
203}
204
205#[derive(Debug)]
206struct SharedIndexBytes(Arc<[u8]>);
207
208impl PackIndexByteSource for SharedIndexBytes {
209 fn as_bytes(&self) -> &[u8] {
210 self.0.as_ref()
211 }
212}
213
214#[derive(Debug, Clone)]
215pub struct PackIndexViewData {
216 pub version: u32,
217 pub count: usize,
218 pub fanout: [u32; 256],
219 pub pack_checksum: ObjectId,
220 pub index_checksum: ObjectId,
221 bytes: Arc<dyn PackIndexByteSource>,
222 format: ObjectFormat,
223 tables: PackIndexViewTables,
224}
225
226#[derive(Debug, Clone, PartialEq, Eq)]
227pub struct PackIndexEntry {
228 pub oid: ObjectId,
229 pub crc32: u32,
230 pub offset: u64,
231}
232
233#[derive(Debug, Clone, Copy, PartialEq, Eq)]
234pub struct PackIndexLookup {
235 pub crc32: u32,
236 pub offset: u64,
237}
238
239#[derive(Debug, Clone, PartialEq, Eq)]
240enum PackIndexViewTables {
241 V1 {
242 entry_table: Range<usize>,
243 },
244 V2 {
245 oid_table: Range<usize>,
246 crc_table: Range<usize>,
247 small_offset_table: Range<usize>,
248 large_offset_table: Range<usize>,
249 },
250}
251
252#[derive(Debug, Clone, PartialEq, Eq)]
253pub struct PackReverseIndex {
254 pub version: u32,
255 pub format: ObjectFormat,
256 pub positions: Vec<u32>,
257 pub pack_checksum: ObjectId,
258 pub index_checksum: ObjectId,
259}
260
261#[derive(Debug, Clone, PartialEq, Eq)]
262pub struct PackMtimes {
263 pub version: u32,
264 pub format: ObjectFormat,
265 pub mtimes: Vec<u32>,
266 pub pack_checksum: ObjectId,
267 pub index_checksum: ObjectId,
268}
269
270#[derive(Debug, Clone, PartialEq, Eq)]
271pub struct PackBitmapIndex {
272 pub version: u16,
273 pub format: ObjectFormat,
274 pub options: u16,
275 pub pack_checksum: ObjectId,
276 pub index_checksum: ObjectId,
277 pub type_bitmaps: PackBitmapTypeBitmaps,
278 pub entries: Vec<PackBitmapEntry>,
279 pub name_hash_cache: Option<Vec<u32>>,
280}
281
282#[derive(Debug, Clone, PartialEq, Eq)]
283pub struct PackBitmapTypeBitmaps {
284 pub commits: EwahBitmap,
285 pub trees: EwahBitmap,
286 pub blobs: EwahBitmap,
287 pub tags: EwahBitmap,
288}
289
290#[derive(Debug, Clone, PartialEq, Eq)]
291pub struct PackBitmapEntry {
292 pub object_position: u32,
297 pub xor_offset: u8,
298 pub flags: u8,
299 pub bitmap: EwahBitmap,
302}
303
304#[derive(Debug, Clone, PartialEq, Eq)]
305pub struct EwahBitmap {
306 pub bit_size: u32,
307 pub words: Vec<u64>,
308 pub rlw_position: u32,
309}
310
311#[derive(Debug, Clone, PartialEq, Eq)]
312pub struct MultiPackIndex {
313 pub version: u8,
314 pub format: ObjectFormat,
315 pub pack_count: u32,
316 pub pack_names: Vec<String>,
317 pub object_count: u32,
318 pub fanout: [u32; 256],
319 pub objects: Vec<MultiPackIndexEntry>,
320 pub reverse_index: Option<Vec<u32>>,
321 pub bitmapped_packs: Option<Vec<MultiPackBitmapPack>>,
322 pub chunks: Vec<MultiPackIndexChunk>,
323 pub checksum: ObjectId,
324}
325
326#[derive(Debug, Clone)]
327pub struct MultiPackIndexOidLookup {
328 format: ObjectFormat,
329 pack_count: u32,
330 pack_names: Vec<String>,
331 fanout: [u32; 256],
332 object_count: usize,
333 oid_lookup_offset: usize,
334 object_offsets_offset: usize,
335 large_offsets_offset: Option<usize>,
336 large_offsets_len: usize,
337 bytes: Arc<Vec<u8>>,
338}
339
340#[derive(Debug, Clone, PartialEq, Eq)]
341pub struct MultiPackIndexEntry {
342 pub oid: ObjectId,
343 pub pack_int_id: u32,
344 pub offset: u64,
345}
346
347#[derive(Debug, Clone, PartialEq, Eq)]
348pub struct MultiPackBitmapPack {
349 pub bitmap_pos: u32,
350 pub bitmap_nr: u32,
351}
352
353#[derive(Debug, Clone, PartialEq, Eq)]
354pub struct MultiPackIndexChunk {
355 pub id: [u8; 4],
356 pub offset: u64,
357 pub len: u64,
358}
359
360#[derive(Debug, Clone, Copy, PartialEq, Eq)]
361enum PackObjectKind {
362 Commit,
363 Tree,
364 Blob,
365 Tag,
366 OfsDelta,
367 RefDelta,
368}
369
370#[derive(Debug, Clone, PartialEq, Eq)]
371enum ParsedPackEntry {
372 Resolved(PackObject),
373 Delta {
374 base: DeltaBase,
375 compressed_size: u64,
376 delta_size: u64,
377 offset: u64,
378 delta: Vec<u8>,
379 },
380}
381
382#[derive(Debug, Clone, PartialEq, Eq)]
383enum DeltaBase {
384 Offset(u64),
385 Ref(ObjectId),
386}
387
388impl PackFile {
389 pub fn parse_sha1(bytes: &[u8]) -> Result<Self> {
390 Self::parse(bytes, ObjectFormat::Sha1)
391 }
392
393 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
394 Self::parse_with_base(bytes, format, |_| Ok(None))
395 }
396
397 pub fn parse_bundle(bundle: &Bundle) -> Result<Self> {
398 Self::parse(&bundle.pack, bundle.format)
399 }
400
401 pub fn index_pack(bytes: &[u8], format: ObjectFormat) -> Result<PackWrite> {
402 let PackIndexBuild {
403 index,
404 pack_checksum,
405 entries,
406 } = PackIndex::write_v2_for_pack(bytes, format)?;
407 Ok(PackWrite {
408 pack: bytes.to_vec(),
409 index,
410 checksum: pack_checksum,
411 entries,
412 })
413 }
414
415 pub fn parse_thin<F>(bytes: &[u8], format: ObjectFormat, external_base: F) -> Result<Self>
416 where
417 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
418 {
419 Self::parse_with_base(bytes, format, external_base)
420 }
421
422 fn parse_with_base<F>(bytes: &[u8], format: ObjectFormat, mut external_base: F) -> Result<Self>
423 where
424 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
425 {
426 let trailer_len = format.raw_len();
427 if bytes.len() < 12 + trailer_len {
428 return Err(GitError::InvalidFormat("pack file too short".into()));
429 }
430 let trailer_offset = bytes.len() - trailer_len;
431 let checksum = sley_core::digest_bytes(format, &bytes[..trailer_offset])?;
432 let expected = ObjectId::from_raw(format, &bytes[trailer_offset..])?;
433 if checksum != expected {
434 return Err(GitError::InvalidFormat(format!(
435 "pack checksum mismatch: expected {expected}, got {checksum}"
436 )));
437 }
438
439 if &bytes[..4] != b"PACK" {
440 return Err(GitError::InvalidFormat("missing PACK signature".into()));
441 }
442 let version = u32_be(&bytes[4..8]);
443 if version != 2 && version != 3 {
444 return Err(GitError::Unsupported(format!("pack version {version}")));
445 }
446 let count = u32_be(&bytes[8..12]) as usize;
447 let mut offset = 12usize;
448 let mut entries = Vec::with_capacity(count);
449 for _ in 0..count {
450 let entry_offset = offset;
451 let header = parse_entry_header(bytes, &mut offset)?;
452 let base =
453 match header.kind {
454 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(
455 parse_ofs_delta_base_offset(bytes, &mut offset, entry_offset as u64)?,
456 )),
457 PackObjectKind::RefDelta => {
458 let hash_len = format.raw_len();
459 if offset + hash_len > trailer_offset {
460 return Err(GitError::InvalidFormat(
461 "truncated ref-delta base object id".into(),
462 ));
463 }
464 let oid = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
465 offset += hash_len;
466 Some(DeltaBase::Ref(oid))
467 }
468 _ => None,
469 };
470 let mut body = Vec::new();
471 let consumed = inflate_into(
472 &bytes[offset..trailer_offset],
473 &mut body,
474 header.size.min(usize::MAX as u64) as usize,
475 )?;
476 if body.len() as u64 != header.size {
477 return Err(GitError::InvalidObject(format!(
478 "pack object declared {} bytes, decoded {}",
479 header.size,
480 body.len()
481 )));
482 }
483 if consumed == 0 {
484 return Err(GitError::InvalidFormat(
485 "empty compressed pack entry".into(),
486 ));
487 }
488 offset = offset
489 .checked_add(consumed)
490 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
491 if offset > trailer_offset {
492 return Err(GitError::InvalidFormat(
493 "pack entry extends past checksum".into(),
494 ));
495 }
496 if let Some(base) = base {
497 entries.push(ParsedPackEntry::Delta {
498 base,
499 compressed_size: consumed as u64,
500 delta_size: header.size,
501 offset: entry_offset as u64,
502 delta: body,
503 });
504 } else {
505 let object_type = match header.kind {
506 PackObjectKind::Commit => ObjectType::Commit,
507 PackObjectKind::Tree => ObjectType::Tree,
508 PackObjectKind::Blob => ObjectType::Blob,
509 PackObjectKind::Tag => ObjectType::Tag,
510 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
511 };
512 let object = EncodedObject::new(object_type, body);
513 let oid = object.object_id(format)?;
514 entries.push(ParsedPackEntry::Resolved(PackObject {
515 entry: PackEntry {
516 oid,
517 compressed_size: consumed as u64,
518 uncompressed_size: header.size,
519 offset: entry_offset as u64,
520 },
521 object,
522 }));
523 }
524 }
525 if offset != trailer_offset {
526 return Err(GitError::InvalidFormat(format!(
527 "pack has {} trailing bytes before checksum",
528 trailer_offset - offset
529 )));
530 }
531 Ok(Self {
532 version,
533 entries: resolve_pack_entries(entries, format, &mut external_base)?,
534 checksum,
535 })
536 }
537
538 pub fn write_undeltified_sha1<T>(objects: &[T]) -> Result<PackWrite>
539 where
540 T: Borrow<EncodedObject>,
541 {
542 Self::write_undeltified(objects, ObjectFormat::Sha1)
543 }
544
545 pub fn write_undeltified<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
551 where
552 T: Borrow<EncodedObject>,
553 {
554 let options = PackWriteOptions::new().with_depth(0).with_reorder(false);
555 Self::write_packed_impl(objects, format, &options)
556 }
557
558 pub fn write_packed<T>(objects: &[T], format: ObjectFormat) -> Result<PackWrite>
567 where
568 T: Borrow<EncodedObject>,
569 {
570 Self::write_packed_with_options(objects, format, &PackWriteOptions::new())
571 }
572
573 pub fn write_packed_with_options<T>(
577 objects: &[T],
578 format: ObjectFormat,
579 options: &PackWriteOptions,
580 ) -> Result<PackWrite>
581 where
582 T: Borrow<EncodedObject>,
583 {
584 Self::write_packed_impl(objects, format, options)
585 }
586
587 pub fn write_packed_with_known_ids(
596 inputs: &[PackInput<'_>],
597 format: ObjectFormat,
598 ) -> Result<PackWrite> {
599 Self::write_packed_with_known_ids_and_options(inputs, format, &PackWriteOptions::new())
600 }
601
602 pub fn write_packed_with_known_ids_and_options(
605 inputs: &[PackInput<'_>],
606 format: ObjectFormat,
607 options: &PackWriteOptions,
608 ) -> Result<PackWrite> {
609 if inputs.len() > u32::MAX as usize {
610 return Err(GitError::InvalidFormat("too many pack objects".into()));
611 }
612 let mut objects = Vec::with_capacity(inputs.len());
613 let mut object_ids = Vec::with_capacity(inputs.len());
614 for input in inputs {
615 if input.oid.format() != format {
616 return Err(GitError::InvalidObjectId(format!(
617 "pack object id {} uses {}, pack uses {}",
618 input.oid,
619 input.oid.format().name(),
620 format.name()
621 )));
622 }
623 objects.push(input.object);
624 object_ids.push(*input.oid);
625 }
626 Self::write_packed_from_parts(objects, object_ids, format, options)
627 }
628
629 pub fn write_thin<T>(
638 objects: &[T],
639 format: ObjectFormat,
640 external_bases: HashMap<ObjectId, EncodedObject>,
641 ) -> Result<PackWrite>
642 where
643 T: Borrow<EncodedObject>,
644 {
645 let options = PackWriteOptions::new().with_thin_bases(external_bases);
646 Self::write_packed_impl(objects, format, &options)
647 }
648
649 fn write_packed_impl<T>(
650 objects: &[T],
651 format: ObjectFormat,
652 options: &PackWriteOptions,
653 ) -> Result<PackWrite>
654 where
655 T: Borrow<EncodedObject>,
656 {
657 if objects.len() > u32::MAX as usize {
658 return Err(GitError::InvalidFormat("too many pack objects".into()));
659 }
660 let objects: Vec<&EncodedObject> = objects.iter().map(Borrow::borrow).collect();
661
662 let mut object_ids: Vec<ObjectId> = Vec::with_capacity(objects.len());
665 for object in &objects {
666 object_ids.push(object.object_id(format)?);
667 }
668 Self::write_packed_from_parts(objects, object_ids, format, options)
669 }
670
671 fn write_packed_from_parts(
672 objects: Vec<&EncodedObject>,
673 object_ids: Vec<ObjectId>,
674 format: ObjectFormat,
675 options: &PackWriteOptions,
676 ) -> Result<PackWrite> {
677 let mut seen = HashSet::with_capacity(object_ids.len());
678 for oid in &object_ids {
679 if !seen.insert(oid) {
680 return Err(GitError::InvalidFormat(format!(
681 "pack contains duplicate object id {oid}"
682 )));
683 }
684 }
685
686 for oid in options.thin_bases.keys() {
688 if oid.format() != format {
689 return Err(GitError::InvalidObjectId(
690 "thin pack base object id format does not match pack format".into(),
691 ));
692 }
693 }
694
695 let (plan, order) = plan_pack_deltas(&objects, &object_ids, options)?;
701
702 let mut pack = Vec::new();
703 pack.extend_from_slice(b"PACK");
704 pack.extend_from_slice(&2u32.to_be_bytes());
705 pack.extend_from_slice(&(objects.len() as u32).to_be_bytes());
706
707 let mut index_entries = Vec::with_capacity(objects.len());
708 let mut written_offsets: Vec<Option<u64>> = vec![None; objects.len()];
711
712 let compressed_payloads = compress_planned_payloads(&objects, &plan, &order)?;
713
714 for (order_pos, &idx) in order.iter().enumerate() {
715 let offset = pack.len() as u64;
716 let mut entry_bytes = Vec::new();
717 match &plan[idx].base {
718 PlannedBase::None => {
719 write_entry_header(
720 &mut entry_bytes,
721 objects[idx].object_type,
722 objects[idx].body.len() as u64,
723 );
724 }
725 PlannedBase::InPack { base_idx, delta } => {
726 let base_offset = written_offsets[*base_idx].ok_or_else(|| {
727 GitError::InvalidFormat(
728 "in-pack delta base emitted after dependent object".into(),
729 )
730 })?;
731 if options.prefer_ofs_delta {
732 write_pack_entry_header_kind(&mut entry_bytes, 6, delta.len() as u64);
733 let relative = offset.checked_sub(base_offset).ok_or_else(|| {
734 GitError::InvalidFormat("ofs-delta base offset is after delta".into())
735 })?;
736 write_ofs_delta_offset(&mut entry_bytes, relative)?;
737 } else {
738 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
739 entry_bytes.extend_from_slice(object_ids[*base_idx].as_bytes());
740 }
741 }
742 PlannedBase::External { base_oid, delta } => {
743 write_pack_entry_header_kind(&mut entry_bytes, 7, delta.len() as u64);
744 entry_bytes.extend_from_slice(base_oid.as_bytes());
745 }
746 }
747 entry_bytes.extend_from_slice(&compressed_payloads[order_pos]);
748 let crc32 = crc32fast::hash(&entry_bytes);
749 pack.extend_from_slice(&entry_bytes);
750 written_offsets[idx] = Some(offset);
751 index_entries.push(PackIndexEntry {
752 oid: object_ids[idx].clone(),
753 crc32,
754 offset,
755 });
756 }
757
758 let checksum = sley_core::digest_bytes(format, &pack)?;
759 pack.extend_from_slice(checksum.as_bytes());
760 let index = PackIndex::write_v2(format, &index_entries, &checksum)?;
761 Ok(PackWrite {
762 pack,
763 index,
764 checksum,
765 entries: index_entries,
766 })
767 }
768}
769
770impl<'a> PackIndexView<'a> {
771 pub fn parse_v2_sha1(bytes: &'a [u8]) -> Result<Self> {
772 Self::parse(bytes, ObjectFormat::Sha1)
773 }
774
775 pub fn parse(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
776 Self::parse_impl(bytes, format, true, true)
777 }
778
779 pub fn parse_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
783 Self::parse_impl(bytes, format, false, true)
784 }
785
786 pub fn parse_trusted_without_checksum(bytes: &'a [u8], format: ObjectFormat) -> Result<Self> {
793 Self::parse_impl(bytes, format, false, false)
794 }
795
796 pub fn count(&self) -> usize {
797 self.count
798 }
799
800 pub fn fanout(&self) -> &[u32; 256] {
801 &self.fanout
802 }
803
804 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
805 if oid.format() != self.format {
806 return None;
807 }
808 let bucket = usize::from(oid.as_bytes()[0]);
809 let mut start = if bucket == 0 {
810 0
811 } else {
812 self.fanout[bucket - 1] as usize
813 };
814 let mut end = self.fanout[bucket] as usize;
815 let target = oid.as_bytes();
816
817 while start < end {
818 let mid = start + (end - start) / 2;
819 match self.oid_bytes_at(mid).cmp(target) {
820 std::cmp::Ordering::Less => start = mid + 1,
821 std::cmp::Ordering::Equal => return self.lookup_at(mid),
822 std::cmp::Ordering::Greater => end = mid,
823 }
824 }
825 None
826 }
827
828 fn parse_impl(
829 bytes: &'a [u8],
830 format: ObjectFormat,
831 verify_checksum: bool,
832 validate_entries: bool,
833 ) -> Result<Self> {
834 let hash_len = format.raw_len();
835 if bytes.len() < 4 {
836 return Err(GitError::InvalidFormat("pack index too short".into()));
837 }
838 if bytes[..4] != [0xff, b't', b'O', b'c'] {
839 return Self::parse_v1_impl(bytes, format, verify_checksum, validate_entries);
840 }
841 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
842 return Err(GitError::InvalidFormat("pack index too short".into()));
843 }
844 let version = u32_be(&bytes[4..8]);
845 if version != 2 {
846 return Err(GitError::Unsupported(format!(
847 "pack index version {version}"
848 )));
849 }
850 let index_checksum_offset = bytes.len() - hash_len;
851 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
852 if verify_checksum {
853 let actual_index_checksum =
854 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
855 if actual_index_checksum != index_checksum {
856 return Err(GitError::InvalidFormat(format!(
857 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
858 )));
859 }
860 }
861
862 let mut offset = 8usize;
863 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
864 let count = fanout[255] as usize;
865 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
866 offset = oid_table.end;
867 let crc_table = checked_range(offset, count, 4, bytes.len())?;
868 offset = crc_table.end;
869 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
870 offset = small_offset_table.end;
871
872 let large_offset_count = (0..count)
873 .filter(|idx| {
874 let start = small_offset_table.start + idx * 4;
875 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
876 })
877 .count();
878 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
879 offset = large_offset_table.end;
880
881 let expected_trailer_offset = bytes.len() - hash_len * 2;
882 if offset != expected_trailer_offset {
883 return Err(GitError::InvalidFormat(format!(
884 "pack index has {} unexpected bytes before trailer",
885 expected_trailer_offset.saturating_sub(offset)
886 )));
887 }
888 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
889
890 let view = Self {
891 version,
892 count,
893 fanout,
894 pack_checksum,
895 index_checksum,
896 bytes,
897 format,
898 tables: PackIndexViewTables::V2 {
899 oid_table,
900 crc_table,
901 small_offset_table,
902 large_offset_table,
903 },
904 };
905 if validate_entries {
906 view.validate_v2_entries()?;
907 }
908 Ok(view)
909 }
910
911 fn parse_v1_impl(
912 bytes: &'a [u8],
913 format: ObjectFormat,
914 verify_checksum: bool,
915 validate_entries: bool,
916 ) -> Result<Self> {
917 let hash_len = format.raw_len();
918 if bytes.len() < 256 * 4 + 2 * hash_len {
919 return Err(GitError::InvalidFormat("pack index too short".into()));
920 }
921 let index_checksum_offset = bytes.len() - hash_len;
922 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
923 if verify_checksum {
924 let actual_index_checksum =
925 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
926 if actual_index_checksum != index_checksum {
927 return Err(GitError::InvalidFormat(format!(
928 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
929 )));
930 }
931 }
932
933 let mut offset = 0usize;
934 let fanout = read_pack_index_fanout(bytes, &mut offset)?;
935 let count = fanout[255] as usize;
936 let entry_len = hash_len
937 .checked_add(4)
938 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
939 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
940 offset = entry_table.end;
941 let expected_trailer_offset = bytes.len() - hash_len * 2;
942 if offset != expected_trailer_offset {
943 return Err(GitError::InvalidFormat(format!(
944 "pack index has {} unexpected bytes before trailer",
945 expected_trailer_offset.saturating_sub(offset)
946 )));
947 }
948 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
949
950 let view = Self {
951 version: 1,
952 count,
953 fanout,
954 pack_checksum,
955 index_checksum,
956 bytes,
957 format,
958 tables: PackIndexViewTables::V1 { entry_table },
959 };
960 if validate_entries {
961 view.validate_v1_entries()?;
962 }
963 Ok(view)
964 }
965
966 fn validate_v2_entries(&self) -> Result<()> {
967 let PackIndexViewTables::V2 {
968 oid_table,
969 small_offset_table,
970 large_offset_table,
971 ..
972 } = &self.tables
973 else {
974 unreachable!("v2 validation only runs for v2 views");
975 };
976 let oid_table = self.slice(oid_table.clone());
977 let small_offset_table = self.slice(small_offset_table.clone());
978 let large_offset_table = self.slice(large_offset_table.clone());
979 let hash_len = self.format.raw_len();
980 for idx in 0..self.count {
981 let oid_start = idx * hash_len;
982 let oid_bytes = &oid_table[oid_start..oid_start + hash_len];
983 if idx > 0 && oid_bytes <= &oid_table[oid_start - hash_len..oid_start] {
984 return Err(GitError::InvalidFormat(
985 "pack index object ids are not strictly ascending".into(),
986 ));
987 }
988 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
989
990 let offset_start = idx * 4;
991 let raw_offset = u32_be(&small_offset_table[offset_start..offset_start + 4]);
992 pack_index_v2_offset(raw_offset, large_offset_table)?;
993 }
994 Ok(())
995 }
996
997 fn validate_v1_entries(&self) -> Result<()> {
998 let PackIndexViewTables::V1 { entry_table } = &self.tables else {
999 unreachable!("v1 validation only runs for v1 views");
1000 };
1001 let entry_table = self.slice(entry_table.clone());
1002 let hash_len = self.format.raw_len();
1003 let entry_len = hash_len
1004 .checked_add(4)
1005 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1006 for idx in 0..self.count {
1007 let start = idx * entry_len;
1008 let oid_start = start + 4;
1009 let oid_bytes = &entry_table[oid_start..start + entry_len];
1010 if idx > 0 {
1011 let previous_oid_start = oid_start - entry_len;
1012 let previous_oid = &entry_table[previous_oid_start..previous_oid_start + hash_len];
1013 if previous_oid >= oid_bytes {
1014 return Err(GitError::InvalidFormat(
1015 "pack index object ids are not strictly sorted".into(),
1016 ));
1017 }
1018 }
1019 validate_pack_index_oid_fanout(idx, oid_bytes, &self.fanout)?;
1020 }
1021 Ok(())
1022 }
1023
1024 fn oid_bytes_at(&self, idx: usize) -> &'a [u8] {
1025 let hash_len = self.format.raw_len();
1026 match &self.tables {
1027 PackIndexViewTables::V1 { entry_table } => {
1028 let entry_table = self.slice(entry_table.clone());
1029 let entry_len = hash_len + 4;
1030 let start = idx * entry_len + 4;
1031 &entry_table[start..start + hash_len]
1032 }
1033 PackIndexViewTables::V2 { oid_table, .. } => {
1034 let oid_table = self.slice(oid_table.clone());
1035 let start = idx * hash_len;
1036 &oid_table[start..start + hash_len]
1037 }
1038 }
1039 }
1040
1041 fn lookup_at(&self, idx: usize) -> Option<PackIndexLookup> {
1042 if idx >= self.count {
1043 return None;
1044 }
1045 let hash_len = self.format.raw_len();
1046 match &self.tables {
1047 PackIndexViewTables::V1 { entry_table } => {
1048 let entry_table = self.slice(entry_table.clone());
1049 let entry_len = hash_len + 4;
1050 let start = idx * entry_len;
1051 Some(PackIndexLookup {
1052 crc32: 0,
1053 offset: u64::from(u32_be(&entry_table[start..start + 4])),
1054 })
1055 }
1056 PackIndexViewTables::V2 {
1057 crc_table,
1058 small_offset_table,
1059 large_offset_table,
1060 ..
1061 } => {
1062 let crc_table = self.slice(crc_table.clone());
1063 let small_offset_table = self.slice(small_offset_table.clone());
1064 let large_offset_table = self.slice(large_offset_table.clone());
1065 let crc_start = idx * 4;
1066 let raw_offset = u32_be(&small_offset_table[crc_start..crc_start + 4]);
1067 Some(PackIndexLookup {
1068 crc32: u32_be(&crc_table[crc_start..crc_start + 4]),
1069 offset: pack_index_v2_offset(raw_offset, large_offset_table).ok()?,
1070 })
1071 }
1072 }
1073 }
1074
1075 fn slice(&self, range: Range<usize>) -> &'a [u8] {
1076 &self.bytes[range]
1077 }
1078}
1079
1080impl PackIndexViewData {
1081 pub fn parse(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1082 Self::parse_source(Arc::new(SharedIndexBytes(bytes)), format)
1083 }
1084
1085 pub fn parse_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1089 Self::parse_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1090 }
1091
1092 pub fn parse_trusted_without_checksum(bytes: Arc<[u8]>, format: ObjectFormat) -> Result<Self> {
1095 Self::parse_trusted_source_without_checksum(Arc::new(SharedIndexBytes(bytes)), format)
1096 }
1097
1098 pub fn parse_source(
1099 bytes: Arc<dyn PackIndexByteSource>,
1100 format: ObjectFormat,
1101 ) -> Result<Self> {
1102 Self::parse_impl(bytes, format, true, true)
1103 }
1104
1105 pub fn parse_source_without_checksum(
1106 bytes: Arc<dyn PackIndexByteSource>,
1107 format: ObjectFormat,
1108 ) -> Result<Self> {
1109 Self::parse_impl(bytes, format, false, true)
1110 }
1111
1112 pub fn parse_trusted_source_without_checksum(
1113 bytes: Arc<dyn PackIndexByteSource>,
1114 format: ObjectFormat,
1115 ) -> Result<Self> {
1116 Self::parse_impl(bytes, format, false, false)
1117 }
1118
1119 pub fn count(&self) -> usize {
1120 self.count
1121 }
1122
1123 pub fn fanout(&self) -> &[u32; 256] {
1124 &self.fanout
1125 }
1126
1127 pub fn find(&self, oid: &ObjectId) -> Option<PackIndexLookup> {
1128 self.as_view().find(oid)
1129 }
1130
1131 pub fn as_view(&self) -> PackIndexView<'_> {
1132 PackIndexView {
1133 version: self.version,
1134 count: self.count,
1135 fanout: self.fanout,
1136 pack_checksum: self.pack_checksum,
1137 index_checksum: self.index_checksum,
1138 bytes: self.bytes.as_bytes(),
1139 format: self.format,
1140 tables: self.tables.clone(),
1141 }
1142 }
1143
1144 fn parse_impl(
1145 bytes: Arc<dyn PackIndexByteSource>,
1146 format: ObjectFormat,
1147 verify_checksum: bool,
1148 validate_entries: bool,
1149 ) -> Result<Self> {
1150 let (version, count, fanout, pack_checksum, index_checksum, tables) = {
1151 let view = PackIndexView::parse_impl(
1152 bytes.as_bytes(),
1153 format,
1154 verify_checksum,
1155 validate_entries,
1156 )?;
1157 (
1158 view.version,
1159 view.count,
1160 view.fanout,
1161 view.pack_checksum,
1162 view.index_checksum,
1163 view.tables,
1164 )
1165 };
1166 Ok(Self {
1167 version,
1168 count,
1169 fanout,
1170 pack_checksum,
1171 index_checksum,
1172 bytes,
1173 format,
1174 tables,
1175 })
1176 }
1177}
1178
1179impl PackIndex {
1180 pub fn write_v2_for_pack_sha1(pack_bytes: &[u8]) -> Result<PackIndexBuild> {
1181 Self::write_v2_for_pack(pack_bytes, ObjectFormat::Sha1)
1182 }
1183
1184 pub fn write_v2_for_pack(pack_bytes: &[u8], format: ObjectFormat) -> Result<PackIndexBuild> {
1185 let trailer_len = format.raw_len();
1186 if pack_bytes.len() < 12 + trailer_len {
1187 return Err(GitError::InvalidFormat("pack file too short".into()));
1188 }
1189 let trailer_offset = pack_bytes.len() - trailer_len;
1190 let pack_checksum = sley_core::digest_bytes(format, &pack_bytes[..trailer_offset])?;
1191 let expected = ObjectId::from_raw(format, &pack_bytes[trailer_offset..])?;
1192 if pack_checksum != expected {
1193 return Err(GitError::InvalidFormat(format!(
1194 "pack checksum mismatch: expected {expected}, got {pack_checksum}"
1195 )));
1196 }
1197
1198 if &pack_bytes[..4] != b"PACK" {
1199 return Err(GitError::InvalidFormat("missing PACK signature".into()));
1200 }
1201 let version = u32_be(&pack_bytes[4..8]);
1202 if version != 2 && version != 3 {
1203 return Err(GitError::Unsupported(format!("pack version {version}")));
1204 }
1205 let count = u32_be(&pack_bytes[8..12]) as usize;
1206 let mut offset = 12usize;
1207 let mut parsed_entries = Vec::with_capacity(count);
1208 let mut raw_entries = Vec::with_capacity(count);
1209 for _ in 0..count {
1210 let entry_offset = offset;
1211 let header = parse_entry_header(pack_bytes, &mut offset)?;
1212 let base = match header.kind {
1213 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
1214 pack_bytes,
1215 &mut offset,
1216 entry_offset as u64,
1217 )?)),
1218 PackObjectKind::RefDelta => {
1219 let hash_len = format.raw_len();
1220 if offset + hash_len > trailer_offset {
1221 return Err(GitError::InvalidFormat(
1222 "truncated ref-delta base object id".into(),
1223 ));
1224 }
1225 let oid = ObjectId::from_raw(format, &pack_bytes[offset..offset + hash_len])?;
1226 offset += hash_len;
1227 Some(DeltaBase::Ref(oid))
1228 }
1229 _ => None,
1230 };
1231 let mut body = Vec::new();
1232 let consumed = inflate_into(
1233 &pack_bytes[offset..trailer_offset],
1234 &mut body,
1235 header.size.min(usize::MAX as u64) as usize,
1236 )?;
1237 if body.len() as u64 != header.size {
1238 return Err(GitError::InvalidObject(format!(
1239 "pack object declared {} bytes, decoded {}",
1240 header.size,
1241 body.len()
1242 )));
1243 }
1244 if consumed == 0 {
1245 return Err(GitError::InvalidFormat(
1246 "empty compressed pack entry".into(),
1247 ));
1248 }
1249 offset = offset
1250 .checked_add(consumed)
1251 .ok_or_else(|| GitError::InvalidFormat("pack offset overflow".into()))?;
1252 if offset > trailer_offset {
1253 return Err(GitError::InvalidFormat(
1254 "pack entry extends past checksum".into(),
1255 ));
1256 }
1257 raw_entries.push((
1258 entry_offset as u64,
1259 crc32fast::hash(&pack_bytes[entry_offset..offset]),
1260 ));
1261 if let Some(base) = base {
1262 parsed_entries.push(ParsedPackEntry::Delta {
1263 base,
1264 compressed_size: consumed as u64,
1265 delta_size: header.size,
1266 offset: entry_offset as u64,
1267 delta: body,
1268 });
1269 } else {
1270 let object_type = match header.kind {
1271 PackObjectKind::Commit => ObjectType::Commit,
1272 PackObjectKind::Tree => ObjectType::Tree,
1273 PackObjectKind::Blob => ObjectType::Blob,
1274 PackObjectKind::Tag => ObjectType::Tag,
1275 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => unreachable!(),
1276 };
1277 let object = EncodedObject::new(object_type, body);
1278 let oid = object.object_id(format)?;
1279 parsed_entries.push(ParsedPackEntry::Resolved(PackObject {
1280 entry: PackEntry {
1281 oid,
1282 compressed_size: consumed as u64,
1283 uncompressed_size: header.size,
1284 offset: entry_offset as u64,
1285 },
1286 object,
1287 }));
1288 }
1289 }
1290 if offset != trailer_offset {
1291 return Err(GitError::InvalidFormat(format!(
1292 "pack has {} trailing bytes before checksum",
1293 trailer_offset - offset
1294 )));
1295 }
1296
1297 let resolved = resolve_pack_entries(parsed_entries, format, &mut |_| Ok(None))?;
1298 let entries = resolved
1299 .iter()
1300 .zip(raw_entries)
1301 .map(|(object, (offset, crc32))| PackIndexEntry {
1302 oid: object.entry.oid,
1303 crc32,
1304 offset,
1305 })
1306 .collect::<Vec<_>>();
1307 let index = PackIndex::write_v2(format, &entries, &pack_checksum)?;
1308 Ok(PackIndexBuild {
1309 index,
1310 pack_checksum,
1311 entries,
1312 })
1313 }
1314
1315 pub fn parse_v2_sha1(bytes: &[u8]) -> Result<Self> {
1316 Self::parse(bytes, ObjectFormat::Sha1)
1317 }
1318
1319 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1320 let hash_len = format.raw_len();
1321 if bytes.len() < 4 {
1322 return Err(GitError::InvalidFormat("pack index too short".into()));
1323 }
1324 if bytes[..4] != [0xff, b't', b'O', b'c'] {
1325 return Self::parse_v1(bytes, format);
1326 }
1327 if bytes.len() < 8 + 256 * 4 + 2 * hash_len {
1328 return Err(GitError::InvalidFormat("pack index too short".into()));
1329 }
1330 let version = u32_be(&bytes[4..8]);
1331 if version != 2 {
1332 return Err(GitError::Unsupported(format!(
1333 "pack index version {version}"
1334 )));
1335 }
1336 let index_checksum_offset = bytes.len() - hash_len;
1337 let actual_index_checksum =
1338 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1339 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1340 if actual_index_checksum != index_checksum {
1341 return Err(GitError::InvalidFormat(format!(
1342 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1343 )));
1344 }
1345
1346 let mut offset = 8usize;
1347 let mut fanout = [0u32; 256];
1348 let mut previous = 0u32;
1349 for slot in &mut fanout {
1350 *slot = u32_be(&bytes[offset..offset + 4]);
1351 if *slot < previous {
1352 return Err(GitError::InvalidFormat(
1353 "pack index fanout is not monotonic".into(),
1354 ));
1355 }
1356 previous = *slot;
1357 offset += 4;
1358 }
1359 let count = fanout[255] as usize;
1360 let oid_table = checked_range(offset, count, hash_len, bytes.len())?;
1361 offset = oid_table.end;
1362 let crc_table = checked_range(offset, count, 4, bytes.len())?;
1363 offset = crc_table.end;
1364 let small_offset_table = checked_range(offset, count, 4, bytes.len())?;
1365 offset = small_offset_table.end;
1366
1367 let large_offset_count = (0..count)
1368 .filter(|idx| {
1369 let start = small_offset_table.start + idx * 4;
1370 u32_be(&bytes[start..start + 4]) & 0x8000_0000 != 0
1371 })
1372 .count();
1373 let large_offset_table = checked_range(offset, large_offset_count, 8, bytes.len())?;
1374 offset = large_offset_table.end;
1375
1376 let expected_trailer_offset = bytes.len() - hash_len * 2;
1377 if offset != expected_trailer_offset {
1378 return Err(GitError::InvalidFormat(format!(
1379 "pack index has {} unexpected bytes before trailer",
1380 expected_trailer_offset.saturating_sub(offset)
1381 )));
1382 }
1383 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1384
1385 let mut entries = Vec::with_capacity(count);
1386 for idx in 0..count {
1387 let oid_start = oid_table.start + idx * hash_len;
1388 let crc_start = crc_table.start + idx * 4;
1389 let offset_start = small_offset_table.start + idx * 4;
1390 let oid_bytes = &bytes[oid_start..oid_start + hash_len];
1391 if idx > 0 && oid_bytes <= &bytes[oid_start - hash_len..oid_start] {
1395 return Err(GitError::InvalidFormat(
1396 "pack index object ids are not strictly ascending".into(),
1397 ));
1398 }
1399 let expected_min = if oid_bytes[0] == 0 {
1400 0
1401 } else {
1402 fanout[usize::from(oid_bytes[0] - 1)]
1403 };
1404 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
1405 return Err(GitError::InvalidFormat(
1406 "pack index object id is outside its fanout bucket".into(),
1407 ));
1408 }
1409 let raw_offset = u32_be(&bytes[offset_start..offset_start + 4]);
1410 let offset = if raw_offset & 0x8000_0000 == 0 {
1411 u64::from(raw_offset)
1412 } else {
1413 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
1414 let large_start = large_offset_table.start + large_idx * 8;
1415 if large_idx >= large_offset_count {
1416 return Err(GitError::InvalidFormat(
1417 "pack index large offset points past table".into(),
1418 ));
1419 }
1420 u64_be(&bytes[large_start..large_start + 8])
1421 };
1422 entries.push(PackIndexEntry {
1423 oid: ObjectId::from_raw(format, oid_bytes)?,
1424 crc32: u32_be(&bytes[crc_start..crc_start + 4]),
1425 offset,
1426 });
1427 }
1428 Ok(Self {
1429 version,
1430 fanout,
1431 entries,
1432 pack_checksum,
1433 index_checksum,
1434 })
1435 }
1436
1437 fn parse_v1(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
1438 let hash_len = format.raw_len();
1439 if bytes.len() < 256 * 4 + 2 * hash_len {
1440 return Err(GitError::InvalidFormat("pack index too short".into()));
1441 }
1442 let index_checksum_offset = bytes.len() - hash_len;
1443 let actual_index_checksum =
1444 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1445 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1446 if actual_index_checksum != index_checksum {
1447 return Err(GitError::InvalidFormat(format!(
1448 "pack index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1449 )));
1450 }
1451
1452 let mut offset = 0usize;
1453 let mut fanout = [0u32; 256];
1454 let mut previous = 0u32;
1455 for slot in &mut fanout {
1456 *slot = u32_be(&bytes[offset..offset + 4]);
1457 if *slot < previous {
1458 return Err(GitError::InvalidFormat(
1459 "pack index fanout is not monotonic".into(),
1460 ));
1461 }
1462 previous = *slot;
1463 offset += 4;
1464 }
1465 let count = fanout[255] as usize;
1466 let entry_len = hash_len
1467 .checked_add(4)
1468 .ok_or_else(|| GitError::InvalidFormat("pack index entry length overflow".into()))?;
1469 let entry_table = checked_range(offset, count, entry_len, bytes.len())?;
1470 offset = entry_table.end;
1471 let expected_trailer_offset = bytes.len() - hash_len * 2;
1472 if offset != expected_trailer_offset {
1473 return Err(GitError::InvalidFormat(format!(
1474 "pack index has {} unexpected bytes before trailer",
1475 expected_trailer_offset.saturating_sub(offset)
1476 )));
1477 }
1478 let pack_checksum = ObjectId::from_raw(format, &bytes[offset..offset + hash_len])?;
1479
1480 let mut entries = Vec::with_capacity(count);
1481 let mut previous_oid: Option<ObjectId> = None;
1482 for idx in 0..count {
1483 let start = entry_table.start + idx * entry_len;
1484 let oid = ObjectId::from_raw(format, &bytes[start + 4..start + entry_len])?;
1485 if let Some(previous) = &previous_oid
1486 && previous.as_bytes() >= oid.as_bytes()
1487 {
1488 return Err(GitError::InvalidFormat(
1489 "pack index object ids are not strictly sorted".into(),
1490 ));
1491 }
1492 previous_oid = Some(oid);
1493 entries.push(PackIndexEntry {
1494 oid,
1495 crc32: 0,
1496 offset: u64::from(u32_be(&bytes[start..start + 4])),
1497 });
1498 }
1499 Ok(Self {
1500 version: 1,
1501 fanout,
1502 entries,
1503 pack_checksum,
1504 index_checksum,
1505 })
1506 }
1507
1508 pub fn find(&self, oid: &ObjectId) -> Option<&PackIndexEntry> {
1509 self.entries
1510 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
1511 .ok()
1512 .map(|idx| &self.entries[idx])
1513 }
1514
1515 pub fn write_v2_sha1(entries: &[PackIndexEntry], pack_checksum: &ObjectId) -> Result<Vec<u8>> {
1516 Self::write_v2(ObjectFormat::Sha1, entries, pack_checksum)
1517 }
1518
1519 pub fn write_v2(
1520 format: ObjectFormat,
1521 entries: &[PackIndexEntry],
1522 pack_checksum: &ObjectId,
1523 ) -> Result<Vec<u8>> {
1524 if pack_checksum.format() != format {
1525 return Err(GitError::InvalidObjectId(
1526 "pack checksum format does not match index format".into(),
1527 ));
1528 }
1529 let mut entries = entries.iter().collect::<Vec<_>>();
1530 entries.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
1531 for pair in entries.windows(2) {
1532 if pair[0].oid.as_bytes() == pair[1].oid.as_bytes() {
1533 return Err(GitError::InvalidFormat(format!(
1534 "pack index contains duplicate object id {}",
1535 pair[0].oid
1536 )));
1537 }
1538 }
1539 let mut fanout = [0u32; 256];
1540 for entry in &entries {
1541 if entry.oid.format() != format {
1542 return Err(GitError::InvalidObjectId(
1543 "pack index entry format does not match index format".into(),
1544 ));
1545 }
1546 let first = entry.oid.as_bytes()[0] as usize;
1547 fanout[first] = fanout[first]
1548 .checked_add(1)
1549 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1550 }
1551 let mut running = 0u32;
1552 for slot in &mut fanout {
1553 running = running
1554 .checked_add(*slot)
1555 .ok_or_else(|| GitError::InvalidFormat("pack index fanout overflow".into()))?;
1556 *slot = running;
1557 }
1558
1559 let mut index = Vec::new();
1560 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
1561 index.extend_from_slice(&2u32.to_be_bytes());
1562 for count in fanout {
1563 index.extend_from_slice(&count.to_be_bytes());
1564 }
1565 for entry in &entries {
1566 index.extend_from_slice(entry.oid.as_bytes());
1567 }
1568 for entry in &entries {
1569 index.extend_from_slice(&entry.crc32.to_be_bytes());
1570 }
1571
1572 let mut large_offsets = Vec::new();
1573 for entry in &entries {
1574 if entry.offset < 0x8000_0000 {
1575 index.extend_from_slice(&(entry.offset as u32).to_be_bytes());
1576 } else {
1577 if large_offsets.len() > 0x7fff_ffff {
1578 return Err(GitError::InvalidFormat(
1579 "too many large pack offsets".into(),
1580 ));
1581 }
1582 let large_idx = large_offsets.len() as u32;
1583 index.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
1584 large_offsets.push(entry.offset);
1585 }
1586 }
1587 for offset in large_offsets {
1588 index.extend_from_slice(&offset.to_be_bytes());
1589 }
1590 index.extend_from_slice(pack_checksum.as_bytes());
1591 let index_checksum = sley_core::digest_bytes(format, &index)?;
1592 index.extend_from_slice(index_checksum.as_bytes());
1593 Ok(index)
1594 }
1595}
1596
1597pub fn pack_order_index_positions(entries: &[PackIndexEntry]) -> Vec<u32> {
1602 let mut oid_sorted: Vec<usize> = (0..entries.len()).collect();
1603 oid_sorted.sort_by(|&a, &b| entries[a].oid.as_bytes().cmp(entries[b].oid.as_bytes()));
1604 let mut index_position = vec![0u32; entries.len()];
1605 for (position, &entry) in oid_sorted.iter().enumerate() {
1606 index_position[entry] = position as u32;
1607 }
1608 let mut by_offset: Vec<usize> = (0..entries.len()).collect();
1609 by_offset.sort_by_key(|&entry| entries[entry].offset);
1610 by_offset
1611 .into_iter()
1612 .map(|entry| index_position[entry])
1613 .collect()
1614}
1615
1616impl PackReverseIndex {
1617 pub fn write(
1618 format: ObjectFormat,
1619 positions: &[u32],
1620 pack_checksum: &ObjectId,
1621 ) -> Result<Vec<u8>> {
1622 if pack_checksum.format() != format {
1623 return Err(GitError::InvalidObjectId(
1624 "pack checksum format does not match reverse index format".into(),
1625 ));
1626 }
1627 validate_position_permutation(positions)?;
1628
1629 let mut out = Vec::new();
1630 out.extend_from_slice(b"RIDX");
1631 out.extend_from_slice(&1u32.to_be_bytes());
1632 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1633 for position in positions {
1634 out.extend_from_slice(&position.to_be_bytes());
1635 }
1636 out.extend_from_slice(pack_checksum.as_bytes());
1637 let checksum = sley_core::digest_bytes(format, &out)?;
1638 out.extend_from_slice(checksum.as_bytes());
1639 Ok(out)
1640 }
1641
1642 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1643 let hash_len = format.raw_len();
1644 let table_len = object_count
1645 .checked_mul(4)
1646 .ok_or_else(|| GitError::InvalidFormat("reverse index table overflow".into()))?;
1647 let min_len = 12usize
1648 .checked_add(table_len)
1649 .and_then(|len| len.checked_add(hash_len * 2))
1650 .ok_or_else(|| GitError::InvalidFormat("reverse index length overflow".into()))?;
1651 if bytes.len() < min_len {
1652 return Err(GitError::InvalidFormat("reverse index too short".into()));
1653 }
1654 if bytes.len() != min_len {
1655 return Err(GitError::InvalidFormat(format!(
1656 "reverse index has {} trailing bytes",
1657 bytes.len() - min_len
1658 )));
1659 }
1660 if &bytes[..4] != b"RIDX" {
1661 return Err(GitError::InvalidFormat(
1662 "missing reverse index signature".into(),
1663 ));
1664 }
1665 let version = u32_be(&bytes[4..8]);
1666 if version != 1 {
1667 return Err(GitError::Unsupported(format!(
1668 "reverse index version {version}"
1669 )));
1670 }
1671 let hash_id = u32_be(&bytes[8..12]);
1672 if hash_id != hash_function_id(format) {
1673 return Err(GitError::InvalidFormat(format!(
1674 "reverse index hash id {hash_id} does not match {}",
1675 format.name()
1676 )));
1677 }
1678
1679 let index_checksum_offset = bytes.len() - hash_len;
1680 let actual_index_checksum =
1681 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1682 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1683 if actual_index_checksum != index_checksum {
1684 return Err(GitError::InvalidFormat(format!(
1685 "reverse index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1686 )));
1687 }
1688
1689 let pack_checksum_offset = index_checksum_offset - hash_len;
1690 let pack_checksum =
1691 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1692 let mut positions = Vec::with_capacity(object_count);
1693 let mut offset = 12usize;
1694 for _ in 0..object_count {
1695 let position = u32_be(&bytes[offset..offset + 4]);
1696 positions.push(position);
1697 offset += 4;
1698 }
1699 validate_position_permutation(&positions)?;
1700
1701 Ok(Self {
1702 version,
1703 format,
1704 positions,
1705 pack_checksum,
1706 index_checksum,
1707 })
1708 }
1709}
1710
1711impl PackMtimes {
1712 pub fn write(
1713 format: ObjectFormat,
1714 mtimes: &[u32],
1715 pack_checksum: &ObjectId,
1716 ) -> Result<Vec<u8>> {
1717 if pack_checksum.format() != format {
1718 return Err(GitError::InvalidObjectId(
1719 "pack checksum format does not match mtimes format".into(),
1720 ));
1721 }
1722
1723 let mut out = Vec::new();
1724 out.extend_from_slice(b"MTME");
1725 out.extend_from_slice(&1u32.to_be_bytes());
1726 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
1727 for mtime in mtimes {
1728 out.extend_from_slice(&mtime.to_be_bytes());
1729 }
1730 out.extend_from_slice(pack_checksum.as_bytes());
1731 let checksum = sley_core::digest_bytes(format, &out)?;
1732 out.extend_from_slice(checksum.as_bytes());
1733 Ok(out)
1734 }
1735
1736 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1737 let hash_len = format.raw_len();
1738 let table_len = object_count
1739 .checked_mul(4)
1740 .ok_or_else(|| GitError::InvalidFormat("mtimes table overflow".into()))?;
1741 let expected_len = 12usize
1742 .checked_add(table_len)
1743 .and_then(|len| len.checked_add(hash_len * 2))
1744 .ok_or_else(|| GitError::InvalidFormat("mtimes length overflow".into()))?;
1745 if bytes.len() < expected_len {
1746 return Err(GitError::InvalidFormat("mtimes file too short".into()));
1747 }
1748 if bytes.len() != expected_len {
1749 return Err(GitError::InvalidFormat(format!(
1750 "mtimes file has {} trailing bytes",
1751 bytes.len() - expected_len
1752 )));
1753 }
1754 if &bytes[..4] != b"MTME" {
1755 return Err(GitError::InvalidFormat("missing mtimes signature".into()));
1756 }
1757 let version = u32_be(&bytes[4..8]);
1758 if version != 1 {
1759 return Err(GitError::Unsupported(format!("mtimes version {version}")));
1760 }
1761 let hash_id = u32_be(&bytes[8..12]);
1762 if hash_id != hash_function_id(format) {
1763 return Err(GitError::InvalidFormat(format!(
1764 "mtimes hash id {hash_id} does not match {}",
1765 format.name()
1766 )));
1767 }
1768
1769 let index_checksum_offset = bytes.len() - hash_len;
1770 let actual_index_checksum =
1771 sley_core::digest_bytes(format, &bytes[..index_checksum_offset])?;
1772 let index_checksum = ObjectId::from_raw(format, &bytes[index_checksum_offset..])?;
1773 if actual_index_checksum != index_checksum {
1774 return Err(GitError::InvalidFormat(format!(
1775 "mtimes checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1776 )));
1777 }
1778
1779 let pack_checksum_offset = index_checksum_offset - hash_len;
1780 let pack_checksum =
1781 ObjectId::from_raw(format, &bytes[pack_checksum_offset..index_checksum_offset])?;
1782 let mut mtimes = Vec::with_capacity(object_count);
1783 let mut offset = 12usize;
1784 for _ in 0..object_count {
1785 mtimes.push(u32_be(&bytes[offset..offset + 4]));
1786 offset += 4;
1787 }
1788
1789 Ok(Self {
1790 version,
1791 format,
1792 mtimes,
1793 pack_checksum,
1794 index_checksum,
1795 })
1796 }
1797}
1798
1799impl PackBitmapIndex {
1800 pub const OPTION_FULL_DAG: u16 = 0x0001;
1801 pub const OPTION_HASH_CACHE: u16 = 0x0004;
1802
1803 pub fn parse(bytes: &[u8], format: ObjectFormat, object_count: usize) -> Result<Self> {
1804 let hash_len = format.raw_len();
1805 let min_len = 12usize
1806 .checked_add(hash_len * 2)
1807 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1808 if bytes.len() < min_len {
1809 return Err(GitError::InvalidFormat("bitmap index too short".into()));
1810 }
1811 if &bytes[..4] != b"BITM" {
1812 return Err(GitError::InvalidFormat(
1813 "missing bitmap index signature".into(),
1814 ));
1815 }
1816 let version = u16_be(&bytes[4..6]);
1817 if version != 1 {
1818 return Err(GitError::Unsupported(format!(
1819 "bitmap index version {version}"
1820 )));
1821 }
1822 let options = u16_be(&bytes[6..8]);
1823 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
1824 if options & !known_options != 0 {
1825 return Err(GitError::Unsupported(format!(
1826 "bitmap index options {:#06x}",
1827 options & !known_options
1828 )));
1829 }
1830 let entry_count = u32_be(&bytes[8..12]) as usize;
1831 let checksum_offset = bytes.len() - hash_len;
1832 let actual_index_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1833 let index_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1834 if actual_index_checksum != index_checksum {
1835 return Err(GitError::InvalidFormat(format!(
1836 "bitmap index checksum mismatch: expected {index_checksum}, got {actual_index_checksum}"
1837 )));
1838 }
1839
1840 let pack_checksum_end = 12usize
1841 .checked_add(hash_len)
1842 .ok_or_else(|| GitError::InvalidFormat("bitmap index length overflow".into()))?;
1843 let pack_checksum = ObjectId::from_raw(format, &bytes[12..pack_checksum_end])?;
1844 let mut offset = pack_checksum_end;
1845 let commits = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1846 let trees = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1847 let blobs = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1848 let tags = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1849
1850 let mut entries = Vec::with_capacity(entry_count);
1851 for idx in 0..entry_count {
1852 if checksum_offset.saturating_sub(offset) < 6 {
1853 return Err(GitError::InvalidFormat(
1854 "truncated bitmap index entry".into(),
1855 ));
1856 }
1857 let object_position = u32_be(&bytes[offset..offset + 4]);
1858 offset += 4;
1859 if object_position as usize >= object_count {
1860 return Err(GitError::InvalidFormat(
1861 "bitmap index entry points past object table".into(),
1862 ));
1863 }
1864 let xor_offset = bytes[offset];
1865 offset += 1;
1866 if xor_offset as usize > idx || xor_offset > 160 {
1867 return Err(GitError::InvalidFormat(
1868 "bitmap index entry has invalid XOR offset".into(),
1869 ));
1870 }
1871 let flags = bytes[offset];
1872 offset += 1;
1873 let bitmap = parse_bitmap_ewah(bytes, &mut offset, checksum_offset, object_count)?;
1874 entries.push(PackBitmapEntry {
1875 object_position,
1876 xor_offset,
1877 flags,
1878 bitmap,
1879 });
1880 }
1881
1882 let name_hash_cache = if options & Self::OPTION_HASH_CACHE != 0 {
1883 let cache_len = object_count
1884 .checked_mul(4)
1885 .ok_or_else(|| GitError::InvalidFormat("bitmap hash cache overflow".into()))?;
1886 if checksum_offset.saturating_sub(offset) < cache_len {
1887 return Err(GitError::InvalidFormat(
1888 "truncated bitmap hash cache".into(),
1889 ));
1890 }
1891 let mut cache = Vec::with_capacity(object_count);
1892 for _ in 0..object_count {
1893 cache.push(u32_be(&bytes[offset..offset + 4]));
1894 offset += 4;
1895 }
1896 Some(cache)
1897 } else {
1898 None
1899 };
1900
1901 if offset != checksum_offset {
1902 return Err(GitError::InvalidFormat(format!(
1903 "bitmap index has {} trailing bytes",
1904 checksum_offset - offset
1905 )));
1906 }
1907
1908 Ok(Self {
1909 version,
1910 format,
1911 options,
1912 pack_checksum,
1913 index_checksum,
1914 type_bitmaps: PackBitmapTypeBitmaps {
1915 commits,
1916 trees,
1917 blobs,
1918 tags,
1919 },
1920 entries,
1921 name_hash_cache,
1922 })
1923 }
1924
1925 pub fn entry_for_index_position(&self, position: u32) -> Option<&PackBitmapEntry> {
1928 self.entries
1929 .iter()
1930 .find(|entry| entry.object_position == position)
1931 }
1932}
1933
1934fn parse_bitmap_ewah(
1935 bytes: &[u8],
1936 offset: &mut usize,
1937 checksum_offset: usize,
1938 _object_count: usize,
1939) -> Result<EwahBitmap> {
1940 if checksum_offset.saturating_sub(*offset) < 12 {
1941 return Err(GitError::InvalidFormat("truncated EWAH bitmap".into()));
1942 }
1943 let bit_size = u32_be(&bytes[*offset..*offset + 4]);
1944 *offset += 4;
1945 let word_count = u32_be(&bytes[*offset..*offset + 4]) as usize;
1946 *offset += 4;
1947 let words_len = word_count
1948 .checked_mul(8)
1949 .ok_or_else(|| GitError::InvalidFormat("EWAH word table overflow".into()))?;
1950 if checksum_offset.saturating_sub(*offset) < words_len + 4 {
1951 return Err(GitError::InvalidFormat("truncated EWAH word table".into()));
1952 }
1953 let mut words = Vec::with_capacity(word_count);
1954 for _ in 0..word_count {
1955 words.push(u64_be(&bytes[*offset..*offset + 8]));
1956 *offset += 8;
1957 }
1958 let rlw_position = u32_be(&bytes[*offset..*offset + 4]);
1959 *offset += 4;
1960 validate_ewah_words(bit_size, &words, rlw_position)?;
1961 Ok(EwahBitmap {
1962 bit_size,
1963 words,
1964 rlw_position,
1965 })
1966}
1967
1968fn validate_ewah_words(bit_size: u32, words: &[u64], rlw_position: u32) -> Result<()> {
1969 if words.is_empty() {
1970 if rlw_position != 0 || bit_size != 0 {
1971 return Err(GitError::InvalidFormat(
1972 "EWAH bitmap has invalid empty RLW".into(),
1973 ));
1974 }
1975 return Ok(());
1976 }
1977 if rlw_position as usize >= words.len() {
1978 return Err(GitError::InvalidFormat(
1979 "EWAH RLW position points past word table".into(),
1980 ));
1981 }
1982 let mut word_idx = 0usize;
1983 let mut decoded_words = 0u64;
1984 while word_idx < words.len() {
1985 let rlw = words[word_idx];
1986 let run_words = (rlw >> 1) & 0xffff_ffff;
1987 let literal_words = (rlw >> 33) as usize;
1988 word_idx += 1;
1989 word_idx = word_idx
1990 .checked_add(literal_words)
1991 .ok_or_else(|| GitError::InvalidFormat("EWAH literal word overflow".into()))?;
1992 if word_idx > words.len() {
1993 return Err(GitError::InvalidFormat(
1994 "EWAH literal words extend past word table".into(),
1995 ));
1996 }
1997 decoded_words = decoded_words
1998 .checked_add(run_words)
1999 .and_then(|value| value.checked_add(literal_words as u64))
2000 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded size overflow".into()))?;
2001 }
2002 let decoded_bits = decoded_words
2003 .checked_mul(64)
2004 .ok_or_else(|| GitError::InvalidFormat("EWAH decoded bit size overflow".into()))?;
2005 if decoded_bits < u64::from(bit_size) {
2006 return Err(GitError::InvalidFormat(
2007 "EWAH bitmap decodes fewer bits than declared".into(),
2008 ));
2009 }
2010 Ok(())
2011}
2012
2013impl MultiPackIndex {
2014 pub fn write(
2015 format: ObjectFormat,
2016 version: u8,
2017 pack_names: &[String],
2018 objects: &[MultiPackIndexEntry],
2019 ) -> Result<Vec<u8>> {
2020 Self::write_with_reverse_index(format, version, pack_names, objects, None)
2021 }
2022
2023 pub fn write_with_reverse_index(
2032 format: ObjectFormat,
2033 version: u8,
2034 pack_names: &[String],
2035 objects: &[MultiPackIndexEntry],
2036 preferred_pack: Option<u32>,
2037 ) -> Result<Vec<u8>> {
2038 if let Some(preferred) = preferred_pack
2039 && preferred as usize >= pack_names.len()
2040 {
2041 return Err(GitError::InvalidFormat(format!(
2042 "preferred pack {preferred} out of range for {} packs",
2043 pack_names.len()
2044 )));
2045 }
2046 if version != 1 && version != 2 {
2047 return Err(GitError::Unsupported(format!(
2048 "multi-pack-index version {version}"
2049 )));
2050 }
2051 if pack_names.len() > u32::MAX as usize {
2052 return Err(GitError::InvalidFormat(
2053 "too many multi-pack-index packs".into(),
2054 ));
2055 }
2056 if objects.len() > u32::MAX as usize {
2057 return Err(GitError::InvalidFormat(
2058 "too many multi-pack-index objects".into(),
2059 ));
2060 }
2061 validate_midx_pack_names(pack_names)?;
2062 if version == 1 && pack_names.windows(2).any(|pair| pair[0] > pair[1]) {
2063 return Err(GitError::InvalidFormat(
2064 "multi-pack-index v1 pack names must be sorted".into(),
2065 ));
2066 }
2067
2068 let mut objects = objects.iter().collect::<Vec<_>>();
2069 objects.sort_by(|left, right| left.oid.as_bytes().cmp(right.oid.as_bytes()));
2070 let mut previous_oid: Option<&ObjectId> = None;
2071 for object in &objects {
2072 if object.oid.format() != format {
2073 return Err(GitError::InvalidObjectId(
2074 "multi-pack-index object format does not match index format".into(),
2075 ));
2076 }
2077 if let Some(previous) = previous_oid
2078 && previous.as_bytes() == object.oid.as_bytes()
2079 {
2080 return Err(GitError::InvalidFormat(
2081 "multi-pack-index contains duplicate object ids".into(),
2082 ));
2083 }
2084 if object.pack_int_id as usize >= pack_names.len() {
2085 return Err(GitError::InvalidFormat(
2086 "multi-pack-index object points past pack table".into(),
2087 ));
2088 }
2089 previous_oid = Some(&object.oid);
2090 }
2091
2092 let mut large_offsets = Vec::new();
2093 let mut chunks = vec![
2094 (*b"PNAM", write_midx_pack_names(pack_names)),
2095 (*b"OIDF", write_midx_oid_fanout(&objects)?),
2096 (*b"OIDL", write_midx_oid_lookup(&objects)),
2097 (
2098 *b"OOFF",
2099 write_midx_object_offsets(&objects, &mut large_offsets)?,
2100 ),
2101 ];
2102 if !large_offsets.is_empty() {
2103 chunks.push((*b"LOFF", large_offsets));
2104 }
2105 if let Some(preferred) = preferred_pack {
2106 let mut pseudo: Vec<u32> = (0..objects.len() as u32).collect();
2109 pseudo.sort_by_key(|&midx_pos| {
2110 let object = objects[midx_pos as usize];
2111 (
2112 object.pack_int_id != preferred,
2113 object.pack_int_id,
2114 object.offset,
2115 )
2116 });
2117 let mut ridx = Vec::with_capacity(pseudo.len() * 4);
2118 for midx_pos in pseudo {
2119 ridx.extend_from_slice(&midx_pos.to_be_bytes());
2120 }
2121 chunks.push((*b"RIDX", ridx));
2122 }
2123 write_multi_pack_index_chunks(format, version, pack_names.len() as u32, &chunks)
2124 }
2125
2126 pub fn parse(bytes: &[u8], format: ObjectFormat) -> Result<Self> {
2127 let hash_len = format.raw_len();
2128 if bytes.len() < 12 + 12 + hash_len {
2129 return Err(GitError::InvalidFormat(
2130 "multi-pack-index file too short".into(),
2131 ));
2132 }
2133 if &bytes[..4] != b"MIDX" {
2134 return Err(GitError::InvalidFormat(
2135 "missing multi-pack-index signature".into(),
2136 ));
2137 }
2138 let version = bytes[4];
2139 if version != 1 && version != 2 {
2140 return Err(GitError::Unsupported(format!(
2141 "multi-pack-index version {version}"
2142 )));
2143 }
2144 let hash_id = bytes[5];
2145 if u32::from(hash_id) != hash_function_id(format) {
2146 return Err(GitError::InvalidFormat(format!(
2147 "multi-pack-index hash id {hash_id} does not match {}",
2148 format.name()
2149 )));
2150 }
2151 let chunk_count = bytes[6] as usize;
2152 let base_midx_count = bytes[7];
2153 if base_midx_count != 0 {
2154 return Err(GitError::Unsupported(format!(
2155 "multi-pack-index base count {base_midx_count}"
2156 )));
2157 }
2158 let pack_count = u32_be(&bytes[8..12]);
2159 let lookup_len = (chunk_count + 1)
2160 .checked_mul(12)
2161 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2162 let data_start = 12usize
2163 .checked_add(lookup_len)
2164 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2165 let checksum_offset = bytes.len() - hash_len;
2166 if data_start > checksum_offset {
2167 return Err(GitError::InvalidFormat(
2168 "truncated multi-pack-index chunk lookup".into(),
2169 ));
2170 }
2171
2172 let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
2173 let checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
2174 if actual_checksum != checksum {
2175 return Err(GitError::InvalidFormat(format!(
2176 "multi-pack-index checksum mismatch: expected {checksum}, got {actual_checksum}"
2177 )));
2178 }
2179
2180 let mut entries = Vec::with_capacity(chunk_count + 1);
2181 let mut offset = 12usize;
2182 for _ in 0..=chunk_count {
2183 let id = [
2184 bytes[offset],
2185 bytes[offset + 1],
2186 bytes[offset + 2],
2187 bytes[offset + 3],
2188 ];
2189 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2190 entries.push((id, chunk_offset));
2191 offset += 12;
2192 }
2193 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2194 return Err(GitError::InvalidFormat(
2195 "multi-pack-index chunk lookup is empty".into(),
2196 ));
2197 };
2198 if terminator_id != [0, 0, 0, 0] {
2199 return Err(GitError::InvalidFormat(
2200 "multi-pack-index chunk lookup missing terminator".into(),
2201 ));
2202 }
2203 if terminator_offset != checksum_offset as u64 {
2204 return Err(GitError::InvalidFormat(
2205 "multi-pack-index terminator does not point at checksum".into(),
2206 ));
2207 }
2208
2209 let mut chunks = Vec::with_capacity(chunk_count);
2210 let mut previous_offset = data_start as u64;
2211 for pair in entries.windows(2) {
2212 let (id, chunk_offset) = pair[0];
2213 let (_next_id, next_offset) = pair[1];
2214 if id == [0, 0, 0, 0] {
2215 return Err(GitError::InvalidFormat(
2216 "multi-pack-index chunk id is zero before terminator".into(),
2217 ));
2218 }
2219 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2220 return Err(GitError::InvalidFormat(
2221 "multi-pack-index chunk offsets are not monotonic".into(),
2222 ));
2223 }
2224 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2225 return Err(GitError::InvalidFormat(
2226 "multi-pack-index chunk length is invalid".into(),
2227 ));
2228 }
2229 chunks.push(MultiPackIndexChunk {
2230 id,
2231 offset: chunk_offset,
2232 len: next_offset - chunk_offset,
2233 });
2234 previous_offset = chunk_offset;
2235 }
2236
2237 let pack_names = parse_midx_pack_names(bytes, &chunks, pack_count as usize, version)?;
2238 let (fanout, object_count) = parse_midx_oid_fanout(bytes, &chunks)?;
2239 let object_ids = parse_midx_object_ids(bytes, &chunks, format, object_count, &fanout)?;
2240 let objects = parse_midx_object_offsets(bytes, &chunks, object_ids, pack_count)?;
2241 let reverse_index = parse_midx_reverse_index(bytes, &chunks, object_count)?;
2242 let bitmapped_packs =
2243 parse_midx_bitmapped_packs(bytes, &chunks, pack_count as usize, object_count)?;
2244
2245 Ok(Self {
2246 version,
2247 format,
2248 pack_count,
2249 pack_names,
2250 object_count: object_count as u32,
2251 fanout,
2252 objects,
2253 reverse_index,
2254 bitmapped_packs,
2255 chunks,
2256 checksum,
2257 })
2258 }
2259
2260 pub fn find(&self, oid: &ObjectId) -> Option<&MultiPackIndexEntry> {
2261 self.objects
2262 .binary_search_by(|entry| entry.oid.as_bytes().cmp(oid.as_bytes()))
2263 .ok()
2264 .map(|idx| &self.objects[idx])
2265 }
2266}
2267
2268impl MultiPackIndexOidLookup {
2269 pub fn parse(bytes: Arc<Vec<u8>>, format: ObjectFormat) -> Result<Self> {
2270 let hash_len = format.raw_len();
2271 if bytes.len() < 12 + 12 + hash_len {
2272 return Err(GitError::InvalidFormat(
2273 "multi-pack-index file too short".into(),
2274 ));
2275 }
2276 if &bytes[..4] != b"MIDX" {
2277 return Err(GitError::InvalidFormat(
2278 "missing multi-pack-index signature".into(),
2279 ));
2280 }
2281 let version = bytes[4];
2282 if version != 1 && version != 2 {
2283 return Err(GitError::Unsupported(format!(
2284 "multi-pack-index version {version}"
2285 )));
2286 }
2287 let hash_id = bytes[5];
2288 if u32::from(hash_id) != hash_function_id(format) {
2289 return Err(GitError::InvalidFormat(format!(
2290 "multi-pack-index hash id {hash_id} does not match {}",
2291 format.name()
2292 )));
2293 }
2294 let chunk_count = bytes[6] as usize;
2295 let base_midx_count = bytes[7];
2296 if base_midx_count != 0 {
2297 return Err(GitError::Unsupported(format!(
2298 "multi-pack-index base count {base_midx_count}"
2299 )));
2300 }
2301 let pack_count = u32_be(&bytes[8..12]);
2302 let lookup_len = (chunk_count + 1)
2303 .checked_mul(12)
2304 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2305 let data_start = 12usize
2306 .checked_add(lookup_len)
2307 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2308 let checksum_offset = bytes.len() - hash_len;
2309 if data_start > checksum_offset {
2310 return Err(GitError::InvalidFormat(
2311 "truncated multi-pack-index chunk lookup".into(),
2312 ));
2313 }
2314
2315 let mut entries = Vec::with_capacity(chunk_count + 1);
2316 let mut offset = 12usize;
2317 for _ in 0..=chunk_count {
2318 let id = [
2319 bytes[offset],
2320 bytes[offset + 1],
2321 bytes[offset + 2],
2322 bytes[offset + 3],
2323 ];
2324 let chunk_offset = u64_be(&bytes[offset + 4..offset + 12]);
2325 entries.push((id, chunk_offset));
2326 offset += 12;
2327 }
2328 let Some((terminator_id, terminator_offset)) = entries.last().copied() else {
2329 return Err(GitError::InvalidFormat(
2330 "multi-pack-index chunk lookup is empty".into(),
2331 ));
2332 };
2333 if terminator_id != [0, 0, 0, 0] {
2334 return Err(GitError::InvalidFormat(
2335 "multi-pack-index chunk lookup missing terminator".into(),
2336 ));
2337 }
2338 if terminator_offset != checksum_offset as u64 {
2339 return Err(GitError::InvalidFormat(
2340 "multi-pack-index terminator does not point at checksum".into(),
2341 ));
2342 }
2343
2344 let mut chunks = Vec::with_capacity(chunk_count);
2345 let mut previous_offset = data_start as u64;
2346 for pair in entries.windows(2) {
2347 let (id, chunk_offset) = pair[0];
2348 let (_next_id, next_offset) = pair[1];
2349 if id == [0, 0, 0, 0] {
2350 return Err(GitError::InvalidFormat(
2351 "multi-pack-index chunk id is zero before terminator".into(),
2352 ));
2353 }
2354 if chunk_offset < data_start as u64 || chunk_offset < previous_offset {
2355 return Err(GitError::InvalidFormat(
2356 "multi-pack-index chunk offsets are not monotonic".into(),
2357 ));
2358 }
2359 if next_offset < chunk_offset || next_offset > checksum_offset as u64 {
2360 return Err(GitError::InvalidFormat(
2361 "multi-pack-index chunk length is invalid".into(),
2362 ));
2363 }
2364 chunks.push(MultiPackIndexChunk {
2365 id,
2366 offset: chunk_offset,
2367 len: next_offset - chunk_offset,
2368 });
2369 previous_offset = chunk_offset;
2370 }
2371
2372 let pack_names = parse_midx_pack_names(&bytes, &chunks, pack_count as usize, version)?;
2373 let (fanout, object_count) = parse_midx_oid_fanout(&bytes, &chunks)?;
2374 let oid_lookup = midx_chunk_data(&bytes, &chunks, *b"OIDL", true)?
2375 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
2376 let expected_len = object_count.checked_mul(hash_len).ok_or_else(|| {
2377 GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into())
2378 })?;
2379 if oid_lookup.len() != expected_len {
2380 return Err(GitError::InvalidFormat(
2381 "multi-pack-index OIDL chunk has invalid length".into(),
2382 ));
2383 }
2384 let object_offsets = midx_chunk_data(&bytes, &chunks, *b"OOFF", true)?
2385 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
2386 let expected_offsets_len = object_count.checked_mul(8).ok_or_else(|| {
2387 GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into())
2388 })?;
2389 if object_offsets.len() != expected_offsets_len {
2390 return Err(GitError::InvalidFormat(
2391 "multi-pack-index OOFF chunk has invalid length".into(),
2392 ));
2393 }
2394 let large_offsets = midx_chunk_data(&bytes, &chunks, *b"LOFF", false)?;
2395 if let Some(large_offsets) = large_offsets
2396 && large_offsets.len() % 8 != 0
2397 {
2398 return Err(GitError::InvalidFormat(
2399 "multi-pack-index LOFF chunk has invalid length".into(),
2400 ));
2401 }
2402 let oid_lookup_offset = oid_lookup.as_ptr() as usize - bytes.as_ptr() as usize;
2403 let object_offsets_offset = object_offsets.as_ptr() as usize - bytes.as_ptr() as usize;
2404 let (large_offsets_offset, large_offsets_len) = match large_offsets {
2405 Some(large_offsets) => (
2406 Some(large_offsets.as_ptr() as usize - bytes.as_ptr() as usize),
2407 large_offsets.len(),
2408 ),
2409 None => (None, 0),
2410 };
2411 Ok(Self {
2412 format,
2413 pack_count,
2414 pack_names,
2415 fanout,
2416 object_count,
2417 oid_lookup_offset,
2418 object_offsets_offset,
2419 large_offsets_offset,
2420 large_offsets_len,
2421 bytes,
2422 })
2423 }
2424
2425 pub fn contains(&self, oid: &ObjectId) -> bool {
2426 self.find_position(oid).is_some()
2427 }
2428
2429 pub fn find(&self, oid: &ObjectId) -> Result<Option<MultiPackIndexEntry>> {
2430 let Some(position) = self.find_position(oid) else {
2431 return Ok(None);
2432 };
2433 let hash_len = self.format.raw_len();
2434 let oid_start = self
2435 .oid_lookup_offset
2436 .checked_add(position * hash_len)
2437 .ok_or_else(|| {
2438 GitError::InvalidFormat("multi-pack-index OIDL offset overflow".into())
2439 })?;
2440 let oid = ObjectId::from_raw(self.format, &self.bytes[oid_start..oid_start + hash_len])?;
2441 let offset_start = self
2442 .object_offsets_offset
2443 .checked_add(position * 8)
2444 .ok_or_else(|| {
2445 GitError::InvalidFormat("multi-pack-index OOFF offset overflow".into())
2446 })?;
2447 let data = &self.bytes[offset_start..offset_start + 8];
2448 let pack_int_id = u32_be(&data[..4]);
2449 if pack_int_id >= self.pack_count {
2450 return Err(GitError::InvalidFormat(
2451 "multi-pack-index object points past pack table".into(),
2452 ));
2453 }
2454 let raw_offset = u32_be(&data[4..8]);
2455 let offset = if raw_offset & 0x8000_0000 == 0 {
2456 u64::from(raw_offset)
2457 } else {
2458 let Some(large_offsets_offset) = self.large_offsets_offset else {
2459 return Err(GitError::InvalidFormat(
2460 "multi-pack-index large offset missing LOFF chunk".into(),
2461 ));
2462 };
2463 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
2464 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
2465 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2466 })?;
2467 let large_end = large_start.checked_add(8).ok_or_else(|| {
2468 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
2469 })?;
2470 if large_end > self.large_offsets_len {
2471 return Err(GitError::InvalidFormat(
2472 "multi-pack-index large offset points past LOFF chunk".into(),
2473 ));
2474 }
2475 let start = large_offsets_offset + large_start;
2476 u64_be(&self.bytes[start..start + 8])
2477 };
2478 Ok(Some(MultiPackIndexEntry {
2479 oid,
2480 pack_int_id,
2481 offset,
2482 }))
2483 }
2484
2485 pub fn pack_name(&self, pack_int_id: u32) -> Option<&str> {
2486 self.pack_names
2487 .get(pack_int_id as usize)
2488 .map(String::as_str)
2489 }
2490
2491 fn find_position(&self, oid: &ObjectId) -> Option<usize> {
2492 if oid.format() != self.format || self.object_count == 0 {
2493 return None;
2494 }
2495 let first = oid.as_bytes()[0] as usize;
2496 let start = if first == 0 {
2497 0
2498 } else {
2499 self.fanout[first - 1] as usize
2500 };
2501 let end = self.fanout[first] as usize;
2502 if start >= end || end > self.object_count {
2503 return None;
2504 }
2505 let hash_len = self.format.raw_len();
2506 let table_start = self.oid_lookup_offset;
2507 let table_end = table_start + self.object_count * hash_len;
2508 let table = &self.bytes[table_start..table_end];
2509 let needle = oid.as_bytes();
2510 let mut low = start;
2511 let mut high = end;
2512 while low < high {
2513 let mid = low + (high - low) / 2;
2514 let raw = &table[mid * hash_len..(mid + 1) * hash_len];
2515 match raw.cmp(needle) {
2516 std::cmp::Ordering::Less => low = mid + 1,
2517 std::cmp::Ordering::Equal => return Some(mid),
2518 std::cmp::Ordering::Greater => high = mid,
2519 }
2520 }
2521 None
2522 }
2523}
2524
2525fn validate_midx_pack_names(pack_names: &[String]) -> Result<()> {
2526 for name in pack_names {
2527 if name.is_empty() {
2528 return Err(GitError::InvalidFormat(
2529 "multi-pack-index pack name is empty".into(),
2530 ));
2531 }
2532 if name
2533 .bytes()
2534 .any(|byte| byte == 0 || matches!(byte, b'/' | b'\\'))
2535 {
2536 return Err(GitError::InvalidFormat(
2537 "multi-pack-index pack name contains an invalid byte".into(),
2538 ));
2539 }
2540 }
2541 Ok(())
2542}
2543
2544fn write_midx_pack_names(pack_names: &[String]) -> Vec<u8> {
2545 let mut out = Vec::new();
2546 for name in pack_names {
2547 out.extend_from_slice(name.as_bytes());
2548 out.push(0);
2549 }
2550 while out.len() % 4 != 0 {
2551 out.push(0);
2552 }
2553 out
2554}
2555
2556fn write_midx_oid_fanout(objects: &[&MultiPackIndexEntry]) -> Result<Vec<u8>> {
2557 let mut counts = [0u32; 256];
2558 for object in objects {
2559 let first = object.oid.as_bytes()[0] as usize;
2560 counts[first] = counts[first]
2561 .checked_add(1)
2562 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2563 }
2564 let mut running = 0u32;
2565 let mut out = Vec::with_capacity(256 * 4);
2566 for count in counts {
2567 running = running
2568 .checked_add(count)
2569 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
2570 out.extend_from_slice(&running.to_be_bytes());
2571 }
2572 Ok(out)
2573}
2574
2575fn write_midx_oid_lookup(objects: &[&MultiPackIndexEntry]) -> Vec<u8> {
2576 let mut out = Vec::new();
2577 for object in objects {
2578 out.extend_from_slice(object.oid.as_bytes());
2579 }
2580 out
2581}
2582
2583fn write_midx_object_offsets(
2584 objects: &[&MultiPackIndexEntry],
2585 large_offsets: &mut Vec<u8>,
2586) -> Result<Vec<u8>> {
2587 let mut out = Vec::new();
2588 for object in objects {
2589 out.extend_from_slice(&object.pack_int_id.to_be_bytes());
2590 if object.offset < 0x8000_0000 {
2591 out.extend_from_slice(&(object.offset as u32).to_be_bytes());
2592 } else {
2593 let large_idx = large_offsets.len() / 8;
2594 if large_idx > 0x7fff_ffff {
2595 return Err(GitError::InvalidFormat(
2596 "too many multi-pack-index large offsets".into(),
2597 ));
2598 }
2599 out.extend_from_slice(&(0x8000_0000 | large_idx as u32).to_be_bytes());
2600 large_offsets.extend_from_slice(&object.offset.to_be_bytes());
2601 }
2602 }
2603 Ok(out)
2604}
2605
2606fn write_multi_pack_index_chunks(
2607 format: ObjectFormat,
2608 version: u8,
2609 pack_count: u32,
2610 chunks: &[([u8; 4], Vec<u8>)],
2611) -> Result<Vec<u8>> {
2612 if chunks.len() > u8::MAX as usize {
2613 return Err(GitError::InvalidFormat(
2614 "too many multi-pack-index chunks".into(),
2615 ));
2616 }
2617 let lookup_len = (chunks.len() + 1)
2618 .checked_mul(12)
2619 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?;
2620 let mut out = Vec::new();
2621 out.extend_from_slice(b"MIDX");
2622 out.push(version);
2623 out.push(hash_function_id(format) as u8);
2624 out.push(chunks.len() as u8);
2625 out.push(0);
2626 out.extend_from_slice(&pack_count.to_be_bytes());
2627 let mut chunk_offset = (12usize)
2628 .checked_add(lookup_len)
2629 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index lookup overflow".into()))?
2630 as u64;
2631 for (id, data) in chunks {
2632 out.extend_from_slice(id);
2633 out.extend_from_slice(&chunk_offset.to_be_bytes());
2634 chunk_offset = chunk_offset
2635 .checked_add(data.len() as u64)
2636 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index size overflow".into()))?;
2637 }
2638 out.extend_from_slice(&[0, 0, 0, 0]);
2639 out.extend_from_slice(&chunk_offset.to_be_bytes());
2640 for (_id, data) in chunks {
2641 out.extend_from_slice(data);
2642 }
2643 let checksum = sley_core::digest_bytes(format, &out)?;
2644 out.extend_from_slice(checksum.as_bytes());
2645 Ok(out)
2646}
2647
2648#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2649struct EntryHeader {
2650 kind: PackObjectKind,
2651 size: u64,
2652}
2653
2654pub trait PackDeltaCache {
2668 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>>;
2670 fn insert(&self, offset: u64, object: Arc<EncodedObject>);
2672}
2673
2674struct NoopDeltaCache;
2677
2678impl PackDeltaCache for NoopDeltaCache {
2679 fn get(&self, _offset: u64) -> Option<Arc<EncodedObject>> {
2680 None
2681 }
2682 fn insert(&self, _offset: u64, _object: Arc<EncodedObject>) {}
2683}
2684
2685thread_local! {
2691 static INFLATE: RefCell<flate2::Decompress> = RefCell::new(flate2::Decompress::new(true));
2692}
2693
2694const MAX_INFLATE_EXPANSION: usize = 1032;
2705
2706const MAX_INFLATE_RESERVE: usize = 64 * 1024 * 1024;
2712
2713fn bounded_inflate_reserve(size_hint: usize, compressed_len: usize) -> usize {
2721 let input_ceiling = compressed_len.saturating_mul(MAX_INFLATE_EXPANSION);
2722 size_hint.min(input_ceiling).clamp(64, MAX_INFLATE_RESERVE)
2724}
2725
2726fn inflate_into(compressed: &[u8], out: &mut Vec<u8>, size_hint: usize) -> Result<usize> {
2735 INFLATE.with(|cell| {
2736 let mut decompress = cell.borrow_mut();
2737 decompress.reset(true);
2738 out.reserve(bounded_inflate_reserve(size_hint, compressed.len()));
2739 let mut input = compressed;
2740 let mut consumed_total = 0usize;
2741 loop {
2742 if out.len() == out.capacity() {
2745 out.reserve(out.len().max(64));
2746 }
2747 let before_in = decompress.total_in();
2748 let before_out = decompress.total_out();
2749 let status = decompress
2750 .decompress_vec(input, out, flate2::FlushDecompress::None)
2751 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2752 let consumed = (decompress.total_in() - before_in) as usize;
2753 let produced = decompress.total_out() - before_out;
2754 input = &input[consumed..];
2755 consumed_total += consumed;
2756 match status {
2757 flate2::Status::StreamEnd => return Ok(consumed_total),
2758 _ if consumed == 0 && produced == 0 => {
2759 return Err(GitError::InvalidObject("truncated zlib stream".into()));
2760 }
2761 _ => {}
2762 }
2763 }
2764 })
2765}
2766
2767fn inflate_prefix(compressed: &[u8], max_out: usize, out: &mut Vec<u8>) -> Result<()> {
2771 INFLATE.with(|cell| {
2772 let mut decompress = cell.borrow_mut();
2773 decompress.reset(true);
2774 out.reserve(max_out.max(16));
2775 let mut input = compressed;
2776 while out.len() < max_out {
2777 if out.len() == out.capacity() {
2778 out.reserve(out.len().max(16));
2779 }
2780 let before_in = decompress.total_in();
2781 let before_out = decompress.total_out();
2782 let status = decompress
2783 .decompress_vec(input, out, flate2::FlushDecompress::None)
2784 .map_err(|err| GitError::InvalidObject(format!("zlib inflate failed: {err}")))?;
2785 let consumed = (decompress.total_in() - before_in) as usize;
2786 let produced = decompress.total_out() - before_out;
2787 input = &input[consumed..];
2788 if status == flate2::Status::StreamEnd || (consumed == 0 && produced == 0) {
2789 break;
2790 }
2791 }
2792 Ok(())
2793 })
2794}
2795
2796pub fn read_object_at_arc<F>(
2804 pack_bytes: &[u8],
2805 offset: u64,
2806 format: ObjectFormat,
2807 resolve_ref_base: F,
2808) -> Result<Arc<EncodedObject>>
2809where
2810 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2811{
2812 read_object_at_with_cache_arc(
2813 pack_bytes,
2814 offset,
2815 format,
2816 resolve_ref_base,
2817 &NoopDeltaCache,
2818 )
2819}
2820
2821pub fn read_object_at_with_cache_arc<F, C>(
2830 pack_bytes: &[u8],
2831 offset: u64,
2832 format: ObjectFormat,
2833 mut resolve_ref_base: F,
2834 cache: &C,
2835) -> Result<Arc<EncodedObject>>
2836where
2837 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2838 C: PackDeltaCache + ?Sized,
2839{
2840 read_object_at_inner(pack_bytes, offset, format, &mut resolve_ref_base, cache)
2841}
2842
2843fn read_object_at_inner<F, C>(
2844 pack_bytes: &[u8],
2845 offset: u64,
2846 format: ObjectFormat,
2847 resolve_ref_base: &mut F,
2848 cache: &C,
2849) -> Result<Arc<EncodedObject>>
2850where
2851 F: FnMut(&ObjectId) -> Result<Option<Arc<EncodedObject>>>,
2852 C: PackDeltaCache + ?Sized,
2853{
2854 if let Some(object) = cache.get(offset) {
2857 return Ok(object);
2858 }
2859 let trailer_offset = pack_bytes
2860 .len()
2861 .checked_sub(format.raw_len())
2862 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
2863 let mut cursor = usize::try_from(offset)
2864 .ok()
2865 .filter(|&value| value < trailer_offset)
2866 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
2867 let header = parse_entry_header(pack_bytes, &mut cursor)?;
2868 let base = match header.kind {
2869 PackObjectKind::OfsDelta => Some(DeltaBase::Offset(parse_ofs_delta_base_offset(
2870 pack_bytes,
2871 &mut cursor,
2872 offset,
2873 )?)),
2874 PackObjectKind::RefDelta => {
2875 let hash_len = format.raw_len();
2876 if cursor + hash_len > trailer_offset {
2877 return Err(GitError::InvalidFormat(
2878 "truncated ref-delta base object id".into(),
2879 ));
2880 }
2881 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
2882 cursor += hash_len;
2883 Some(DeltaBase::Ref(oid))
2884 }
2885 _ => None,
2886 };
2887 let mut body = Vec::new();
2888 inflate_into(
2889 &pack_bytes[cursor..trailer_offset],
2890 &mut body,
2891 header.size.min(usize::MAX as u64) as usize,
2892 )?;
2893 if body.len() as u64 != header.size {
2894 return Err(GitError::InvalidObject(format!(
2895 "pack object declared {} bytes, decoded {}",
2896 header.size,
2897 body.len()
2898 )));
2899 }
2900 let object = match base {
2901 None => {
2902 let object_type = match header.kind {
2903 PackObjectKind::Commit => ObjectType::Commit,
2904 PackObjectKind::Tree => ObjectType::Tree,
2905 PackObjectKind::Blob => ObjectType::Blob,
2906 PackObjectKind::Tag => ObjectType::Tag,
2907 PackObjectKind::OfsDelta | PackObjectKind::RefDelta => {
2908 return Err(GitError::InvalidFormat(
2909 "delta pack entry decoded without a base".into(),
2910 ));
2911 }
2912 };
2913 Arc::new(EncodedObject::new(object_type, body))
2914 }
2915 Some(DeltaBase::Offset(base_offset)) => {
2916 let base =
2917 read_object_at_inner(pack_bytes, base_offset, format, resolve_ref_base, cache)?;
2918 let resolved = apply_pack_delta(&base.body, &body)?;
2919 Arc::new(EncodedObject::new(base.object_type, resolved))
2920 }
2921 Some(DeltaBase::Ref(base_oid)) => {
2922 let base = resolve_ref_base(&base_oid)?
2923 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {base_oid}")))?;
2924 let resolved = apply_pack_delta(&base.body, &body)?;
2925 Arc::new(EncodedObject::new(base.object_type, resolved))
2926 }
2927 };
2928 cache.insert(offset, Arc::clone(&object));
2932 Ok(object)
2933}
2934
2935pub fn read_object_header_at<F>(
2945 pack_bytes: &[u8],
2946 offset: u64,
2947 format: ObjectFormat,
2948 mut resolve_ref_base_type: F,
2949) -> Result<(ObjectType, u64)>
2950where
2951 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
2952{
2953 read_object_header_at_inner(
2954 pack_bytes,
2955 offset,
2956 format,
2957 &mut resolve_ref_base_type,
2958 &mut NoopHeaderTypeCache,
2959 )
2960}
2961
2962pub trait HeaderTypeCache {
2979 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)>;
2981 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64));
2983}
2984
2985struct NoopHeaderTypeCache;
2986
2987impl HeaderTypeCache for NoopHeaderTypeCache {
2988 fn get(&self, _pack_offset: u64) -> Option<(ObjectType, u64)> {
2989 None
2990 }
2991 fn put(&mut self, _pack_offset: u64, _header: (ObjectType, u64)) {}
2992}
2993
2994pub fn read_object_header_at_with_cache<F, C>(
3000 pack_bytes: &[u8],
3001 offset: u64,
3002 format: ObjectFormat,
3003 mut resolve_ref_base_type: F,
3004 type_cache: &mut C,
3005) -> Result<(ObjectType, u64)>
3006where
3007 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3008 C: HeaderTypeCache + ?Sized,
3009{
3010 if let Some(header) = type_cache.get(offset) {
3011 return Ok(header);
3012 }
3013 read_object_header_at_inner(
3014 pack_bytes,
3015 offset,
3016 format,
3017 &mut resolve_ref_base_type,
3018 type_cache,
3019 )
3020}
3021
3022fn read_object_header_at_inner<F, C>(
3023 pack_bytes: &[u8],
3024 offset: u64,
3025 format: ObjectFormat,
3026 resolve_ref_base_type: &mut F,
3027 type_cache: &mut C,
3028) -> Result<(ObjectType, u64)>
3029where
3030 F: FnMut(&ObjectId) -> Result<Option<ObjectType>>,
3031 C: HeaderTypeCache + ?Sized,
3032{
3033 let trailer_offset = pack_bytes
3034 .len()
3035 .checked_sub(format.raw_len())
3036 .ok_or_else(|| GitError::InvalidFormat("pack smaller than its trailer".into()))?;
3037 let mut cursor = usize::try_from(offset)
3038 .ok()
3039 .filter(|&value| value < trailer_offset)
3040 .ok_or_else(|| GitError::InvalidFormat("pack object offset out of range".into()))?;
3041 let header = parse_entry_header(pack_bytes, &mut cursor)?;
3042 let resolved = match header.kind {
3043 PackObjectKind::Commit => (ObjectType::Commit, header.size),
3044 PackObjectKind::Tree => (ObjectType::Tree, header.size),
3045 PackObjectKind::Blob => (ObjectType::Blob, header.size),
3046 PackObjectKind::Tag => (ObjectType::Tag, header.size),
3047 PackObjectKind::OfsDelta => {
3048 let base_offset = parse_ofs_delta_base_offset(pack_bytes, &mut cursor, offset)?;
3049 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3050 let base_type = match type_cache.get(base_offset) {
3053 Some((base_type, _)) => base_type,
3054 None => {
3055 let (base_type, _) = read_object_header_at_inner(
3056 pack_bytes,
3057 base_offset,
3058 format,
3059 resolve_ref_base_type,
3060 type_cache,
3061 )?;
3062 base_type
3063 }
3064 };
3065 (base_type, size)
3066 }
3067 PackObjectKind::RefDelta => {
3068 let hash_len = format.raw_len();
3069 if cursor + hash_len > trailer_offset {
3070 return Err(GitError::InvalidFormat(
3071 "truncated ref-delta base object id".into(),
3072 ));
3073 }
3074 let oid = ObjectId::from_raw(format, &pack_bytes[cursor..cursor + hash_len])?;
3075 cursor += hash_len;
3076 let size = delta_result_size_from_stream(&pack_bytes[cursor..trailer_offset])?;
3077 let base_type = resolve_ref_base_type(&oid)?
3078 .ok_or_else(|| GitError::not_found(format!("ref-delta base object {oid}")))?;
3079 (base_type, size)
3080 }
3081 };
3082 type_cache.put(offset, resolved);
3085 Ok(resolved)
3086}
3087
3088const DELTA_HEADER_PREFIX_LEN: usize = 32;
3092
3093fn delta_result_size_from_stream(compressed: &[u8]) -> Result<u64> {
3096 let mut prefix = Vec::new();
3097 inflate_prefix(compressed, DELTA_HEADER_PREFIX_LEN, &mut prefix)?;
3098 decoded_delta_result_size(&prefix)
3099}
3100
3101fn parse_entry_header(bytes: &[u8], offset: &mut usize) -> Result<EntryHeader> {
3102 let first = next_byte(bytes, offset)?;
3103 let mut size = u64::from(first & 0x0f);
3104 let kind = match (first >> 4) & 0x07 {
3105 1 => PackObjectKind::Commit,
3106 2 => PackObjectKind::Tree,
3107 3 => PackObjectKind::Blob,
3108 4 => PackObjectKind::Tag,
3109 6 => PackObjectKind::OfsDelta,
3110 7 => PackObjectKind::RefDelta,
3111 other => {
3112 return Err(GitError::InvalidFormat(format!(
3113 "invalid pack object type {other}"
3114 )));
3115 }
3116 };
3117 let mut shift = 4;
3118 let mut byte = first;
3119 while byte & 0x80 != 0 {
3120 byte = next_byte(bytes, offset)?;
3121 let part = u64::from(byte & 0x7f);
3122 size = size
3123 .checked_add(
3124 part.checked_shl(shift)
3125 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?,
3126 )
3127 .ok_or_else(|| GitError::InvalidFormat("pack size overflow".into()))?;
3128 shift += 7;
3129 }
3130 Ok(EntryHeader { kind, size })
3131}
3132
3133fn parse_ofs_delta_base_offset(bytes: &[u8], offset: &mut usize, entry_offset: u64) -> Result<u64> {
3134 let mut byte = next_byte(bytes, offset)?;
3135 let mut relative = u64::from(byte & 0x7f);
3136 while byte & 0x80 != 0 {
3137 byte = next_byte(bytes, offset)?;
3138 relative = relative
3139 .checked_add(1)
3140 .and_then(|value| value.checked_shl(7))
3141 .and_then(|value| value.checked_add(u64::from(byte & 0x7f)))
3142 .ok_or_else(|| GitError::InvalidFormat("ofs-delta offset overflow".into()))?;
3143 }
3144 entry_offset
3145 .checked_sub(relative)
3146 .ok_or_else(|| GitError::InvalidFormat("ofs-delta points before pack start".into()))
3147}
3148
3149fn resolve_pack_entries<F>(
3150 parsed: Vec<ParsedPackEntry>,
3151 format: ObjectFormat,
3152 external_base: &mut F,
3153) -> Result<Vec<PackObject>>
3154where
3155 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3156{
3157 let mut offset_to_index = HashMap::with_capacity(parsed.len());
3158 for (idx, entry) in parsed.iter().enumerate() {
3159 offset_to_index.insert(parsed_entry_offset(entry), idx);
3160 }
3161
3162 let mut resolved = vec![None; parsed.len()];
3163 let mut oid_to_index = HashMap::new();
3164 let mut unresolved = 0usize;
3165 for (idx, entry) in parsed.iter().enumerate() {
3166 match entry {
3167 ParsedPackEntry::Resolved(object) => {
3168 oid_to_index.insert(object.entry.oid, idx);
3169 resolved[idx] = Some(object.clone());
3170 }
3171 ParsedPackEntry::Delta { .. } => unresolved += 1,
3172 }
3173 }
3174
3175 while unresolved != 0 {
3176 let mut progress = false;
3177 for idx in 0..parsed.len() {
3178 if resolved[idx].is_some() {
3179 continue;
3180 }
3181 let ParsedPackEntry::Delta {
3182 base,
3183 compressed_size,
3184 delta_size,
3185 offset,
3186 delta,
3187 } = &parsed[idx]
3188 else {
3189 continue;
3190 };
3191 let Some(base_object) = delta_base_object(
3192 base,
3193 &offset_to_index,
3194 &oid_to_index,
3195 &resolved,
3196 external_base,
3197 )?
3198 else {
3199 continue;
3200 };
3201 let body = apply_pack_delta(base_object.body(), delta)?;
3202 let object = EncodedObject::new(base_object.object_type(), body);
3203 let oid = object.object_id(format)?;
3204 let pack_object = PackObject {
3205 entry: PackEntry {
3206 oid,
3207 compressed_size: *compressed_size,
3208 uncompressed_size: object.body.len() as u64,
3209 offset: *offset,
3210 },
3211 object,
3212 };
3213 if pack_object.entry.uncompressed_size != decoded_delta_result_size(delta)? {
3214 return Err(GitError::InvalidObject(
3215 "resolved delta size does not match delta header".into(),
3216 ));
3217 }
3218 if *delta_size != delta.len() as u64 {
3219 return Err(GitError::InvalidObject(format!(
3220 "pack delta declared {delta_size} bytes, decoded {}",
3221 delta.len()
3222 )));
3223 }
3224 oid_to_index.insert(oid, idx);
3225 resolved[idx] = Some(pack_object);
3226 unresolved -= 1;
3227 progress = true;
3228 }
3229 if !progress {
3230 return Err(GitError::Unsupported("unresolved delta base".into()));
3231 }
3232 }
3233
3234 resolved
3235 .into_iter()
3236 .map(|entry| entry.ok_or_else(|| GitError::InvalidFormat("unresolved pack entry".into())))
3237 .collect()
3238}
3239
3240fn parsed_entry_offset(entry: &ParsedPackEntry) -> u64 {
3241 match entry {
3242 ParsedPackEntry::Resolved(object) => object.entry.offset,
3243 ParsedPackEntry::Delta { offset, .. } => *offset,
3244 }
3245}
3246
3247enum DeltaBaseObject<'a> {
3248 Borrowed(&'a EncodedObject),
3249 Owned(EncodedObject),
3250}
3251
3252impl DeltaBaseObject<'_> {
3253 fn object_type(&self) -> ObjectType {
3254 match self {
3255 Self::Borrowed(object) => object.object_type,
3256 Self::Owned(object) => object.object_type,
3257 }
3258 }
3259
3260 fn body(&self) -> &[u8] {
3261 match self {
3262 Self::Borrowed(object) => &object.body,
3263 Self::Owned(object) => &object.body,
3264 }
3265 }
3266}
3267
3268fn delta_base_object<'a, F>(
3269 base: &DeltaBase,
3270 offset_to_index: &HashMap<u64, usize>,
3271 oid_to_index: &HashMap<ObjectId, usize>,
3272 resolved: &'a [Option<PackObject>],
3273 external_base: &mut F,
3274) -> Result<Option<DeltaBaseObject<'a>>>
3275where
3276 F: FnMut(&ObjectId) -> Result<Option<EncodedObject>>,
3277{
3278 match base {
3279 DeltaBase::Offset(offset) => {
3280 let Some(index) = offset_to_index.get(offset).copied() else {
3281 return Err(GitError::InvalidFormat(format!(
3282 "ofs-delta base offset {offset} not found"
3283 )));
3284 };
3285 Ok(resolved[index]
3286 .as_ref()
3287 .map(|object| DeltaBaseObject::Borrowed(&object.object)))
3288 }
3289 DeltaBase::Ref(oid) => {
3290 if let Some(index) = oid_to_index.get(oid).copied() {
3291 return Ok(resolved[index]
3292 .as_ref()
3293 .map(|object| DeltaBaseObject::Borrowed(&object.object)));
3294 }
3295 external_base(oid).map(|object| object.map(DeltaBaseObject::Owned))
3296 }
3297 }
3298}
3299
3300fn apply_pack_delta(base: &[u8], delta: &[u8]) -> Result<Vec<u8>> {
3301 let mut cursor = 0usize;
3302 let base_size = read_delta_varint(delta, &mut cursor)?;
3303 if base_size != base.len() as u64 {
3304 return Err(GitError::InvalidObject(format!(
3305 "delta base size mismatch: expected {base_size}, got {}",
3306 base.len()
3307 )));
3308 }
3309 let result_size = read_delta_varint(delta, &mut cursor)?;
3310 let result_size_hint = usize::try_from(result_size).unwrap_or(usize::MAX);
3319 let mut result = Vec::with_capacity(bounded_inflate_reserve(result_size_hint, delta.len()));
3320 while cursor < delta.len() {
3321 let command = delta[cursor];
3322 cursor += 1;
3323 if command & 0x80 != 0 {
3324 let copy_offset =
3325 read_delta_copy_value(delta, &mut cursor, command, &[0x01, 0x02, 0x04, 0x08])?;
3326 let mut copy_size =
3327 read_delta_copy_value(delta, &mut cursor, command, &[0x10, 0x20, 0x40])?;
3328 if copy_size == 0 {
3329 copy_size = 0x10000;
3330 }
3331 let start = usize::try_from(copy_offset)
3332 .map_err(|_| GitError::InvalidObject("delta copy offset overflows usize".into()))?;
3333 let len = usize::try_from(copy_size)
3334 .map_err(|_| GitError::InvalidObject("delta copy size overflows usize".into()))?;
3335 let end = start
3336 .checked_add(len)
3337 .ok_or_else(|| GitError::InvalidObject("delta copy range overflow".into()))?;
3338 let Some(slice) = base.get(start..end) else {
3339 return Err(GitError::InvalidObject(
3340 "delta copy range exceeds base object".into(),
3341 ));
3342 };
3343 result.extend_from_slice(slice);
3344 } else if command != 0 {
3345 let len = usize::from(command);
3346 let end = cursor
3347 .checked_add(len)
3348 .ok_or_else(|| GitError::InvalidObject("delta insert range overflow".into()))?;
3349 let Some(slice) = delta.get(cursor..end) else {
3350 return Err(GitError::InvalidObject(
3351 "delta insert range exceeds delta data".into(),
3352 ));
3353 };
3354 result.extend_from_slice(slice);
3355 cursor = end;
3356 } else {
3357 return Err(GitError::InvalidObject(
3358 "delta contains reserved zero command".into(),
3359 ));
3360 }
3361 }
3362 if result.len() as u64 != result_size {
3363 return Err(GitError::InvalidObject(format!(
3364 "delta result size mismatch: expected {result_size}, got {}",
3365 result.len()
3366 )));
3367 }
3368 Ok(result)
3369}
3370
3371fn decoded_delta_result_size(delta: &[u8]) -> Result<u64> {
3372 let mut cursor = 0usize;
3373 let _ = read_delta_varint(delta, &mut cursor)?;
3374 read_delta_varint(delta, &mut cursor)
3375}
3376
3377const DELTA_BLOCK_SIZE: usize = 16;
3380
3381const DELTA_INDEX_STRIDE: usize = DELTA_BLOCK_SIZE;
3386
3387const DELTA_BUCKET_BITS: usize = 12;
3390const DELTA_BUCKET_COUNT: usize = 1 << DELTA_BUCKET_BITS;
3391const DELTA_BUCKET_MASK: usize = DELTA_BUCKET_COUNT - 1;
3392
3393struct DeltaIndex<'a> {
3400 base: &'a [u8],
3401 blocks: Vec<DeltaBlock>,
3402 buckets: Vec<usize>,
3403}
3404
3405#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3406struct DeltaBlock {
3407 hash: u32,
3408 offset: usize,
3409}
3410
3411impl<'a> DeltaIndex<'a> {
3412 fn new(base: &'a [u8]) -> Self {
3413 let mut buckets = vec![0usize; DELTA_BUCKET_COUNT + 1];
3414 let mut anchors = Vec::with_capacity(delta_anchor_count(base.len()));
3415 for_each_delta_anchor(base.len(), |offset| {
3416 let hash = block_hash(&base[offset..offset + DELTA_BLOCK_SIZE]);
3417 buckets[delta_bucket(hash) + 1] += 1;
3418 anchors.push(DeltaBlock { hash, offset });
3419 });
3420 for idx in 1..buckets.len() {
3421 buckets[idx] += buckets[idx - 1];
3422 }
3423
3424 let mut next_offsets = buckets[..DELTA_BUCKET_COUNT].to_vec();
3425 let mut blocks = vec![DeltaBlock { hash: 0, offset: 0 }; anchors.len()];
3426 for anchor in anchors {
3427 let bucket = delta_bucket(anchor.hash);
3428 let next = &mut next_offsets[bucket];
3429 blocks[*next] = anchor;
3430 *next += 1;
3431 }
3432
3433 Self {
3434 base,
3435 blocks,
3436 buckets,
3437 }
3438 }
3439
3440 fn candidate_blocks(&self, hash: u32) -> impl Iterator<Item = &DeltaBlock> {
3441 let bucket = delta_bucket(hash);
3442 let start = self.buckets[bucket];
3443 let end = self.buckets[bucket + 1];
3444 self.blocks[start..end]
3445 .iter()
3446 .filter(move |block| block.hash == hash)
3447 }
3448
3449 fn has_hash(&self, hash: u32) -> bool {
3450 self.candidate_blocks(hash).next().is_some()
3451 }
3452
3453 fn has_shared_anchor(&self, target: &[u8]) -> bool {
3454 if target.len() < DELTA_BLOCK_SIZE || self.blocks.is_empty() {
3455 return false;
3456 }
3457 let last = target.len() - DELTA_BLOCK_SIZE;
3458 for offset in (0..=last).step_by(DELTA_INDEX_STRIDE) {
3459 let hash = block_hash(&target[offset..offset + DELTA_BLOCK_SIZE]);
3460 if self.has_hash(hash) {
3461 return true;
3462 }
3463 }
3464 if !last.is_multiple_of(DELTA_INDEX_STRIDE) {
3465 let hash = block_hash(&target[last..last + DELTA_BLOCK_SIZE]);
3466 if self.has_hash(hash) {
3467 return true;
3468 }
3469 }
3470 false
3471 }
3472
3473 fn delta(&self, target: &[u8]) -> Option<Vec<u8>> {
3475 if !self.has_shared_anchor(target) {
3476 return None;
3477 }
3478 let base = self.base;
3479 let mut delta = Vec::new();
3480 write_delta_varint(&mut delta, base.len() as u64);
3481 write_delta_varint(&mut delta, target.len() as u64);
3482
3483 let mut pending_insert_start = 0usize;
3484 let mut pos = 0usize;
3485 while pos < target.len() {
3486 let mut best_len = 0usize;
3487 let mut best_offset = 0usize;
3488 if pos + DELTA_BLOCK_SIZE <= target.len() {
3489 let hash = block_hash(&target[pos..pos + DELTA_BLOCK_SIZE]);
3490 for candidate in self.candidate_blocks(hash).take(DELTA_MAX_CHAIN) {
3491 let candidate = candidate.offset;
3494 let max_len = (base.len() - candidate).min(target.len() - pos);
3495 let mut len = 0usize;
3496 while len < max_len && base[candidate + len] == target[pos + len] {
3497 len += 1;
3498 }
3499 if len > best_len {
3500 best_len = len;
3501 best_offset = candidate;
3502 }
3503 }
3504 }
3505
3506 if best_len >= DELTA_BLOCK_SIZE {
3507 if pending_insert_start < pos {
3508 write_delta_insert(&mut delta, &target[pending_insert_start..pos]);
3509 }
3510 write_delta_copy(&mut delta, best_offset as u64, best_len as u64);
3511 pos += best_len;
3512 pending_insert_start = pos;
3513 } else {
3514 pos += 1;
3515 }
3516 }
3517 if pending_insert_start < target.len() {
3518 write_delta_insert(&mut delta, &target[pending_insert_start..]);
3519 }
3520 Some(delta)
3521 }
3522}
3523
3524fn for_each_delta_anchor(mut len: usize, mut visit: impl FnMut(usize)) {
3525 if len < DELTA_BLOCK_SIZE {
3526 return;
3527 }
3528 len -= DELTA_BLOCK_SIZE;
3529 for offset in (0..=len).step_by(DELTA_INDEX_STRIDE) {
3530 visit(offset);
3531 }
3532 if !len.is_multiple_of(DELTA_INDEX_STRIDE) {
3533 visit(len);
3534 }
3535}
3536
3537fn delta_anchor_count(len: usize) -> usize {
3538 if len < DELTA_BLOCK_SIZE {
3539 return 0;
3540 }
3541 let last = len - DELTA_BLOCK_SIZE;
3542 (last / DELTA_INDEX_STRIDE) + 1 + usize::from(!last.is_multiple_of(DELTA_INDEX_STRIDE))
3543}
3544
3545fn delta_bucket(hash: u32) -> usize {
3546 (hash as usize) & DELTA_BUCKET_MASK
3547}
3548
3549const DELTA_MAX_CHAIN: usize = 64;
3552
3553fn block_hash(block: &[u8]) -> u32 {
3559 let mut hash = 0u32;
3560 for &byte in block {
3561 hash = hash.wrapping_mul(0x0100_0193) ^ u32::from(byte);
3562 }
3563 hash
3564}
3565
3566#[derive(Debug, Clone, PartialEq, Eq)]
3568enum PlannedBase {
3569 None,
3571 InPack { base_idx: usize, delta: Vec<u8> },
3575 External { base_oid: ObjectId, delta: Vec<u8> },
3577}
3578
3579#[derive(Debug, Clone, PartialEq, Eq)]
3580struct PlannedEntry {
3581 base: PlannedBase,
3582}
3583
3584fn compress_planned_payloads(
3585 objects: &[&EncodedObject],
3586 plan: &[PlannedEntry],
3587 order: &[usize],
3588) -> Result<Vec<Vec<u8>>> {
3589 if order.is_empty() {
3590 return Ok(Vec::new());
3591 }
3592
3593 let worker_count = std::thread::available_parallelism()
3594 .map(|threads| threads.get())
3595 .unwrap_or(1)
3596 .min(PACK_PARALLEL_COMPRESSION_MAX_THREADS)
3597 .min(order.len());
3598 if worker_count <= 1 || order.len() < PACK_PARALLEL_COMPRESSION_MIN_OBJECTS {
3599 let mut payloads = Vec::with_capacity(order.len());
3600 for &idx in order {
3601 payloads.push(compressed_payload(planned_payload(objects, plan, idx))?);
3602 }
3603 return Ok(payloads);
3604 }
3605
3606 let chunk_len = order.len().div_ceil(worker_count);
3607 let mut payloads: Vec<Vec<u8>> = std::iter::repeat_with(Vec::new).take(order.len()).collect();
3608 std::thread::scope(|scope| {
3609 let mut handles = Vec::new();
3610 for (chunk_idx, chunk) in order.chunks(chunk_len).enumerate() {
3611 let chunk_start = chunk_idx * chunk_len;
3612 handles.push(scope.spawn(move || -> Result<Vec<(usize, Vec<u8>)>> {
3613 let mut chunk_payloads = Vec::with_capacity(chunk.len());
3614 for (offset, &idx) in chunk.iter().enumerate() {
3615 chunk_payloads.push((
3616 chunk_start + offset,
3617 compressed_payload(planned_payload(objects, plan, idx))?,
3618 ));
3619 }
3620 Ok(chunk_payloads)
3621 }));
3622 }
3623
3624 let mut first_error = None;
3625 for handle in handles {
3626 match handle.join() {
3627 Ok(Ok(chunk_payloads)) => {
3628 if first_error.is_none() {
3629 for (pos, payload) in chunk_payloads {
3630 payloads[pos] = payload;
3631 }
3632 }
3633 }
3634 Ok(Err(err)) => {
3635 first_error.get_or_insert(err);
3636 }
3637 Err(_) => {
3638 first_error.get_or_insert_with(|| {
3639 GitError::InvalidObject("pack compression worker panicked".into())
3640 });
3641 }
3642 }
3643 }
3644
3645 match first_error {
3646 Some(err) => Err(err),
3647 None => Ok(()),
3648 }
3649 })?;
3650 Ok(payloads)
3651}
3652
3653fn planned_payload<'a>(
3654 objects: &'a [&'a EncodedObject],
3655 plan: &'a [PlannedEntry],
3656 idx: usize,
3657) -> &'a [u8] {
3658 match &plan[idx].base {
3659 PlannedBase::None => &objects[idx].body,
3660 PlannedBase::InPack { delta, .. } | PlannedBase::External { delta, .. } => delta,
3661 }
3662}
3663
3664fn compressed_payload(body: &[u8]) -> Result<Vec<u8>> {
3665 let mut out = Vec::new();
3666 write_compressed_payload(&mut out, body)?;
3667 Ok(out)
3668}
3669
3670const DELTA_MAX_EXTERNAL_BASES: usize = 64;
3673
3674struct DeltaWindowEntry<'a> {
3675 idx: usize,
3676 index: DeltaIndex<'a>,
3677}
3678
3679fn delta_type_rank(object_type: ObjectType) -> u8 {
3682 match object_type {
3683 ObjectType::Commit => 0,
3684 ObjectType::Tree => 1,
3685 ObjectType::Blob => 2,
3686 ObjectType::Tag => 3,
3687 }
3688}
3689
3690fn plan_pack_deltas(
3720 objects: &[&EncodedObject],
3721 object_ids: &[ObjectId],
3722 options: &PackWriteOptions,
3723) -> Result<(Vec<PlannedEntry>, Vec<usize>)> {
3724 let count = objects.len();
3725 let mut plan: Vec<PlannedEntry> = (0..count)
3726 .map(|_| PlannedEntry {
3727 base: PlannedBase::None,
3728 })
3729 .collect();
3730
3731 let mut order: Vec<usize> = (0..count).collect();
3735 if options.reorder && options.depth > 0 {
3736 order.sort_by(|&left, &right| {
3737 delta_type_rank(objects[left].object_type)
3738 .cmp(&delta_type_rank(objects[right].object_type))
3739 .then_with(|| objects[right].body.len().cmp(&objects[left].body.len()))
3740 .then_with(|| {
3741 object_ids[left]
3742 .as_bytes()
3743 .cmp(object_ids[right].as_bytes())
3744 })
3745 });
3746 }
3747
3748 if options.depth == 0 {
3749 return Ok((plan, order));
3750 }
3751
3752 let mut external_indexes: Vec<(ObjectId, ObjectType, DeltaIndex<'_>)> =
3755 Vec::with_capacity(options.thin_bases.len());
3756 for (oid, object) in &options.thin_bases {
3757 external_indexes.push((*oid, object.object_type, DeltaIndex::new(&object.body)));
3758 }
3759
3760 let mut depth = vec![0usize; count];
3763 let mut window: std::collections::VecDeque<DeltaWindowEntry<'_>> =
3765 std::collections::VecDeque::new();
3766
3767 for &idx in &order {
3768 let target = &objects[idx].body;
3769 let target_type = objects[idx].object_type;
3770
3771 let mut best_delta: Option<Vec<u8>> = None;
3772 let mut best_base = PlannedBase::None;
3773
3774 for base_entry in window.iter().rev() {
3776 let base_idx = base_entry.idx;
3777 if objects[base_idx].object_type != target_type {
3778 continue;
3779 }
3780 if depth[base_idx] + 1 > options.depth {
3783 continue;
3784 }
3785 let Some(delta) = base_entry.index.delta(target) else {
3786 continue;
3787 };
3788 if !delta_is_acceptable(&delta, target.len()) {
3789 continue;
3790 }
3791 if best_delta
3792 .as_ref()
3793 .is_none_or(|current| delta.len() < current.len())
3794 {
3795 best_delta = Some(delta);
3796 best_base = PlannedBase::InPack {
3797 base_idx,
3798 delta: Vec::new(),
3799 };
3800 }
3801 }
3802
3803 for (base_oid, base_type, base_index) in
3806 external_indexes.iter().take(DELTA_MAX_EXTERNAL_BASES)
3807 {
3808 if *base_type != target_type {
3809 continue;
3810 }
3811 let Some(delta) = base_index.delta(target) else {
3812 continue;
3813 };
3814 if !delta_is_acceptable(&delta, target.len()) {
3815 continue;
3816 }
3817 if best_delta
3818 .as_ref()
3819 .is_none_or(|current| delta.len() < current.len())
3820 {
3821 best_delta = Some(delta);
3822 best_base = PlannedBase::External {
3823 base_oid: *base_oid,
3824 delta: Vec::new(),
3825 };
3826 }
3827 }
3828
3829 if let Some(delta) = best_delta {
3830 match best_base {
3831 PlannedBase::InPack { base_idx, .. } => {
3832 depth[idx] = depth[base_idx] + 1;
3833 plan[idx].base = PlannedBase::InPack { base_idx, delta };
3834 }
3835 PlannedBase::External { base_oid, .. } => {
3836 depth[idx] = 1;
3837 plan[idx].base = PlannedBase::External { base_oid, delta };
3838 }
3839 PlannedBase::None => {}
3840 }
3841 }
3842
3843 window.push_back(DeltaWindowEntry {
3845 idx,
3846 index: DeltaIndex::new(&objects[idx].body),
3847 });
3848 while window.len() > options.window {
3849 window.pop_front();
3850 }
3851 }
3852
3853 Ok((plan, order))
3854}
3855
3856fn delta_is_acceptable(delta: &[u8], target_len: usize) -> bool {
3861 !delta.is_empty() && delta.len() < target_len
3862}
3863
3864fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
3865 loop {
3866 let mut byte = (value as u8) & 0x7f;
3867 value >>= 7;
3868 if value != 0 {
3869 byte |= 0x80;
3870 }
3871 out.push(byte);
3872 if value == 0 {
3873 break;
3874 }
3875 }
3876}
3877
3878fn write_delta_copy(out: &mut Vec<u8>, mut offset: u64, mut size: u64) {
3879 while size != 0 {
3880 let chunk = size.min(0x10000);
3881 let encoded_size = if chunk == 0x10000 { 0 } else { chunk };
3882 let mut command = 0x80u8;
3883 let mut payload = [0u8; 7];
3884 let mut payload_len = 0usize;
3885 for idx in 0..4 {
3886 let byte = ((offset >> (idx * 8)) & 0xff) as u8;
3887 if byte != 0 {
3888 command |= 1 << idx;
3889 payload[payload_len] = byte;
3890 payload_len += 1;
3891 }
3892 }
3893 for idx in 0..3 {
3894 let byte = ((encoded_size >> (idx * 8)) & 0xff) as u8;
3895 if byte != 0 {
3896 command |= 0x10 << idx;
3897 payload[payload_len] = byte;
3898 payload_len += 1;
3899 }
3900 }
3901 out.push(command);
3902 out.extend_from_slice(&payload[..payload_len]);
3903 offset += chunk;
3904 size -= chunk;
3905 }
3906}
3907
3908fn write_delta_insert(out: &mut Vec<u8>, mut bytes: &[u8]) {
3909 while !bytes.is_empty() {
3910 let chunk_len = bytes.len().min(0x7f);
3911 out.push(chunk_len as u8);
3912 out.extend_from_slice(&bytes[..chunk_len]);
3913 bytes = &bytes[chunk_len..];
3914 }
3915}
3916
3917fn read_delta_varint(delta: &[u8], cursor: &mut usize) -> Result<u64> {
3918 let mut value = 0u64;
3919 let mut shift = 0u32;
3920 loop {
3921 let Some(byte) = delta.get(*cursor).copied() else {
3922 return Err(GitError::InvalidObject("truncated delta size".into()));
3923 };
3924 *cursor += 1;
3925 value = value
3926 .checked_add(
3927 u64::from(byte & 0x7f)
3928 .checked_shl(shift)
3929 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?,
3930 )
3931 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3932 if byte & 0x80 == 0 {
3933 return Ok(value);
3934 }
3935 shift = shift
3936 .checked_add(7)
3937 .ok_or_else(|| GitError::InvalidObject("delta size overflow".into()))?;
3938 }
3939}
3940
3941fn read_delta_copy_value(
3942 delta: &[u8],
3943 cursor: &mut usize,
3944 command: u8,
3945 masks: &[u8],
3946) -> Result<u64> {
3947 let mut value = 0u64;
3948 for (shift, mask) in masks.iter().enumerate() {
3949 if command & mask != 0 {
3950 let Some(byte) = delta.get(*cursor).copied() else {
3951 return Err(GitError::InvalidObject(
3952 "truncated delta copy command".into(),
3953 ));
3954 };
3955 *cursor += 1;
3956 value |= u64::from(byte) << (shift * 8);
3957 }
3958 }
3959 Ok(value)
3960}
3961
3962thread_local! {
3963 static DEFLATE: RefCell<Compress> = RefCell::new(Compress::new(Compression::default(), true));
3964}
3965
3966fn write_compressed_payload(out: &mut Vec<u8>, body: &[u8]) -> Result<()> {
3967 DEFLATE.with(|cell| {
3968 let mut compressor = cell.borrow_mut();
3969 compressor.reset();
3970 out.reserve(zlib_compress_bound(body.len()));
3971 let status = compressor
3972 .compress_vec(body, out, FlushCompress::Finish)
3973 .map_err(|err| GitError::InvalidObject(format!("zlib compression failed: {err}")))?;
3974 if status != Status::StreamEnd || compressor.total_in() != body.len() as u64 {
3975 return Err(GitError::InvalidObject(
3976 "zlib compression did not finish pack entry".into(),
3977 ));
3978 }
3979 Ok(())
3980 })
3981}
3982
3983fn zlib_compress_bound(len: usize) -> usize {
3984 len.saturating_add(len >> 12)
3985 .saturating_add(len >> 14)
3986 .saturating_add(len >> 25)
3987 .saturating_add(13)
3988}
3989
3990fn write_entry_header(out: &mut Vec<u8>, object_type: ObjectType, size: u64) {
3991 let type_code = match object_type {
3992 ObjectType::Commit => 1,
3993 ObjectType::Tree => 2,
3994 ObjectType::Blob => 3,
3995 ObjectType::Tag => 4,
3996 };
3997 write_pack_entry_header_kind(out, type_code, size);
3998}
3999
4000fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
4001 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
4002 size >>= 4;
4003 if size != 0 {
4004 byte |= 0x80;
4005 }
4006 out.push(byte);
4007 while size != 0 {
4008 let mut byte = (size as u8) & 0x7f;
4009 size >>= 7;
4010 if size != 0 {
4011 byte |= 0x80;
4012 }
4013 out.push(byte);
4014 }
4015}
4016
4017fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: u64) -> Result<()> {
4018 if relative == 0 {
4019 return Err(GitError::InvalidFormat(
4020 "ofs-delta relative offset cannot be zero".into(),
4021 ));
4022 }
4023 let mut value = relative;
4024 let mut bytes = vec![(value & 0x7f) as u8];
4025 value >>= 7;
4026 while value != 0 {
4027 value -= 1;
4028 bytes.push(((value & 0x7f) as u8) | 0x80);
4029 value >>= 7;
4030 }
4031 bytes.reverse();
4032 out.extend_from_slice(&bytes);
4033 Ok(())
4034}
4035
4036fn next_byte(bytes: &[u8], offset: &mut usize) -> Result<u8> {
4037 let Some(byte) = bytes.get(*offset).copied() else {
4038 return Err(GitError::InvalidFormat(
4039 "truncated pack entry header".into(),
4040 ));
4041 };
4042 *offset += 1;
4043 Ok(byte)
4044}
4045
4046fn u16_be(bytes: &[u8]) -> u16 {
4047 u16::from_be_bytes([bytes[0], bytes[1]])
4048}
4049
4050fn u32_be(bytes: &[u8]) -> u32 {
4051 u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
4052}
4053
4054fn u64_be(bytes: &[u8]) -> u64 {
4055 u64::from_be_bytes([
4056 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
4057 ])
4058}
4059
4060fn read_pack_index_fanout(bytes: &[u8], offset: &mut usize) -> Result<[u32; 256]> {
4061 let mut fanout = [0u32; 256];
4062 let mut previous = 0u32;
4063 for slot in &mut fanout {
4064 *slot = u32_be(&bytes[*offset..*offset + 4]);
4065 if *slot < previous {
4066 return Err(GitError::InvalidFormat(
4067 "pack index fanout is not monotonic".into(),
4068 ));
4069 }
4070 previous = *slot;
4071 *offset += 4;
4072 }
4073 Ok(fanout)
4074}
4075
4076fn validate_pack_index_oid_fanout(idx: usize, oid_bytes: &[u8], fanout: &[u32; 256]) -> Result<()> {
4077 let expected_min = if oid_bytes[0] == 0 {
4078 0
4079 } else {
4080 fanout[usize::from(oid_bytes[0] - 1)]
4081 };
4082 if (idx as u32) < expected_min || (idx as u32) >= fanout[usize::from(oid_bytes[0])] {
4083 return Err(GitError::InvalidFormat(
4084 "pack index object id is outside its fanout bucket".into(),
4085 ));
4086 }
4087 Ok(())
4088}
4089
4090fn pack_index_v2_offset(raw_offset: u32, large_offset_table: &[u8]) -> Result<u64> {
4091 if raw_offset & 0x8000_0000 == 0 {
4092 return Ok(u64::from(raw_offset));
4093 }
4094 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4095 let large_start = large_idx
4096 .checked_mul(8)
4097 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4098 let large_end = large_start
4099 .checked_add(8)
4100 .ok_or_else(|| GitError::InvalidFormat("pack index large offset overflow".into()))?;
4101 if large_end > large_offset_table.len() {
4102 return Err(GitError::InvalidFormat(
4103 "pack index large offset points past table".into(),
4104 ));
4105 }
4106 Ok(u64_be(&large_offset_table[large_start..large_end]))
4107}
4108
4109fn checked_range(
4110 start: usize,
4111 count: usize,
4112 width: usize,
4113 total: usize,
4114) -> Result<std::ops::Range<usize>> {
4115 let len = count
4116 .checked_mul(width)
4117 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4118 let end = start
4119 .checked_add(len)
4120 .ok_or_else(|| GitError::InvalidFormat("pack index table overflow".into()))?;
4121 if end > total {
4122 return Err(GitError::InvalidFormat("truncated pack index table".into()));
4123 }
4124 Ok(start..end)
4125}
4126
4127fn validate_position_permutation(positions: &[u32]) -> Result<()> {
4128 let mut seen = vec![false; positions.len()];
4129 for position in positions {
4130 let idx = *position as usize;
4131 if idx >= positions.len() {
4132 return Err(GitError::InvalidFormat(
4133 "reverse index position points past object table".into(),
4134 ));
4135 }
4136 if seen[idx] {
4137 return Err(GitError::InvalidFormat(
4138 "reverse index position is duplicated".into(),
4139 ));
4140 }
4141 seen[idx] = true;
4142 }
4143 Ok(())
4144}
4145
4146fn parse_midx_pack_names(
4147 bytes: &[u8],
4148 chunks: &[MultiPackIndexChunk],
4149 pack_count: usize,
4150 version: u8,
4151) -> Result<Vec<String>> {
4152 let data = midx_chunk_data(bytes, chunks, *b"PNAM", true)?
4153 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing PNAM chunk".into()))?;
4154 let mut names = Vec::with_capacity(pack_count);
4155 let mut offset = 0usize;
4156 while names.len() < pack_count {
4157 let Some(relative_end) = data[offset..].iter().position(|byte| *byte == 0) else {
4158 return Err(GitError::InvalidFormat(
4159 "multi-pack-index PNAM entry is unterminated".into(),
4160 ));
4161 };
4162 let name_bytes = &data[offset..offset + relative_end];
4163 if name_bytes.is_empty() {
4164 return Err(GitError::InvalidFormat(
4165 "multi-pack-index PNAM entry is empty".into(),
4166 ));
4167 }
4168 let name = std::str::from_utf8(name_bytes)
4169 .map_err(|err| GitError::InvalidFormat(err.to_string()))?;
4170 if name.bytes().any(|byte| matches!(byte, b'/' | b'\\')) {
4171 return Err(GitError::InvalidFormat(
4172 "multi-pack-index PNAM entry contains a path separator".into(),
4173 ));
4174 }
4175 names.push(name.to_string());
4176 offset += relative_end + 1;
4177 }
4178 let padding = &data[offset..];
4179 if padding.len() > 3 || padding.iter().any(|byte| *byte != 0) {
4180 return Err(GitError::InvalidFormat(
4181 "multi-pack-index PNAM padding is invalid".into(),
4182 ));
4183 }
4184 if version == 1 && names.windows(2).any(|pair| pair[0] > pair[1]) {
4185 return Err(GitError::InvalidFormat(
4186 "multi-pack-index v1 PNAM entries are not sorted".into(),
4187 ));
4188 }
4189 Ok(names)
4190}
4191
4192fn parse_midx_oid_fanout(
4193 bytes: &[u8],
4194 chunks: &[MultiPackIndexChunk],
4195) -> Result<([u32; 256], usize)> {
4196 let data = midx_chunk_data(bytes, chunks, *b"OIDF", true)?
4197 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDF chunk".into()))?;
4198 if data.len() != 256 * 4 {
4199 return Err(GitError::InvalidFormat(
4200 "multi-pack-index OIDF chunk has invalid length".into(),
4201 ));
4202 }
4203 let mut fanout = [0u32; 256];
4204 let mut previous = 0u32;
4205 for (idx, slot) in fanout.iter_mut().enumerate() {
4206 let start = idx * 4;
4207 *slot = u32_be(&data[start..start + 4]);
4208 if *slot < previous {
4209 return Err(GitError::InvalidFormat(
4210 "multi-pack-index OIDF fanout is not monotonic".into(),
4211 ));
4212 }
4213 previous = *slot;
4214 }
4215 Ok((fanout, fanout[255] as usize))
4216}
4217
4218fn parse_midx_object_ids(
4219 bytes: &[u8],
4220 chunks: &[MultiPackIndexChunk],
4221 format: ObjectFormat,
4222 object_count: usize,
4223 fanout: &[u32; 256],
4224) -> Result<Vec<ObjectId>> {
4225 let data = midx_chunk_data(bytes, chunks, *b"OIDL", true)?
4226 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OIDL chunk".into()))?;
4227 let expected_len = object_count
4228 .checked_mul(format.raw_len())
4229 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OIDL chunk overflow".into()))?;
4230 if data.len() != expected_len {
4231 return Err(GitError::InvalidFormat(
4232 "multi-pack-index OIDL chunk has invalid length".into(),
4233 ));
4234 }
4235
4236 let mut ids = Vec::with_capacity(object_count);
4237 let mut counts = [0u32; 256];
4238 let mut previous_oid: Option<ObjectId> = None;
4239 for idx in 0..object_count {
4240 let start = idx * format.raw_len();
4241 let oid = ObjectId::from_raw(format, &data[start..start + format.raw_len()])?;
4242 if let Some(previous) = &previous_oid
4243 && previous.as_bytes() >= oid.as_bytes()
4244 {
4245 return Err(GitError::InvalidFormat(
4246 "multi-pack-index OIDL object ids are not strictly sorted".into(),
4247 ));
4248 }
4249 counts[oid.as_bytes()[0] as usize] = counts[oid.as_bytes()[0] as usize]
4250 .checked_add(1)
4251 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4252 previous_oid = Some(oid);
4253 ids.push(oid);
4254 }
4255
4256 let mut running = 0u32;
4257 for (idx, count) in counts.iter().enumerate() {
4258 running = running
4259 .checked_add(*count)
4260 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index fanout overflow".into()))?;
4261 if fanout[idx] != running {
4262 return Err(GitError::InvalidFormat(
4263 "multi-pack-index OIDF fanout does not match OIDL".into(),
4264 ));
4265 }
4266 }
4267 Ok(ids)
4268}
4269
4270fn parse_midx_object_offsets(
4271 bytes: &[u8],
4272 chunks: &[MultiPackIndexChunk],
4273 object_ids: Vec<ObjectId>,
4274 pack_count: u32,
4275) -> Result<Vec<MultiPackIndexEntry>> {
4276 let data = midx_chunk_data(bytes, chunks, *b"OOFF", true)?
4277 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index missing OOFF chunk".into()))?;
4278 let expected_len = object_ids
4279 .len()
4280 .checked_mul(8)
4281 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index OOFF chunk overflow".into()))?;
4282 if data.len() != expected_len {
4283 return Err(GitError::InvalidFormat(
4284 "multi-pack-index OOFF chunk has invalid length".into(),
4285 ));
4286 }
4287 let large_offsets = midx_chunk_data(bytes, chunks, *b"LOFF", false)?;
4288 if let Some(large_offsets) = large_offsets
4289 && large_offsets.len() % 8 != 0
4290 {
4291 return Err(GitError::InvalidFormat(
4292 "multi-pack-index LOFF chunk has invalid length".into(),
4293 ));
4294 }
4295
4296 let mut entries = Vec::with_capacity(object_ids.len());
4297 for (idx, oid) in object_ids.into_iter().enumerate() {
4298 let start = idx * 8;
4299 let pack_int_id = u32_be(&data[start..start + 4]);
4300 if pack_int_id >= pack_count {
4301 return Err(GitError::InvalidFormat(
4302 "multi-pack-index object points past pack table".into(),
4303 ));
4304 }
4305 let raw_offset = u32_be(&data[start + 4..start + 8]);
4306 let offset = if raw_offset & 0x8000_0000 == 0 {
4307 u64::from(raw_offset)
4308 } else {
4309 let Some(large_offsets) = large_offsets else {
4310 return Err(GitError::InvalidFormat(
4311 "multi-pack-index large offset missing LOFF chunk".into(),
4312 ));
4313 };
4314 let large_idx = (raw_offset & 0x7fff_ffff) as usize;
4315 let large_start = large_idx.checked_mul(8).ok_or_else(|| {
4316 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4317 })?;
4318 let large_end = large_start.checked_add(8).ok_or_else(|| {
4319 GitError::InvalidFormat("multi-pack-index LOFF index overflow".into())
4320 })?;
4321 if large_end > large_offsets.len() {
4322 return Err(GitError::InvalidFormat(
4323 "multi-pack-index large offset points past LOFF chunk".into(),
4324 ));
4325 }
4326 u64_be(&large_offsets[large_start..large_end])
4327 };
4328 entries.push(MultiPackIndexEntry {
4329 oid,
4330 pack_int_id,
4331 offset,
4332 });
4333 }
4334 Ok(entries)
4335}
4336
4337fn parse_midx_reverse_index(
4338 bytes: &[u8],
4339 chunks: &[MultiPackIndexChunk],
4340 object_count: usize,
4341) -> Result<Option<Vec<u32>>> {
4342 let Some(data) = midx_chunk_data(bytes, chunks, *b"RIDX", false)? else {
4343 return Ok(None);
4344 };
4345 let expected_len = object_count
4346 .checked_mul(4)
4347 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index RIDX chunk overflow".into()))?;
4348 if data.len() != expected_len {
4349 return Err(GitError::InvalidFormat(
4350 "multi-pack-index RIDX chunk has invalid length".into(),
4351 ));
4352 }
4353 let mut positions = Vec::with_capacity(object_count);
4354 for idx in 0..object_count {
4355 let start = idx * 4;
4356 positions.push(u32_be(&data[start..start + 4]));
4357 }
4358 validate_position_permutation(&positions)?;
4359 Ok(Some(positions))
4360}
4361
4362fn parse_midx_bitmapped_packs(
4363 bytes: &[u8],
4364 chunks: &[MultiPackIndexChunk],
4365 pack_count: usize,
4366 object_count: usize,
4367) -> Result<Option<Vec<MultiPackBitmapPack>>> {
4368 let Some(data) = midx_chunk_data(bytes, chunks, *b"BTMP", false)? else {
4369 return Ok(None);
4370 };
4371 let expected_len = pack_count
4372 .checked_mul(8)
4373 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index BTMP chunk overflow".into()))?;
4374 if data.len() != expected_len {
4375 return Err(GitError::InvalidFormat(
4376 "multi-pack-index BTMP chunk has invalid length".into(),
4377 ));
4378 }
4379 let mut entries = Vec::with_capacity(pack_count);
4380 for idx in 0..pack_count {
4381 let start = idx * 8;
4382 let bitmap_pos = u32_be(&data[start..start + 4]);
4383 let bitmap_nr = u32_be(&data[start + 4..start + 8]);
4384 let bitmap_end = u64::from(bitmap_pos)
4385 .checked_add(u64::from(bitmap_nr))
4386 .ok_or_else(|| {
4387 GitError::InvalidFormat("multi-pack-index BTMP range overflow".into())
4388 })?;
4389 if bitmap_end > object_count as u64 {
4390 return Err(GitError::InvalidFormat(
4391 "multi-pack-index BTMP range points past object table".into(),
4392 ));
4393 }
4394 entries.push(MultiPackBitmapPack {
4395 bitmap_pos,
4396 bitmap_nr,
4397 });
4398 }
4399 Ok(Some(entries))
4400}
4401
4402fn midx_chunk_data<'a>(
4403 bytes: &'a [u8],
4404 chunks: &[MultiPackIndexChunk],
4405 id: [u8; 4],
4406 required: bool,
4407) -> Result<Option<&'a [u8]>> {
4408 let Some(chunk) = chunks.iter().find(|chunk| chunk.id == id) else {
4409 if required {
4410 return Err(GitError::InvalidFormat(format!(
4411 "multi-pack-index missing {} chunk",
4412 std::str::from_utf8(&id).unwrap_or("required")
4413 )));
4414 }
4415 return Ok(None);
4416 };
4417 let start = usize::try_from(chunk.offset)
4418 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk offset overflow".into()))?;
4419 let len = usize::try_from(chunk.len)
4420 .map_err(|_| GitError::InvalidFormat("multi-pack-index chunk length overflow".into()))?;
4421 let end = start
4422 .checked_add(len)
4423 .ok_or_else(|| GitError::InvalidFormat("multi-pack-index chunk range overflow".into()))?;
4424 let Some(data) = bytes.get(start..end) else {
4425 return Err(GitError::InvalidFormat(
4426 "multi-pack-index chunk extends past file".into(),
4427 ));
4428 };
4429 Ok(Some(data))
4430}
4431
4432fn hash_function_id(format: ObjectFormat) -> u32 {
4433 match format {
4434 ObjectFormat::Sha1 => 1,
4435 ObjectFormat::Sha256 => 2,
4436 }
4437}
4438
4439const EWAH_MAX_RUNNING_LEN: u64 = 0xffff_ffff;
4442
4443const EWAH_MAX_LITERAL_LEN: u64 = 0x7fff_ffff;
4446
4447const EWAH_ALL_ONES: u64 = u64::MAX;
4449
4450impl EwahBitmap {
4451 pub fn from_words(bit_size: u32, words: &[u64]) -> Result<Self> {
4465 let required_words = bit_size.div_ceil(64) as usize;
4466 if required_words > words.len() {
4467 return Err(GitError::InvalidFormat(format!(
4468 "EWAH bit_size {bit_size} requires {required_words} words but only {} supplied",
4469 words.len()
4470 )));
4471 }
4472 let significant = &words[..required_words];
4475 let mut builder = EwahBuilder::new(bit_size);
4476 for &word in significant {
4477 if word == 0 {
4478 builder.add_empty_words(false, 1);
4479 } else if word == EWAH_ALL_ONES {
4480 builder.add_empty_words(true, 1);
4481 } else {
4482 builder.add_literal(word);
4483 }
4484 }
4485 builder.finish()
4486 }
4487
4488 pub fn from_positions(bit_size: u32, positions: &[u32]) -> Result<Self> {
4494 let word_count = bit_size.div_ceil(64) as usize;
4495 let mut words = vec![0u64; word_count];
4496 for &position in positions {
4497 if position >= bit_size {
4498 return Err(GitError::InvalidFormat(format!(
4499 "EWAH bit position {position} out of range for bit_size {bit_size}"
4500 )));
4501 }
4502 let word_index = (position / 64) as usize;
4503 let bit_index = position % 64;
4504 words[word_index] |= 1u64 << bit_index;
4505 }
4506 Self::from_words(bit_size, &words)
4507 }
4508
4509 pub fn empty() -> Self {
4512 Self {
4513 bit_size: 0,
4514 words: Vec::new(),
4515 rlw_position: 0,
4516 }
4517 }
4518
4519 pub fn to_words(&self) -> Result<Vec<u64>> {
4525 let mut out = Vec::new();
4526 let mut word_idx = 0usize;
4527 while word_idx < self.words.len() {
4528 let rlw = self.words[word_idx];
4529 let run_bit = rlw & 1;
4530 let run_words = (rlw >> 1) & EWAH_MAX_RUNNING_LEN;
4531 let literal_words = (rlw >> 33) as usize;
4532 word_idx += 1;
4533 let fill = if run_bit == 1 { EWAH_ALL_ONES } else { 0 };
4534 for _ in 0..run_words {
4535 out.push(fill);
4536 }
4537 let literal_end = word_idx
4538 .checked_add(literal_words)
4539 .filter(|end| *end <= self.words.len())
4540 .ok_or_else(|| {
4541 GitError::InvalidFormat("EWAH literal words extend past word table".into())
4542 })?;
4543 out.extend_from_slice(&self.words[word_idx..literal_end]);
4544 word_idx = literal_end;
4545 }
4546 let required_words = (self.bit_size as usize).div_ceil(64);
4547 if out.len() < required_words {
4548 out.resize(required_words, 0);
4549 }
4550 out.truncate(required_words);
4551 Ok(out)
4552 }
4553
4554 pub fn to_positions(&self) -> Result<Vec<u32>> {
4556 let words = self.to_words()?;
4557 let mut positions = Vec::new();
4558 for (word_index, word) in words.iter().enumerate() {
4559 let mut remaining = *word;
4560 while remaining != 0 {
4561 let bit = remaining.trailing_zeros();
4562 let position = (word_index as u64) * 64 + u64::from(bit);
4563 if position < u64::from(self.bit_size) {
4564 positions.push(position as u32);
4566 }
4567 remaining &= remaining - 1;
4568 }
4569 }
4570 Ok(positions)
4571 }
4572
4573 pub fn to_bytes(&self) -> Vec<u8> {
4577 let mut out = Vec::with_capacity(12 + self.words.len() * 8);
4578 self.append_bytes(&mut out);
4579 out
4580 }
4581
4582 fn append_bytes(&self, out: &mut Vec<u8>) {
4583 out.extend_from_slice(&self.bit_size.to_be_bytes());
4584 out.extend_from_slice(&(self.words.len() as u32).to_be_bytes());
4585 for word in &self.words {
4586 out.extend_from_slice(&word.to_be_bytes());
4587 }
4588 out.extend_from_slice(&self.rlw_position.to_be_bytes());
4589 }
4590}
4591
4592struct EwahBuilder {
4600 bit_size: u32,
4601 words: Vec<u64>,
4602 rlw_position: usize,
4603}
4604
4605impl EwahBuilder {
4606 fn new(bit_size: u32) -> Self {
4607 Self {
4609 bit_size,
4610 words: vec![0u64],
4611 rlw_position: 0,
4612 }
4613 }
4614
4615 fn rlw(&self) -> u64 {
4616 self.words[self.rlw_position]
4617 }
4618
4619 fn set_rlw(&mut self, value: u64) {
4620 self.words[self.rlw_position] = value;
4621 }
4622
4623 fn rlw_running_len(&self) -> u64 {
4624 (self.rlw() >> 1) & EWAH_MAX_RUNNING_LEN
4625 }
4626
4627 fn rlw_running_bit(&self) -> bool {
4628 self.rlw() & 1 == 1
4629 }
4630
4631 fn rlw_literal_len(&self) -> u64 {
4632 self.rlw() >> 33
4633 }
4634
4635 fn set_running_bit(&mut self, bit: bool) {
4636 let mut value = self.rlw();
4637 value &= !1;
4638 value |= u64::from(bit);
4639 self.set_rlw(value);
4640 }
4641
4642 fn set_running_len(&mut self, len: u64) {
4643 let mut value = self.rlw();
4644 value &= !(EWAH_MAX_RUNNING_LEN << 1);
4645 value |= (len & EWAH_MAX_RUNNING_LEN) << 1;
4646 self.set_rlw(value);
4647 }
4648
4649 fn set_literal_len(&mut self, len: u64) {
4650 let mut value = self.rlw();
4651 value &= (1u64 << 33) - 1;
4652 value |= (len & EWAH_MAX_LITERAL_LEN) << 33;
4653 self.set_rlw(value);
4654 }
4655
4656 fn push_rlw(&mut self) {
4658 self.rlw_position = self.words.len();
4659 self.words.push(0);
4660 }
4661
4662 fn add_empty_words(&mut self, value: bool, mut number: u64) {
4670 while number > 0 {
4671 let can_extend = self.rlw_literal_len() == 0
4675 && (self.rlw_running_len() == 0 || self.rlw_running_bit() == value)
4676 && self.rlw_running_len() < EWAH_MAX_RUNNING_LEN;
4677 if !can_extend {
4678 self.push_rlw();
4679 }
4680 if self.rlw_running_len() == 0 {
4681 self.set_running_bit(value);
4682 }
4683 let available = EWAH_MAX_RUNNING_LEN - self.rlw_running_len();
4684 let take = available.min(number);
4685 self.set_running_len(self.rlw_running_len() + take);
4686 number -= take;
4687 }
4688 }
4689
4690 fn add_literal(&mut self, word: u64) {
4693 if self.rlw_literal_len() >= EWAH_MAX_LITERAL_LEN {
4694 self.push_rlw();
4695 }
4696 let literal_len = self.rlw_literal_len();
4697 self.set_literal_len(literal_len + 1);
4698 self.words.push(word);
4699 }
4700
4701 fn finish(self) -> Result<EwahBitmap> {
4702 let rlw_position = u32::try_from(self.rlw_position)
4703 .map_err(|_| GitError::InvalidFormat("EWAH RLW position overflow".into()))?;
4704 if self.words.len() > u32::MAX as usize {
4705 return Err(GitError::InvalidFormat("EWAH word table overflow".into()));
4706 }
4707 Ok(EwahBitmap {
4708 bit_size: self.bit_size,
4709 words: self.words,
4710 rlw_position,
4711 })
4712 }
4713}
4714
4715#[derive(Debug, Clone)]
4728pub struct PackBitmapWriter {
4729 format: ObjectFormat,
4730 pack_checksum: ObjectId,
4731 object_count: u32,
4732 commit_positions: Vec<u32>,
4733 tree_positions: Vec<u32>,
4734 blob_positions: Vec<u32>,
4735 tag_positions: Vec<u32>,
4736 name_hash_cache: Option<Vec<u32>>,
4737 selected: Vec<SelectedCommit>,
4738}
4739
4740#[derive(Debug, Clone)]
4741struct SelectedCommit {
4742 commit_index_position: u32,
4746 flags: u8,
4747 reachable: Vec<u32>,
4748}
4749
4750impl PackBitmapWriter {
4751 pub const FLAG_NONE: u8 = 0;
4755
4756 pub fn new(
4763 format: ObjectFormat,
4764 pack_checksum: ObjectId,
4765 object_types: &[ObjectType],
4766 ) -> Result<Self> {
4767 if object_types.len() > u32::MAX as usize {
4768 return Err(GitError::InvalidFormat(
4769 "too many objects for a pack bitmap".into(),
4770 ));
4771 }
4772 if pack_checksum.format() != format {
4773 return Err(GitError::InvalidObjectId(
4774 "pack checksum format does not match bitmap format".into(),
4775 ));
4776 }
4777 let object_count = object_types.len() as u32;
4778 let mut commit_positions = Vec::new();
4779 let mut tree_positions = Vec::new();
4780 let mut blob_positions = Vec::new();
4781 let mut tag_positions = Vec::new();
4782 for (index, object_type) in object_types.iter().enumerate() {
4783 let position = index as u32;
4784 match object_type {
4785 ObjectType::Commit => commit_positions.push(position),
4786 ObjectType::Tree => tree_positions.push(position),
4787 ObjectType::Blob => blob_positions.push(position),
4788 ObjectType::Tag => tag_positions.push(position),
4789 }
4790 }
4791 Ok(Self {
4792 format,
4793 pack_checksum,
4794 object_count,
4795 commit_positions,
4796 tree_positions,
4797 blob_positions,
4798 tag_positions,
4799 name_hash_cache: None,
4800 selected: Vec::new(),
4801 })
4802 }
4803
4804 pub fn with_name_hash_cache(mut self, cache: Vec<u32>) -> Result<Self> {
4810 if cache.len() != self.object_count as usize {
4811 return Err(GitError::InvalidFormat(format!(
4812 "name hash cache has {} entries but pack has {} objects",
4813 cache.len(),
4814 self.object_count
4815 )));
4816 }
4817 self.name_hash_cache = Some(cache);
4818 Ok(self)
4819 }
4820
4821 pub fn add_commit(
4833 &mut self,
4834 commit_position: u32,
4835 commit_index_position: u32,
4836 reachable: &[u32],
4837 ) -> Result<()> {
4838 if commit_position >= self.object_count {
4839 return Err(GitError::InvalidFormat(format!(
4840 "commit position {commit_position} out of range for {} objects",
4841 self.object_count
4842 )));
4843 }
4844 if commit_index_position >= self.object_count {
4845 return Err(GitError::InvalidFormat(format!(
4846 "commit index position {commit_index_position} out of range for {} objects",
4847 self.object_count
4848 )));
4849 }
4850 if !self.commit_positions.contains(&commit_position) {
4851 return Err(GitError::InvalidFormat(format!(
4852 "bitmap commit position {commit_position} is not a commit object"
4853 )));
4854 }
4855 for &position in reachable {
4856 if position >= self.object_count {
4857 return Err(GitError::InvalidFormat(format!(
4858 "reachable position {position} out of range for {} objects",
4859 self.object_count
4860 )));
4861 }
4862 }
4863 let mut reachable = reachable.to_vec();
4864 reachable.push(commit_position);
4865 self.selected.push(SelectedCommit {
4866 commit_index_position,
4867 flags: Self::FLAG_NONE,
4868 reachable,
4869 });
4870 Ok(())
4871 }
4872
4873 pub fn build(&self) -> Result<PackBitmapIndex> {
4880 let commits = EwahBitmap::from_positions(self.object_count, &self.commit_positions)?;
4881 let trees = EwahBitmap::from_positions(self.object_count, &self.tree_positions)?;
4882 let blobs = EwahBitmap::from_positions(self.object_count, &self.blob_positions)?;
4883 let tags = EwahBitmap::from_positions(self.object_count, &self.tag_positions)?;
4884
4885 let mut entries = Vec::with_capacity(self.selected.len());
4886 for selected in &self.selected {
4887 let bitmap = EwahBitmap::from_positions(self.object_count, &selected.reachable)?;
4888 entries.push(PackBitmapEntry {
4889 object_position: selected.commit_index_position,
4890 xor_offset: 0,
4891 flags: selected.flags,
4892 bitmap,
4893 });
4894 }
4895
4896 let mut options = PackBitmapIndex::OPTION_FULL_DAG;
4897 if self.name_hash_cache.is_some() {
4898 options |= PackBitmapIndex::OPTION_HASH_CACHE;
4899 }
4900
4901 let placeholder_checksum = ObjectId::null(self.format);
4906 Ok(PackBitmapIndex {
4907 version: 1,
4908 format: self.format,
4909 options,
4910 pack_checksum: self.pack_checksum.clone(),
4911 index_checksum: placeholder_checksum,
4912 type_bitmaps: PackBitmapTypeBitmaps {
4913 commits,
4914 trees,
4915 blobs,
4916 tags,
4917 },
4918 entries,
4919 name_hash_cache: self.name_hash_cache.clone(),
4920 })
4921 }
4922
4923 pub fn write(&self) -> Result<Vec<u8>> {
4926 self.build()?.write()
4927 }
4928}
4929
4930impl PackBitmapIndex {
4931 pub fn write(&self) -> Result<Vec<u8>> {
4945 if self.version != 1 {
4946 return Err(GitError::Unsupported(format!(
4947 "bitmap index version {}",
4948 self.version
4949 )));
4950 }
4951 let known_options = Self::OPTION_FULL_DAG | Self::OPTION_HASH_CACHE;
4952 if self.options & !known_options != 0 {
4953 return Err(GitError::Unsupported(format!(
4954 "bitmap index options {:#06x}",
4955 self.options & !known_options
4956 )));
4957 }
4958 if self.pack_checksum.format() != self.format {
4959 return Err(GitError::InvalidObjectId(
4960 "bitmap pack checksum format does not match index format".into(),
4961 ));
4962 }
4963 if self.entries.len() > u32::MAX as usize {
4964 return Err(GitError::InvalidFormat(
4965 "too many bitmap index entries".into(),
4966 ));
4967 }
4968 let want_cache = self.options & Self::OPTION_HASH_CACHE != 0;
4969 match (&self.name_hash_cache, want_cache) {
4970 (Some(_), false) => {
4971 return Err(GitError::InvalidFormat(
4972 "name hash cache present without OPTION_HASH_CACHE".into(),
4973 ));
4974 }
4975 (None, true) => {
4976 return Err(GitError::InvalidFormat(
4977 "OPTION_HASH_CACHE set without a name hash cache".into(),
4978 ));
4979 }
4980 _ => {}
4981 }
4982
4983 let mut out = Vec::new();
4984 out.extend_from_slice(b"BITM");
4985 out.extend_from_slice(&self.version.to_be_bytes());
4986 out.extend_from_slice(&self.options.to_be_bytes());
4987 out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
4988 out.extend_from_slice(self.pack_checksum.as_bytes());
4989
4990 self.type_bitmaps.commits.append_bytes(&mut out);
4991 self.type_bitmaps.trees.append_bytes(&mut out);
4992 self.type_bitmaps.blobs.append_bytes(&mut out);
4993 self.type_bitmaps.tags.append_bytes(&mut out);
4994
4995 for (idx, entry) in self.entries.iter().enumerate() {
4996 if entry.xor_offset as usize > idx {
4997 return Err(GitError::InvalidFormat(
4998 "bitmap index entry has invalid XOR offset".into(),
4999 ));
5000 }
5001 out.extend_from_slice(&entry.object_position.to_be_bytes());
5002 out.push(entry.xor_offset);
5003 out.push(entry.flags);
5004 entry.bitmap.append_bytes(&mut out);
5005 }
5006
5007 if let Some(cache) = &self.name_hash_cache {
5008 for value in cache {
5009 out.extend_from_slice(&value.to_be_bytes());
5010 }
5011 }
5012
5013 let checksum = sley_core::digest_bytes(self.format, &out)?;
5014 out.extend_from_slice(checksum.as_bytes());
5015 Ok(out)
5016 }
5017}
5018
5019pub fn write_bitmap(
5028 format: ObjectFormat,
5029 pack_checksum: ObjectId,
5030 object_types: &[ObjectType],
5031 commits: &[(u32, u32, Vec<u32>)],
5032 name_hash_cache: Option<Vec<u32>>,
5033) -> Result<Vec<u8>> {
5034 let mut writer = PackBitmapWriter::new(format, pack_checksum, object_types)?;
5035 if let Some(cache) = name_hash_cache {
5036 writer = writer.with_name_hash_cache(cache)?;
5037 }
5038 for (commit_position, commit_index_position, reachable) in commits {
5039 writer.add_commit(*commit_position, *commit_index_position, reachable)?;
5040 }
5041 writer.write()
5042}
5043
5044#[cfg(test)]
5045mod tests {
5046 use super::*;
5047 use flate2::Compression;
5048 use flate2::read::ZlibDecoder;
5049 use flate2::write::ZlibEncoder;
5050 use std::fs;
5051 use std::io::Read;
5052 use std::io::Write;
5053 use std::path::{Path, PathBuf};
5054 use std::process::Command;
5055 use std::time::{SystemTime, UNIX_EPOCH};
5056
5057 fn delta_pack_options(prefer_ofs_delta: bool) -> PackWriteOptions {
5058 PackWriteOptions::new()
5059 .with_prefer_ofs_delta(prefer_ofs_delta)
5060 .with_reorder(false)
5061 }
5062
5063 #[test]
5064 fn parses_single_blob_pack() {
5065 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5066 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5067 assert_eq!(parsed.version, 2);
5068 assert_eq!(parsed.entries.len(), 1);
5069 let object = &parsed.entries[0].object;
5070 assert_eq!(object.object_type, ObjectType::Blob);
5071 assert_eq!(object.body, b"hello\n");
5072 assert_eq!(
5073 parsed.entries[0].entry.oid.to_hex(),
5074 "ce013625030ba8dba906f756967f9e9ca394464a"
5075 );
5076 }
5077
5078 #[test]
5079 fn parses_single_blob_pack_sha256() {
5080 let pack = single_object_pack(ObjectFormat::Sha256, ObjectType::Blob, b"hello\n");
5081 let parsed =
5082 PackFile::parse(&pack, ObjectFormat::Sha256).expect("test operation should succeed");
5083 assert_eq!(parsed.version, 2);
5084 assert_eq!(parsed.entries.len(), 1);
5085 let object = &parsed.entries[0].object;
5086 assert_eq!(object.object_type, ObjectType::Blob);
5087 assert_eq!(object.body, b"hello\n");
5088 assert_eq!(
5089 parsed.entries[0].entry.oid,
5090 object
5091 .object_id(ObjectFormat::Sha256)
5092 .expect("test operation should succeed")
5093 );
5094 }
5095
5096 #[test]
5097 fn parses_bundle_pack_payload_with_bundle_format() {
5098 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5099 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"bundle\n")
5100 .expect("test operation should succeed");
5101 let bundle_bytes = format!("# v2 git bundle\n{oid} refs/heads/main\n\n")
5102 .into_bytes()
5103 .into_iter()
5104 .chain(pack)
5105 .collect::<Vec<_>>();
5106 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5107 .expect("test operation should succeed");
5108
5109 let parsed = PackFile::parse_bundle(&bundle).expect("test operation should succeed");
5110 assert_eq!(parsed.entries.len(), 1);
5111 assert_eq!(parsed.entries[0].object.object_type, ObjectType::Blob);
5112 assert_eq!(parsed.entries[0].object.body, b"bundle\n");
5113 }
5114
5115 fn lying_size_blob_pack(format: ObjectFormat, declared_size: u64, real_body: &[u8]) -> Vec<u8> {
5121 let mut pack = Vec::new();
5122 pack.extend_from_slice(b"PACK");
5123 pack.extend_from_slice(&2u32.to_be_bytes());
5124 pack.extend_from_slice(&1u32.to_be_bytes());
5125 write_pack_entry_header_kind(&mut pack, 3, declared_size);
5127 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5128 encoder
5129 .write_all(real_body)
5130 .expect("test operation should succeed");
5131 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5132 let checksum =
5133 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5134 pack.extend_from_slice(checksum.as_bytes());
5135 pack
5136 }
5137
5138 #[test]
5151 fn rejects_decompression_bomb_header_without_oom() {
5152 for &declared in &[u64::MAX, 100 * 1024 * 1024 * 1024, u64::from(u32::MAX) * 4] {
5153 let pack = lying_size_blob_pack(ObjectFormat::Sha1, declared, b"tiny\n");
5154 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5155 let result = handle.join();
5156 assert!(
5158 result.is_ok(),
5159 "parsing a bomb header (declared={declared}) panicked instead of erroring cleanly"
5160 );
5161 let parse_result = result.expect("parse thread should not panic on a bomb header");
5163 assert!(
5164 parse_result.is_err(),
5165 "bomb header (declared={declared}) should be rejected as invalid"
5166 );
5167 }
5168 }
5169
5170 fn lying_result_size_delta_pack(
5177 format: ObjectFormat,
5178 declared_result_size: u64,
5179 delta_kind: DeltaKind,
5180 ) -> Vec<u8> {
5181 let base = b"hello";
5182 let result = b"hello world"; let mut delta = Vec::new();
5186 write_delta_varint(&mut delta, base.len() as u64);
5187 write_delta_varint(&mut delta, declared_result_size);
5188 let suffix = &result[base.len()..];
5190 delta.push(0x90); delta.push(base.len() as u8);
5192 delta.push(suffix.len() as u8);
5193 delta.extend_from_slice(suffix);
5194
5195 let mut pack = Vec::new();
5196 pack.extend_from_slice(b"PACK");
5197 pack.extend_from_slice(&2u32.to_be_bytes());
5198 pack.extend_from_slice(&2u32.to_be_bytes());
5199
5200 let base_offset = pack.len();
5201 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
5202 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5203 encoder
5204 .write_all(base)
5205 .expect("test operation should succeed");
5206 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5207
5208 let delta_offset = pack.len();
5209 write_pack_entry_header_kind(
5210 &mut pack,
5211 match delta_kind {
5212 DeltaKind::Offset => 6,
5213 DeltaKind::Ref => 7,
5214 },
5215 delta.len() as u64,
5216 );
5217 match delta_kind {
5218 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
5219 DeltaKind::Ref => {
5220 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
5221 .expect("test operation should succeed");
5222 pack.extend_from_slice(base_oid.as_bytes());
5223 }
5224 }
5225 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
5226 encoder
5227 .write_all(&delta)
5228 .expect("test operation should succeed");
5229 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
5230
5231 let checksum =
5232 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
5233 pack.extend_from_slice(checksum.as_bytes());
5234 pack
5235 }
5236
5237 #[test]
5247 fn rejects_delta_result_size_bomb_without_oom() {
5248 let bombs: &[u64] = &[u64::MAX, 1024 * 1024 * 1024 * 1024];
5249 for &declared in bombs {
5250 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5251 let pack = lying_result_size_delta_pack(ObjectFormat::Sha1, declared, delta_kind);
5252 let handle = std::thread::spawn(move || PackFile::parse_sha1(&pack));
5253 let join_result = handle.join();
5254 assert!(
5255 join_result.is_ok(),
5256 "delta bomb (declared={declared}, kind={delta_kind:?}) panicked/aborted \
5257 instead of erroring cleanly"
5258 );
5259 let parse_result =
5260 join_result.expect("parse thread should not panic on a delta bomb");
5261 assert!(
5262 parse_result.is_err(),
5263 "delta bomb (declared={declared}, kind={delta_kind:?}) should be rejected \
5264 as invalid (result.len() != declared)"
5265 );
5266 }
5267 }
5268 }
5269
5270 #[test]
5274 fn applies_legitimate_delta_after_result_size_bound() {
5275 for delta_kind in [DeltaKind::Ref, DeltaKind::Offset] {
5276 let base = b"hello";
5277 let result = b"hello world";
5278 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, delta_kind);
5279 let parsed = PackFile::parse_sha1(&pack).expect("legitimate delta should resolve");
5280 assert_eq!(parsed.entries.len(), 2);
5281 assert_eq!(parsed.entries[0].object.body, base);
5282 assert_eq!(parsed.entries[1].object.body, result);
5283 }
5284 }
5285
5286 #[test]
5287 fn bounded_inflate_reserve_caps_attacker_declared_size() {
5288 assert_eq!(bounded_inflate_reserve(u64::MAX as usize, 10), 10 * 1032);
5290 assert_eq!(
5292 bounded_inflate_reserve(usize::MAX, usize::MAX),
5293 MAX_INFLATE_RESERVE
5294 );
5295 assert_eq!(bounded_inflate_reserve(1000, 500), 1000);
5299 assert_eq!(bounded_inflate_reserve(0, 0), 64);
5301 }
5302
5303 #[test]
5304 fn rejects_bundle_pack_payload_with_wrong_object_format() {
5305 let pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"bundle\n");
5306 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"bundle\n")
5307 .expect("test operation should succeed");
5308 let bundle_bytes =
5309 format!("# v3 git bundle\n@object-format=sha256\n{oid} refs/heads/main\n\n")
5310 .into_bytes()
5311 .into_iter()
5312 .chain(pack)
5313 .collect::<Vec<_>>();
5314 let bundle = Bundle::parse(&bundle_bytes, ObjectFormat::Sha1)
5315 .expect("test operation should succeed");
5316
5317 assert!(PackFile::parse_bundle(&bundle).is_err());
5318 }
5319
5320 fn assert_pack_index_view_matches_owned(index: &[u8], format: ObjectFormat) {
5321 let owned = PackIndex::parse(index, format).expect("test operation should succeed");
5322 let view = PackIndexView::parse(index, format).expect("test operation should succeed");
5323 let owned_view =
5324 PackIndexViewData::parse(Arc::from(index.to_vec().into_boxed_slice()), format)
5325 .expect("test operation should succeed");
5326
5327 assert_eq!(view.version, owned.version);
5328 assert_eq!(view.count, owned.entries.len());
5329 assert_eq!(view.count(), owned.entries.len());
5330 assert_eq!(view.fanout(), &owned.fanout);
5331 assert_eq!(view.pack_checksum, owned.pack_checksum);
5332 assert_eq!(view.index_checksum, owned.index_checksum);
5333 assert_eq!(owned_view.version, owned.version);
5334 assert_eq!(owned_view.count(), owned.entries.len());
5335 assert_eq!(owned_view.fanout(), &owned.fanout);
5336 assert_eq!(owned_view.pack_checksum, owned.pack_checksum);
5337 assert_eq!(owned_view.index_checksum, owned.index_checksum);
5338 for entry in &owned.entries {
5339 let owned_found = owned
5340 .find(&entry.oid)
5341 .expect("test operation should succeed");
5342 let expected = Some(PackIndexLookup {
5343 crc32: owned_found.crc32,
5344 offset: owned_found.offset,
5345 });
5346 assert_eq!(view.find(&entry.oid), expected);
5347 assert_eq!(owned_view.find(&entry.oid), expected);
5348 }
5349 }
5350
5351 #[test]
5352 fn writes_pack_and_index_that_round_trip() {
5353 let object = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec());
5354 let written = PackFile::write_undeltified_sha1(std::slice::from_ref(&object))
5355 .expect("test operation should succeed");
5356 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5357 let index =
5358 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5359 let oid = object
5360 .object_id(ObjectFormat::Sha1)
5361 .expect("test operation should succeed");
5362 assert_eq!(pack.entries[0].object, object);
5363 assert_eq!(index.pack_checksum, pack.checksum);
5364 assert_eq!(
5365 index
5366 .find(&oid)
5367 .expect("test operation should succeed")
5368 .offset,
5369 12
5370 );
5371 }
5372
5373 #[test]
5374 fn pack_index_view_matches_owned_index_for_generated_sha1_pack() {
5375 let objects = (0..8)
5376 .map(|idx| {
5377 EncodedObject::new(
5378 ObjectType::Blob,
5379 format!("borrowed pack index view sha1 object {idx}\n").into_bytes(),
5380 )
5381 })
5382 .collect::<Vec<_>>();
5383 let written = PackFile::write_packed(&objects, ObjectFormat::Sha1)
5384 .expect("test operation should succeed");
5385
5386 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha1);
5387
5388 let view =
5389 PackIndexView::parse_v2_sha1(&written.index).expect("test operation should succeed");
5390 let missing = sley_core::object_id_for_bytes(
5391 ObjectFormat::Sha1,
5392 "blob",
5393 b"not present in borrowed index\n",
5394 )
5395 .expect("test operation should succeed");
5396 assert_eq!(view.find(&missing), None);
5397 }
5398
5399 #[test]
5400 fn writes_sha256_pack_and_index_that_round_trip() {
5401 let object = EncodedObject::new(ObjectType::Blob, b"hello sha256\n".to_vec());
5402 let written =
5403 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5404 .expect("test operation should succeed");
5405 let pack = PackFile::parse(&written.pack, ObjectFormat::Sha256)
5406 .expect("test operation should succeed");
5407 let index = PackIndex::parse(&written.index, ObjectFormat::Sha256)
5408 .expect("test operation should succeed");
5409 let oid = object
5410 .object_id(ObjectFormat::Sha256)
5411 .expect("test operation should succeed");
5412 assert_eq!(pack.entries[0].object, object);
5413 assert_eq!(index.pack_checksum, pack.checksum);
5414 assert_eq!(index.pack_checksum.format(), ObjectFormat::Sha256);
5415 assert_eq!(index.index_checksum.format(), ObjectFormat::Sha256);
5416 assert_eq!(
5417 index
5418 .find(&oid)
5419 .expect("test operation should succeed")
5420 .offset,
5421 12
5422 );
5423 }
5424
5425 #[test]
5426 fn pack_index_view_matches_owned_index_for_generated_sha256_pack() {
5427 let objects = (0..4)
5428 .map(|idx| {
5429 EncodedObject::new(
5430 ObjectType::Blob,
5431 format!("borrowed pack index view sha256 object {idx}\n").into_bytes(),
5432 )
5433 })
5434 .collect::<Vec<_>>();
5435 let written = PackFile::write_undeltified(&objects, ObjectFormat::Sha256)
5436 .expect("test operation should succeed");
5437
5438 assert_pack_index_view_matches_owned(&written.index, ObjectFormat::Sha256);
5439 }
5440
5441 #[test]
5442 fn indexes_existing_sha256_pack_bytes() {
5443 let object = EncodedObject::new(ObjectType::Blob, b"index raw sha256 pack\n".to_vec());
5444 let written =
5445 PackFile::write_undeltified(std::slice::from_ref(&object), ObjectFormat::Sha256)
5446 .expect("test operation should succeed");
5447
5448 let indexed = PackIndex::write_v2_for_pack(&written.pack, ObjectFormat::Sha256)
5449 .expect("test operation should succeed");
5450 let index = PackIndex::parse(&indexed.index, ObjectFormat::Sha256)
5451 .expect("test operation should succeed");
5452
5453 assert_eq!(indexed.pack_checksum, written.checksum);
5454 assert_eq!(indexed.entries, written.entries);
5455 assert_eq!(index.pack_checksum, written.checksum);
5456 assert_eq!(index.entries, written.entries);
5457 }
5458
5459 #[test]
5460 fn indexes_existing_delta_pack_bytes() {
5461 let (base, changed) = similar_blob_objects();
5462 let options = delta_pack_options(true);
5463 let written = PackFile::write_packed_with_options(
5464 &[base, changed.clone()],
5465 ObjectFormat::Sha1,
5466 &options,
5467 )
5468 .expect("test operation should succeed");
5469
5470 let indexed = PackIndex::write_v2_for_pack_sha1(&written.pack)
5471 .expect("test operation should succeed");
5472 let index =
5473 PackIndex::parse_v2_sha1(&indexed.index).expect("test operation should succeed");
5474 let changed_oid = changed
5475 .object_id(ObjectFormat::Sha1)
5476 .expect("test operation should succeed");
5477
5478 assert_eq!(indexed.pack_checksum, written.checksum);
5479 assert_eq!(indexed.entries, written.entries);
5480 assert_eq!(
5481 index
5482 .find(&changed_oid)
5483 .expect("test operation should succeed")
5484 .offset,
5485 written.entries[1].offset
5486 );
5487 assert_eq!(
5488 index
5489 .find(&changed_oid)
5490 .expect("test operation should succeed")
5491 .crc32,
5492 written.entries[1].crc32
5493 );
5494 }
5495
5496 #[test]
5497 fn writes_ref_delta_pack_and_index_that_round_trip() {
5498 let (base, changed) = similar_blob_objects();
5499 let options = delta_pack_options(false);
5500 let written = PackFile::write_packed_with_options(
5501 &[base.clone(), changed.clone()],
5502 ObjectFormat::Sha1,
5503 &options,
5504 )
5505 .expect("test operation should succeed");
5506 let mut second_offset = written.entries[1].offset as usize;
5507 let header = parse_entry_header(&written.pack, &mut second_offset)
5508 .expect("test operation should succeed");
5509 assert_eq!(header.kind, PackObjectKind::RefDelta);
5510
5511 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5512 let index =
5513 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5514 let oid = changed
5515 .object_id(ObjectFormat::Sha1)
5516 .expect("test operation should succeed");
5517 assert_eq!(pack.entries[0].object, base);
5518 assert_eq!(pack.entries[1].object, changed);
5519 assert_eq!(index.pack_checksum, pack.checksum);
5520 assert_eq!(
5521 index
5522 .find(&oid)
5523 .expect("test operation should succeed")
5524 .offset,
5525 written.entries[1].offset
5526 );
5527 }
5528
5529 #[test]
5530 fn read_object_at_matches_full_parse_for_ofs_delta_pack() {
5531 let (base, changed) = similar_blob_objects();
5532 let options = delta_pack_options(true);
5533 let written = PackFile::write_packed_with_options(
5534 &[base, changed.clone()],
5535 ObjectFormat::Sha1,
5536 &options,
5537 )
5538 .expect("test operation should succeed");
5539 let mut second = written.entries[1].offset as usize;
5541 assert_eq!(
5542 parse_entry_header(&written.pack, &mut second)
5543 .expect("test operation should succeed")
5544 .kind,
5545 PackObjectKind::OfsDelta
5546 );
5547 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5549 for po in &parsed.entries {
5550 let got =
5551 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5552 Ok(None)
5553 })
5554 .expect("test operation should succeed");
5555 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5556 }
5557 }
5558
5559 #[derive(Default)]
5562 struct MapHeaderTypeCache(HashMap<u64, (ObjectType, u64)>);
5563
5564 impl HeaderTypeCache for MapHeaderTypeCache {
5565 fn get(&self, pack_offset: u64) -> Option<(ObjectType, u64)> {
5566 self.0.get(&pack_offset).copied()
5567 }
5568 fn put(&mut self, pack_offset: u64, header: (ObjectType, u64)) {
5569 self.0.insert(pack_offset, header);
5570 }
5571 }
5572
5573 #[test]
5574 fn read_object_header_at_cached_matches_uncached_cold_and_warm_for_ofs_delta() {
5575 let (base, changed) = similar_blob_objects();
5576 let options = delta_pack_options(true);
5577 let written =
5578 PackFile::write_packed_with_options(&[base, changed], ObjectFormat::Sha1, &options)
5579 .expect("test operation should succeed");
5580 let mut second = written.entries[1].offset as usize;
5582 assert_eq!(
5583 parse_entry_header(&written.pack, &mut second)
5584 .expect("test operation should succeed")
5585 .kind,
5586 PackObjectKind::OfsDelta
5587 );
5588
5589 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5590 let mut cache = MapHeaderTypeCache::default();
5591 for po in &parsed.entries {
5592 let uncached =
5593 read_object_header_at(&written.pack, po.entry.offset, ObjectFormat::Sha1, |_| {
5594 Ok(None)
5595 })
5596 .expect("test operation should succeed");
5597 assert_eq!(
5599 uncached,
5600 (po.object.object_type, po.object.body.len() as u64),
5601 "uncached header at offset {}",
5602 po.entry.offset
5603 );
5604 let cold = read_object_header_at_with_cache(
5606 &written.pack,
5607 po.entry.offset,
5608 ObjectFormat::Sha1,
5609 |_| Ok(None),
5610 &mut cache,
5611 )
5612 .expect("test operation should succeed");
5613 assert_eq!(cold, uncached, "cold cache at offset {}", po.entry.offset);
5614 }
5615 for po in &parsed.entries {
5618 let warm = read_object_header_at_with_cache(
5619 &written.pack,
5620 po.entry.offset,
5621 ObjectFormat::Sha1,
5622 |_| panic!("warm cache must not re-walk the chain"),
5623 &mut cache,
5624 )
5625 .expect("test operation should succeed");
5626 assert_eq!(
5627 warm,
5628 (po.object.object_type, po.object.body.len() as u64),
5629 "warm cache at offset {}",
5630 po.entry.offset
5631 );
5632 }
5633 }
5634
5635 #[test]
5636 fn read_object_at_matches_full_parse_for_ref_delta_pack() {
5637 let (base, changed) = similar_blob_objects();
5638 let options = delta_pack_options(false);
5639 let written = PackFile::write_packed_with_options(
5640 &[base, changed.clone()],
5641 ObjectFormat::Sha1,
5642 &options,
5643 )
5644 .expect("test operation should succeed");
5645 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5646 let by_oid: HashMap<ObjectId, Arc<EncodedObject>> = parsed
5647 .entries
5648 .iter()
5649 .map(|po| (po.entry.oid, Arc::new(po.object.clone())))
5650 .collect();
5651 for po in &parsed.entries {
5652 let got =
5653 read_object_at_arc(&written.pack, po.entry.offset, ObjectFormat::Sha1, |oid| {
5654 Ok(by_oid.get(oid).cloned())
5655 })
5656 .expect("test operation should succeed");
5657 assert_eq!(*got, po.object);
5658 }
5659 }
5660
5661 #[derive(Default)]
5665 struct CountingDeltaCache {
5666 map: std::cell::RefCell<HashMap<u64, Arc<EncodedObject>>>,
5667 hits: std::cell::Cell<usize>,
5668 inserts: std::cell::Cell<usize>,
5669 }
5670
5671 impl PackDeltaCache for CountingDeltaCache {
5672 fn get(&self, offset: u64) -> Option<Arc<EncodedObject>> {
5673 let hit = self.map.borrow().get(&offset).cloned();
5674 if hit.is_some() {
5675 self.hits.set(self.hits.get() + 1);
5676 }
5677 hit
5678 }
5679 fn insert(&self, offset: u64, object: Arc<EncodedObject>) {
5680 self.inserts.set(self.inserts.get() + 1);
5681 self.map.borrow_mut().insert(offset, object);
5682 }
5683 }
5684
5685 #[test]
5686 fn read_object_at_with_cache_matches_uncached_and_reuses_bases() {
5687 let mut objects = Vec::new();
5690 for idx in 0..8u32 {
5691 let mut body = vec![b'x'; 4096];
5692 body.extend_from_slice(format!("\nvariant {idx}\n").as_bytes());
5693 objects.push(EncodedObject::new(ObjectType::Blob, body));
5694 }
5695 let options = delta_pack_options(true);
5696 let written = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
5697 .expect("test operation should succeed");
5698 let parsed = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5699
5700 let cache = CountingDeltaCache::default();
5701 for _ in 0..2 {
5704 for po in &parsed.entries {
5705 let got = read_object_at_with_cache_arc(
5706 &written.pack,
5707 po.entry.offset,
5708 ObjectFormat::Sha1,
5709 |_| Ok(None),
5710 &cache,
5711 )
5712 .expect("test operation should succeed");
5713 assert_eq!(*got, po.object, "offset {}", po.entry.offset);
5714 }
5715 }
5716 assert!(cache.hits.get() > 0, "cache never served a warm object");
5719 }
5720
5721 #[test]
5722 fn writes_ofs_delta_pack_and_index_that_round_trip() {
5723 let (base, changed) = similar_blob_objects();
5724 let options = delta_pack_options(true);
5725 let written = PackFile::write_packed_with_options(
5726 &[base.clone(), changed.clone()],
5727 ObjectFormat::Sha1,
5728 &options,
5729 )
5730 .expect("test operation should succeed");
5731 let mut second_offset = written.entries[1].offset as usize;
5732 let header = parse_entry_header(&written.pack, &mut second_offset)
5733 .expect("test operation should succeed");
5734 assert_eq!(header.kind, PackObjectKind::OfsDelta);
5735
5736 let pack = PackFile::parse_sha1(&written.pack).expect("test operation should succeed");
5737 let index =
5738 PackIndex::parse_v2_sha1(&written.index).expect("test operation should succeed");
5739 let oid = changed
5740 .object_id(ObjectFormat::Sha1)
5741 .expect("test operation should succeed");
5742 assert_eq!(pack.entries[0].object, base);
5743 assert_eq!(pack.entries[1].object, changed);
5744 assert_eq!(index.pack_checksum, pack.checksum);
5745 assert_eq!(
5746 index
5747 .find(&oid)
5748 .expect("test operation should succeed")
5749 .offset,
5750 written.entries[1].offset
5751 );
5752 }
5753
5754 #[test]
5755 fn resolves_ofs_delta_pack_entry() {
5756 let base = b"hello";
5757 let result = b"hello world";
5758 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Offset);
5759 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5760 assert_eq!(parsed.entries.len(), 2);
5761 assert_eq!(parsed.entries[0].object.body, base);
5762 assert_eq!(parsed.entries[1].object.body, result);
5763 assert_eq!(
5764 parsed.entries[1].entry.oid,
5765 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
5766 .expect("test operation should succeed")
5767 );
5768 }
5769
5770 #[test]
5771 fn resolves_ref_delta_pack_entry() {
5772 let base = b"hello";
5773 let result = b"hello world";
5774 let pack = two_object_delta_pack(ObjectFormat::Sha1, base, result, DeltaKind::Ref);
5775 let parsed = PackFile::parse_sha1(&pack).expect("test operation should succeed");
5776 assert_eq!(parsed.entries.len(), 2);
5777 assert_eq!(parsed.entries[0].object.body, base);
5778 assert_eq!(parsed.entries[1].object.body, result);
5779 assert_eq!(
5780 parsed.entries[1].entry.oid,
5781 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
5782 .expect("test operation should succeed")
5783 );
5784 }
5785
5786 #[test]
5787 fn resolves_thin_ref_delta_pack_entry_with_external_base() {
5788 let base = b"hello";
5789 let result = b"hello world";
5790 let pack = thin_ref_delta_pack(ObjectFormat::Sha1, base, result);
5791 assert!(PackFile::parse_sha1(&pack).is_err());
5792
5793 let base_oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", base)
5794 .expect("test operation should succeed");
5795 let parsed = PackFile::parse_thin(&pack, ObjectFormat::Sha1, |oid| {
5796 if oid == &base_oid {
5797 Ok(Some(EncodedObject::new(ObjectType::Blob, base.to_vec())))
5798 } else {
5799 Ok(None)
5800 }
5801 })
5802 .expect("test operation should succeed");
5803 assert_eq!(parsed.entries.len(), 1);
5804 assert_eq!(parsed.entries[0].object.body, result);
5805 assert_eq!(
5806 parsed.entries[0].entry.oid,
5807 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", result)
5808 .expect("test operation should succeed")
5809 );
5810 }
5811
5812 #[test]
5813 fn rejects_bad_pack_checksum() {
5814 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5815 let last = pack.len() - 1;
5816 pack[last] ^= 1;
5817 assert!(PackFile::parse_sha1(&pack).is_err());
5818 }
5819
5820 #[test]
5821 fn raw_pack_index_rejects_bad_pack_checksum() {
5822 let mut pack = single_object_pack(ObjectFormat::Sha1, ObjectType::Blob, b"hello\n");
5823 let last = pack.len() - 1;
5824 pack[last] ^= 1;
5825 assert!(PackIndex::write_v2_for_pack_sha1(&pack).is_err());
5826 }
5827
5828 #[test]
5829 fn pack_index_writer_rejects_duplicate_object_ids() {
5830 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"same\n")
5831 .expect("test operation should succeed");
5832 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5833 .expect("test operation should succeed");
5834 let entries = vec![
5835 PackIndexEntry {
5836 oid,
5837 crc32: 1,
5838 offset: 12,
5839 },
5840 PackIndexEntry {
5841 oid,
5842 crc32: 2,
5843 offset: 24,
5844 },
5845 ];
5846 assert!(PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum).is_err());
5847 }
5848
5849 #[test]
5850 fn parses_single_entry_pack_index() {
5851 let oid = ObjectId::from_hex(
5852 ObjectFormat::Sha1,
5853 "ce013625030ba8dba906f756967f9e9ca394464a",
5854 )
5855 .expect("test operation should succeed");
5856 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5857 .expect("test operation should succeed");
5858 let index = single_entry_index(
5859 ObjectFormat::Sha1,
5860 oid,
5861 0x1234_5678,
5862 12,
5863 pack_checksum.clone(),
5864 );
5865 let parsed = PackIndex::parse_v2_sha1(&index).expect("test operation should succeed");
5866 assert_eq!(parsed.version, 2);
5867 assert_eq!(parsed.pack_checksum, pack_checksum);
5868 assert_eq!(parsed.entries.len(), 1);
5869 assert_eq!(
5870 parsed
5871 .find(&oid)
5872 .expect("test operation should succeed")
5873 .offset,
5874 12
5875 );
5876 assert_eq!(
5877 parsed
5878 .find(&oid)
5879 .expect("test operation should succeed")
5880 .crc32,
5881 0x1234_5678
5882 );
5883 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
5884 }
5885
5886 #[test]
5887 fn parses_single_entry_pack_index_v1() {
5888 let oid = ObjectId::from_hex(
5889 ObjectFormat::Sha1,
5890 "ce013625030ba8dba906f756967f9e9ca394464a",
5891 )
5892 .expect("test operation should succeed");
5893 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5894 .expect("test operation should succeed");
5895 let index =
5896 single_entry_index_v1(ObjectFormat::Sha1, oid, 0x1234_5678, pack_checksum.clone());
5897 let parsed =
5898 PackIndex::parse(&index, ObjectFormat::Sha1).expect("test operation should succeed");
5899 assert_eq!(parsed.version, 1);
5900 assert_eq!(parsed.pack_checksum, pack_checksum);
5901 assert_eq!(parsed.entries.len(), 1);
5902 assert_eq!(
5903 parsed
5904 .find(&oid)
5905 .expect("test operation should succeed")
5906 .offset,
5907 0x1234_5678
5908 );
5909 assert_eq!(
5910 parsed
5911 .find(&oid)
5912 .expect("test operation should succeed")
5913 .crc32,
5914 0
5915 );
5916 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
5917 }
5918
5919 #[test]
5920 fn rejects_bad_pack_index_v1_checksum() {
5921 let oid = ObjectId::from_hex(
5922 ObjectFormat::Sha1,
5923 "ce013625030ba8dba906f756967f9e9ca394464a",
5924 )
5925 .expect("test operation should succeed");
5926 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5927 .expect("test operation should succeed");
5928 let mut index = single_entry_index_v1(ObjectFormat::Sha1, oid, 12, pack_checksum);
5929 let last = index.len() - 1;
5930 index[last] ^= 1;
5931 assert!(PackIndex::parse(&index, ObjectFormat::Sha1).is_err());
5932 }
5933
5934 #[test]
5935 fn pack_index_view_reads_v2_large_offsets() {
5936 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset a\n")
5937 .expect("test operation should succeed");
5938 let second =
5939 sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"large offset b\n")
5940 .expect("test operation should succeed");
5941 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5942 .expect("test operation should succeed");
5943 let entries = vec![
5944 PackIndexEntry {
5945 oid: first,
5946 crc32: 0x1111_2222,
5947 offset: 0x8000_0000,
5948 },
5949 PackIndexEntry {
5950 oid: second,
5951 crc32: 0x3333_4444,
5952 offset: 0x1_0000_0042,
5953 },
5954 ];
5955 let index = PackIndex::write_v2(ObjectFormat::Sha1, &entries, &pack_checksum)
5956 .expect("test operation should succeed");
5957
5958 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha1);
5959 let view = PackIndexView::parse(&index, ObjectFormat::Sha1)
5960 .expect("test operation should succeed");
5961 for entry in entries {
5962 assert_eq!(
5963 view.find(&entry.oid),
5964 Some(PackIndexLookup {
5965 crc32: entry.crc32,
5966 offset: entry.offset,
5967 })
5968 );
5969 }
5970 }
5971
5972 #[test]
5973 fn pack_index_view_default_parse_checks_index_checksum() {
5974 let oid = ObjectId::from_hex(
5975 ObjectFormat::Sha1,
5976 "ce013625030ba8dba906f756967f9e9ca394464a",
5977 )
5978 .expect("test operation should succeed");
5979 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
5980 .expect("test operation should succeed");
5981 let mut index = single_entry_index(ObjectFormat::Sha1, oid, 0x1234_5678, 12, pack_checksum);
5982 let last = index.len() - 1;
5983 index[last] ^= 1;
5984
5985 assert!(PackIndexView::parse(&index, ObjectFormat::Sha1).is_err());
5986 let view = PackIndexView::parse_without_checksum(&index, ObjectFormat::Sha1)
5987 .expect("test operation should succeed");
5988 let trusted_view = PackIndexViewData::parse_trusted_without_checksum(
5989 Arc::from(index.clone().into_boxed_slice()),
5990 ObjectFormat::Sha1,
5991 )
5992 .expect("test operation should succeed");
5993 assert_eq!(
5994 view.find(&oid),
5995 Some(PackIndexLookup {
5996 crc32: 0x1234_5678,
5997 offset: 12,
5998 })
5999 );
6000 assert_eq!(
6001 trusted_view.find(&oid),
6002 Some(PackIndexLookup {
6003 crc32: 0x1234_5678,
6004 offset: 12,
6005 })
6006 );
6007 }
6008
6009 #[test]
6010 fn parses_pack_reverse_index() {
6011 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6012 .expect("test operation should succeed");
6013 let reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[2, 0, 1], &pack_checksum)
6014 .expect("test operation should succeed");
6015 let parsed = PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 3)
6016 .expect("test operation should succeed");
6017 assert_eq!(parsed.version, 1);
6018 assert_eq!(parsed.format, ObjectFormat::Sha1);
6019 assert_eq!(parsed.positions, vec![2, 0, 1]);
6020 assert_eq!(parsed.pack_checksum, pack_checksum);
6021 assert_eq!(
6022 PackReverseIndex::write(ObjectFormat::Sha1, &parsed.positions, &parsed.pack_checksum)
6023 .expect("test operation should succeed"),
6024 reverse_index
6025 );
6026 }
6027
6028 #[test]
6029 fn rejects_bad_pack_reverse_index_checksum() {
6030 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6031 .expect("test operation should succeed");
6032 let mut reverse_index = PackReverseIndex::write(ObjectFormat::Sha1, &[0], &pack_checksum)
6033 .expect("test operation should succeed");
6034 let last = reverse_index.len() - 1;
6035 reverse_index[last] ^= 1;
6036 assert!(PackReverseIndex::parse(&reverse_index, ObjectFormat::Sha1, 1).is_err());
6037 }
6038
6039 #[test]
6040 fn rejects_bad_pack_reverse_index_positions() {
6041 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6042 .expect("test operation should succeed");
6043 let duplicate = pack_reverse_index(ObjectFormat::Sha1, &[0, 0], pack_checksum.clone());
6044 assert!(PackReverseIndex::parse(&duplicate, ObjectFormat::Sha1, 2).is_err());
6045 let out_of_range = pack_reverse_index(ObjectFormat::Sha1, &[0, 2], pack_checksum);
6046 assert!(PackReverseIndex::parse(&out_of_range, ObjectFormat::Sha1, 2).is_err());
6047 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6048 .expect("test operation should succeed");
6049 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 0], &pack_checksum).is_err());
6050 assert!(PackReverseIndex::write(ObjectFormat::Sha1, &[0, 2], &pack_checksum).is_err());
6051 }
6052
6053 #[test]
6054 fn parses_pack_mtimes() {
6055 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6056 .expect("test operation should succeed");
6057 let mtimes = PackMtimes::write(
6058 ObjectFormat::Sha1,
6059 &[1, 1_700_000_000, u32::MAX],
6060 &pack_checksum,
6061 )
6062 .expect("test operation should succeed");
6063 let parsed = PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 3)
6064 .expect("test operation should succeed");
6065 assert_eq!(parsed.version, 1);
6066 assert_eq!(parsed.format, ObjectFormat::Sha1);
6067 assert_eq!(parsed.mtimes, vec![1, 1_700_000_000, u32::MAX]);
6068 assert_eq!(parsed.pack_checksum, pack_checksum);
6069 assert_eq!(
6070 PackMtimes::write(ObjectFormat::Sha1, &parsed.mtimes, &parsed.pack_checksum)
6071 .expect("test operation should succeed"),
6072 mtimes
6073 );
6074 }
6075
6076 #[test]
6077 fn rejects_bad_pack_mtimes_checksum() {
6078 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6079 .expect("test operation should succeed");
6080 let mut mtimes = PackMtimes::write(ObjectFormat::Sha1, &[1], &pack_checksum)
6081 .expect("test operation should succeed");
6082 let last = mtimes.len() - 1;
6083 mtimes[last] ^= 1;
6084 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6085 }
6086
6087 #[test]
6088 fn rejects_bad_pack_mtimes_shape() {
6089 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6090 .expect("test operation should succeed");
6091 let mtimes = pack_mtimes(ObjectFormat::Sha1, &[1, 2], pack_checksum.clone());
6092 assert!(PackMtimes::parse(&mtimes, ObjectFormat::Sha1, 1).is_err());
6093
6094 let mut wrong_hash = pack_mtimes(ObjectFormat::Sha1, &[1], pack_checksum);
6095 wrong_hash[11] = 2;
6096 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6097 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6098 .expect("test operation should succeed");
6099 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6100 assert!(PackMtimes::parse(&wrong_hash, ObjectFormat::Sha1, 1).is_err());
6101 }
6102
6103 #[test]
6104 fn parses_multi_pack_index_header_and_chunk_lookup() {
6105 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6106 .expect("test operation should succeed");
6107 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6108 .expect("test operation should succeed");
6109 let chunks = midx_chunks_with_pack_names(
6110 ObjectFormat::Sha1,
6111 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6112 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6113 );
6114 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6115 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6116 .expect("test operation should succeed");
6117 assert_eq!(parsed.version, 2);
6118 assert_eq!(parsed.format, ObjectFormat::Sha1);
6119 assert_eq!(parsed.pack_count, 2);
6120 assert_eq!(parsed.pack_names, vec!["pack-a.idx", "pack-b.idx"]);
6121 assert_eq!(parsed.object_count, 2);
6122 assert_eq!(parsed.objects.len(), 2);
6123 assert_eq!(
6124 parsed
6125 .find(&first)
6126 .expect("test operation should succeed")
6127 .pack_int_id,
6128 0
6129 );
6130 assert_eq!(
6131 parsed
6132 .find(&first)
6133 .expect("test operation should succeed")
6134 .offset,
6135 12
6136 );
6137 assert_eq!(
6138 parsed
6139 .find(&second)
6140 .expect("test operation should succeed")
6141 .pack_int_id,
6142 1
6143 );
6144 assert_eq!(
6145 parsed
6146 .find(&second)
6147 .expect("test operation should succeed")
6148 .offset,
6149 0x1_0000_0000
6150 );
6151 assert_eq!(parsed.reverse_index, None);
6152 assert_eq!(parsed.bitmapped_packs, None);
6153 assert_eq!(parsed.chunks.len(), 5);
6154 assert_eq!(parsed.chunks[0].id, *b"PNAM");
6155 assert_eq!(parsed.chunks[0].offset, 84);
6156 assert_eq!(parsed.chunks[0].len, 24);
6157 assert_eq!(parsed.chunks[1].id, *b"OIDF");
6158 assert_eq!(parsed.chunks[1].offset, 108);
6159 assert_eq!(parsed.chunks[1].len, 1024);
6160 }
6161
6162 #[test]
6163 fn raw_multi_pack_index_lookup_finds_pack_and_offset() {
6164 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6165 .expect("test operation should succeed");
6166 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6167 .expect("test operation should succeed");
6168 let missing = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"missing\n")
6169 .expect("test operation should succeed");
6170 let chunks = midx_chunks_with_pack_names(
6171 ObjectFormat::Sha1,
6172 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6173 &[(first.clone(), 0, 12), (second.clone(), 1, 0x1_0000_0000)],
6174 );
6175 let midx = Arc::new(multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks));
6176 let lookup = MultiPackIndexOidLookup::parse(midx, ObjectFormat::Sha1)
6177 .expect("test operation should succeed");
6178
6179 assert!(lookup.contains(&first));
6180 assert!(lookup.contains(&second));
6181 assert!(!lookup.contains(&missing));
6182
6183 let first_entry = lookup
6184 .find(&first)
6185 .expect("test operation should succeed")
6186 .expect("object should be present");
6187 assert_eq!(lookup.pack_name(first_entry.pack_int_id), Some("pack-a.idx"));
6188 assert_eq!(first_entry.offset, 12);
6189
6190 let second_entry = lookup
6191 .find(&second)
6192 .expect("test operation should succeed")
6193 .expect("object should be present");
6194 assert_eq!(lookup.pack_name(second_entry.pack_int_id), Some("pack-b.idx"));
6195 assert_eq!(second_entry.offset, 0x1_0000_0000);
6196 assert!(
6197 lookup
6198 .find(&missing)
6199 .expect("test operation should succeed")
6200 .is_none()
6201 );
6202 }
6203
6204 #[test]
6205 fn rejects_bad_multi_pack_index_checksum() {
6206 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6207 let mut midx = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6208 let last = midx.len() - 1;
6209 midx[last] ^= 1;
6210 assert!(MultiPackIndex::parse(&midx, ObjectFormat::Sha1).is_err());
6211 }
6212
6213 #[test]
6214 fn rejects_bad_multi_pack_index_shape() {
6215 let chunks = midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]);
6216 let mut wrong_hash = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6217 wrong_hash[5] = 2;
6218 let checksum_offset = wrong_hash.len() - ObjectFormat::Sha1.raw_len();
6219 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &wrong_hash[..checksum_offset])
6220 .expect("test operation should succeed");
6221 wrong_hash[checksum_offset..].copy_from_slice(checksum.as_bytes());
6222 assert!(MultiPackIndex::parse(&wrong_hash, ObjectFormat::Sha1).is_err());
6223
6224 let mut missing_terminator = multi_pack_index(ObjectFormat::Sha1, 1, 0, &chunks);
6225 missing_terminator[12] = b'B';
6226 let checksum_offset = missing_terminator.len() - ObjectFormat::Sha1.raw_len();
6227 let checksum =
6228 sley_core::digest_bytes(ObjectFormat::Sha1, &missing_terminator[..checksum_offset])
6229 .expect("test operation should succeed");
6230 missing_terminator[checksum_offset..].copy_from_slice(checksum.as_bytes());
6231 assert!(MultiPackIndex::parse(&missing_terminator, ObjectFormat::Sha1).is_err());
6232
6233 let mut bad_offset = multi_pack_index(
6234 ObjectFormat::Sha1,
6235 2,
6236 0,
6237 &midx_chunks_with_pack_names(ObjectFormat::Sha1, Vec::new(), &[]),
6238 );
6239 bad_offset[16..24].copy_from_slice(&0u64.to_be_bytes());
6240 let checksum_offset = bad_offset.len() - ObjectFormat::Sha1.raw_len();
6241 let checksum = sley_core::digest_bytes(ObjectFormat::Sha1, &bad_offset[..checksum_offset])
6242 .expect("test operation should succeed");
6243 bad_offset[checksum_offset..].copy_from_slice(checksum.as_bytes());
6244 assert!(MultiPackIndex::parse(&bad_offset, ObjectFormat::Sha1).is_err());
6245 }
6246
6247 #[test]
6248 fn rejects_bad_multi_pack_index_pack_names() {
6249 let missing = multi_pack_index(ObjectFormat::Sha1, 2, 1, &[]);
6250 assert!(MultiPackIndex::parse(&missing, ObjectFormat::Sha1).is_err());
6251
6252 let too_few = multi_pack_index(
6253 ObjectFormat::Sha1,
6254 2,
6255 2,
6256 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0".to_vec(), &[]),
6257 );
6258 assert!(MultiPackIndex::parse(&too_few, ObjectFormat::Sha1).is_err());
6259
6260 let bad_padding = multi_pack_index(
6261 ObjectFormat::Sha1,
6262 2,
6263 1,
6264 &midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0xxxx".to_vec(), &[]),
6265 );
6266 assert!(MultiPackIndex::parse(&bad_padding, ObjectFormat::Sha1).is_err());
6267
6268 let unsorted_v1 = multi_pack_index(
6269 ObjectFormat::Sha1,
6270 1,
6271 2,
6272 &midx_chunks_with_pack_names(
6273 ObjectFormat::Sha1,
6274 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6275 &[],
6276 ),
6277 );
6278 assert!(MultiPackIndex::parse(&unsorted_v1, ObjectFormat::Sha1).is_err());
6279
6280 let unsorted_v2 = multi_pack_index(
6281 ObjectFormat::Sha1,
6282 2,
6283 2,
6284 &midx_chunks_with_pack_names(
6285 ObjectFormat::Sha1,
6286 b"pack-b.idx\0pack-a.idx\0".to_vec(),
6287 &[],
6288 ),
6289 );
6290 let parsed = MultiPackIndex::parse(&unsorted_v2, ObjectFormat::Sha1)
6291 .expect("test operation should succeed");
6292 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6293 }
6294
6295 #[test]
6296 fn rejects_bad_multi_pack_index_object_tables() {
6297 let oid_a = ObjectId::from_hex(
6298 ObjectFormat::Sha1,
6299 "1111111111111111111111111111111111111111",
6300 )
6301 .expect("test operation should succeed");
6302 let oid_b = ObjectId::from_hex(
6303 ObjectFormat::Sha1,
6304 "2222222222222222222222222222222222222222",
6305 )
6306 .expect("test operation should succeed");
6307
6308 let missing_oidf = multi_pack_index(
6309 ObjectFormat::Sha1,
6310 2,
6311 1,
6312 &[(*b"PNAM", b"pack-a.idx\0\0".to_vec())],
6313 );
6314 assert!(MultiPackIndex::parse(&missing_oidf, ObjectFormat::Sha1).is_err());
6315
6316 let bad_fanout = vec![
6317 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6318 (*b"OIDF", vec![0; 256 * 4]),
6319 (*b"OIDL", oid_a.as_bytes().to_vec()),
6320 (*b"OOFF", midx_ooff_entries(&[(0, 12)], &mut Vec::new())),
6321 ];
6322 let bad_fanout = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_fanout);
6323 assert!(MultiPackIndex::parse(&bad_fanout, ObjectFormat::Sha1).is_err());
6324
6325 let mut unsorted = Vec::new();
6326 unsorted.push((*b"PNAM", b"pack-a.idx\0\0".to_vec()));
6327 unsorted.push((*b"OIDF", midx_oid_fanout(&[oid_a.clone(), oid_b.clone()])));
6328 let mut oid_lookup = Vec::new();
6329 oid_lookup.extend_from_slice(oid_b.as_bytes());
6330 oid_lookup.extend_from_slice(oid_a.as_bytes());
6331 unsorted.push((*b"OIDL", oid_lookup));
6332 unsorted.push((
6333 *b"OOFF",
6334 midx_ooff_entries(&[(0, 12), (0, 24)], &mut Vec::new()),
6335 ));
6336 let unsorted = multi_pack_index(ObjectFormat::Sha1, 2, 1, &unsorted);
6337 assert!(MultiPackIndex::parse(&unsorted, ObjectFormat::Sha1).is_err());
6338
6339 let bad_pack = multi_pack_index(
6340 ObjectFormat::Sha1,
6341 2,
6342 1,
6343 &midx_chunks_with_pack_names(
6344 ObjectFormat::Sha1,
6345 b"pack-a.idx\0\0".to_vec(),
6346 &[(oid_a.clone(), 1, 12)],
6347 ),
6348 );
6349 assert!(MultiPackIndex::parse(&bad_pack, ObjectFormat::Sha1).is_err());
6350
6351 let mut large_offsets = Vec::new();
6352 let missing_loff = vec![
6353 (*b"PNAM", b"pack-a.idx\0\0".to_vec()),
6354 (*b"OIDF", midx_oid_fanout(std::slice::from_ref(&oid_a))),
6355 (*b"OIDL", oid_a.as_bytes().to_vec()),
6356 (
6357 *b"OOFF",
6358 midx_ooff_entries(&[(0, 0x1_0000_0000)], &mut large_offsets),
6359 ),
6360 ];
6361 let missing_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &missing_loff);
6362 assert!(MultiPackIndex::parse(&missing_loff, ObjectFormat::Sha1).is_err());
6363
6364 let mut bad_loff =
6365 midx_chunks_with_pack_names(ObjectFormat::Sha1, b"pack-a.idx\0\0".to_vec(), &[]);
6366 bad_loff.push((*b"LOFF", vec![0]));
6367 let bad_loff = multi_pack_index(ObjectFormat::Sha1, 2, 1, &bad_loff);
6368 assert!(MultiPackIndex::parse(&bad_loff, ObjectFormat::Sha1).is_err());
6369 }
6370
6371 #[test]
6372 fn parses_multi_pack_index_bitmap_chunks() {
6373 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6374 .expect("test operation should succeed");
6375 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6376 .expect("test operation should succeed");
6377 let mut chunks = midx_chunks_with_pack_names(
6378 ObjectFormat::Sha1,
6379 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6380 &[(first, 0, 12), (second, 1, 24)],
6381 );
6382 chunks.push((*b"RIDX", midx_u32_table(&[1, 0])));
6383 chunks.push((*b"BTMP", midx_bitmap_packs(&[(0, 1), (1, 1)])));
6384 let midx = multi_pack_index(ObjectFormat::Sha1, 2, 2, &chunks);
6385
6386 let parsed = MultiPackIndex::parse(&midx, ObjectFormat::Sha1)
6387 .expect("test operation should succeed");
6388 assert_eq!(parsed.reverse_index, Some(vec![1, 0]));
6389 assert_eq!(
6390 parsed.bitmapped_packs,
6391 Some(vec![
6392 MultiPackBitmapPack {
6393 bitmap_pos: 0,
6394 bitmap_nr: 1,
6395 },
6396 MultiPackBitmapPack {
6397 bitmap_pos: 1,
6398 bitmap_nr: 1,
6399 },
6400 ])
6401 );
6402 }
6403
6404 #[test]
6405 fn writes_multi_pack_index_that_round_trips() {
6406 let first = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"first object\n")
6407 .expect("test operation should succeed");
6408 let second = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"second object\n")
6409 .expect("test operation should succeed");
6410 let bytes = MultiPackIndex::write(
6411 ObjectFormat::Sha1,
6412 2,
6413 &["pack-b.idx".into(), "pack-a.idx".into()],
6414 &[
6415 MultiPackIndexEntry {
6416 oid: second.clone(),
6417 pack_int_id: 0,
6418 offset: 0x1_0000_0000,
6419 },
6420 MultiPackIndexEntry {
6421 oid: first.clone(),
6422 pack_int_id: 1,
6423 offset: 12,
6424 },
6425 ],
6426 )
6427 .expect("test operation should succeed");
6428
6429 let parsed = MultiPackIndex::parse(&bytes, ObjectFormat::Sha1)
6430 .expect("test operation should succeed");
6431 assert_eq!(parsed.version, 2);
6432 assert_eq!(parsed.pack_names, vec!["pack-b.idx", "pack-a.idx"]);
6433 assert_eq!(parsed.object_count, 2);
6434 assert_eq!(
6435 parsed
6436 .find(&first)
6437 .expect("test operation should succeed")
6438 .pack_int_id,
6439 1
6440 );
6441 assert_eq!(
6442 parsed
6443 .find(&first)
6444 .expect("test operation should succeed")
6445 .offset,
6446 12
6447 );
6448 assert_eq!(
6449 parsed
6450 .find(&second)
6451 .expect("test operation should succeed")
6452 .pack_int_id,
6453 0
6454 );
6455 assert_eq!(
6456 parsed
6457 .find(&second)
6458 .expect("test operation should succeed")
6459 .offset,
6460 0x1_0000_0000
6461 );
6462 assert!(parsed.chunks.iter().any(|chunk| chunk.id == *b"LOFF"));
6463 }
6464
6465 #[test]
6466 fn write_multi_pack_index_rejects_invalid_inputs() {
6467 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha1, "blob", b"object\n")
6468 .expect("test operation should succeed");
6469 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 3, &["pack-a.idx".into()], &[]).is_err());
6470 assert!(
6471 MultiPackIndex::write(
6472 ObjectFormat::Sha1,
6473 1,
6474 &["pack-b.idx".into(), "pack-a.idx".into()],
6475 &[],
6476 )
6477 .is_err()
6478 );
6479 assert!(MultiPackIndex::write(ObjectFormat::Sha1, 2, &["pack/a.idx".into()], &[]).is_err());
6480 assert!(
6481 MultiPackIndex::write(
6482 ObjectFormat::Sha1,
6483 2,
6484 &["pack-a.idx".into()],
6485 &[MultiPackIndexEntry {
6486 oid,
6487 pack_int_id: 1,
6488 offset: 12,
6489 }],
6490 )
6491 .is_err()
6492 );
6493 assert!(
6494 MultiPackIndex::write(
6495 ObjectFormat::Sha1,
6496 2,
6497 &["pack-a.idx".into()],
6498 &[
6499 MultiPackIndexEntry {
6500 oid,
6501 pack_int_id: 0,
6502 offset: 12,
6503 },
6504 MultiPackIndexEntry {
6505 oid,
6506 pack_int_id: 0,
6507 offset: 24,
6508 },
6509 ],
6510 )
6511 .is_err()
6512 );
6513 }
6514
6515 #[test]
6516 fn rejects_bad_multi_pack_index_bitmap_chunks() {
6517 let oid_a = ObjectId::from_hex(
6518 ObjectFormat::Sha1,
6519 "1111111111111111111111111111111111111111",
6520 )
6521 .expect("test operation should succeed");
6522 let oid_b = ObjectId::from_hex(
6523 ObjectFormat::Sha1,
6524 "2222222222222222222222222222222222222222",
6525 )
6526 .expect("test operation should succeed");
6527
6528 let mut duplicate_ridx = midx_chunks_with_pack_names(
6529 ObjectFormat::Sha1,
6530 b"pack-a.idx\0\0".to_vec(),
6531 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 0, 24)],
6532 );
6533 duplicate_ridx.push((*b"RIDX", midx_u32_table(&[0, 0])));
6534 let duplicate_ridx = multi_pack_index(ObjectFormat::Sha1, 2, 1, &duplicate_ridx);
6535 assert!(MultiPackIndex::parse(&duplicate_ridx, ObjectFormat::Sha1).is_err());
6536
6537 let mut short_btmp = midx_chunks_with_pack_names(
6538 ObjectFormat::Sha1,
6539 b"pack-a.idx\0pack-b.idx\0\0\0".to_vec(),
6540 &[(oid_a.clone(), 0, 12), (oid_b.clone(), 1, 24)],
6541 );
6542 short_btmp.push((*b"BTMP", midx_bitmap_packs(&[(0, 1)])));
6543 let short_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 2, &short_btmp);
6544 assert!(MultiPackIndex::parse(&short_btmp, ObjectFormat::Sha1).is_err());
6545
6546 let mut out_of_range_btmp = midx_chunks_with_pack_names(
6547 ObjectFormat::Sha1,
6548 b"pack-a.idx\0\0".to_vec(),
6549 &[(oid_a, 0, 12), (oid_b, 0, 24)],
6550 );
6551 out_of_range_btmp.push((*b"BTMP", midx_bitmap_packs(&[(1, 2)])));
6552 let out_of_range_btmp = multi_pack_index(ObjectFormat::Sha1, 2, 1, &out_of_range_btmp);
6553 assert!(MultiPackIndex::parse(&out_of_range_btmp, ObjectFormat::Sha1).is_err());
6554 }
6555
6556 #[test]
6557 fn parses_pack_bitmap_index_with_hash_cache() {
6558 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6559 .expect("test operation should succeed");
6560 let bitmap = pack_bitmap_index(
6561 ObjectFormat::Sha1,
6562 3,
6563 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE,
6564 &pack_checksum,
6565 &[(2, 0, 1, &[0b101])],
6566 Some(&[0x1111_1111, 0x2222_2222, 0x3333_3333]),
6567 );
6568
6569 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha1, 3)
6570 .expect("test operation should succeed");
6571 assert_eq!(parsed.version, 1);
6572 assert_eq!(parsed.format, ObjectFormat::Sha1);
6573 assert_eq!(
6574 parsed.options,
6575 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
6576 );
6577 assert_eq!(parsed.pack_checksum, pack_checksum);
6578 assert_eq!(parsed.type_bitmaps.commits.bit_size, 3);
6579 assert_eq!(parsed.type_bitmaps.trees.bit_size, 3);
6580 assert_eq!(parsed.entries.len(), 1);
6581 let entry = parsed
6582 .entry_for_index_position(2)
6583 .expect("test operation should succeed");
6584 assert_eq!(entry.xor_offset, 0);
6585 assert_eq!(entry.flags, 1);
6586 assert_eq!(entry.bitmap.words, ewah_literal_words(&[0b101]));
6587 assert_eq!(
6588 parsed.name_hash_cache,
6589 Some(vec![0x1111_1111, 0x2222_2222, 0x3333_3333])
6590 );
6591 }
6592
6593 #[test]
6594 fn parses_pack_bitmap_index_sha256() {
6595 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6596 .expect("test operation should succeed");
6597 let bitmap = pack_bitmap_index(
6598 ObjectFormat::Sha256,
6599 2,
6600 PackBitmapIndex::OPTION_FULL_DAG,
6601 &pack_checksum,
6602 &[(0, 0, 0, &[0b11])],
6603 None,
6604 );
6605
6606 let parsed = PackBitmapIndex::parse(&bitmap, ObjectFormat::Sha256, 2)
6607 .expect("test operation should succeed");
6608 assert_eq!(parsed.version, 1);
6609 assert_eq!(parsed.format, ObjectFormat::Sha256);
6610 assert_eq!(parsed.pack_checksum, pack_checksum);
6611 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6612 assert_eq!(parsed.entries[0].object_position, 0);
6613 assert_eq!(parsed.name_hash_cache, None);
6614 }
6615
6616 #[test]
6617 fn parses_upstream_git_written_pack_bitmap_index() {
6618 let root = unique_temp_dir("git-pack-bitmap-upstream");
6619 fs::create_dir_all(&root).expect("test operation should succeed");
6620 {
6621 run_git_success(&root, &["init", "-q", "-b", "main"]);
6622 run_git_success(
6623 &root,
6624 &[
6625 "-c",
6626 "user.name=Example User",
6627 "-c",
6628 "user.email=example@example.invalid",
6629 "commit",
6630 "--allow-empty",
6631 "-q",
6632 "-m",
6633 "one",
6634 ],
6635 );
6636 run_git_success(
6637 &root,
6638 &[
6639 "-c",
6640 "user.name=Example User",
6641 "-c",
6642 "user.email=example@example.invalid",
6643 "commit",
6644 "--allow-empty",
6645 "-q",
6646 "-m",
6647 "two",
6648 ],
6649 );
6650 run_git_success(&root, &["repack", "-adb"]);
6651 let pack_dir = root.join(".git").join("objects").join("pack");
6652 let idx_path = single_path_with_extension(&pack_dir, "idx");
6653 let bitmap_path = single_path_with_extension(&pack_dir, "bitmap");
6654 let index = PackIndex::parse(
6655 &fs::read(idx_path).expect("test operation should succeed"),
6656 ObjectFormat::Sha1,
6657 )
6658 .expect("test operation should succeed");
6659 let bitmap = PackBitmapIndex::parse(
6660 &fs::read(bitmap_path).expect("test operation should succeed"),
6661 ObjectFormat::Sha1,
6662 index.entries.len(),
6663 )
6664 .expect("test operation should succeed");
6665 assert_eq!(bitmap.pack_checksum, index.pack_checksum);
6666 assert!(!bitmap.entries.is_empty());
6667 };
6668 let _ = fs::remove_dir_all(&root);
6669 }
6670
6671 #[test]
6672 fn rejects_bad_pack_bitmap_index_header_and_checksum() {
6673 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6674 .expect("test operation should succeed");
6675 let bitmap = pack_bitmap_index(
6676 ObjectFormat::Sha1,
6677 1,
6678 PackBitmapIndex::OPTION_FULL_DAG,
6679 &pack_checksum,
6680 &[(0, 0, 0, &[1])],
6681 None,
6682 );
6683
6684 let mut bad_signature = bitmap.clone();
6685 bad_signature[0] = b'X';
6686 assert!(PackBitmapIndex::parse(&bad_signature, ObjectFormat::Sha1, 1).is_err());
6687
6688 let mut bad_version = bitmap.clone();
6689 bad_version[5] = 2;
6690 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_version);
6691 assert!(PackBitmapIndex::parse(&bad_version, ObjectFormat::Sha1, 1).is_err());
6692
6693 let mut bad_option = bitmap.clone();
6694 bad_option[7] = 0x20;
6695 refresh_trailing_checksum(ObjectFormat::Sha1, &mut bad_option);
6696 assert!(PackBitmapIndex::parse(&bad_option, ObjectFormat::Sha1, 1).is_err());
6697
6698 let mut bad_checksum = bitmap;
6699 let last = bad_checksum.len() - 1;
6700 bad_checksum[last] ^= 1;
6701 assert!(PackBitmapIndex::parse(&bad_checksum, ObjectFormat::Sha1, 1).is_err());
6702 }
6703
6704 #[test]
6705 fn rejects_bad_pack_bitmap_index_ewah_and_entries() {
6706 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha1, b"pack")
6707 .expect("test operation should succeed");
6708 let bitmap = pack_bitmap_index(
6709 ObjectFormat::Sha1,
6710 2,
6711 PackBitmapIndex::OPTION_FULL_DAG,
6712 &pack_checksum,
6713 &[(0, 0, 0, &[0b01]), (1, 1, 0, &[0b11])],
6714 None,
6715 );
6716
6717 let mut truncated = bitmap.clone();
6718 truncated.truncate(truncated.len() - ObjectFormat::Sha1.raw_len() - 1);
6719 refresh_trailing_checksum(ObjectFormat::Sha1, &mut truncated);
6720 assert!(PackBitmapIndex::parse(&truncated, ObjectFormat::Sha1, 2).is_err());
6721
6722 let mut out_of_range_position = pack_bitmap_index(
6723 ObjectFormat::Sha1,
6724 2,
6725 PackBitmapIndex::OPTION_FULL_DAG,
6726 &pack_checksum,
6727 &[(2, 0, 0, &[0b01])],
6728 None,
6729 );
6730 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
6731 refresh_trailing_checksum(ObjectFormat::Sha1, &mut out_of_range_position);
6732 assert!(PackBitmapIndex::parse(&out_of_range_position, ObjectFormat::Sha1, 2).is_err());
6733
6734 let invalid_xor = pack_bitmap_index(
6735 ObjectFormat::Sha1,
6736 2,
6737 PackBitmapIndex::OPTION_FULL_DAG,
6738 &pack_checksum,
6739 &[(0, 1, 0, &[0b01])],
6740 None,
6741 );
6742 assert!(PackBitmapIndex::parse(&invalid_xor, ObjectFormat::Sha1, 2).is_err());
6743 }
6744
6745 #[test]
6746 fn parses_single_entry_pack_index_sha256() {
6747 let oid = sley_core::object_id_for_bytes(ObjectFormat::Sha256, "blob", b"hello sha256\n")
6748 .expect("test operation should succeed");
6749 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
6750 .expect("test operation should succeed");
6751 let index = single_entry_index(
6752 ObjectFormat::Sha256,
6753 oid,
6754 0x1234_5678,
6755 12,
6756 pack_checksum.clone(),
6757 );
6758 let parsed =
6759 PackIndex::parse(&index, ObjectFormat::Sha256).expect("test operation should succeed");
6760 assert_eq!(parsed.version, 2);
6761 assert_eq!(parsed.pack_checksum, pack_checksum);
6762 assert_eq!(parsed.entries.len(), 1);
6763 assert_eq!(
6764 parsed
6765 .find(&oid)
6766 .expect("test operation should succeed")
6767 .offset,
6768 12
6769 );
6770 assert_eq!(
6771 parsed
6772 .find(&oid)
6773 .expect("test operation should succeed")
6774 .crc32,
6775 0x1234_5678
6776 );
6777 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
6778 assert_pack_index_view_matches_owned(&index, ObjectFormat::Sha256);
6779 }
6780
6781 #[test]
6782 fn write_packed_deltifies_similar_blobs_and_round_trips_sha1() {
6783 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha1);
6784 }
6785
6786 #[test]
6787 fn write_packed_deltifies_similar_blobs_and_round_trips_sha256() {
6788 write_packed_deltifies_similar_blobs_and_round_trips(ObjectFormat::Sha256);
6789 }
6790
6791 #[test]
6792 fn write_packed_rejects_duplicate_objects() {
6793 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6794 assert!(PackFile::write_packed(&[object.clone(), object], ObjectFormat::Sha1,).is_err());
6795 }
6796
6797 #[test]
6798 fn write_packed_with_known_ids_validates_ids_before_trusting_them() {
6799 let object = EncodedObject::new(ObjectType::Blob, b"same\n".to_vec());
6800 let sha1 = object
6801 .object_id(ObjectFormat::Sha1)
6802 .expect("test operation should succeed");
6803 let sha256 = object
6804 .object_id(ObjectFormat::Sha256)
6805 .expect("test operation should succeed");
6806 let duplicate = [
6807 PackInput {
6808 oid: &sha1,
6809 object: &object,
6810 },
6811 PackInput {
6812 oid: &sha1,
6813 object: &object,
6814 },
6815 ];
6816 assert!(PackFile::write_packed_with_known_ids(&duplicate, ObjectFormat::Sha1).is_err());
6817
6818 let wrong_format = [PackInput {
6819 oid: &sha256,
6820 object: &object,
6821 }];
6822 assert!(PackFile::write_packed_with_known_ids(&wrong_format, ObjectFormat::Sha1).is_err());
6823 }
6824
6825 fn write_packed_deltifies_similar_blobs_and_round_trips(format: ObjectFormat) {
6826 let objects = similar_blob_family(8);
6827 let packed =
6828 PackFile::write_packed(&objects, format).expect("test operation should succeed");
6829 let undeltified =
6830 PackFile::write_undeltified(&objects, format).expect("test operation should succeed");
6831
6832 assert!(
6835 packed.pack.len() < undeltified.pack.len(),
6836 "expected delta pack ({}) smaller than undeltified pack ({})",
6837 packed.pack.len(),
6838 undeltified.pack.len()
6839 );
6840
6841 let kinds = pack_entry_kinds(&packed.pack, format);
6843 let delta_count = kinds
6844 .iter()
6845 .filter(|kind| matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta))
6846 .count();
6847 assert!(
6848 delta_count >= 1,
6849 "expected at least one delta entry, found kinds {kinds:?}"
6850 );
6851
6852 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
6854 assert_eq!(parsed.entries.len(), objects.len());
6855 for object in &objects {
6856 let oid = object
6857 .object_id(format)
6858 .expect("test operation should succeed");
6859 let found = parsed
6860 .entries
6861 .iter()
6862 .find(|entry| entry.entry.oid == oid)
6863 .unwrap_or_else(|| panic!("object {oid} missing from parsed pack"));
6864 assert_eq!(&found.object, object, "object {oid} did not round-trip");
6865 }
6866
6867 let index = PackIndex::parse(&packed.index, format).expect("test operation should succeed");
6869 assert_eq!(index.pack_checksum, packed.checksum);
6870 for object in &objects {
6871 let oid = object
6872 .object_id(format)
6873 .expect("test operation should succeed");
6874 assert!(index.find(&oid).is_some(), "index missing {oid}");
6875 }
6876 }
6877
6878 #[test]
6879 fn write_packed_emits_ofs_delta_by_default() {
6880 let objects = similar_blob_family(6);
6881 let packed = PackFile::write_packed(&objects, ObjectFormat::Sha1)
6882 .expect("test operation should succeed");
6883 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
6884 assert!(
6885 kinds.contains(&PackObjectKind::OfsDelta),
6886 "expected an ofs-delta entry by default, found {kinds:?}"
6887 );
6888 assert!(
6889 !kinds.contains(&PackObjectKind::RefDelta),
6890 "default self-contained pack must not use ref-delta, found {kinds:?}"
6891 );
6892 assert!(PackFile::parse(&packed.pack, ObjectFormat::Sha1).is_ok());
6894 }
6895
6896 #[test]
6897 fn write_packed_can_emit_ref_delta() {
6898 let objects = similar_blob_family(6);
6899 let options = PackWriteOptions::new().with_prefer_ofs_delta(false);
6900 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6901 .expect("test operation should succeed");
6902 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
6903 assert!(
6904 kinds.contains(&PackObjectKind::RefDelta),
6905 "expected a ref-delta entry, found {kinds:?}"
6906 );
6907 assert!(
6908 !kinds.contains(&PackObjectKind::OfsDelta),
6909 "ref-delta mode must not emit ofs-delta, found {kinds:?}"
6910 );
6911
6912 let parsed = PackFile::parse(&packed.pack, ObjectFormat::Sha1)
6915 .expect("test operation should succeed");
6916 assert_eq!(parsed.entries.len(), objects.len());
6917 }
6918
6919 #[test]
6920 fn write_packed_bounds_delta_chain_depth() {
6921 let objects = incremental_blob_chain(20);
6925 let format = ObjectFormat::Sha1;
6926
6927 for max_depth in [1usize, 2, 5] {
6928 let options = PackWriteOptions::new()
6929 .with_window(20)
6930 .with_depth(max_depth);
6931 let packed = PackFile::write_packed_with_options(&objects, format, &options)
6932 .expect("test operation should succeed");
6933
6934 let depths = pack_entry_depths(&packed.pack, format);
6935 let observed = depths.iter().copied().max().unwrap_or(0);
6936 assert!(
6937 observed <= max_depth,
6938 "max chain depth {observed} exceeded bound {max_depth}"
6939 );
6940
6941 let parsed =
6943 PackFile::parse(&packed.pack, format).expect("test operation should succeed");
6944 for object in &objects {
6945 let oid = object
6946 .object_id(format)
6947 .expect("test operation should succeed");
6948 let found = parsed
6949 .entries
6950 .iter()
6951 .find(|entry| entry.entry.oid == oid)
6952 .expect("test operation should succeed");
6953 assert_eq!(&found.object, object);
6954 }
6955 }
6956 }
6957
6958 #[test]
6959 fn write_packed_depth_zero_stores_everything_undeltified() {
6960 let objects = similar_blob_family(5);
6961 let options = PackWriteOptions::new().with_depth(0);
6962 let packed = PackFile::write_packed_with_options(&objects, ObjectFormat::Sha1, &options)
6963 .expect("test operation should succeed");
6964 let kinds = pack_entry_kinds(&packed.pack, ObjectFormat::Sha1);
6965 assert!(
6966 kinds
6967 .iter()
6968 .all(|kind| !matches!(kind, PackObjectKind::OfsDelta | PackObjectKind::RefDelta)),
6969 "depth 0 must disable deltas, found {kinds:?}"
6970 );
6971 }
6972
6973 #[test]
6974 fn write_thin_uses_external_base_and_round_trips_sha1() {
6975 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha1);
6976 }
6977
6978 #[test]
6979 fn write_thin_uses_external_base_and_round_trips_sha256() {
6980 write_thin_uses_external_base_and_round_trips(ObjectFormat::Sha256);
6981 }
6982
6983 fn write_thin_uses_external_base_and_round_trips(format: ObjectFormat) {
6984 let base = blob_with_marker("EXTERNAL-BASE");
6987 let target = blob_with_marker("EXTERNAL-TARGET");
6988 let base_oid = base
6989 .object_id(format)
6990 .expect("test operation should succeed");
6991
6992 let mut external = HashMap::new();
6993 external.insert(base_oid, base.clone());
6994 let packed = PackFile::write_thin(std::slice::from_ref(&target), format, external)
6995 .expect("test operation should succeed");
6996
6997 let kinds = pack_entry_kinds(&packed.pack, format);
6999 assert_eq!(kinds, vec![PackObjectKind::RefDelta]);
7000
7001 let mut offset = 12usize;
7003 let header =
7004 parse_entry_header(&packed.pack, &mut offset).expect("test operation should succeed");
7005 assert_eq!(header.kind, PackObjectKind::RefDelta);
7006 let referenced =
7007 ObjectId::from_raw(format, &packed.pack[offset..offset + format.raw_len()])
7008 .expect("test operation should succeed");
7009 assert_eq!(referenced, base_oid);
7010
7011 assert!(PackFile::parse(&packed.pack, format).is_err());
7013
7014 let parsed = PackFile::parse_thin(&packed.pack, format, |oid| {
7016 if oid == &base_oid {
7017 Ok(Some(base.clone()))
7018 } else {
7019 Ok(None)
7020 }
7021 })
7022 .expect("test operation should succeed");
7023 assert_eq!(parsed.entries.len(), 1);
7024 assert_eq!(parsed.entries[0].object, target);
7025 }
7026
7027 #[test]
7028 fn write_packed_preserves_distinct_objects_with_no_similarity() {
7029 let objects = vec![
7032 EncodedObject::new(ObjectType::Blob, b"alpha distinct\n".to_vec()),
7033 EncodedObject::new(ObjectType::Tree, vec![0u8; 0]),
7034 EncodedObject::new(ObjectType::Commit, b"tree 0000\n".to_vec()),
7035 ];
7036 let format = ObjectFormat::Sha1;
7037 let packed =
7038 PackFile::write_packed(&objects, format).expect("test operation should succeed");
7039 let parsed = PackFile::parse(&packed.pack, format).expect("test operation should succeed");
7040 assert_eq!(parsed.entries.len(), objects.len());
7041 for object in &objects {
7042 let oid = object
7043 .object_id(format)
7044 .expect("test operation should succeed");
7045 assert!(parsed.entries.iter().any(|entry| entry.entry.oid == oid));
7046 }
7047 }
7048
7049 fn similar_blob_family(count: usize) -> Vec<EncodedObject> {
7053 let mut common_head = Vec::new();
7054 for _ in 0..200 {
7055 common_head.extend_from_slice(b"shared header line for delta testing\n");
7056 }
7057 let mut common_tail = Vec::new();
7058 for _ in 0..200 {
7059 common_tail.extend_from_slice(b"shared trailer line for delta testing\n");
7060 }
7061 (0..count)
7062 .map(|idx| {
7063 let mut body = common_head.clone();
7064 body.extend_from_slice(format!("UNIQUE MIDDLE MARKER NUMBER {idx}\n").as_bytes());
7065 body.extend_from_slice(&common_tail);
7066 EncodedObject::new(ObjectType::Blob, body)
7067 })
7068 .collect()
7069 }
7070
7071 fn incremental_blob_chain(count: usize) -> Vec<EncodedObject> {
7074 let mut body = Vec::new();
7075 for _ in 0..100 {
7076 body.extend_from_slice(b"baseline content shared across the whole chain\n");
7077 }
7078 let mut objects = Vec::with_capacity(count);
7079 for idx in 0..count {
7080 body.extend_from_slice(format!("appended unique line {idx}\n").as_bytes());
7081 objects.push(EncodedObject::new(ObjectType::Blob, body.clone()));
7082 }
7083 objects
7084 }
7085
7086 fn blob_with_marker(marker: &str) -> EncodedObject {
7087 let mut body = Vec::new();
7088 for _ in 0..150 {
7089 body.extend_from_slice(b"common body shared between base and target\n");
7090 }
7091 body.extend_from_slice(marker.as_bytes());
7092 body.push(b'\n');
7093 for _ in 0..150 {
7094 body.extend_from_slice(b"more common body shared between objects\n");
7095 }
7096 EncodedObject::new(ObjectType::Blob, body)
7097 }
7098
7099 fn pack_entry_kinds(pack: &[u8], format: ObjectFormat) -> Vec<PackObjectKind> {
7101 pack_entry_descriptors(pack, format)
7102 .into_iter()
7103 .map(|descriptor| descriptor.kind)
7104 .collect()
7105 }
7106
7107 fn pack_entry_depths(pack: &[u8], format: ObjectFormat) -> Vec<usize> {
7111 let descriptors = pack_entry_descriptors(pack, format);
7112 let mut depth_by_offset: HashMap<u64, usize> = HashMap::new();
7113 let mut depths = Vec::with_capacity(descriptors.len());
7114 for descriptor in &descriptors {
7115 let depth = match &descriptor.base {
7116 EntryBase::None => 0,
7117 EntryBase::Offset(base_offset) => {
7118 depth_by_offset.get(base_offset).copied().unwrap_or(0) + 1
7119 }
7120 EntryBase::Ref => 1,
7124 };
7125 depth_by_offset.insert(descriptor.offset, depth);
7126 depths.push(depth);
7127 }
7128 depths
7129 }
7130
7131 struct EntryDescriptor {
7132 offset: u64,
7133 kind: PackObjectKind,
7134 base: EntryBase,
7135 }
7136
7137 enum EntryBase {
7138 None,
7139 Offset(u64),
7140 Ref,
7141 }
7142
7143 fn pack_entry_descriptors(pack: &[u8], format: ObjectFormat) -> Vec<EntryDescriptor> {
7144 let trailer_offset = pack.len() - format.raw_len();
7145 let count = u32_be(&pack[8..12]) as usize;
7146 let mut offset = 12usize;
7147 let mut descriptors = Vec::with_capacity(count);
7148 for _ in 0..count {
7149 let entry_offset = offset as u64;
7150 let header =
7151 parse_entry_header(pack, &mut offset).expect("test operation should succeed");
7152 let base = match header.kind {
7153 PackObjectKind::OfsDelta => {
7154 let base_offset = parse_ofs_delta_base_offset(pack, &mut offset, entry_offset)
7155 .expect("test operation should succeed");
7156 EntryBase::Offset(base_offset)
7157 }
7158 PackObjectKind::RefDelta => {
7159 offset += format.raw_len();
7160 EntryBase::Ref
7161 }
7162 _ => EntryBase::None,
7163 };
7164 let mut decoder = ZlibDecoder::new(&pack[offset..trailer_offset]);
7165 let mut body = Vec::new();
7166 decoder
7167 .read_to_end(&mut body)
7168 .expect("test operation should succeed");
7169 offset += decoder.total_in() as usize;
7170 descriptors.push(EntryDescriptor {
7171 offset: entry_offset,
7172 kind: header.kind,
7173 base,
7174 });
7175 }
7176 descriptors
7177 }
7178
7179 fn similar_blob_objects() -> (EncodedObject, EncodedObject) {
7180 let mut base = Vec::new();
7181 for _ in 0..300 {
7182 base.extend_from_slice(b"common payload\n");
7183 }
7184 base.extend_from_slice(b"base\n");
7185 let mut changed = Vec::new();
7186 for _ in 0..300 {
7187 changed.extend_from_slice(b"common payload\n");
7188 }
7189 changed.extend_from_slice(b"changed\n");
7190 (
7191 EncodedObject::new(ObjectType::Blob, base),
7192 EncodedObject::new(ObjectType::Blob, changed),
7193 )
7194 }
7195
7196 fn single_object_pack(format: ObjectFormat, object_type: ObjectType, body: &[u8]) -> Vec<u8> {
7197 let mut pack = Vec::new();
7198 pack.extend_from_slice(b"PACK");
7199 pack.extend_from_slice(&2u32.to_be_bytes());
7200 pack.extend_from_slice(&1u32.to_be_bytes());
7201 write_entry_header(&mut pack, object_type, body.len() as u64);
7202 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7203 encoder
7204 .write_all(body)
7205 .expect("test operation should succeed");
7206 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7207 let checksum =
7208 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7209 pack.extend_from_slice(checksum.as_bytes());
7210 pack
7211 }
7212
7213 #[derive(Clone, Copy, Debug)]
7214 enum DeltaKind {
7215 Offset,
7216 Ref,
7217 }
7218
7219 fn two_object_delta_pack(
7220 format: ObjectFormat,
7221 base: &[u8],
7222 result: &[u8],
7223 delta_kind: DeltaKind,
7224 ) -> Vec<u8> {
7225 let mut pack = Vec::new();
7226 pack.extend_from_slice(b"PACK");
7227 pack.extend_from_slice(&2u32.to_be_bytes());
7228 pack.extend_from_slice(&2u32.to_be_bytes());
7229
7230 let base_offset = pack.len();
7231 write_entry_header(&mut pack, ObjectType::Blob, base.len() as u64);
7232 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7233 encoder
7234 .write_all(base)
7235 .expect("test operation should succeed");
7236 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7237
7238 let delta = append_suffix_delta(base, result);
7239 let delta_offset = pack.len();
7240 write_pack_entry_header_kind(
7241 &mut pack,
7242 match delta_kind {
7243 DeltaKind::Offset => 6,
7244 DeltaKind::Ref => 7,
7245 },
7246 delta.len() as u64,
7247 );
7248 match delta_kind {
7249 DeltaKind::Offset => write_ofs_delta_offset(&mut pack, delta_offset - base_offset),
7250 DeltaKind::Ref => {
7251 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7252 .expect("test operation should succeed");
7253 pack.extend_from_slice(base_oid.as_bytes());
7254 }
7255 }
7256 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7257 encoder
7258 .write_all(&delta)
7259 .expect("test operation should succeed");
7260 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7261
7262 let checksum =
7263 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7264 pack.extend_from_slice(checksum.as_bytes());
7265 pack
7266 }
7267
7268 fn thin_ref_delta_pack(format: ObjectFormat, base: &[u8], result: &[u8]) -> Vec<u8> {
7269 let mut pack = Vec::new();
7270 pack.extend_from_slice(b"PACK");
7271 pack.extend_from_slice(&2u32.to_be_bytes());
7272 pack.extend_from_slice(&1u32.to_be_bytes());
7273
7274 let delta = append_suffix_delta(base, result);
7275 write_pack_entry_header_kind(&mut pack, 7, delta.len() as u64);
7276 let base_oid = sley_core::object_id_for_bytes(format, "blob", base)
7277 .expect("test operation should succeed");
7278 pack.extend_from_slice(base_oid.as_bytes());
7279 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
7280 encoder
7281 .write_all(&delta)
7282 .expect("test operation should succeed");
7283 pack.extend_from_slice(&encoder.finish().expect("test operation should succeed"));
7284
7285 let checksum =
7286 sley_core::digest_bytes(format, &pack).expect("test operation should succeed");
7287 pack.extend_from_slice(checksum.as_bytes());
7288 pack
7289 }
7290
7291 fn unique_temp_dir(name: &str) -> PathBuf {
7292 let nanos = SystemTime::now()
7293 .duration_since(UNIX_EPOCH)
7294 .expect("test operation should succeed")
7295 .as_nanos();
7296 std::env::temp_dir().join(format!("sley-{name}-{}-{nanos}", std::process::id()))
7297 }
7298
7299 fn run_git_success(cwd: &Path, args: &[&str]) {
7300 let output = Command::new("git")
7301 .current_dir(cwd)
7302 .args(args)
7303 .output()
7304 .unwrap_or_else(|err| panic!("failed to run git {args:?}: {err}"));
7305 assert!(
7306 output.status.success(),
7307 "git {args:?} failed with status {:?}\nstdout:\n{}\nstderr:\n{}",
7308 output.status.code(),
7309 String::from_utf8_lossy(&output.stdout),
7310 String::from_utf8_lossy(&output.stderr)
7311 );
7312 }
7313
7314 fn single_path_with_extension(dir: &Path, extension: &str) -> PathBuf {
7315 let mut paths = fs::read_dir(dir)
7316 .expect("test operation should succeed")
7317 .map(|entry| entry.expect("test operation should succeed").path())
7318 .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some(extension))
7319 .collect::<Vec<_>>();
7320 assert_eq!(paths.len(), 1, "expected one .{extension} file");
7321 paths.remove(0)
7322 }
7323
7324 fn pack_bitmap_index(
7325 format: ObjectFormat,
7326 object_count: u32,
7327 options: u16,
7328 pack_checksum: &ObjectId,
7329 entries: &[(u32, u8, u8, &[u64])],
7330 name_hash_cache: Option<&[u32]>,
7331 ) -> Vec<u8> {
7332 let mut out = Vec::new();
7333 out.extend_from_slice(b"BITM");
7334 out.extend_from_slice(&1u16.to_be_bytes());
7335 out.extend_from_slice(&options.to_be_bytes());
7336 out.extend_from_slice(&(entries.len() as u32).to_be_bytes());
7337 out.extend_from_slice(pack_checksum.as_bytes());
7338 write_test_ewah(&mut out, object_count, &[0b001]);
7339 write_test_ewah(&mut out, object_count, &[0b010]);
7340 write_test_ewah(&mut out, object_count, &[0b100]);
7341 write_test_ewah(&mut out, object_count, &[0]);
7342 for (position, xor_offset, flags, words) in entries {
7343 out.extend_from_slice(&position.to_be_bytes());
7344 out.push(*xor_offset);
7345 out.push(*flags);
7346 write_test_ewah(&mut out, object_count, words);
7347 }
7348 if let Some(cache) = name_hash_cache {
7349 for value in cache {
7350 out.extend_from_slice(&value.to_be_bytes());
7351 }
7352 }
7353 let checksum =
7354 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7355 out.extend_from_slice(checksum.as_bytes());
7356 out
7357 }
7358
7359 fn write_test_ewah(out: &mut Vec<u8>, bit_size: u32, literals: &[u64]) {
7360 out.extend_from_slice(&bit_size.to_be_bytes());
7361 let words = ewah_literal_words(literals);
7362 out.extend_from_slice(&(words.len() as u32).to_be_bytes());
7363 for word in words {
7364 out.extend_from_slice(&word.to_be_bytes());
7365 }
7366 out.extend_from_slice(&0u32.to_be_bytes());
7367 }
7368
7369 fn ewah_literal_words(literals: &[u64]) -> Vec<u64> {
7370 let rlw = (literals.len() as u64) << 33;
7371 let mut words = vec![rlw];
7372 words.extend_from_slice(literals);
7373 words
7374 }
7375
7376 fn refresh_trailing_checksum(format: ObjectFormat, bytes: &mut [u8]) {
7377 let checksum_offset = bytes.len() - format.raw_len();
7378 let checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])
7379 .expect("test operation should succeed");
7380 bytes[checksum_offset..].copy_from_slice(checksum.as_bytes());
7381 }
7382
7383 fn append_suffix_delta(base: &[u8], result: &[u8]) -> Vec<u8> {
7384 assert!(result.starts_with(base));
7385 let suffix = &result[base.len()..];
7386 assert!(base.len() < 0x10000);
7387 assert!(suffix.len() < 0x80);
7388 let mut delta = Vec::new();
7389 write_delta_varint(&mut delta, base.len() as u64);
7390 write_delta_varint(&mut delta, result.len() as u64);
7391 delta.push(0x90);
7392 delta.push(base.len() as u8);
7393 delta.push(suffix.len() as u8);
7394 delta.extend_from_slice(suffix);
7395 delta
7396 }
7397
7398 fn write_delta_varint(out: &mut Vec<u8>, mut value: u64) {
7399 loop {
7400 let mut byte = (value as u8) & 0x7f;
7401 value >>= 7;
7402 if value != 0 {
7403 byte |= 0x80;
7404 }
7405 out.push(byte);
7406 if value == 0 {
7407 break;
7408 }
7409 }
7410 }
7411
7412 fn write_pack_entry_header_kind(out: &mut Vec<u8>, type_code: u8, mut size: u64) {
7413 let mut byte = (type_code << 4) | ((size as u8) & 0x0f);
7414 size >>= 4;
7415 if size != 0 {
7416 byte |= 0x80;
7417 }
7418 out.push(byte);
7419 while size != 0 {
7420 let mut byte = (size as u8) & 0x7f;
7421 size >>= 7;
7422 if size != 0 {
7423 byte |= 0x80;
7424 }
7425 out.push(byte);
7426 }
7427 }
7428
7429 fn write_ofs_delta_offset(out: &mut Vec<u8>, relative: usize) {
7430 assert!(relative < 0x80);
7431 out.push(relative as u8);
7432 }
7433
7434 fn single_entry_index(
7435 format: ObjectFormat,
7436 oid: ObjectId,
7437 crc32: u32,
7438 offset: u32,
7439 pack_checksum: ObjectId,
7440 ) -> Vec<u8> {
7441 let mut index = Vec::new();
7442 index.extend_from_slice(&[0xff, b't', b'O', b'c']);
7443 index.extend_from_slice(&2u32.to_be_bytes());
7444 for idx in 0..256 {
7445 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7446 1u32
7447 } else {
7448 0u32
7449 };
7450 index.extend_from_slice(&count.to_be_bytes());
7451 }
7452 index.extend_from_slice(oid.as_bytes());
7453 index.extend_from_slice(&crc32.to_be_bytes());
7454 index.extend_from_slice(&offset.to_be_bytes());
7455 index.extend_from_slice(pack_checksum.as_bytes());
7456 let checksum =
7457 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7458 index.extend_from_slice(checksum.as_bytes());
7459 index
7460 }
7461
7462 fn single_entry_index_v1(
7463 format: ObjectFormat,
7464 oid: ObjectId,
7465 offset: u32,
7466 pack_checksum: ObjectId,
7467 ) -> Vec<u8> {
7468 let mut index = Vec::new();
7469 for idx in 0..256 {
7470 let count = if idx >= usize::from(oid.as_bytes()[0]) {
7471 1u32
7472 } else {
7473 0u32
7474 };
7475 index.extend_from_slice(&count.to_be_bytes());
7476 }
7477 index.extend_from_slice(&offset.to_be_bytes());
7478 index.extend_from_slice(oid.as_bytes());
7479 index.extend_from_slice(pack_checksum.as_bytes());
7480 let checksum =
7481 sley_core::digest_bytes(format, &index).expect("test operation should succeed");
7482 index.extend_from_slice(checksum.as_bytes());
7483 index
7484 }
7485
7486 fn pack_reverse_index(
7487 format: ObjectFormat,
7488 positions: &[u32],
7489 pack_checksum: ObjectId,
7490 ) -> Vec<u8> {
7491 let mut reverse_index = Vec::new();
7492 reverse_index.extend_from_slice(b"RIDX");
7493 reverse_index.extend_from_slice(&1u32.to_be_bytes());
7494 reverse_index.extend_from_slice(&hash_function_id(format).to_be_bytes());
7495 for position in positions {
7496 reverse_index.extend_from_slice(&position.to_be_bytes());
7497 }
7498 reverse_index.extend_from_slice(pack_checksum.as_bytes());
7499 let checksum =
7500 sley_core::digest_bytes(format, &reverse_index).expect("test operation should succeed");
7501 reverse_index.extend_from_slice(checksum.as_bytes());
7502 reverse_index
7503 }
7504
7505 fn pack_mtimes(format: ObjectFormat, mtimes: &[u32], pack_checksum: ObjectId) -> Vec<u8> {
7506 let mut out = Vec::new();
7507 out.extend_from_slice(b"MTME");
7508 out.extend_from_slice(&1u32.to_be_bytes());
7509 out.extend_from_slice(&hash_function_id(format).to_be_bytes());
7510 for mtime in mtimes {
7511 out.extend_from_slice(&mtime.to_be_bytes());
7512 }
7513 out.extend_from_slice(pack_checksum.as_bytes());
7514 let checksum =
7515 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7516 out.extend_from_slice(checksum.as_bytes());
7517 out
7518 }
7519
7520 fn midx_chunks_with_pack_names(
7521 _format: ObjectFormat,
7522 pack_names: Vec<u8>,
7523 entries: &[(ObjectId, u32, u64)],
7524 ) -> Vec<([u8; 4], Vec<u8>)> {
7525 let mut entries = entries.to_vec();
7526 entries.sort_by(|left, right| left.0.as_bytes().cmp(right.0.as_bytes()));
7527 let object_ids: Vec<ObjectId> = entries.iter().map(|entry| entry.0).collect();
7528 let mut large_offsets = Vec::new();
7529 let mut chunks = vec![
7530 (*b"PNAM", pack_names),
7531 (*b"OIDF", midx_oid_fanout(&object_ids)),
7532 (*b"OIDL", midx_oid_lookup(&object_ids)),
7533 (
7534 *b"OOFF",
7535 midx_ooff_entries(
7536 &entries
7537 .iter()
7538 .map(|(_oid, pack_int_id, offset)| (*pack_int_id, *offset))
7539 .collect::<Vec<_>>(),
7540 &mut large_offsets,
7541 ),
7542 ),
7543 ];
7544 if !large_offsets.is_empty() {
7545 chunks.push((*b"LOFF", large_offsets));
7546 }
7547 chunks
7548 }
7549
7550 fn midx_oid_fanout(object_ids: &[ObjectId]) -> Vec<u8> {
7551 let mut counts = [0u32; 256];
7552 for oid in object_ids {
7553 counts[oid.as_bytes()[0] as usize] += 1;
7554 }
7555 let mut running = 0u32;
7556 let mut out = Vec::new();
7557 for count in counts {
7558 running += count;
7559 out.extend_from_slice(&running.to_be_bytes());
7560 }
7561 out
7562 }
7563
7564 fn midx_oid_lookup(object_ids: &[ObjectId]) -> Vec<u8> {
7565 let mut out = Vec::new();
7566 for oid in object_ids {
7567 out.extend_from_slice(oid.as_bytes());
7568 }
7569 out
7570 }
7571
7572 fn midx_ooff_entries(entries: &[(u32, u64)], large_offsets: &mut Vec<u8>) -> Vec<u8> {
7573 let mut out = Vec::new();
7574 for (pack_int_id, offset) in entries {
7575 out.extend_from_slice(&pack_int_id.to_be_bytes());
7576 if *offset < 0x8000_0000 {
7577 out.extend_from_slice(&(*offset as u32).to_be_bytes());
7578 } else {
7579 let large_idx = (large_offsets.len() / 8) as u32;
7580 out.extend_from_slice(&(0x8000_0000 | large_idx).to_be_bytes());
7581 large_offsets.extend_from_slice(&offset.to_be_bytes());
7582 }
7583 }
7584 out
7585 }
7586
7587 fn midx_u32_table(values: &[u32]) -> Vec<u8> {
7588 let mut out = Vec::new();
7589 for value in values {
7590 out.extend_from_slice(&value.to_be_bytes());
7591 }
7592 out
7593 }
7594
7595 fn midx_bitmap_packs(entries: &[(u32, u32)]) -> Vec<u8> {
7596 let mut out = Vec::new();
7597 for (bitmap_pos, bitmap_nr) in entries {
7598 out.extend_from_slice(&bitmap_pos.to_be_bytes());
7599 out.extend_from_slice(&bitmap_nr.to_be_bytes());
7600 }
7601 out
7602 }
7603
7604 fn multi_pack_index(
7605 format: ObjectFormat,
7606 version: u8,
7607 pack_count: u32,
7608 chunks: &[([u8; 4], Vec<u8>)],
7609 ) -> Vec<u8> {
7610 let lookup_len = (chunks.len() + 1) * 12;
7611 let mut out = Vec::new();
7612 out.extend_from_slice(b"MIDX");
7613 out.push(version);
7614 out.push(hash_function_id(format) as u8);
7615 out.push(chunks.len() as u8);
7616 out.push(0);
7617 out.extend_from_slice(&pack_count.to_be_bytes());
7618 let mut chunk_offset = (12 + lookup_len) as u64;
7619 for (id, data) in chunks {
7620 out.extend_from_slice(id);
7621 out.extend_from_slice(&chunk_offset.to_be_bytes());
7622 chunk_offset += data.len() as u64;
7623 }
7624 out.extend_from_slice(&[0, 0, 0, 0]);
7625 out.extend_from_slice(&chunk_offset.to_be_bytes());
7626 for (_id, data) in chunks {
7627 out.extend_from_slice(data);
7628 }
7629 let checksum =
7630 sley_core::digest_bytes(format, &out).expect("test operation should succeed");
7631 out.extend_from_slice(checksum.as_bytes());
7632 out
7633 }
7634
7635 fn pack_checksum_sha1() -> ObjectId {
7638 sley_core::digest_bytes(ObjectFormat::Sha1, b"pack").expect("test operation should succeed")
7639 }
7640
7641 fn parse_ewah_bytes(bytes: &[u8]) -> EwahBitmap {
7642 let mut offset = 0usize;
7645 let checksum_offset = bytes.len();
7646 parse_bitmap_ewah(bytes, &mut offset, checksum_offset, 0)
7647 .expect("test operation should succeed")
7648 }
7649
7650 #[test]
7651 fn ewah_encodes_single_literal_word_matching_helper() {
7652 let ewah = EwahBitmap::from_words(64, &[0b101]).expect("test operation should succeed");
7656 assert_eq!(ewah.words, ewah_literal_words(&[0b101]));
7657 assert_eq!(ewah.rlw_position, 0);
7658 assert_eq!(ewah.bit_size, 64);
7659 }
7660
7661 #[test]
7662 fn ewah_byte_layout_is_big_endian() {
7663 let ewah = EwahBitmap::from_words(64, &[0x0102_0304_0506_0708])
7664 .expect("test operation should succeed");
7665 let bytes = ewah.to_bytes();
7666 let mut expected = Vec::new();
7667 expected.extend_from_slice(&64u32.to_be_bytes()); expected.extend_from_slice(&2u32.to_be_bytes()); expected.extend_from_slice(&(1u64 << 33).to_be_bytes()); expected.extend_from_slice(&0x0102_0304_0506_0708u64.to_be_bytes());
7671 expected.extend_from_slice(&0u32.to_be_bytes()); assert_eq!(bytes, expected);
7673 }
7674
7675 #[test]
7676 fn ewah_empty_bitmap_serialises_like_git() {
7677 let ewah = EwahBitmap::empty();
7678 let bytes = ewah.to_bytes();
7679 assert_eq!(bytes, vec![0u8; 12]);
7681 let parsed = parse_ewah_bytes(&bytes);
7683 assert_eq!(parsed, ewah);
7684 assert!(
7685 parsed
7686 .to_positions()
7687 .expect("test operation should succeed")
7688 .is_empty()
7689 );
7690 }
7691
7692 #[test]
7693 fn ewah_compresses_clean_zero_run() {
7694 let ewah =
7697 EwahBitmap::from_words(256, &[0, 0, 0, 0b1]).expect("test operation should succeed");
7698 assert_eq!(ewah.words.len(), 2, "expected one RLW plus one literal");
7699 let rlw = ewah.words[0];
7700 assert_eq!(rlw & 1, 0, "run bit should be zero");
7701 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7702 assert_eq!(rlw >> 33, 1, "literal length should be 1");
7703 assert_eq!(ewah.words[1], 0b1);
7704 }
7705
7706 #[test]
7707 fn ewah_compresses_clean_ones_run() {
7708 let ewah = EwahBitmap::from_words(192, &[u64::MAX, u64::MAX, u64::MAX])
7709 .expect("test operation should succeed");
7710 assert_eq!(ewah.words.len(), 1);
7712 let rlw = ewah.words[0];
7713 assert_eq!(rlw & 1, 1, "run bit should be one");
7714 assert_eq!((rlw >> 1) & 0xffff_ffff, 3, "run length should be 3");
7715 assert_eq!(rlw >> 33, 0, "no literals");
7716 }
7717
7718 #[test]
7719 fn ewah_run_then_literal_then_run_roundtrips() {
7720 let words = vec![0, 0, 0xdead_beef, u64::MAX, u64::MAX, 0, 0xabc];
7721 let bit_size = (words.len() * 64) as u32;
7722 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
7723 assert_eq!(
7724 ewah.to_words().expect("test operation should succeed"),
7725 words
7726 );
7727 }
7728
7729 #[test]
7730 fn ewah_drops_trailing_clean_zero_words() {
7731 let words = vec![0b1, 0, 0, 0];
7734 let ewah = EwahBitmap::from_words(1, &words).expect("test operation should succeed");
7735 assert_eq!(ewah.bit_size, 1);
7737 assert_eq!(
7738 ewah.to_words().expect("test operation should succeed"),
7739 vec![0b1]
7740 );
7741 }
7742
7743 #[test]
7744 fn ewah_from_positions_roundtrips_via_positions() {
7745 let positions = [0u32, 1, 63, 64, 65, 200, 511];
7746 let ewah =
7747 EwahBitmap::from_positions(512, &positions).expect("test operation should succeed");
7748 let mut decoded = ewah.to_positions().expect("test operation should succeed");
7749 decoded.sort_unstable();
7750 assert_eq!(decoded, positions);
7751 }
7752
7753 #[test]
7754 fn ewah_from_positions_dedupes_and_orders() {
7755 let ewah = EwahBitmap::from_positions(128, &[100, 5, 100, 5, 5])
7756 .expect("test operation should succeed");
7757 assert_eq!(
7758 ewah.to_positions().expect("test operation should succeed"),
7759 vec![5, 100]
7760 );
7761 }
7762
7763 #[test]
7764 fn ewah_huge_zero_run_spans_multiple_rlws() {
7765 let mut builder = EwahBuilder::new(0);
7770 builder.add_empty_words(false, 0xffff_ffff);
7771 builder.add_empty_words(false, 5);
7772 let ewah = builder.finish().expect("test operation should succeed");
7773 assert_eq!(ewah.words.len(), 2, "run split across two RLWs");
7774 assert_eq!((ewah.words[0] >> 1) & 0xffff_ffff, 0xffff_ffff);
7775 assert_eq!(ewah.words[1] & 1, 0);
7776 assert_eq!((ewah.words[1] >> 1) & 0xffff_ffff, 5);
7777 assert_eq!(ewah.rlw_position, 1);
7778 }
7779
7780 #[test]
7781 fn ewah_from_words_rejects_oversized_bit_size() {
7782 assert!(EwahBitmap::from_words(65, &[0]).is_err());
7784 }
7785
7786 #[test]
7787 fn ewah_from_positions_rejects_out_of_range() {
7788 assert!(EwahBitmap::from_positions(64, &[64]).is_err());
7789 }
7790
7791 #[test]
7792 fn ewah_serialised_bytes_reparse_to_equal_bitmap() {
7793 let words = vec![0, u64::MAX, 0x1234_5678_9abc_def0, 0, 0, 0xff];
7796 let bit_size = (words.len() * 64) as u32;
7797 let ewah = EwahBitmap::from_words(bit_size, &words).expect("test operation should succeed");
7798 let bytes = ewah.to_bytes();
7799 let parsed = parse_ewah_bytes(&bytes);
7800 assert_eq!(parsed, ewah);
7801 assert_eq!(
7802 parsed.to_words().expect("test operation should succeed"),
7803 words
7804 );
7805 }
7806
7807 #[test]
7808 fn pack_bitmap_index_write_parse_roundtrip_sha1() {
7809 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
7811 let bytes = write_bitmap(
7812 ObjectFormat::Sha1,
7813 pack_checksum_sha1(),
7814 &object_types,
7815 &[(0u32, 0u32, vec![1u32, 2u32])],
7816 None,
7817 )
7818 .expect("test operation should succeed");
7819 assert_eq!(&bytes[..4], b"BITM");
7820
7821 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
7822 .expect("test operation should succeed");
7823 assert_eq!(parsed.version, 1);
7824 assert_eq!(parsed.options, PackBitmapIndex::OPTION_FULL_DAG);
7825 assert_eq!(parsed.pack_checksum, pack_checksum_sha1());
7826 assert_eq!(
7827 parsed
7828 .type_bitmaps
7829 .commits
7830 .to_positions()
7831 .expect("test operation should succeed"),
7832 vec![0]
7833 );
7834 assert_eq!(
7835 parsed
7836 .type_bitmaps
7837 .trees
7838 .to_positions()
7839 .expect("test operation should succeed"),
7840 vec![1]
7841 );
7842 assert_eq!(
7843 parsed
7844 .type_bitmaps
7845 .blobs
7846 .to_positions()
7847 .expect("test operation should succeed"),
7848 vec![2]
7849 );
7850 assert!(
7851 parsed
7852 .type_bitmaps
7853 .tags
7854 .to_positions()
7855 .expect("test operation should succeed")
7856 .is_empty()
7857 );
7858 assert_eq!(parsed.entries.len(), 1);
7859 let entry = parsed
7860 .entry_for_index_position(0)
7861 .expect("test operation should succeed");
7862 assert_eq!(entry.xor_offset, 0);
7863 assert_eq!(entry.flags, 0);
7864 assert_eq!(
7865 entry
7866 .bitmap
7867 .to_positions()
7868 .expect("test operation should succeed"),
7869 vec![0, 1, 2]
7870 );
7871 assert_eq!(parsed.name_hash_cache, None);
7872 }
7873
7874 #[test]
7875 fn pack_bitmap_index_write_parse_roundtrip_sha256() {
7876 let pack_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
7877 .expect("test operation should succeed");
7878 let object_types = [ObjectType::Commit, ObjectType::Tree];
7879 let bytes = write_bitmap(
7880 ObjectFormat::Sha256,
7881 pack_checksum.clone(),
7882 &object_types,
7883 &[(0u32, 0u32, vec![1u32])],
7884 None,
7885 )
7886 .expect("test operation should succeed");
7887 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha256, 2)
7888 .expect("test operation should succeed");
7889 assert_eq!(parsed.format, ObjectFormat::Sha256);
7890 assert_eq!(parsed.pack_checksum, pack_checksum);
7891 assert_eq!(parsed.index_checksum.format(), ObjectFormat::Sha256);
7892 assert_eq!(
7893 parsed.entries[0]
7894 .bitmap
7895 .to_positions()
7896 .expect("test operation should succeed"),
7897 vec![0, 1]
7898 );
7899 }
7900
7901 #[test]
7902 fn pack_bitmap_index_write_includes_name_hash_cache() {
7903 let object_types = [ObjectType::Commit, ObjectType::Tree, ObjectType::Blob];
7904 let cache = vec![0x1111_1111u32, 0x2222_2222, 0x3333_3333];
7905 let bytes = write_bitmap(
7906 ObjectFormat::Sha1,
7907 pack_checksum_sha1(),
7908 &object_types,
7909 &[(0u32, 0u32, vec![1u32, 2u32])],
7910 Some(cache.clone()),
7911 )
7912 .expect("test operation should succeed");
7913 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 3)
7914 .expect("test operation should succeed");
7915 assert_eq!(
7916 parsed.options,
7917 PackBitmapIndex::OPTION_FULL_DAG | PackBitmapIndex::OPTION_HASH_CACHE
7918 );
7919 assert_eq!(parsed.name_hash_cache, Some(cache));
7920 }
7921
7922 #[test]
7923 fn pack_bitmap_writer_supports_multiple_commits() {
7924 let object_types = [
7925 ObjectType::Commit,
7926 ObjectType::Commit,
7927 ObjectType::Tree,
7928 ObjectType::Blob,
7929 ];
7930 let mut writer =
7931 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
7932 .expect("test operation should succeed");
7933 writer
7934 .add_commit(0, 0, &[2, 3])
7935 .expect("test operation should succeed");
7936 writer
7937 .add_commit(1, 1, &[2])
7938 .expect("test operation should succeed");
7939 let bytes = writer.write().expect("test operation should succeed");
7940 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 4)
7941 .expect("test operation should succeed");
7942 assert_eq!(parsed.entries.len(), 2);
7943 assert_eq!(
7944 parsed
7945 .type_bitmaps
7946 .commits
7947 .to_positions()
7948 .expect("test operation should succeed"),
7949 vec![0, 1]
7950 );
7951 let first = parsed
7952 .entry_for_index_position(0)
7953 .expect("test operation should succeed");
7954 assert_eq!(
7955 first
7956 .bitmap
7957 .to_positions()
7958 .expect("test operation should succeed"),
7959 vec![0, 2, 3]
7960 );
7961 let second = parsed
7962 .entry_for_index_position(1)
7963 .expect("test operation should succeed");
7964 assert_eq!(
7965 second
7966 .bitmap
7967 .to_positions()
7968 .expect("test operation should succeed"),
7969 vec![1, 2]
7970 );
7971 }
7972
7973 #[test]
7974 fn pack_bitmap_index_recomputes_checksum_on_write() {
7975 let object_types = [ObjectType::Commit, ObjectType::Blob];
7978 let writer = PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
7979 .expect("test operation should succeed");
7980 let mut index = writer.build().expect("test operation should succeed");
7981 assert_eq!(index.index_checksum.as_bytes(), [0u8; 20]);
7983 index.entries.clear(); index.entries.push(PackBitmapEntry {
7985 object_position: 0,
7986 xor_offset: 0,
7987 flags: 0,
7988 bitmap: EwahBitmap::from_positions(2, &[0, 1]).expect("test operation should succeed"),
7989 });
7990 let bytes = index.write().expect("test operation should succeed");
7991 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, 2)
7993 .expect("test operation should succeed");
7994 assert_ne!(parsed.index_checksum.as_bytes(), [0u8; 20]);
7995 }
7996
7997 #[test]
7998 fn pack_bitmap_writer_rejects_non_commit_selection() {
7999 let object_types = [ObjectType::Commit, ObjectType::Blob];
8000 let mut writer =
8001 PackBitmapWriter::new(ObjectFormat::Sha1, pack_checksum_sha1(), &object_types)
8002 .expect("test operation should succeed");
8003 assert!(writer.add_commit(1, 1, &[]).is_err());
8005 assert!(writer.add_commit(5, 5, &[]).is_err());
8007 assert!(writer.add_commit(0, 5, &[]).is_err());
8009 assert!(writer.add_commit(0, 0, &[9]).is_err());
8011 }
8012
8013 #[test]
8014 fn pack_bitmap_writer_rejects_checksum_format_mismatch() {
8015 let sha256_checksum = sley_core::digest_bytes(ObjectFormat::Sha256, b"pack")
8016 .expect("test operation should succeed");
8017 assert!(
8018 PackBitmapWriter::new(ObjectFormat::Sha1, sha256_checksum, &[ObjectType::Commit])
8019 .is_err()
8020 );
8021 }
8022
8023 #[test]
8024 fn pack_bitmap_writer_rejects_bad_name_hash_cache_len() {
8025 let writer = PackBitmapWriter::new(
8026 ObjectFormat::Sha1,
8027 pack_checksum_sha1(),
8028 &[ObjectType::Commit],
8029 )
8030 .expect("test operation should succeed");
8031 assert!(writer.with_name_hash_cache(vec![1, 2]).is_err());
8032 }
8033
8034 #[test]
8035 fn pack_bitmap_index_write_rejects_inconsistent_cache_flag() {
8036 let mut index = PackBitmapWriter::new(
8037 ObjectFormat::Sha1,
8038 pack_checksum_sha1(),
8039 &[ObjectType::Commit],
8040 )
8041 .expect("test operation should succeed")
8042 .build()
8043 .expect("test operation should succeed");
8044 index.options |= PackBitmapIndex::OPTION_HASH_CACHE;
8046 assert!(index.write().is_err());
8047 index.options = PackBitmapIndex::OPTION_FULL_DAG;
8049 index.name_hash_cache = Some(vec![0]);
8050 assert!(index.write().is_err());
8051 }
8052
8053 #[test]
8054 fn write_bitmap_roundtrips_through_upstream_git_parser() {
8055 let root = unique_temp_dir("git-pack-bitmap-writer");
8059 fs::create_dir_all(&root).expect("test operation should succeed");
8060 {
8061 run_git_success(&root, &["init", "-q", "-b", "main"]);
8062 run_git_success(
8063 &root,
8064 &[
8065 "-c",
8066 "user.name=Example User",
8067 "-c",
8068 "user.email=example@example.invalid",
8069 "commit",
8070 "--allow-empty",
8071 "-q",
8072 "-m",
8073 "one",
8074 ],
8075 );
8076 run_git_success(&root, &["repack", "-adb"]);
8077 let pack_dir = root.join(".git").join("objects").join("pack");
8078 let idx_path = single_path_with_extension(&pack_dir, "idx");
8079 let index = PackIndex::parse(
8080 &fs::read(idx_path).expect("test operation should succeed"),
8081 ObjectFormat::Sha1,
8082 )
8083 .expect("test operation should succeed");
8084 let pack_path = single_path_with_extension(&pack_dir, "pack");
8086 let pack =
8087 PackFile::parse_sha1(&fs::read(pack_path).expect("test operation should succeed"))
8088 .expect("test operation should succeed");
8089 let mut offsets: Vec<u64> = index.entries.iter().map(|entry| entry.offset).collect();
8092 offsets.sort_unstable();
8093 let position_of = |offset: u64| -> u32 {
8094 offsets
8095 .iter()
8096 .position(|value| *value == offset)
8097 .expect("test operation should succeed") as u32
8098 };
8099 let mut object_types = vec![ObjectType::Blob; index.entries.len()];
8100 for entry in &index.entries {
8101 let position = position_of(entry.offset) as usize;
8102 if let Some(parsed) = pack
8104 .entries
8105 .iter()
8106 .find(|po| po.entry.offset == entry.offset)
8107 {
8108 object_types[position] = parsed.object.object_type;
8109 }
8110 }
8111 let commit_position = object_types
8113 .iter()
8114 .position(|ty| *ty == ObjectType::Commit)
8115 .expect("test operation should succeed") as u32;
8116 let commit_index_position = index
8118 .entries
8119 .iter()
8120 .position(|entry| position_of(entry.offset) == commit_position)
8121 .expect("test operation should succeed")
8122 as u32;
8123 let reachable: Vec<u32> = (0..index.entries.len() as u32).collect();
8124 let bytes = write_bitmap(
8125 ObjectFormat::Sha1,
8126 index.pack_checksum.clone(),
8127 &object_types,
8128 &[(commit_position, commit_index_position, reachable)],
8129 None,
8130 )
8131 .expect("test operation should succeed");
8132 let parsed = PackBitmapIndex::parse(&bytes, ObjectFormat::Sha1, index.entries.len())
8133 .expect("test operation should succeed");
8134 assert_eq!(parsed.pack_checksum, index.pack_checksum);
8135 assert_eq!(parsed.entries.len(), 1);
8136 assert_eq!(
8137 parsed.entries[0]
8138 .bitmap
8139 .to_positions()
8140 .expect("test operation should succeed")
8141 .len(),
8142 index.entries.len()
8143 );
8144 };
8145 let _ = fs::remove_dir_all(&root);
8146 }
8147}